
    OfeC                         S SK r S SKJr  S SKJr  S SK7  S SK7  S SKJr  S SK	J
r
   " S S\5      r " S	 S
5      r " S S5      r " S S\5      r " S S\5      r\ " S S\5      5       r " S S5      rg)    N)total_ordering)ElementTree)*)raise_unorderable_types)Treec                   `    \ rS rSrSr     SS jrSS jrS rS rSS jr	S	 r
S
 4S jrSrg)PropbankCorpusReader   ai  
Corpus reader for the propbank corpus, which augments the Penn
Treebank with information about the predicate argument structure
of every verb instance.  The corpus consists of two parts: the
predicate-argument annotations themselves, and a set of "frameset
files" which define the argument labels used by the annotations,
on a per-verb basis.  Each "frameset file" contains one or more
predicates, such as ``'turn'`` or ``'turn_on'``, each of which is
divided into coarse-grained word senses called "rolesets".  For
each "roleset", the frameset file provides descriptions of the
argument roles, along with examples.
Nc                     [        U[        5      (       a  [        X5      n[        U5      n[        R                  XX$/U-   U5        X l        X0l        X@l        XPl	        X`l
        g)at  
:param root: The root directory for this corpus.
:param propfile: The name of the file containing the predicate-
    argument annotations (relative to ``root``).
:param framefiles: A list or regexp specifying the frameset
    fileids for this corpus.
:param parse_fileid_xform: A transform that should be applied
    to the fileids in this corpus.  This should be a function
    of one argument (a fileid) that returns a string (the new
    fileid).
:param parse_corpus: The corpus containing the parse trees
    corresponding to this corpus.  These parse trees are
    necessary to resolve the tree pointers used by propbank.
N)
isinstancestrfind_corpus_fileidslistCorpusReader__init__	_propfile_framefiles
_verbsfile_parse_fileid_xform_parse_corpus)selfrootpropfile
framefiles	verbsfileparse_fileid_xformparse_corpusencodings           =/usr/lib/python3/dist-packages/nltk/corpus/reader/propbank.pyr   PropbankCorpusReader.__init__    s]    2 j#&&,T>J*%
d8*?**LhW "%##5 )    c                    ^ ^^ 0 mTb	  U4S jTS'   [        T R                  T R                  5      UU 4S jT R                  T R                  5      S9$ )zr
:return: a corpus view that acts as a list of
    ``PropBankInstance`` objects, one for each noun in the corpus.
c                 "   > U R                   T:H  $ N)baseform)instr%   s    r   <lambda>0PropbankCorpusReader.instances.<locals>.<lambda>M   s    T]]h5Nr!   instance_filterc                 *   > TR                   " U 40 TD6$ r$   )_read_instance_block)streamkwargsr   s    r   r'   r(   P   s    444VFvFr!   r   )StreamBackedCorpusViewabspathr   r   )r   r%   r-   s   ``@r   	instancesPropbankCorpusReader.instancesF   sK    
 (NF$%%LL(F]]4>>2
 	
r!   c                     [        U R                  U R                  5      [        U R	                  U R                  5      S9$ )zy
:return: a corpus view that acts as a list of strings, one for
    each line in the predicate-argument annotation file.
r.   )r/   r0   r   read_line_blockr   r   s    r   linesPropbankCorpusReader.linesT   s4    
 &LL(]]4>>2
 	
r!   c                    UR                  S5      S   nSU-  nX0R                  ;  a  [        SU-  5      eU R                  U5      R	                  5        n[
        R                  " U5      R                  5       nSSS5        WR                  S5       H  nUR                  S   U:X  d  M  Us  $    [        SU S	U 35      e! , (       d  f       NN= f)
z5
:return: the xml description for the given roleset.
.r   frames/%s.xmlFrameset file for %s not foundNpredicate/rolesetidzRoleset z not found in )
splitr   
ValueErrorr0   openr   parsegetrootfindallattrib)r   
roleset_idr%   	framefilefpetreerolesets          r   rI   PropbankCorpusReader.roleset_   s     ##C(+#h.	,,,=
JKK \\)$))+r%%b)113E ,}}%89G~~d#z1 : 8J<~i[IJJ ,+s   %C
Cc                    Ub&  SU-  nX R                   ;  a  [        SU-  5      eU/nOU R                   n/ nU Ho  nU R                  U5      R                  5        n[        R
                  " U5      R                  5       nSSS5        UR                  WR                  S5      5        Mq     [        U5      $ ! , (       d  f       N== f)z1
:return: list of xml descriptions for rolesets.
Nr:   r;   r<   )
r   r?   r0   r@   r   rA   rB   appendrC   LazyConcatenation)r   r%   rF   r   rsetsrG   rH   s          r   rolesetsPropbankCorpusReader.rolesetsq   s     '(2I 0 00 !AH!LMM#J))J#I i(--/2#))"-557 0LL':;< $ !'' 0/s   %B88
C	c                     [        U R                  U R                  5      [        U R	                  U R                  5      S9$ )zm
:return: a corpus view that acts as a list of all verb lemmas
    in this corpus (from the verbs.txt file).
r.   )r/   r0   r   r4   r   r5   s    r   verbsPropbankCorpusReader.verbs   s4    
 &LL)]]4??3
 	
r!   c                     g)NT )r&   s    r   r'   PropbankCorpusReader.<lambda>   s    r!   c                    / n[        S5       Ht  nUR                  5       R                  5       nU(       d  M*  [        R	                  XPR
                  U R                  5      nU" U5      (       d  Mc  UR                  U5        Mv     U$ )Nd   )rangereadlinestripPropbankInstancerA   r   r   rL   )r   r,   r)   blockiliner&   s          r   r+   )PropbankCorpusReader._read_instance_block   sr     sA??$**,Dt'--22D4F4F #4((LL&  r!   )r   r   r   r   r   ) NNNutf8r$   )__name__
__module____qualname____firstlineno____doc__r   r1   r6   rI   rO   rR   r+   __static_attributes__rU   r!   r   r	   r	      sC    " $*L
	
K$(*	
 <M r!   r	   c                       \ rS rSr SS jr\S 5       r\S 5       r\S 5       rS r	S r
S	 r\" \S
S9r\SS j5       rSrg)r\      Nc
                     Xl          X l         X0l         X@l         XPl         X`l         Xpl         [        U5      U l         Xl	        g r$   )
fileidsentnumwordnumtaggerrI   
inflection	predicatetuple	argumentsr   )
r   rl   rm   rn   ro   rI   rp   rq   rs   r   s
             r   r   PropbankInstance.__init__   sx     	  	& 	D 	: 	2 %	& #	@ y)	 )	.r!   c                 >    U R                   R                  S5      S   $ )zThe baseform of the predicate.r9   r   rI   r>   r5   s    r   r%   PropbankInstance.baseform        ||!!#&q))r!   c                 >    U R                   R                  S5      S   $ )z"The sense number of the predicate.r9      rv   r5   s    r   sensenumberPropbankInstance.sensenumber   rx   r!   c                     g)zIdentifier of the predicate.relrU   r5   s    r   predidPropbankInstance.predid   s     r!   c                 d    SR                  U R                  U R                  U R                  5      $ )Nz(<PropbankInstance: {}, sent {}, word {}>)formatrl   rm   rn   r5   s    r   __repr__PropbankInstance.__repr__   s*    9@@KKLLLL
 	
r!   c                     SR                  U R                  U R                  U R                  U R                  U R
                  U R                  5      nU R                  U R                  S44-   n[        U5       H  u  p4USU SU 3-  nM     U$ )Nz{} {} {} {} {} {}r~    -)
r   rl   rm   rn   ro   rI   rp   rs   rq   sorted)r   sitemsarglocargids        r   __str__PropbankInstance.__str__   s    &&KKLLLLKKLLOO
 4>>5"9!;;#E]MF1VHAeW%%A +r!   c                     U R                   c  g U R                  U R                   R                  5       ;  a  g U R                   R                  U R                  5      U R                     $ r$   )r   rl   fileidsparsed_sentsrm   r5   s    r   	_get_treePropbankInstance._get_tree   sS    $;;d//7799  --dkk:4<<HHr!   zs
        The parse tree corresponding to this instance, or None if
        the corresponding tree is not available.)docc                    U R                  5       n[        U5      S:  a  [        SU -  5      eUS S u  pEpgpUSS   V
s/ s H  oR                  S5      (       d  M  U
PM     nn
USS   V
s/ s H  oR                  S5      (       a  M  U
PM     nn
[        U5      S:w  a  [        SU -  5      eUb  U" U5      n[	        U5      n[	        U5      n[
        R                  U	5      n	[        R                  US   S S 5      n/ nU H>  nUR                  SS5      u  nnUR                  [        R                  U5      U45        M@     [        UUUUUU	UUU5	      $ s  sn
f s  sn
f )	N   z!Badly formatted propbank line: %r   z-relrz   r   r   )
r>   lenr?   endswithintPropbankInflectionrA   PropbankTreePointerrL   r\   )r   r   r   piecesrl   rm   rn   ro   rI   rp   pr~   argsrq   rs   argr   r   s                     r   rA   PropbankInstance.parse  sk   v;?@1DEE CI!*?'7 *;*Q

6(:q*;!!":@:aZZ-?:@s8q=@1DEE )'/F g,g, (--j9
 (--c!fSbk:	 	CIIc1-MFE177?GH 
  

 
	
5 <@s   EE*EE)	rs   rl   rp   r   rq   rI   rm   ro   rn   r$   )NN)rc   rd   re   rf   r   propertyr%   r{   r   r   r   r   treestaticmethodrA   rh   rU   r!   r   r\   r\      s     3.j * * * *  
I 4D +
 +
r!   r\   c                       \ rS rSrSrS rSrg)PropbankPointeri=  aX  
A pointer used by propbank to identify one or more constituents in
a parse tree.  ``PropbankPointer`` is an abstract base class with
three concrete subclasses:

  - ``PropbankTreePointer`` is used to point to single constituents.
  - ``PropbankSplitTreePointer`` is used to point to 'split'
    constituents, which consist of a sequence of two or more
    ``PropbankTreePointer`` pointers.
  - ``PropbankChainTreePointer`` is used to point to entire trace
    chains in a tree.  It consists of a sequence of pieces, which
    can be ``PropbankTreePointer`` or ``PropbankSplitTreePointer`` pointers.
c                 @    U R                   [        :X  a
  [        5       eg r$   )	__class__r   NotImplementedErrorr5   s    r   r   PropbankPointer.__init__L  s    >>_,%'' -r!   rU   N)rc   rd   re   rf   rg   r   rh   rU   r!   r   r   r   =  s    (r!   r   c                   ,    \ rS rSrS rS rS rS rSrg)PropbankChainTreePointeriQ  c                     Xl         g r$   r   r   r   s     r   r   !PropbankChainTreePointer.__init__R  s    	0r!   c                 F    SR                  S U R                   5       5      $ )Nr   c              3   ,   #    U  H
  nS U-  v   M     g7fz%sNrU   .0r   s     r   	<genexpr>3PropbankChainTreePointer.__str__.<locals>.<genexpr>Y       6+Qq+   joinr   r5   s    r   r    PropbankChainTreePointer.__str__X      xx6$++666r!   c                     SU -  $ )Nz<PropbankChainTreePointer: %s>rU   r5   s    r   r   !PropbankChainTreePointer.__repr__[      /$66r!   c           	          Uc  [        S5      e[        SU R                   Vs/ s H  o"R                  U5      PM     sn5      $ s  snf )NParse tree not availablez*CHAIN*r?   r   r   selectr   r   r   s      r   r   PropbankChainTreePointer.select^  <    <788ID1DEED   A
r   N	rc   rd   re   rf   r   r   r   r   rh   rU   r!   r   r   r   Q  s    077Fr!   r   c                   ,    \ rS rSrS rS rS rS rSrg)PropbankSplitTreePointerid  c                     Xl         g r$   r   r   s     r   r   !PropbankSplitTreePointer.__init__e  s    	4r!   c                 F    SR                  S U R                   5       5      $ )N,c              3   ,   #    U  H
  nS U-  v   M     g7fr   rU   r   s     r   r   3PropbankSplitTreePointer.__str__.<locals>.<genexpr>k  r   r   r   r5   s    r   r    PropbankSplitTreePointer.__str__j  r   r!   c                     SU -  $ )Nz<PropbankSplitTreePointer: %s>rU   r5   s    r   r   !PropbankSplitTreePointer.__repr__m  r   r!   c           	          Uc  [        S5      e[        SU R                   Vs/ s H  o"R                  U5      PM     sn5      $ s  snf )Nr   z*SPLIT*r   r   s      r   r   PropbankSplitTreePointer.selectp  r   r   r   Nr   rU   r!   r   r   r   d  s    4
77Fr!   r   c                   X    \ rS rSrSrS r\S 5       rS rS r	S r
S rS	 rS
 rS rSrg)r   iv  z4
wordnum:height*wordnum:height*...
wordnum:height,

c                     Xl         X l        g r$   rn   height)r   rn   r   s      r   r   PropbankTreePointer.__init__~  s    r!   c                    U R                  S5      n[        U5      S:  a/  [        U Vs/ s H  n[        R	                  U5      PM     sn5      $ U R                  S5      n[        U5      S:  a/  [        U Vs/ s H  n[        R	                  U5      PM     sn5      $ U R                  S5      n[        U5      S:w  a  [        SU -  5      e[        [        US   5      [        US   5      5      $ s  snf s  snf )Nr   rz   r   :   zbad propbank pointer %rr   )r>   r   r   r   rA   r   r?   r   )r   r   elts      r   rA   PropbankTreePointer.parse  s     v;?+;AB6C$**3/6B 
 v;?+;AB6C$**3/6B 
 v;!6:;;"3vay>3vay>BB C Cs   C19C6c                 8    U R                    SU R                   3$ )Nr   r   r5   s    r   r   PropbankTreePointer.__str__  s    ,,q..r!   c                 8    SU R                   U R                  4-  $ )NzPropbankTreePointer(%d, %d)r   r5   s    r   r   PropbankTreePointer.__repr__  s    ,dkk/JJJr!   c                 2   [        U[        [        45      (       a,  UR                  S   n[        U[        [        45      (       a  M,  [        U[        5      (       d  XL $ U R
                  UR
                  :H  =(       a    U R                  UR                  :H  $ Nr   )r   r   r   r   r   rn   r   r   others     r   __eq__PropbankTreePointer.__eq__  sv    !9;S TUULLOE !9;S TUU %!455= ||u}},L1LLr!   c                     X:X  + $ r$   rU   r   s     r   __ne__PropbankTreePointer.__ne__  s      r!   c                 N   [        U[        [        45      (       a,  UR                  S   n[        U[        [        45      (       a  M,  [        U[        5      (       d  [        U 5      [        U5      :  $ U R                  U R                  * 4UR                  UR                  * 4:  $ r   )r   r   r   r   r   r=   rn   r   r   s     r   __lt__PropbankTreePointer.__lt__  s    !9;S TUULLOE !9;S TUU %!455d8bi''t{{l+u}}u||m.LLLr!   c                 D    Uc  [        S5      eXR                  U5         $ )Nr   )r?   treepos)r   r   s     r   r   PropbankTreePointer.select  s%    <788LL&''r!   c                    Uc  [        S5      eU/n/ nSn [        US   [        5      (       a  [        U5      [        U5      :  a  UR	                  S5        OUS==   S-  ss'   US   [        US   5      :  a  UR	                  US   US      5        OlUR                  5         UR                  5         OKX@R                  :X  a'  [        US[        U5      U R                  -
  S-
   5      $ US-  nUR                  5         M  )ze
Convert this pointer to a standard 'tree position' pointer,
given that it points to the given tree.
Nr   r   rz   )	r?   r   r   r   rL   poprn   rr   r   )r   r   stackr   rn   s        r   r   PropbankTreePointer.treepos  s    
 <788%)T**w<#e*,NN1%BK1$K2;U2Y/LLr72;!78 IIKKKM ll* )I3w<$+++E+I!JKKqLGIIK+ r!   )r   rn   N)rc   rd   re   rf   rg   r   r   rA   r   r   r   r   r   r   r   rh   rU   r!   r   r   r   v  sH     C C*/KM!M(
  r!   r   c                       \ rS rSrSrSrSrSrSrSr	Sr
SrSrS	rS
rSrSrSrSS jrS rS r\R,                  " S5      r\S 5       rSrg)r   i  r^   gr   vfnob3ar   c                 @    Xl         X l        X0l        X@l        XPl        g r$   formtenseaspectpersonvoice)r   r  r  r  r  r	  s         r   r   PropbankInflection.__init__  s    	

r!   c                     U R                   U R                  -   U R                  -   U R                  -   U R                  -   $ r$   r  r5   s    r   r   PropbankInflection.__str__  s0    yy4::%3dkkADJJNNr!   c                     SU -  $ )Nz<PropbankInflection: %s>rU   r5   s    r   r   PropbankInflection.__repr__  s    )D00r!   z"[igpv\-][fpn\-][pob\-][3\-][ap\-]$c                     [        U [        5      (       d  [        S5      e[        U 5      S:w  d$  [        R
                  R                  U 5      (       d  [        SU -  5      e[	        U 6 $ )Nzexpected a string   z!Bad propbank inflection string %r)r   r   	TypeErrorr   r   	_VALIDATEmatchr?   )r   s    r   rA   PropbankInflection.parse  sY    !S!!/00q6Q;0::@@CC@1DEE!1%%r!   )r  r  r  r  r	  N)r   r   r   r   r   )rc   rd   re   rf   
INFINITIVEGERUND
PARTICIPLEFINITEFUTUREPASTPRESENTPERFECTPROGRESSIVEPERFECT_AND_PROGRESSIVETHIRD_PERSONACTIVEPASSIVENONEr   r   r   recompiler  r   rA   rh   rU   r!   r   r   r     s|    JFJFFDGGK!LFGDO1 

@AI& &r!   r   )r#  	functoolsr   	xml.etreer   nltk.corpus.reader.apinltk.corpus.reader.utilnltk.internalsr   	nltk.treer   r   r	   r\   r   r   r   r   r   rU   r!   r   <module>r+     s    
 $ ! $ % 2 L< LhT
 T
n( ((F F&F F$ a / a  a H,& ,&r!   