
    Of=                         S SK Jr  S SKJr  S SK7  S SK7  S SKJr  S SKJ	r	   " S S\
5      r " S S	5      r " S
 S5      r " S S\5      r " S S\5      r\ " S S\5      5       rg)    )total_ordering)ElementTree)*)raise_unorderable_types)Treec                   `    \ rS rSrSr     SS jrSS jrS rS rSS jr	S	 r
S
 4S jrSrg)NombankCorpusReader   ah  
Corpus reader for the nombank corpus, which augments the Penn
Treebank with information about the predicate argument structure
of every noun instance.  The corpus consists of two parts: the
predicate-argument annotations themselves, and a set of "frameset
files" which define the argument labels used by the annotations,
on a per-noun basis.  Each "frameset file" contains one or more
predicates, such as ``'turn'`` or ``'turn_on'``, each of which is
divided into coarse-grained word senses called "rolesets".  For
each "roleset", the frameset file provides descriptions of the
argument roles, along with examples.
Nc                     [        U[        5      (       a  [        X5      U l        [	        U5      U l        [
        R                  XX75        X l        X@l        XPl	        X`l
        g)ar  
:param root: The root directory for this corpus.
:param nomfile: The name of the file containing the predicate-
    argument annotations (relative to ``root``).
:param framefiles: A list or regexp specifying the frameset
    fileids for this corpus.
:param parse_fileid_xform: A transform that should be applied
    to the fileids in this corpus.  This should be a function
    of one argument (a fileid) that returns a string (the new
    fileid).
:param parse_corpus: The corpus containing the parse trees
    corresponding to this corpus.  These parse trees are
    necessary to resolve the tree pointers used by nombank.
N)
isinstancestrfind_corpus_fileids_fileidslistCorpusReader__init___nomfile
_nounsfile_parse_fileid_xform_parse_corpus)selfrootnomfile
framefiles	nounsfileparse_fileid_xformparse_corpusencodings           </usr/lib/python3/dist-packages/nltk/corpus/reader/nombank.pyr   NombankCorpusReader.__init__    sQ    4 j#&&/ADMZ(d*?  ##5 )    c                    ^ ^^ 0 mTb	  U4S jTS'   [        T R                  T R                  5      UU 4S jT R                  T R                  5      S9$ )zq
:return: a corpus view that acts as a list of
    ``NombankInstance`` objects, one for each noun in the corpus.
c                 "   > U R                   T:H  $ N)baseform)instr%   s    r   <lambda>/NombankCorpusReader.instances.<locals>.<lambda>M   s    T]]h5Nr!   instance_filterc                 *   > TR                   " U 40 TD6$ r$   )_read_instance_block)streamkwargsr   s    r   r'   r(   P   s    444VFvFr!   r   )StreamBackedCorpusViewabspathr   r   )r   r%   r-   s   ``@r   	instancesNombankCorpusReader.instancesF   sK    
 (NF$%%LL'F]]4==1
 	
r!   c                     [        U R                  U R                  5      [        U R	                  U R                  5      S9$ )zy
:return: a corpus view that acts as a list of strings, one for
    each line in the predicate-argument annotation file.
r.   )r/   r0   r   read_line_blockr   r   s    r   linesNombankCorpusReader.linesT   s4    
 &LL']]4==1
 	
r!   c                    UR                  S5      S   nUR                  SS5      nUR                  SS5      R                  SS5      nSU-  nX0R                  5       ;  a  [        S	U-  5      eU R	                  U5      R                  5        n[        R                  " U5      R                  5       nS
S
S
5        WR                  S5       H  nUR                  S   U:X  d  M  Us  $    [        SU SU 35      e! , (       d  f       NN= f)z5
:return: the xml description for the given roleset.
.r   	perc-sign%oneslashonezero1/10
1-slash-10frames/%s.xmlFrameset file for %s not foundNpredicate/rolesetidzRoleset z not found in )splitreplacefileids
ValueErrorr0   openr   parsegetrootfindallattrib)r   
roleset_idr%   	framefilefpetreerolesets          r   rP   NombankCorpusReader.roleset_   s     ##C(+##K5##$5v>FFL
 $h.	LLN*=
JKK \\)$))+r%%b)113E ,}}%89G~~d#z1 : 8J<~i[IJJ ,+s   %C;;
D	c                    Ub*  SU-  nX R                  5       ;  a  [        SU-  5      eU/nOU R                  5       n/ nU Ho  nU R                  U5      R                  5        n[        R
                  " U5      R                  5       nSSS5        UR                  WR                  S5      5        Mq     [        U5      $ ! , (       d  f       N== f)z1
:return: list of xml descriptions for rolesets.
Nr?   r@   rA   )
rE   rF   r0   rG   r   rH   rI   appendrJ   LazyConcatenation)r   r%   rM   r   rsetsrN   rO   s          r   rolesetsNombankCorpusReader.rolesetsu   s     '(2I. !AH!LMM#JJ#I i(--/2#))"-557 0LL':;< $ !'' 0/s   $%C  
C	c                     [        U R                  U R                  5      [        U R	                  U R                  5      S9$ )zu
:return: a corpus view that acts as a list of all noun lemmas
    in this corpus (from the nombank.1.0.words file).
r.   )r/   r0   r   r4   r   r5   s    r   nounsNombankCorpusReader.nouns   s4    
 &LL)]]4??3
 	
r!   c                     g)NT )r&   s    r   r'   NombankCorpusReader.<lambda>   s    r!   c                    / n[        S5       Ht  nUR                  5       R                  5       nU(       d  M*  [        R	                  XPR
                  U R                  5      nU" U5      (       d  Mc  UR                  U5        Mv     U$ )Nd   )rangereadlinestripNombankInstancerH   r   r   rS   )r   r,   r)   blockiliner&   s          r   r+   (NombankCorpusReader._read_instance_block   sr     sA??$**,Dt&,,22D4F4F #4((LL&  r!   )r   r   r   r   r   ) NNNutf8r$   )__name__
__module____qualname____firstlineno____doc__r   r1   r6   rP   rV   rY   r+   __static_attributes__r\   r!   r   r	   r	      sC    " $*L
	
K,(*	
 <M r!   r	   c                   d    \ rS rSr SS jr\S 5       rS rS rS r	\" \	SS	9r
\SS
 j5       rSrg)rc      Nc
                     Xl          X l         X0l         X@l         XPl         X`l         Xpl         [        U5      U l         Xl	        g r$   )
fileidsentnumwordnumr%   sensenumber	predicatepredidtuple	argumentsr   )
r   rs   rt   ru   r%   rv   rw   rx   rz   r   s
             r   r   NombankInstance.__init__   sm     	  	& 	D !,&0"	@ *y)	 )	-r!   c                     U R                   R                  SS5      nUR                  SS5      R                  SS5      nU SU R                   3$ )zThe name of the roleset used by this instance's predicate.
Use ``nombank.roleset() <NombankCorpusReader.roleset>`` to
look up information about the roleset.r;   r:   r=   r>   r<   r9   )r%   rD   rv   )r   rs     r   rP   NombankInstance.roleset   sP    
 MM!!#{3IIfl+33LBSTAd&&'((r!   c                 d    SR                  U R                  U R                  U R                  5      $ )Nz'<NombankInstance: {}, sent {}, word {}>)formatrs   rt   ru   r5   s    r   __repr__NombankInstance.__repr__   s*    8??KKLLLL
 	
r!   c                 
   SR                  U R                  U R                  U R                  U R                  U R
                  5      nU R                  U R                  S44-   n[        U5       H  u  p4USU SU 3-  nM     U$ )Nz{} {} {} {} {}rel -)	r   rs   rt   ru   r%   rv   rz   rw   sorted)r   sitemsarglocargids        r   __str__NombankInstance.__str__   s    ##KKLLLLMM
 4>>5"9!;;#E]MF1VHAeW%%A +r!   c                     U R                   c  g U R                  U R                   R                  5       ;  a  g U R                   R                  U R                  5      U R                     $ r$   )r   rs   rE   parsed_sentsrt   r5   s    r   	_get_treeNombankInstance._get_tree   sS    $;;d//7799  --dkk:4<<HHr!   zs
        The parse tree corresponding to this instance, or None if
        the corresponding tree is not available.)docc                 v   U R                  5       n[        U5      S:  a  [        SU -  5      eUS S u  pEpgnUSS  n	[        U	5       V
Vs/ s H  u  pSU;   d  M  U	R	                  U
5      PM      nn
n[        U5      S:w  a  [        SU -  5      eUb  U" U5      n[        U5      n[        U5      nUS   R                  SS5      u  p[        R                  U5      n/ nU	 H>  nUR                  SS5      u  nnUR                  [        R                  U5      U45        M@     [        UUUUUUUUU5	      $ s  snn
f )N   z Badly formatted nombank line: %r   z-rel   r   r   )
rC   lenrF   	enumeratepopintNombankTreePointerrH   rS   rc   )r   r   r   piecesrs   rt   ru   r%   rv   argsre   pr   predlocrx   rw   rz   argr   r   s                       r   rH   NombankInstance.parse  sQ   v;??!CDD =C2AJ9'[abz'0Ftq&A+{txx{Fs8q=?!CDD )'/F g,g, a&,,sA.&,,W5	 	CIIc1-MFE066v>FG 
 

 
	
1 Gs   
D5D5)	rz   r%   rs   r   rw   rx   rv   rt   ru   r$   )NN)rj   rk   rl   rm   r   propertyrP   r   r   r   treestaticmethodrH   ro   r\   r!   r   rc   rc      sY     /-b ) )
I 4D +
 +
r!   rc   c                       \ rS rSrSrS rSrg)NombankPointeri6  aB  
A pointer used by nombank to identify one or more constituents in
a parse tree.  ``NombankPointer`` is an abstract base class with
three concrete subclasses:

- ``NombankTreePointer`` is used to point to single constituents.
- ``NombankSplitTreePointer`` is used to point to 'split'
  constituents, which consist of a sequence of two or more
  ``NombankTreePointer`` pointers.
- ``NombankChainTreePointer`` is used to point to entire trace
  chains in a tree.  It consists of a sequence of pieces, which
  can be ``NombankTreePointer`` or ``NombankSplitTreePointer`` pointers.
c                 @    U R                   [        :X  a
  [        5       eg r$   )	__class__r   NotImplementedErrorr5   s    r   r   NombankPointer.__init__E  s    >>^+%'' ,r!   r\   N)rj   rk   rl   rm   rn   r   ro   r\   r!   r   r   r   6  s    (r!   r   c                   ,    \ rS rSrS rS rS rS rSrg)NombankChainTreePointeriJ  c                     Xl         g r$   r   r   r   s     r   r    NombankChainTreePointer.__init__K  s    	/r!   c                 F    SR                  S U R                   5       5      $ )Nr   c              3   ,   #    U  H
  nS U-  v   M     g7fz%sNr\   .0r   s     r   	<genexpr>2NombankChainTreePointer.__str__.<locals>.<genexpr>R       6+Qq+   joinr   r5   s    r   r   NombankChainTreePointer.__str__Q      xx6$++666r!   c                     SU -  $ )Nz<NombankChainTreePointer: %s>r\   r5   s    r   r    NombankChainTreePointer.__repr__T      .55r!   c           	          Uc  [        S5      e[        SU R                   Vs/ s H  o"R                  U5      PM     sn5      $ s  snf )NParse tree not availablez*CHAIN*rF   r   r   selectr   r   r   s      r   r   NombankChainTreePointer.selectW  <    <788ID1DEED   A
r   N	rj   rk   rl   rm   r   r   r   r   ro   r\   r!   r   r   r   J  s    /76Fr!   r   c                   ,    \ rS rSrS rS rS rS rSrg)NombankSplitTreePointeri]  c                     Xl         g r$   r   r   s     r   r    NombankSplitTreePointer.__init__^  s    	3r!   c                 F    SR                  S U R                   5       5      $ )N,c              3   ,   #    U  H
  nS U-  v   M     g7fr   r\   r   s     r   r   2NombankSplitTreePointer.__str__.<locals>.<genexpr>d  r   r   r   r5   s    r   r   NombankSplitTreePointer.__str__c  r   r!   c                     SU -  $ )Nz<NombankSplitTreePointer: %s>r\   r5   s    r   r    NombankSplitTreePointer.__repr__f  r   r!   c           	          Uc  [        S5      e[        SU R                   Vs/ s H  o"R                  U5      PM     sn5      $ s  snf )Nr   z*SPLIT*r   r   s      r   r   NombankSplitTreePointer.selecti  r   r   r   Nr   r\   r!   r   r   r   ]  s    3
76Fr!   r   c                   X    \ rS rSrSrS r\S 5       rS rS r	S r
S rS	 rS
 rS rSrg)r   io  z4
wordnum:height*wordnum:height*...
wordnum:height,

c                     Xl         X l        g r$   ru   height)r   ru   r   s      r   r   NombankTreePointer.__init__w  s    r!   c                    U R                  S5      n[        U5      S:  a/  [        U Vs/ s H  n[        R	                  U5      PM     sn5      $ U R                  S5      n[        U5      S:  a/  [        U Vs/ s H  n[        R	                  U5      PM     sn5      $ U R                  S5      n[        U5      S:w  a  [        SU -  5      e[        [        US   5      [        US   5      5      $ s  snf s  snf )Nr   r   r   :   zbad nombank pointer %rr   )rC   r   r   r   rH   r   rF   r   )r   r   elts      r   rH   NombankTreePointer.parse{  s     v;?*:@A&3#))#.&A 
 v;?*:@A&3#))#.&A 
 v;!59::!#fQi.#fQi.AA B Bs   C19C6c                 8    U R                    SU R                   3$ )Nr   r   r5   s    r   r   NombankTreePointer.__str__  s    ,,q..r!   c                 8    SU R                   U R                  4-  $ )NzNombankTreePointer(%d, %d)r   r5   s    r   r   NombankTreePointer.__repr__  s    +t||T[[.IIIr!   c                 2   [        U[        [        45      (       a,  UR                  S   n[        U[        [        45      (       a  M,  [        U[        5      (       d  XL $ U R
                  UR
                  :H  =(       a    U R                  UR                  :H  $ Nr   )r   r   r   r   r   ru   r   r   others     r   __eq__NombankTreePointer.__eq__  sv    !8:Q RSSLLOE !8:Q RSS %!344= ||u}},L1LLr!   c                     X:X  + $ r$   r\   r   s     r   __ne__NombankTreePointer.__ne__  s      r!   c                 N   [        U[        [        45      (       a,  UR                  S   n[        U[        [        45      (       a  M,  [        U[        5      (       d  [        U 5      [        U5      :  $ U R                  U R                  * 4UR                  UR                  * 4:  $ r   )r   r   r   r   r   rB   ru   r   r   s     r   __lt__NombankTreePointer.__lt__  s    !8:Q RSSLLOE !8:Q RSS %!344d8bi''t{{l+u}}u||m.LLLr!   c                 D    Uc  [        S5      eXR                  U5         $ )Nr   )rF   treepos)r   r   s     r   r   NombankTreePointer.select  s%    <788LL&''r!   c                    Uc  [        S5      eU/n/ nSn [        US   [        5      (       a  [        U5      [        U5      :  a  UR	                  S5        OUS==   S-  ss'   US   [        US   5      :  a  UR	                  US   US      5        OlUR                  5         UR                  5         OKX@R                  :X  a'  [        US[        U5      U R                  -
  S-
   5      $ US-  nUR                  5         M  )ze
Convert this pointer to a standard 'tree position' pointer,
given that it points to the given tree.
Nr   r   r   )	rF   r   r   r   rS   r   ru   ry   r   )r   r   stackr   ru   s        r   r   NombankTreePointer.treepos  s    
 <788%)T**w<#e*,NN1%BK1$K2;U2Y/LLr72;!78 IIKKKM ll* )I3w<$+++E+I!JKKqLGIIK+ r!   )r   ru   N)rj   rk   rl   rm   rn   r   r   rH   r   r   r   r   r   r   r   ro   r\   r!   r   r   r   o  sH     B B*/JM!M(
  r!   r   N)	functoolsr   	xml.etreer   nltk.corpus.reader.apinltk.corpus.reader.utilnltk.internalsr   	nltk.treer   r   r	   rc   r   r   r   r   r\   r!   r   <module>r      sy    % ! $ % 2 P, PpI
 I
X( ((Fn F&Fn F$ a  a  a r!   