
    Of.                     6   S SK 7  S SKJr  \R	                  S5      r\R	                  S5      r\R	                  S5      r\R	                  S5      r\R	                  S5      r	\R	                  S5      r
\R	                  S	5      r " S
 S\5      r " S S\\5      rg)    )*)XMLCorpusReaderz<p(?: [^>]*){0,1}>(.*?)</p>z<s(?: [^>]*){0,1}>(.*?)</s>z#<([wc](?: [^>]*){0,1}>)(.*?)</[wc]>z!<[wc](?: [^>]*){0,1}>(.*?)</[wc]>ztype="(.*?)"zana="(.*?)"ztext id="(.*?)"c                   4    \ rS rSr   SS jrSrS rS rSrg)	TEICorpusView   Nc                 \    X l         Xpl        X0l        X@l        [        R                  XUS9  g )N)startpos)_tagged_textids_group_by_sent_group_by_paraStreamBackedCorpusView__init__)selfcorpus_filetaggedgroup_by_sentgroup_by_paratagsethead_lentextidss           ;/usr/lib/python3/dist-packages/nltk/corpus/reader/pl196x.pyr   TEICorpusView.__init__   s-     ++''H'M    i   c           
         UR                  U R                  5      n[        U5      nUR                  S5      UR                  S5      :  d  UR                  S5      S:X  aa  UR	                  5       n[        U5      S::  a  OAX#-  nUR                  S5      UR                  S5      :  a  MJ  UR                  S5      S:X  a  Ma  UR                  SS5      n[        R                  U5      nU R                  (       aZ  U HT  nXPR                  ;  d  M  UR                  U5      S-
  nX&S  R                  S5      [        S5      -   nUS U X&U-   S  -   nMV     / n[        R                  U5       H  n	/ n
[        R                  U	5       H  nU R                  (       d  [        R                  U5      nO2[        [!        U R"                  [$        R                  U5      5      5      nU R&                  (       a  U
R)                  U5        M  U
R+                  U5        M     U R,                  (       a  UR)                  U
5        M  UR+                  U
5        M     U$ )Nz<text idz</text>r   
    )	readlines	_pagesizeconcatcountreadlinelenreplaceTEXTIDfindallr   findPARASENTr
   WORDlistmap
_parse_tag
TAGGEDWORDr   appendextendr   )r   streamblocktmpr   tidbegendoutputpara_strparasent_strsents                r   
read_blockTEICorpusView.read_block,   s     0u{{:&Y)??EKKE
E //#C3x1}LE {{:&Y)??EKKE
E dB'..'==mm+**S/A-C+**95IFC!$3K%c	*<<E	  U+HD LL2||<<1DDOOZ5G5G5Q RSD&&KK%KK% 3 ""d#d# , r   c                     Uu  p#UR                  S5      (       a'  [        R                  U5      R                  S5      nX24$ [        R                  U5      R                  S5      nX24$ )Nwr   )
startswithANAsearchgroupTYPE)r   tag_word_tupletagwords       r   r.   TEICorpusView._parse_tagS   s]    $>>#**S/''*C y ++c"((+Cyr   )r   r   r
   r   )Nr   N)	__name__
__module____qualname____firstlineno__r   r    r=   r.   __static_attributes__ r   r   r   r      s%     N$ I%Nr   r   c                       \ rS rSrSrS rS rS rSS jrS r	SS	 jr
SS
 jrSS jrSS jrSS jrSS jrSS jrSS jrSrg)Pl196xCorpusReader\   i
  c                     SU;   a  US   U l         OS U l         [        R                  " U /UQ76   [        R                  X5        U R	                  5         g )Ntextid_file)r   r   r   CategorizedCorpusReader_init_textids)r   argskwargss      r   r   Pl196xCorpusReader.__init___   sJ    F""=1DM DM  --((6r   c           	         [        [        5      U l        [        [        5      U l        U R                  b  [        U R                  5       nU H  nUR                  5       nUR                  SS5      u  p4X0R                  5       ;  a  [        SU R                  < SU< S35      eUR                  U R                  5       H  nU R                  X55        M     M     S S S 5        g g ! , (       d  f       g = f)N r   zIn text_id mapping file z: z
 not found)defaultdictr,   _f2t_t2fr   openstripsplitfileids
ValueError
_delimiter_add_textids)r   fplinefile_idtext_idstext_ids         r   rV    Pl196xCorpusReader._init_textidsj   s    %	%	==$dmm$D::<D(,

3(:%Glln4(#}}g7  $,>>$//#B))'; $C  %$ %$$s   BC''
C5c                 |    U R                   U   R                  U5        U R                  U   R                  U5        g N)r]   r0   r^   )r   rh   rj   s      r   re   Pl196xCorpusReader._add_textidsz   s0    		'!!'*		'!!'*r   Nc           
      ~  ^  S n[        [        [        S XU45      5      5      S:w  a  [        S5      eUb  US 4$ Ub  T R	                  U5      S 4$ Ubj  [        U[        5      (       a  U/n[        U 4S jU 5       / 5      n[        5       nU H)  n[        T R                  U   5      [        U5      -  Xg'   M+     XV4$ g )Nc                 
    U S L $ rm   rO   )accessors    r   <lambda>-Pl196xCorpusReader._resolve.<locals>.<lambda>   s	    T)9r   r   z6Specify exactly one of: fileids, categories or textidsc              3   B   >#    U  H  nTR                   U   v   M     g 7frm   )r^   ).0tr   s     r   	<genexpr>.Pl196xCorpusReader._resolve.<locals>.<genexpr>   s     7w!1w   )r$   r,   filterrc   rb   
isinstancestrsumdictsetr]   )r   rb   
categoriesr   r4   filestdictfs   `       r   _resolvePl196xCorpusReader._resolve~   s    9 g6  K  D= !<<
+T11'3''")7w7<EFEtyy|,s7|; < r   c                     U$ rm   rO   )r   rG   s     r   
decode_tagPl196xCorpusReader.decode_tag   s    
r   c                    ^  T R                  X5      u  pUc  [        T R                  5      $ [        U[        5      (       a  U/n[        [        U 4S jU 5       / 5      5      $ )a>  
In the pl196x corpus each category is stored in single
file and thus both methods provide identical functionality. In order
to accommodate finer granularity, a non-standard textids() method was
implemented. All the main functions can be supplied with a list
of required chunks---giving much more control to the user.
c              3   B   >#    U  H  nTR                   U   v   M     g 7frm   )r]   )ru   dr   s     r   rw   -Pl196xCorpusReader.textids.<locals>.<genexpr>   s     9A499Q<ry   )r   sortedr^   r{   r|   r}   r   rb   r   _s   `   r   r   Pl196xCorpusReader.textids   sU     ]]77
?$))$$gs##iGc992>??r   c                    U R                  XU5      u  pUc  U R                  nO[        U[        5      (       a  U/nU(       aC  [	        U Vs/ s H,  n[        U R                  U5      SSSU R                  X4   S9PM.     sn5      $ [	        U Vs/ s H)  n[        U R                  U5      SSSU R                  S9PM+     sn5      $ s  snf s  snf )NFr   r   r   r   _fileidsr{   r|   r!   r   abspathr   r   rb   r   r   fileids        r   wordsPl196xCorpusReader.words   s    ==gF?mmG%%iG #*
 #* "V,!% ' #*
   #*	 #* "V,!% #*	 
	   3C0Cc                    U R                  XU5      u  pUc  U R                  nO[        U[        5      (       a  U/nU(       aC  [	        U Vs/ s H,  n[        U R                  U5      SSSU R                  X4   S9PM.     sn5      $ [	        U Vs/ s H)  n[        U R                  U5      SSSU R                  S9PM+     sn5      $ s  snf s  snf NFTr   r   r   r   s        r   sentsPl196xCorpusReader.sents   s    ==gF?mmG%%iG #*
 #* "V,!% ' #*
  
 #*	 #* "V,eT54== #*	 
r   c                    U R                  XU5      u  pUc  U R                  nO[        U[        5      (       a  U/nU(       aC  [	        U Vs/ s H,  n[        U R                  U5      SSSU R                  X4   S9PM.     sn5      $ [	        U Vs/ s H)  n[        U R                  U5      SSSU R                  S9PM+     sn5      $ s  snf s  snf r   r   r   s        r   parasPl196xCorpusReader.paras   s    ==gF?mmG%%iG #*
 #* "V,!% ' #*
  
 #*	 #* "V,eT4$-- #*	 
r   c                    U R                  XU5      u  pUc  U R                  nO[        U[        5      (       a  U/nU(       aC  [	        U Vs/ s H,  n[        U R                  U5      SSSU R                  X4   S9PM.     sn5      $ [	        U Vs/ s H)  n[        U R                  U5      SSSU R                  S9PM+     sn5      $ s  snf s  snf NTFr   r   r   r   s        r   tagged_wordsPl196xCorpusReader.tagged_words  s    ==gF?mmG%%iG #*
 #* "V,!% ' #*
  
 #*	 #* "V,dE54== #*	 
r   c                    U R                  XU5      u  pUc  U R                  nO[        U[        5      (       a  U/nU(       aC  [	        U Vs/ s H,  n[        U R                  U5      SSSU R                  X4   S9PM.     sn5      $ [	        U Vs/ s H)  n[        U R                  U5      SSSU R                  S9PM+     sn5      $ s  snf s  snf r   r   r   s        r   tagged_sentsPl196xCorpusReader.tagged_sents2  s    ==gF?mmG%%iG #*
 #* "V,!% ' #*
  
 #*	 #* "V,dD%$-- #*	 
r   c                    U R                  XU5      u  pUc  U R                  nO[        U[        5      (       a  U/nU(       aC  [	        U Vs/ s H,  n[        U R                  U5      SSSU R                  X4   S9PM.     sn5      $ [	        U Vs/ s H)  n[        U R                  U5      SSSU R                  S9PM+     sn5      $ s  snf s  snf )NTr   r   r   r   s        r   tagged_parasPl196xCorpusReader.tagged_parasQ  s    ==gF?mmG%%iG #*
 #* "V,!% ' #*
  
 #*	 #* "V,dD$ #*	 
r   c                     U R                  X5      u  p[        U5      S:X  a  [        R                  " XS   5      $ [	        S5      e)Nr   r   zExpected a single file)r   r$   r   xml	TypeErrorr   s       r   r   Pl196xCorpusReader.xmlp  s?    ]]77
w<1"&&tQZ88455r   )r]   r^   r   rm   )NN)NNN)rJ   rK   rL   rM   r   r   rV   re   r   r   r   r   r   r   r   r   r   r   rN   rO   r   r   rQ   rQ   \   sK    H	< + @@ !F>>>>>6r   rQ   N)nltk.corpus.reader.apinltk.corpus.reader.xmldocsr   recompiler)   r*   r/   r+   rE   rB   r&   r   r   rU   rQ   rO   r   r   <module>r      s    % 6	zz01	zz01ZZ>?
	zz67	zz/"jj 	&	'B* BJY60/ Y6r   