
    Of                         S SK r S SKJr  S SK7  S SK7   " S S\5      r " S S\5      rS r	S	 r
\S
:X  a  \	" 5         \
" 5         gg)    N)util)*c                   Z    \ rS rSrSS jrSS jrSS jrSS jrSS jrSS jr	SS	 jr
S
rg)ChasenCorpusReader   Nc                 <    X@l         [        R                  XX#5        g N)_sent_splitterCorpusReader__init__)selfrootfileidsencodingsent_splitters        ;/usr/lib/python3/dist-packages/nltk/corpus/reader/chasen.pyr   ChasenCorpusReader.__init__   s    +d'<    c                     [        U R                  US5       VVs/ s H  u  p#[        X#SSSU R                  5      PM      snn5      $ s  snnf NTFconcatabspathsChasenCorpusViewr
   r   r   fileidencs       r   wordsChasenCorpusReader.words   sQ     &*]]7D%A%AMV !eUE4CVCVW%A
 	
   %A
c                     [        U R                  US5       VVs/ s H  u  p#[        X#SSSU R                  5      PM      snn5      $ s  snnf r   r   r   s       r   tagged_wordsChasenCorpusReader.tagged_words   sQ     &*]]7D%A%AMV !dE5$BUBUV%A
 	
r    c                     [        U R                  US5       VVs/ s H  u  p#[        X#SSSU R                  5      PM      snn5      $ s  snnf r   r   r   s       r   sentsChasenCorpusReader.sents#   sQ     &*]]7D%A%AMV !eT5$BUBUV%A
 	
r    c                     [        U R                  US5       VVs/ s H  u  p#[        X#SSSU R                  5      PM      snn5      $ s  snnf r   r   r   s       r   tagged_sentsChasenCorpusReader.tagged_sents+   sQ     &*]]7D%A%AMV !dD%ATATU%A
 	
r    c                     [        U R                  US5       VVs/ s H  u  p#[        X#SSSU R                  5      PM      snn5      $ s  snnf r   r   r   s       r   parasChasenCorpusReader.paras3   sQ     &*]]7D%A%AMV !eT4ATATU%A
 	
r    c                     [        U R                  US5       VVs/ s H  u  p#[        X#SSSU R                  5      PM      snn5      $ s  snnf )NTr   r   s       r   tagged_parasChasenCorpusReader.tagged_paras;   sQ     &*]]7D%A%AMV !dD$@S@ST%A
 	
r    )r
   )utf8Nr	   )__name__
__module____qualname____firstlineno__r   r   r"   r%   r(   r+   r.   __static_attributes__ r   r   r   r      s%    =





r   r   c                   *    \ rS rSrSr SS jrS rSrg)r   D   z
A specialized corpus view for ChasenReader. Similar to ``TaggedCorpusView``,
but this'll use fixed sets of word and sentence tokenizer.
Nc                 \    X0l         X@l        XPl        X`l        [        R                  XUS9  g )Nr   )_tagged_group_by_sent_group_by_parar
   StreamBackedCorpusViewr   )r   corpus_filer   taggedgroup_by_sentgroup_by_parar   s          r   r   ChasenCorpusView.__init__J   s.     +++''H'Mr   c                    / n[        USS5       GH  n/ n/ nUR                  5        H  nUR                  5       S:H  nUR                  S5      nUS   SR	                  USS 5      4n	U(       d  UR                  U	5        U(       d+  U R                  (       d  Mr  U R                  U	5      (       d  M  U R                  (       d  U V	V
s/ s H  u  pU	PM	     nn	n
U R                  (       a  UR                  U5        OUR                  U5        / nM     [        U5      S:  a[  U R                  (       d  U V	V
s/ s H  u  pU	PM	     nn	n
U R                  (       a  UR                  U5        OUR                  U5        U R                  (       a  UR                  U5        GM  UR                  U5        GM     U$ s  sn
n	f s  sn
n	f )zReads one paragraph at a time..z^EOS\nEOS	r      N)read_regexp_block
splitlinesstripsplitjoinappendr
   r;   r<   extendlenr=   )r   streamblockpara_strparasentline_eos_cellswts              r   
read_blockChasenCorpusView.read_blockY   sa   )&$	BHDD ++-zz|u,D)AY		&* 56KKND///D4G4G4J4J<<045fq5**D)D)D .  4y1}||,01D&1ADD1&&KK%KK%""T"T"A CD +  6 2s   F:
8G )r=   r<   r
   r;   r	   )r1   r2   r3   r4   __doc__r   r[   r5   r6   r   r   r   r   D   s     N%r   r   c                      SS K n SSKJn  U" S[        SSS9n[	        SR                  UR                  5       SS	 5      5        [	        S
R                  S UR                  5       SS  5       5      5        g )Nr   LazyCorpusLoaderjeita.*chasenutf-8r:   /iTV  i|V  z
EOS
c              3   R   #    U  H  nS R                  S U 5       5      v   M     g7f)
c              3   v   #    U  H/  nS R                  US   US   R                  S5      S   5      v   M1     g7f)z{}/{}r   rH   rG      N)formatrL   ).0rY   s     r   	<genexpr>!demo.<locals>.<genexpr>.<genexpr>   s4     NAgnnQqT1Q4::d+;A+>??s   79N)rM   )rj   rU   s     r   rk   demo.<locals>.<genexpr>   s)      
7 IINNNN7s   %'iz  i}  )nltknltk.corpus.utilr`   r   printrM   r   r(   )rn   r`   ra   s      r   demorq      si    1W&8+PWXE	#((5;;=u-
./	 
**,T$7
 	
r   c                      SSK Jn   U " S[        SSS9n[        UR	                  5       S   S   [
        5      (       d   eg )Nr   r_   ra   rb   rc   r:   rH   )ro   r`   r   
isinstancer"   str)r`   ra   s     r   testru      s=    1W&8+PWXEe((*1-a0#6666r   __main__)sysnltk.corpus.readerr   nltk.corpus.reader.apinltk.corpus.reader.utilr   r   r>   r   rq   ru   r1   r6   r   r   <module>r{      sQ     # $ %3
 3
l:- :z7 zFF r   