
    Of-                        S SK Jr  S SKJrJr  S SKJr  S SKJr  S SK	J
r
Jr  S SKJrJrJr  S rS r " S	 S
5      r " S S\5      r " S S\5      r\" SS5      r\" SS5      r\" SS5      r " S S\5      r " S S\\5      rg)    )
namedtuple)partialwraps)CategorizedCorpusReader)PlaintextCorpusReader)concatread_blankline_block)blankline_tokenizesent_tokenizeword_tokenizec                 0   ^  [        T 5      U 4S j5       nU$ )z
A decorator that allows a function to be called with
a single string of comma-separated values which become
individual function arguments.
c                  >  > [        5       nU  H  n[        U[        5      (       a@  UR                  UR	                  S5       Vs1 s H  oDR                  5       iM     sn5        MX  [        U[         5      (       a  UR                  [        U5      5        M  UR                  U5        M     UR                  5        HM  u  pV[        U[        5      (       d  M  UR	                  S5       Vs1 s H  oDR                  5       iM     snX'   MO     T" U0 UD6$ s  snf s  snf )N,)list
isinstancestrappendsplitstripsetitems)argskwargs_argsargpartnamevaluefuncs          =/usr/lib/python3/dist-packages/nltk/corpus/reader/markdown.pywrapper,comma_separated_string_args.<locals>.wrapper   s    C#s##syy~F~tjjl~FGC&&SX&S!  "<<>KD%%%9>S9IJ9I

9IJ * U%f%% G  Ks   D
+D)r   )r   r!   s   ` r    comma_separated_string_argsr#   
   s"     4[& & N    c                 T    [        U 5      nU(       a  UR                  US   5      /$ U$ Nr   )r	   render)streamparserblocks      r    read_parse_blankline_blockr+   #   s*     (EeAh'((Lr$   c                   f    \ rS rSrS rS rS r\S 5       r\S 5       r	\S 5       r
\S 5       rS	rg
)MarkdownBlock*   c                     Xl         SU l        g )N   contenttruncate_at)selfr2   s     r    __init__MarkdownBlock.__init__+   s    r$   c                 ^    U R                   R                   S[        [        U 5      5       S3$ )Nz	(content=))	__class____name__reprr   r4   s    r    __repr__MarkdownBlock.__repr__/   s)    ..))*)DTO3DAFFr$   c                     U R                   S U R                    [        U R                   5      U R                  :  a  S 3$ S 3$ )Nz... )r2   r3   lenr<   s    r    __str__MarkdownBlock.__str__2   sR    ||-T--./DLL)D,<,<<uEG	
BDEG	
r$   c                     U R                   $ N)r2   r<   s    r    rawMarkdownBlock.raw8   s    ||r$   c                 ,    [        U R                  5      $ rE   )r   r2   r<   s    r    wordsMarkdownBlock.words<   s    T\\**r$   c                 j    [        U R                  5       Vs/ s H  n[        U5      PM     sn$ s  snf rE   )r   r2   r   )r4   sents     r    sentsMarkdownBlock.sents@   s*    0=dll0KL0Kd#0KLLLs   0c           
          [        U R                  5       VVs/ s H(  n[        U5       Vs/ s H  n[        U5      PM     snPM*     snn$ s  snf s  snnf rE   )r
   r2   r   r   )r4   pararL   s      r    parasMarkdownBlock.parasD   sP     +4<<8
8 .;4-@A-@T]4 -@A8
 	
A
s   AAAAr1   N)r:   
__module____qualname____firstlineno__r5   r=   rB   propertyrF   rI   rM   rQ   __static_attributes__ r$   r    r-   r-   *   sd    G
   + + M M 
 
r$   r-   c                   X   ^  \ rS rSrU 4S jr\S 5       r\S 5       r\S 5       rSr	U =r
$ )	CodeBlockL   c                 ,   > Xl         [        TU ]  " U6   g rE   )languagesuperr5   )r4   r]   r   r9   s      r    r5   CodeBlock.__init__M   s     $r$   c                 t    U R                   R                  5        Vs/ s H  n[        U5      PM     sn$ s  snf rE   )r2   
splitlinesr   )r4   lines     r    rM   CodeBlock.sentsQ   s.    040G0G0IJ0Id#0IJJJs   5c                 6    U R                   R                  5       $ rE   )r2   ra   r<   s    r    linesCodeBlock.linesU   s    ||&&((r$   c           
          [        U R                  5       VVs/ s H-  nUR                  5        Vs/ s H  n[        U5      PM     snPM/     snn$ s  snf s  snnf rE   )r
   r2   ra   r   )r4   rP   rb   s      r    rQ   CodeBlock.parasY   sR     +4<<8
8 .2__->?->T]4 ->?8
 	
?
s   AAAA)r]   )r:   rS   rT   rU   r5   rV   rM   re   rQ   rW   __classcell__r9   s   @r    rZ   rZ   L   sF      K K ) ) 
 
r$   rZ   c                   (   ^  \ rS rSrU 4S jrSrU =r$ )MarkdownSectiona   c                 8   > Xl         X l        [        TU ]  " U6   g rE   )headinglevelr^   r5   )r4   ro   rp   r   r9   s       r    r5   MarkdownSection.__init__b   s    
$r$   )ro   rp   )r:   rS   rT   rU   r5   rW   ri   rj   s   @r    rl   rl   a   s       r$   rl   Imagezlabel, src, titleLinkzlabel, href, titleListzis_ordered, itemsc                   6   ^  \ rS rSrSS.U 4S jjrS rSrU =r$ )MarkdownCorpusReaderm   Nr)   c                  > SSK Jn  SSKJn  SSKJn  Xl        U R                  c'  U" SUS9U l        U R                  R                  U5        UR                  S[        [        U R                  S95        [        TU ]0  " U0 UD6  g )	Nr   )
MarkdownIt)RendererPlain)front_matter_plugin
commonmark)renderer_clspara_block_readerrx   )markdown_itrz   mdit_plain.rendererr{   mdit_py_plugins.front_matterr|   r)   use
setdefaultr   r+   r^   r5   )r4   r)   r   r   rz   r{   r|   r9   s          r    r5   MarkdownCorpusReader.__init__n   sm    *5D;;$\NDKKKOO/0)CDKK!X	
 	$)&)r$   c                     [        5       nU R                  U5       H-  nUR                  U R                  R	                  U5      5        M/     U$ rE   )r   _para_block_readerextend_word_tokenizertokenize)r4   r(   rI   rP   s       r    _read_word_block%MarkdownCorpusReader._read_word_block~   s@    ++F3DLL--66t<= 4r$   )r:   rS   rT   rU   r5   r   rW   ri   rj   s   @r    rv   rv   m   s    %) * *  r$   rv   c                     ^  \ rS rSrSrSS.S jr\SU 4S jj5       r\SU 4S jj5       r\SU 4S jj5       r	\SU 4S	 jj5       r
\SU 4S
 jj5       r\SU 4S jj5       rS rS r\SS j5       rS r\SS j5       rS r\SS j5       rS r\SS j5       rS r\SS j5       rS r\SS j5       rS r\SS j5       rSrU =r$ )CategorizedMarkdownCorpusReader   a  
A reader for markdown corpora whose documents are divided into
categories based on their file identifiers.

Based on nltk.corpus.reader.plaintext.CategorizedPlaintextCorpusReader:
https://www.nltk.org/_modules/nltk/corpus/reader/api.html#CategorizedCorpusReader
tags)	cat_fieldc                  ^ / SQn[        U4S jU 5       5      (       d  [        5       TS'   [        R                  " U T5        [        R                  " U /UQ70 TD6  U R
                  bb  U R
                  (       dP  U R                   H?  nU R                  U5      nU(       d  M  US   R                  U/ 5      U R
                  U'   MA     ggg)z
Initialize the corpus reader. Categorization arguments
(``cat_pattern``, ``cat_map``, and ``cat_file``) are passed to
the ``CategorizedCorpusReader`` constructor.  The remaining arguments
are passed to the ``MarkdownCorpusReader`` constructor.
)cat_patterncat_mapcat_filec              3   ,   >#    U  H	  oT;   v   M     g 7frE   rX   ).0r   r   s     r    	<genexpr>;CategorizedMarkdownCorpusReader.__init__.<locals>.<genexpr>   s     5HS&=Hs   r   Nr   )	anydictr   r5   rv   _map_fileidsmetadataget)r4   r   r   r   cat_argsfile_idr   s      `   r    r5   (CategorizedMarkdownCorpusReader.__init__   s     :5H555 !%F9((v6%%d<T<V< 99 ====18)1!B)GDIIg& ) *3 r$   c                 "   > [         TU ]  U5      $ rE   )r^   
categories)r4   fileidsr9   s     r    r   *CategorizedMarkdownCorpusReader.categories   s    w!'**r$   c                 @   > Uc  U R                   $ [        TU ]	  U5      $ rE   )r   r^   r   )r4   r   r9   s     r    r   'CategorizedMarkdownCorpusReader.fileids   s"    == wz**r$   c                 @   > [         TU ]  U R                  X5      5      $ rE   )r^   rF   _resolver4   r   r   r9   s      r    rF   #CategorizedMarkdownCorpusReader.raw   s    w{4===>>r$   c                 @   > [         TU ]  U R                  X5      5      $ rE   )r^   rI   r   r   s      r    rI   %CategorizedMarkdownCorpusReader.words       w}T]]7?@@r$   c                 @   > [         TU ]  U R                  X5      5      $ rE   )r^   rM   r   r   s      r    rM   %CategorizedMarkdownCorpusReader.sents   r   r$   c                 @   > [         TU ]  U R                  X5      5      $ rE   )r^   rQ   r   r   s      r    rQ   %CategorizedMarkdownCorpusReader.paras   r   r$   c                     [        U R                  U R                  X#5      SS9 VVs/ s H  u  pEU R                  XAUS9PM     snn5      $ s  snnf )NT)include_encoding)encoding)r   abspathsr   
CorpusView)r4   readerr   r   pathencs         r    concatenated_view1CategorizedMarkdownCorpusReader.concatenated_view   s`     $(==MM'6 $1 $$KT s;$
 	
s   A
c                     SSK Jn  U R                  R                  UR	                  5       5       Vs/ s H'  nUR
                  S:X  d  M  U" UR                  5      PM)     sn$ s  snf )Nr   )	safe_loadfront_matter)yamlr   r)   parsereadtyper2   )r4   r(   r   ts       r    metadata_reader/CategorizedMarkdownCorpusReader.metadata_reader   sX    " [[&&v{{}5
5vv' !Iaii 5
 	
 
s   A#
A#c                 :    U R                  U R                  X5      $ rE   )r   r   r4   r   r   s      r    r   (CategorizedMarkdownCorpusReader.metadata   s    %%d&:&:GPPr$   c                    U R                   R                  UR                  5       5      n[        S U5      n[        S U5      n[	        5       n[        X45       H=  u  pgUR                  U5      nUR                  Xx5      n	UR                  X(U	S-    5        M?     U V
s/ s HD  n
[        U R                   R                  R                  XR                   R                  S S95      PMF     sn
$ s  sn
f )Nc                 L    U R                   S:H  =(       a    U R                  S:H  $ )Nr   blockquote_openrp   r   r   s    r    <lambda>CCategorizedMarkdownCorpusReader.blockquote_reader.<locals>.<lambda>   s    agglBqvv1B'BBr$   c                 L    U R                   S:H  =(       a    U R                  S:H  $ )Nr   blockquote_closer   r   s    r    r   r      s    agglCqvv1C'CCr$      env)r)   r   r   filterr   zipindexr   r-   rendererr'   options)r4   r(   tokensopening_tokensclosing_tokensblockquotesocopening_indexclosing_indexr*   s              r    blockquote_reader1CategorizedMarkdownCorpusReader.blockquote_reader   s    ""6;;=1BF
  CV
 f7DA"LLOM"LL:Mvma6GHI 8 %	
 % $$++E;;3F3FD+Q %	
 	
 
s   AC,c                 :    U R                  U R                  X5      $ rE   )r   r   r   s      r    r   +CategorizedMarkdownCorpusReader.blockquotes       %%d&<&<gRRr$   c                    U R                   R                  UR                  5       5       Vs/ s HG  nUR                  S:X  d  M  UR                  S;   d  M'  [        UR                  UR                  5      PMI     sn$ s  snf )Nr   )fence
code_block)r)   r   r   rp   r   rZ   infor2   )r4   r(   r   s      r    code_block_reader1CategorizedMarkdownCorpusReader.code_block_reader   sv     [[&&v{{}5

 6ww!|
 !"*A AI		 6
 	
 
s   A=A=$A=c                 :    U R                  U R                  X5      $ rE   )r   r   r   s      r    code_blocks+CategorizedMarkdownCorpusReader.code_blocks   r   r$   c                 N   [        S U R                  R                  UR                  5       5      5       VVs/ s H^  nUR                    HJ  nUR
                  S:X  d  M  [        UR                  UR                  S5      UR                  S5      5      PML     M`     snn$ s  snnf )Nc                      U R                   S:H  $ Ninliner   r   s    r    r   >CategorizedMarkdownCorpusReader.image_reader.<locals>.<lambda>	      !&&H,r$   imagesrctitle)	r   r)   r   r   childrenr   rr   r2   attrGet)r4   r(   inline_tokenchild_tokens       r    image_reader,CategorizedMarkdownCorpusReader.image_reader  s     !',dkk.?.?.N!
!  ,447*E####E*##G,  5
!
 	
 
s   $B! =B!c                 :    U R                  U R                  X5      $ rE   )r   r   r   s      r    images&CategorizedMarkdownCorpusReader.images  s    %%d&7&7MMr$   c                    [        S U R                  R                  UR                  5       5      5       VVVs/ s Hy  n[	        UR
                  5        H\  u  p4UR                  S:X  d  M  [        UR
                  US-      R                  UR                  S5      UR                  S5      5      PM^     M{     snnn$ s  snnnf )Nc                      U R                   S:H  $ r   r   r   s    r    r   =CategorizedMarkdownCorpusReader.link_reader.<locals>.<lambda>  r   r$   	link_openr   hrefr   )
r   r)   r   r   	enumerater   r   rs   r2   r   )r4   r(   r   ir   s        r    link_reader+CategorizedMarkdownCorpusReader.link_reader  s     !',dkk.?.?.N!
! #,L,A,A"B;.D%%a!e,44##F+##G, #C
!
 	
 
s   /B>,AB>c                 :    U R                  U R                  X5      $ rE   )r   r  r   s      r    links%CategorizedMarkdownCorpusReader.links!      %%d&6&6LLr$   c                   ^^ U R                   R                  UR                  5       5      nSm[        U4S jU5      nSm[        U4S jU5      n[	        5       n[        X45       H=  u  pgUR                  U5      nUR                  Xx5      n	UR                  X(U	S-    5        M?     U VV
s/ s HL  n[        US   R                  S:H  U V
s/ s H!  oR                  (       d  M  U
R                  PM#     sn
5      PMN     sn
n$ s  sn
f s  sn
nf )N)bullet_list_openordered_list_openc                 N   > U R                   S:H  =(       a    U R                  T;   $ r&   r   )r   opening_typess    r    r   =CategorizedMarkdownCorpusReader.list_reader.<locals>.<lambda>)      aggl>qvv'>>r$   )bullet_list_closeordered_list_closec                 N   > U R                   S:H  =(       a    U R                  T;   $ r&   r   )r   closing_typess    r    r   r  -  r  r$   r   r   r  )r)   r   r   r   r   r   r   r   rt   r   r2   )r4   r(   r   r   r   list_blocksr   r   r   r   r   r  r  s              @@r    list_reader+CategorizedMarkdownCorpusReader.list_reader%  s    ""6;;=1A>
 D>
 f7DA"LLOM"LL:Mvma6GHI 8 &

 &	 q	"55$*8FqiiF8 &
 	
 9
s   + DD#D3DDc                 :    U R                  U R                  X5      $ rE   )r   r  r   s      r    lists%CategorizedMarkdownCorpusReader.lists<  r  r$   c                    [        5       [        5       p2U R                  R                  UR                  5       5       Hm  nUR                  S:X  a@  UR
                  S:X  a0  U(       d  UR                  U5        M=  UR                  U5        U/nMS  U(       d  M\  UR                  U5        Mo     U(       a  UR                  U5        U Vs/ s Ho  n[        US   R                  US   R                  R                  S5      U R                  R                  R                  X0R                  R                  S S95      PMq     sn$ s  snf )Nr   heading_openr   #r   )r   r)   r   r   rp   r   r   rl   r2   markupcountr   r'   r   )r4   r(   section_blocksr*   r   s        r    section_reader.CategorizedMarkdownCorpusReader.section_reader@  s    $""6;;=1Aww!|. 8LLO"))%0CEQ 2 !!%( (
 ( a  a%%c*$$++E;;3F3FD+Q
 (
 	
 
s   
A6Ec                 :    U R                  U R                  X5      $ rE   )r   r)  r   s      r    sections(CategorizedMarkdownCorpusReader.sectionsV  s    %%d&9&97OOr$   rX   rE   )NN)r:   rS   rT   rU   __doc__r5   r#   r   r   rF   rI   rM   rQ   r   r   r   r   r   r   r   r   r  r  r  r  r!  r)  r,  rW   ri   rj   s   @r    r   r      sS    )/ H. !+ !+ !+ !+ !? !? !A !A !A !A !A !A


 !Q !Q
( !S !S
 !S !S
 !N !N
 !M !M
. !M !M
, !P !Pr$   r   N)collectionsr   	functoolsr   r   nltk.corpus.reader.apir   nltk.corpus.reader.plaintextr   nltk.corpus.reader.utilr   r	   nltk.tokenizer
   r   r   r#   r+   r-   rZ   rl   rr   rs   rt   rv   r   rX   r$   r    <module>r5     s    " $ : > @ J J2
 
D
 
* m   	7/0&./&-.0 0SP&=?S SPr$   