
    Of                     p    S SK Jr  S SKJrJrJr  S SKJrJr  S SK	J
r
Jr   " S S\5      r " S S\5      rg	)
    )CorpusReader)StreamBackedCorpusViewconcatread_alignedsent_block)RegexpTokenizerWhitespaceTokenizer)AlignedSent	Alignmentc                   ^    \ rS rSrSrS\" 5       \" SSS9\S4S jrSS
 jr	SS jr
SS jrSrg	)AlignedCorpusReader   z
Reader for corpora of word-aligned sentences.  Tokens are assumed
to be separated by whitespace.  Sentences begin on separate lines.
/
T)gapslatin1c                 b    [         R                  " XX'5        X0l        X@l        XPl        X`l        g)a`  
Construct a new Aligned Corpus reader for a set of documents
located at the given root directory.  Example usage:

    >>> root = '/...path to corpus.../'
    >>> reader = AlignedCorpusReader(root, '.*', '.txt') # doctest: +SKIP

:param root: The root directory for this corpus.
:param fileids: A list or regexp specifying the fileids in this corpus.
N)r   __init___sep_word_tokenizer_sent_tokenizer_alignedsent_block_reader)selfrootfileidssepword_tokenizersent_tokenizeralignedsent_block_readerencodings           </usr/lib/python3/dist-packages/nltk/corpus/reader/aligned.pyr   AlignedCorpusReader.__init__   s+    ( 	d'<	--)A&    Nc                     [        U R                  US5       VVs/ s H4  u  p#[        UUSSU R                  U R                  U R
                  5      PM6     snn5      $ s  snnf )z^
:return: the given file(s) as a list of words
    and punctuation symbols.
:rtype: list(str)
TFr   abspathsAlignedSentCorpusViewr   r   r   r   r   fileidencs       r    wordsAlignedCorpusReader.words2   sr      &*]]7D%A &BMV &((((22 &B
 	
   ;A
c                     [        U R                  US5       VVs/ s H4  u  p#[        UUSSU R                  U R                  U R
                  5      PM6     snn5      $ s  snnf )z
:return: the given file(s) as a list of
    sentences or utterances, each encoded as a list of word
    strings.
:rtype: list(list(str))
TFr$   r'   s       r    sentsAlignedCorpusReader.sentsG   sr      &*]]7D%A &BMV &((((22 &B
 	
r,   c                     [        U R                  US5       VVs/ s H4  u  p#[        UUSSU R                  U R                  U R
                  5      PM6     snn5      $ s  snnf )zX
:return: the given file(s) as a list of AlignedSent objects.
:rtype: list(AlignedSent)
Tr$   r'   s       r    aligned_sents!AlignedCorpusReader.aligned_sents]   sr    
  &*]]7D%A &BMV &((((22 &B
 	
r,   )r   r   r   r   )N)__name__
__module____qualname____firstlineno____doc__r   r   r   r   r*   r.   r1   __static_attributes__ r"   r    r   r      s7     *,&t$7!7B4
*
,
r"   r   c                   $    \ rS rSrSrS rS rSrg)r&   r   z
A specialized corpus view for aligned sentences.
``AlignedSentCorpusView`` objects are typically created by
``AlignedCorpusReader`` (not directly by nltk users).
c                 j    X0l         X@l        XPl        X`l        Xpl        [
        R                  " XUS9  g )N)r   )_aligned_group_by_sentr   r   r   r   r   )r   corpus_filer   alignedgroup_by_sentr   r   r   s           r    r   AlignedSentCorpusView.__init__y   s1      +--)A&''HMr"   c                    U R                  U5       VVs/ s HA  nU R                  R                  U5        H  nU R                  R                  U5      PM      MC     nnnU R                  (       a6  [
        R                  " SR                  US   5      5      US'   [        U6 /nU$ U R                  (       a  US   /nU$ US   nU$ s  snnf )N    r   )
r   r   tokenizer   r=   r
   
fromstringjoinr	   r>   )r   streamalignedsent_strsent_strblocks        r    
read_block AlignedSentCorpusView.read_block   s     $(#A#A&#I
#I 0099/J   ))(3J 4#I 	 

 == ++q"E!H !%()E    1XJE  !HE
s   AC)r=   r   r>   r   r   N)r3   r4   r5   r6   r7   r   rM   r8   r9   r"   r    r&   r&   r   s    N"r"   r&   N)nltk.corpus.reader.apir   nltk.corpus.reader.utilr   r   r   nltk.tokenizer   r   nltk.translater	   r
   r   r&   r9   r"   r    <module>rS      s8    0 
 ? 1]
, ]
@(2 (r"   