
    Ofx                     t    S SK r S SKJrJr  S SK7  S SK7  S SKJr  S SKJ	r	  \" S/ SQ5      r
 " S S	\5      rg)
    N)defaultdict
namedtuple)*)WordListCorpusReader)line_tokenizePanlexLanguage)
panlex_uidiso639iso639_typescriptnamelangvar_uidc                   Z   ^  \ rS rSrSrU 4S jrS rS rS rS r	S r
S	 rSS
 jrSrU =r$ )PanlexSwadeshCorpusReader   aU  
This is a class to read the PanLex Swadesh list from

David Kamholz, Jonathan Pool, and Susan M. Colowick (2014).
PanLex: Building a Resource for Panlingual Lexical Translation.
In LREC. http://www.lrec-conf.org/proceedings/lrec2014/pdf/1029_Paper.pdf

License: CC0 1.0 Universal
https://creativecommons.org/publicdomain/zero/1.0/legalcode
c                 2  > [         TU ]  " U0 UD6  [        R                  " SU R	                  5       S   5      R                  S5      U l        U R                  5        Vs0 s H  o3R                  U_M     snU l	        U R                  5       U l        g s  snf )Nzswadesh([0-9].*)\/r      )super__init__rematchfileidsgroupswadesh_sizeget_languagesr	   
_languagesget_macrolanguages_macro_langauges)selfargskwargslang	__class__s       C/usr/lib/python3/dist-packages/nltk/corpus/reader/panlex_swadesh.pyr   "PanlexSwadeshCorpusReader.__init__+   s    $)&)HH%:DLLN1<MNTTUVW=A=O=O=QR=QT??D0=QR $ 7 7 9 Ss    Bc                     g)NzCC0 1.0 Universal r   s    r$   license!PanlexSwadeshCorpusReader.license2   s    "    c                 6    U R                   R                  5       $ N)r   keysr(   s    r$   language_codes(PanlexSwadeshCorpusReader.language_codes5   s    ##%%r+   c              #      #    U R                  SU R                   S35      R                  S5       HA  nUR                  5       (       d  M  [	        UR                  5       R                  S5      6 v   MC     g 7f)Nlangs.txt
	)rawr   splitstripr   )r   lines     r$   r   'PanlexSwadeshCorpusReader.get_languages8   s^     HHuT%6%6$7t<=CCDID::<< $**,"4"4T":;; Js   A5A7c                     [        [        5      nU R                  R                  5        H*  nXR                     R                  UR                  5        M,     U$ r-   )r   listr   valuesr
   appendr	   )r   macro_langaugesr"   s      r$   r   ,PanlexSwadeshCorpusReader.get_macrolanguages>   sB    %d+OO**,DKK(//@ -r+   c                     SU R                    SU S3nU R                  U5       Vs/ s H  o3R                  S5      PM     sn$ s  snf z
:return: a list of list(str)
swadesh/r3   r5   )r   wordsr7   )r   	lang_codefileidconcepts       r$   words_by_lang'PanlexSwadeshCorpusReader.words_by_langD   sI     4,,-Qyk>37::f3EF3Ed#3EFFFs   Ac                     U R                   U    Vs/ s H  nSU R                   SU S3PM     nnU VVs/ s H+  o@R                  U5        H  oUR                  S5      PM     M-     snn$ s  snf s  snnf rB   )r   r   rE   r7   )r   
iso63_coderF   r   rG   rH   s         r$   words_by_iso639)PanlexSwadeshCorpusReader.words_by_iso639K   s     "22:>
>	 d''()D9> 	 

 /6
.5FZZPVEW'MM$EWg
 	
	

s
   A,2A1c                     U(       d  U R                  5       nU Vs/ s H  o R                  U5      PM     nn[        [        U6 5      $ s  snf )z6
:return: a tuple of words for the specified fileids.
)r   rE   r<   zip)r   r   f	wordlistss       r$   entries!PanlexSwadeshCorpusReader.entriesW   s@     llnG,34GqZZ]G	4CO$$ 5s   A	)r   r   r   r-   )__name__
__module____qualname____firstlineno____doc__r   r)   r/   r   r   rI   rM   rS   __static_attributes____classcell__)r#   s   @r$   r   r      s6    	:#&<G

% %r+   r   )r   collectionsr   r   nltk.corpus.reader.apinltk.corpus.reader.utilnltk.corpus.reader.wordlistr   nltk.tokenizer   r   r   r'   r+   r$   <module>ra      s:    
 / $ % < '
@% 4 @%r+   