
    OfG                     h    S SK r S SKJr  S SKJr  S SKJr   " S S\5      rS r\	S:X  a  \" 5         gg)	    N)defaultdict)reduce)CorpusReaderc                      ^  \ rS rSrSr\R                  " S5      r\S 5       r	SU 4S jjr
SS jrSS jrSS jrS	 rS
rU =r$ )LinThesaurusCorpusReader   zEWrapper for the LISP-formatted thesauruses distributed by Dekang Lin.z \("?([^"]+)"? \(desc [0-9.]+\).+c                       [        [        5      $ )z6Factory for creating defaultdict of defaultdict(dict)s)r   dict     8/usr/lib/python3/dist-packages/nltk/corpus/reader/lin.py__defaultdict_factory.LinThesaurusCorpusReader.__defaultdict_factory   s     4  r   c                 4  > [         TU ]  US5        [        [        R                  5      U l        X l        U R                  SSS9 H  u  p4n[        U5       nSnU H  nUR                  5       nU(       a$  [        R                  R                  SU5      n	SnM>  US:X  a  SnMH  UR                  S5      n
[        U
5      S:X  d  Mj  U
u  p[        U5      U R
                  U   W	   UR                  S	5      '   M     S
S
S
5        M     g
! , (       d  f       M  = f)z
Initialize the thesaurus.

:param root: root directory containing thesaurus LISP files
:type root: C{string}
:param badscore: the score to give to words which do not appear in each other's sets of synonyms
:type badscore: C{float}
zsim[A-Z]\.lspT)include_encodinginclude_fileidz\1Fz))	   "N)super__init__r   r   ._LinThesaurusCorpusReader__defaultdict_factory
_thesaurus	_badscoreabspathsopenstrip_key_resubsplitlenfloat)selfrootbadscorepathencodingfileidlin_filefirstlinekey
split_linengramscore	__class__s                r   r   !LinThesaurusCorpusReader.__init__   s     	/0%&>&T&TU!&*mm!$ '4 '
"DF dx$D::<D6>>BB5$O % $ &*ZZ%5
z?a/+5LEMR %NDOOF3C8S9IJ % '
 s   A+D5D
D	c                    X:X  a&  U(       a  gU R                    Vs/ s H  oDS4PM     sn$ U(       a6  X R                  U   U   ;   a  U R                  U   U   U   $ U R                  $ U R                    Vs/ s H;  nUX R                  U   U   ;   a  U R                  U   U   U   OU R                  4PM=     sn$ s  snf s  snf )a  
Returns the similarity score for two ngrams.

:param ngram1: first ngram to compare
:type ngram1: C{string}
:param ngram2: second ngram to compare
:type ngram2: C{string}
:param fileid: thesaurus fileid to search in. If None, search all fileids.
:type fileid: C{string}
:return: If fileid is specified, just the score for the two ngrams; otherwise,
         list of tuples of fileids and scores.
g      ?)_fileidsr   r   )r#   ngram1ngram2r(   fids        r   
similarity#LinThesaurusCorpusReader.similarity?   s     .2mm<msc
m<< !8!@@ OOF+F3F;   $}}
  -   &)=f)EE !OOC08@!%  -
 
 =
s   B<7ACc                     U(       a   U R                   U   U   R                  5       $ U R                   Vs/ s H$  nX R                   U   U   R                  5       4PM&     sn$ s  snf )a  
Returns a list of scored synonyms (tuples of synonyms and scores) for the current ngram

:param ngram: ngram to lookup
:type ngram: C{string}
:param fileid: thesaurus fileid to search in. If None, search all fileids.
:type fileid: C{string}
:return: If fileid is specified, list of tuples of scores and synonyms; otherwise,
         list of tuples of fileids and lists, where inner lists consist of tuples of
         scores and synonyms.
)r   itemsr3   r#   r.   r(   s      r   scored_synonyms(LinThesaurusCorpusReader.scored_synonymsf   si     ??6*517799 #mm+F 07==?@+     +A$c                     U(       a   U R                   U   U   R                  5       $ U R                   Vs/ s H$  nX R                   U   U   R                  5       4PM&     sn$ s  snf )a\  
Returns a list of synonyms for the current ngram.

:param ngram: ngram to lookup
:type ngram: C{string}
:param fileid: thesaurus fileid to search in. If None, search all fileids.
:type fileid: C{string}
:return: If fileid is specified, list of synonyms; otherwise, list of tuples of fileids and
         lists, where inner lists contain synonyms.
)r   keysr3   r;   s      r   synonyms!LinThesaurusCorpusReader.synonymsz   si     ??6*516688 #mm+F 07<<>?+  r>   c                 >   ^ ^ [        UU 4S jT R                  S5      $ )z
Determines whether or not the given ngram is in the thesaurus.

:param ngram: ngram to lookup
:type ngram: C{string}
:return: whether the given ngram is in the thesaurus.
c                 :   > U =(       d    TTR                   U   ;   $ N)r   )accumr(   r.   r#   s     r   <lambda>7LinThesaurusCorpusReader.__contains__.<locals>.<lambda>   s    %"MET__V5L,L"Mr   F)r   r3   )r#   r.   s   ``r   __contains__%LinThesaurusCorpusReader.__contains__   s     MMM
 	
r   )r   r   )g        rE   )__name__
__module____qualname____firstlineno____doc__recompiler   staticmethodr   r   r7   r<   rA   rI   __static_attributes____classcell__)r0   s   @r   r   r      sI    O jj<=G! !"H%N(&
 
r   r   c                     SSK Jn   SnSn[        SU-   5        [        U R                  U5      5        [        SU-   5        [        U R	                  U5      5        [        SU-   5        [        U R                  USS	95        [        SU-   5        [        U R                  USS	95        [        S
U SU S35        [        U R                  X5      5        g )Nr   )lin_thesaurusbusiness
enterprisezGetting synonyms for zGetting scored synonyms for z5Getting synonyms from simN.lsp (noun subsection) for zsimN.lsp)r(   zSimilarity score for z and :)nltk.corpusrV   printrA   r<   r7   )thesword1word2s      r   demor_      s    1EE	
!E
)*	$--
	
(5
01	$

u
%&	
AE
IJ	$--j-
12	
AE
IJ	$--j-
12	!%eWA
67	$//%
'(r   __main__)
rP   collectionsr   	functoolsr   nltk.corpus.readerr   r   r_   rK   r   r   r   <module>rd      s;    
 #  +J
| J
d)* zF r   