
    [g$                          " S  S5      r g)c                   z    \ rS rSrSrSrS rS rS r\	S 5       r
\	S 5       r\	S 5       r\	S	 5       rS
 rS rSrg)TokenSet   aG  
A token set is used to store the unique list of all tokens
within an index. Token sets are also used to represent an
incoming query to the index, this query token set and index
token set are then intersected to find which tokens to look
up in the inverted index.

A token set can hold multiple tokens, as in the case of the
index token set, or it can hold a single token as in the
case of a simple query token set.

Additionally token sets are used to perform wildcard matching.
Leading, contained and trailing wildcards are supported, and
from this edit distance matching can also be provided.

Token sets are implemented as a minimal finite state automata,
where both common prefixes and suffixes are shared between tokens.
This helps to reduce the space used for storing the token set.

TODO: consider https://github.com/glyph/automat
c                     SU l         0 U l        U R                  U l        U R                  =R                  S-  sl        g )NFr   )finaledges_next_idid	__class__selfs    0/usr/lib/python3/dist-packages/lunr/token_set.py__init__TokenSet.__init__   s0    

--1$    c                 N    U R                   $ ! [         a     Of = fU R                  (       a  SOSn[        [	        U R
                  R                  5       5      5       HB  nU R
                  U   n [        UR                  5      nO! [         a    Sn Of = fX-   U-   nMD     U$ )N10 )	_stringAttributeErrorr   sortedlistr   keysstrr	   )r   stringlabelnodenode_ids        r   __str__TokenSet.__str__    s    	<< 		 

D!234E::e$Ddgg,!  ^g-F 5 s    
2BBBc                 6    SR                  [        U 5      5      $ )Nz<TokenSet "{}">)formatr   r   s    r   __repr__TokenSet.__repr__3   s     ''D	22r   c                     [        5       nUn[        U5       HR  u  pEU[        U5      S-
  :H  nUS:X  a  X"R                  U'   Xbl        M2  [        5       nXgl        XrR                  U'   UnMT     U$ )zCreates a TokenSet from a string.

The string may contain one or more wildcard characters (*) that will
allow wildcard matching when intersecting with another TokenSet
r   *)r   	enumeratelenr   r   )r   r   r   rooticharr   next_s           r   from_stringTokenSet.from_string6   sq     z !(GAVq(Es{#'

4 "
 
##(

4  ) r   c                    [        5       nX2US./nU(       Ga  UR                  5       n[        US   5      S:  a  US   S   nSnXeS   R                  ;   a  US   R                  U   nO[        5       nXuS   R                  U'   [        US   5      S:X  a  SUl        UR                  UUS   US   SS S.5        US   S:X  a  M  S	US   R                  ;   a  US   R                  S	   nO[        5       nXS   R                  S	'   [        US   5      S:X  a  SUl        UR                  UUS   S-
  US   S.5        [        US   5      S:  a$  UR                  US   US   S-
  US   SS S.5        [        US   5      S:X  a
  SUS   l        [        US   5      S:  a{  S	US   R                  ;   a  US   R                  S	   n	O[        5       n	XS   R                  S	'   [        US   5      S:X  a  SU	l        UR                  U	US   S-
  US   SS S.5        US   (       a  [        US   5      S:  a  US   S   n
US   S   nSnXS   R                  ;   a  US   R                  U   nO[        5       nXS   R                  U'   [        US   5      S:X  a  SUl        UR                  UUS   S-
  XS   S
S -   S.5        U(       a  GM  U$ )ar  Creates a token set representing a single string with a specified
edit distance.

Insertions, deletions, substitutions and transpositions are each
treated as an edit distance of 1.

Increasing the allowed edit distance will have a dramatic impact
on the performance of both creating and intersecting these TokenSets.
It is advised to keep the edit distance less than 3.
)r   edits_remainingr   r       Nr   r   Tr0   r&      )r   popr(   r   r   append)clsr   edit_distancer)   stackframer+   no_edit_nodeinsertion_nodesubstitution_nodechar_achar_btranspose_nodes                r   from_fuzzy_stringTokenSet.from_fuzzy_stringQ   s:    zFSTIIKE5?#a'Xq)#=...#(=#6#6t#<L#+:L0<&M''-uX'1,)-L& ,+01B+C"'/!""5 &'1, eFm)))!&v!4!4S!9!)+9f##C(5?#q('+$LL*',->'?!'C#Ho 5?#a' %f+01B+Ca+G"'/!""5 5?#q(&*f# 5?#q(%----(-f(;(;C(@%(0
%/@&M'',uX'1,.2%+ 1+01B+Ca+G"'/!""5 &'Ch,@1,Dx+x+!%6]000%*6]%8%8%@N%-ZN2@&M''/uX'1,+/N( .+01B+Ca+G"(?12+>"> eN r   c                     SSK Jn  U" 5       nU H  nUR                  U5        M     UR                  5         UR                  $ )Nr1   )TokenSetBuilder)lunr.token_set_builderrB   insertfinishr)   )r5   list_of_wordsrB   builderwords        r   	from_listTokenSet.from_list   s9    :!#!DNN4  " 	||r   c                     UR                   (       a&  U R                  UR                  UR                   5      $ U R                  UR                  5      $ )N)r6   r?   termr-   )r5   clauses     r   from_clauseTokenSet.from_clause   s;    ((f6J6JKK??6;;//r   c                 T   / nSU S./nU(       a  UR                  5       nUS   R                  (       a  UR                  US   5        US   R                  R	                  5        H6  nUR                  US   [        U5      -   US   R                  U   S.5        M8     U(       a  M  U$ )Nr   )prefixr   r   rQ   )r3   r   r4   r   r   r   )r   wordsr7   r8   edges        r   to_listTokenSet.to_list   s    -.IIKEV}""U8_-f++002"'/CI"= %f 3 3D 9 3 e r   c                    [        5       nXUS./nU(       Ga(  UR                  5       nUS   R                  R                  5        H  nUS   R                  R                  5        H  nXe:X  d  US:X  d  M  US   R                  U   nUS   R                  U   nUR                  =(       a    UR                  n	Sn
XdS   R                  ;   a-  US   R                  U   n
U
R                  =(       d    U	U
l        O![        5       n
Xl        XS   R                  U'   UR                  XxU
S.5        M     M     U(       a  GM(  U$ )zReturns a new TokenSet that is the intersection of this TokenSet
and the passed TokenSet.

This intersection will take into account any wildcards contained within
the TokenSet.
)r   q_nodeoutputrW   r   r&   NrX   )r   r3   r   r   r   r4   )r   otherrX   r7   r8   q_edgen_edger   rW   r   r,   s              r   	intersectTokenSet.intersect   s    6BCIIKE///446#Fm11668F'6S=$V}226:!&x!6!6v!> $

 ;v|| $!8_%:%::$)(O$9$9&$AE*/++*>EK$,JE*/K<A(O11&9dPU%VW 9 7 e( r   )r   r   r	   N)__name__
__module____qualname____firstlineno____doc__r   r   r   r#   classmethodr-   r?   rI   rN   rT   r\   __static_attributes__ r   r   r   r      sz    , H%&3  4 v vp   0 0&r   r   N)r   re   r   r   <module>rf      s   L Lr   