
    [g(                         S SK Jr  S SKJr  S SKJr  S SKJr  S SKJ	r	  S SK
Jr  S SKJr  S SKJr   " S	 S
5      r " S S5      rg)    )defaultdict)Pipeline)	Tokenizer)TokenSet)FieldRef)Index)Vector)idfc                   .    \ rS rSrSrSS jrS rS rSrg)	Field   z6Represents a field with boost and extractor functions.Nc                 (    Xl         X l        X0l        g N)nameboost	extractorself
field_namer   r   s       ./usr/lib/python3/dist-packages/lunr/builder.py__init__Field.__init__   s    	
"    c                 $    SR                  U 5      $ )Nz$<Field "{0.name}" boost="{0.boost}">)formatr   s    r   __repr__Field.__repr__   s    5<<TBBr   c                 ,    [        U R                  5      $ r   )hashr   r   s    r   __hash__Field.__hash__   s    DIIr   )r   r   r      N)	__name__
__module____qualname____firstlineno____doc__r   r   r!   __static_attributes__ r   r   r   r      s    @#
Cr   r   c                   b    \ rS rSrSrS rS rSS jrS rS r	SS	 jr
S
 rS rS rS rS rSrg)Builder   a/  Performs indexing on a set of documents and returns instances of
lunr.Index ready for querying.

All configuration of the index is done via the builder, the fields to
index, the document reference, the text processing pipeline and document
scoring parameters are all set on the builder before indexing.
c                     SU l         0 U l        0 U l        0 U l        0 U l        [        5       U l        [        5       U l        0 U l        SU l	        SU l
        SU l        SU l        / U l        g )Nidr   g      ?g333333?)_ref_fieldsinverted_indexfield_term_frequenciesfield_lengthsr   pipelinesearch_pipeline
_documentsdocument_count_b_k1
term_indexmetadata_whitelistr   s    r   r   Builder.__init__%   sg    	 &(# 
'z"$r   c                     Xl         g)a  Sets the document field used as the document reference.

Every document must have this field. The type of this field in the
document should be a string, if it is not a string it will be coerced
into a string by calling `str`.

The default ref is 'id'. The ref should _not_ be changed during
indexing, it should be set before any documents are added to the index.
Changing it during indexing can lead to inconsistent results.

N)r1   )r   refs     r   r@   Builder.ref4   s	     	r   Nc                 X    SU;   a  [        S5      e[        XU5      U R                  U'   g)a  Adds a field to the list of document fields that will be indexed.

Every document being indexed should have this field. None values for
this field in indexed documents will not cause errors but will limit
the chance of that document being retrieved by searches.

All fields should be added before adding documents to the index. Adding
fields after a document has been indexed will have no effect on already
indexed documents.

Fields can be boosted at build time. This allows terms within that
field to have more importance on search results. Use a field boost to
specify that matches within one field are more important that other
fields.

Args:
    field_name (str): Name of the field to be added, must not include
        a forward slash '/'.
    boost (int): Optional boost factor to apply to field.
    extractor (callable): Optional function to extract a field from
        the document.

Raises:
    ValueError: If the field name contains a `/`.
/z'Field {} contains illegal character `/`N)
ValueErrorr   r2   r   s       r   fieldBuilder.fieldB   s-    4 *FGG#(I#FZ r   c                 H    US:  a  SU l         gUS:  a  SU l         gXl         g)a"  A parameter to tune the amount of field length normalisation that is
applied when calculating relevance scores.

A value of 0 will completely disable any normalisation and a value of 1
will fully normalise field lengths. The default is 0.75. Values of b
will be clamped to the range 0 - 1.
r   r$   N)r:   r   numbers     r   b	Builder.ba   s%     A:DGaZDGGr   c                     Xl         g)zA parameter that controls the speed at which a rise in term
frequency results in term frequency saturation.

The default value is 1.2. Setting this to a higher value will give
slower saturation levels, a lower value will result in quicker
saturation.
N)r;   rH   s     r   k1
Builder.k1p   s	     r   c                     [        XR                     5      nU=(       d    0 U R                  U'   U =R                  S-  sl        U R                  R                  5        GH  u  pEUR                  nUc  X   OU" U5      n[        U5      nU R                  R                  X5      n	[        X45      n
[        [        5      nXR                  [        U
5      '   [        U	5      U R                  [        U
5      '   U	 H  n[        U5      nX==   S-  ss'   XR                   ;  aN  U R                   Vs0 s H  o0 _M     nnU R"                  US'   U =R"                  S-  sl        XR                   U'   X0R                   U   U   ;  a"  [        [$        5      U R                   U   U   U'   U R&                   H9  nUR(                  U   nU R                   U   U   U   U   R+                  U5        M;     M     GM     gs  snf )aC  Adds a document to the index.

Before adding documents to the index it should have been fully
setup, with the document ref and all fields to index already having
been specified.

The document must have a field name as specified by the ref (by default
this is 'id') and it should have all fields defined for indexing,
though None values will not cause errors.

Args:
    - doc (dict): The document to be added to the index.
    - attributes (dict, optional): A set of attributes corresponding
    to the document, currently a single `boost` -> int will be
    taken into account.
r$   N_index)strr1   r8   r9   r2   itemsr   r   r6   runr   r   intr4   lenr5   r3   r<   listr=   metadataappend)r   doc
attributesdoc_refr   rE   r   field_valuetokensterms	field_reffield_termstermterm_key_field_namepostingmetadata_keyrW   s                     r   addBuilder.addz   s   " c))n%#-#3 q !%!3!3!5JI-6->#/IcNK{+FMM%%f9E 5I%c*K ;F''I714UDs9~.t9%*%#6#66BF,,O,;B,GO(,GH%OOq(O4;''1"5"5h"?
"KKITJD''1*=gF %)$;$;L#}}\:H''1*=gF$fX&	 %<!  "6& Ps   1G;c           	         U R                  5         U R                  5         U R                  5         [        U R                  U R
                  U R                  [        U R                  R                  5       5      U R                  S9$ )zBuilds the index, creating an instance of `lunr.Index`.

This completes the indexing process and should only be called once all
documents have been added to the index.
)r3   field_vectors	token_setfieldsr6   ) _calculate_average_field_lengths_create_field_vectors_create_token_setr   r3   ri   rj   rV   r2   keysr7   r   s    r   buildBuilder.build   sl     	--/""$ ..,,nn))+,))
 	
r   c                     [         R                  " [        [        U R                  R                  5       5      5      5      U l        g)zDCreates a token set of all tokens in the index using `lunr.TokenSet`N)r   	from_listsortedrV   r3   ro   rj   r   s    r   rn   Builder._create_token_set   s-    !++F48K8K8P8P8R3S,TUr   c                 H   [        [        5      n[        [        5      nU R                  R                  5        H?  u  p4[        R
                  " U5      nUR                  nX&==   S-  ss'   X==   U-  ss'   MA     U R                   H  nX==   X'   -  ss'   M     Xl        g)z5Calculates the average document length for this indexr$   N)	r   rT   r5   rR   r   from_stringr   r2   average_field_length)r   accumulatordocuments_with_fieldr_   length
_field_refrE   r   s           r   rl   (Builder._calculate_average_field_lengths   s    !#&*3/!%!3!3!9!9!;I!--i8J))E '1,'&( "< ,,J#';'GG# ' %0!r   c           	         0 n0 nU R                   R                  5        GH]  u  p4[        R                  " U5      nUR                  nU R
                  U   n[        5       nU R                  U   R                  n	U R                  UR                     R                  SS5      n
UR                  5        H  u  pU R                  U   S   nX;  a(  [        U R                  U   U R                  5      nXU'   OX+   nUU R                  S-   U-  -  U R                  SU R                   -
  U R                   XpR"                  U   -  -  -   -  U-   -  nX-  nX-  n[%        US5      nUR'                  UU5        M     XU'   GM`     Xl        g)z@Builds a vector space model of every document using lunr.Vector.r   r$   rP      N)r4   rR   r   rw   r   r5   r	   r2   r   r8   r[   getr3   Idfr9   r;   r:   rx   roundinsertri   )r   ri   term_idf_cacher_   term_frequenciesr|   r   field_lengthfield_vectorfield_boost	doc_boostra   tfr<   r
   scorescore_with_precisions                    r   rm   Builder._create_field_vectors   s   +/+F+F+L+L+N'I!--i8J#..J--i8L!8L,,z288K
(:(:;??KI,224!006x@
-d11$79L9LMC+.4((.C 1*, "gg&"gg+.G.G
.SSUU   $"',UA$##J0DE7 5: (4)$K ,ON +r   c                     U" U /UQ70 UD6  g)a  Applies a plugin to the index builder.

A plugin is a function that is called with the index builder as its
context. Plugins can be used to customise or extend the behaviour of
the index in some way.

A plugin is just a function, that encapsulated the custom behaviour
that should be applied when building the index. The plugin function
will be called with the index builder as its argument, additional
arguments can also be passed when calling use.
Nr+   )r   fnargskwargss       r   useBuilder.use  s     	4!$!&!r   )r:   r8   r2   r;   r1   rx   r9   r5   r4   ri   r3   r=   r6   r7   r<   rj   r#   r   )r%   r&   r'   r(   r)   r   r@   rE   rJ   rM   rf   rp   rn   rl   rm   r   r*   r+   r   r   r-   r-      sB    %G>6'p
$V0",+\"r   r-   N)collectionsr   lunr.pipeliner   lunr.tokenizerr   lunr.token_setr   lunr.field_refr   
lunr.indexr   lunr.vectorr	   lunr.idfr
   r   r   r-   r+   r   r   <module>r      s5    # # $ # #    w" w"r   