from lunr.pipeline import Pipeline

WORDS = {
    "a",
    "able",
    "about",
    "across",
    "after",
    "all",
    "almost",
    "also",
    "am",
    "among",
    "an",
    "and",
    "any",
    "are",
    "as",
    "at",
    "be",
    "because",
    "been",
    "but",
    "by",
    "can",
    "cannot",
    "could",
    "dear",
    "did",
    "do",
    "does",
    "either",
    "else",
    "ever",
    "every",
    "for",
    "from",
    "get",
    "got",
    "had",
    "has",
    "have",
    "he",
    "her",
    "hers",
    "him",
    "his",
    "how",
    "however",
    "i",
    "if",
    "in",
    "into",
    "is",
    "it",
    "its",
    "just",
    "least",
    "let",
    "like",
    "likely",
    "may",
    "me",
    "might",
    "most",
    "must",
    "my",
    "neither",
    "no",
    "nor",
    "not",
    "of",
    "off",
    "often",
    "on",
    "only",
    "or",
    "other",
    "our",
    "own",
    "rather",
    "said",
    "say",
    "says",
    "she",
    "should",
    "since",
    "so",
    "some",
    "than",
    "that",
    "the",
    "their",
    "them",
    "then",
    "there",
    "these",
    "they",
    "this",
    "tis",
    "to",
    "too",
    "twas",
    "us",
    "wants",
    "was",
    "we",
    "were",
    "what",
    "when",
    "where",
    "which",
    "while",
    "who",
    "whom",
    "why",
    "will",
    "with",
    "would",
    "yet",
    "you",
    "your",
}


def generate_stop_word_filter(stop_words, language=None):
    """Builds a stopWordFilter function from the provided list of stop words.

    The built in `stop_word_filter` is built using this factory and can be used
    to generate custom `stop_word_filter` for applications or non English
    languages.
    """

    def stop_word_filter(token, i=None, tokens=None):
        if token and str(token) not in stop_words:
            return token

    # camelCased for for compatibility with lunr.js
    label = (
        "stopWordFilter-{}".format(language)
        if language is not None
        else "stopWordFilter"
    )
    Pipeline.register_function(stop_word_filter, label)
    return stop_word_filter


stop_word_filter = generate_stop_word_filter(WORDS)
