import re
import json

import requests
import wikitextparser as wtp

from .utils import get_languages, remove_sortkey


API_QUERY = "https://fr.wiktionary.org/w/api.php?\
             action=parse&prop=wikitext&format=json&page="


class Page:
    def __init__(self, wikitext):
        self.wikitext = wikitext
        self._parsed = wtp.parse(wikitext)

    @classmethod
    def from_api(cls, title):
        query = API_QUERY + title
        response = requests.get(query).json()
        wikitext = response['parse']['wikitext']['*']
        return cls(wikitext)

    @property
    def get_languages(self):
        """
        Returns a list of all languages present on the page.
        """
        return get_languages(self.wikitext)

    def get_language(self, language_code):
        """
        Extracts a language section.
        To know the languages present on the page, use `get_languages()`.
        """
        language_section = self._extract_lang(language_code)
        if language_section:
            return language_section.contents

    def _extract_lang(self, language_code):
        regex = r"\ *\{\{langue\|%s\}\} *" % language_code
        for section in self._parsed.sections:
            if section.level != 2:
                continue

            if re.match(regex, section.title):
                return section

    def get_etymology(self, language_code):
        """
        Extracts the content of the "etymology" section of a given language.
        On the french wiktionary, there is only one section per language.
        """
        etymology_section = self._extract_etymology(language_code)
        if etymology_section:
            return etymology_section.contents

    def _extract_etymology(self, language_code):
        regex = r"\ *\{\{S\|étymologie\}\} *"
        language_section = self._extract_lang(language_code)
        if not language_section:
            return None
        for section in language_section.sections:
            if not section.title:
                continue
            if re.match(regex, section.title):
                return section

    def get_parts_of_speech(self, language_code, part_of_speech=None):
        return list(self._extract_parts_of_speech(language_code, part_of_speech))

    def _available_lexical_category(self, lexcat):
        with open("lexical_categories.json", "r") as f:
            data = f.read()
        lexical_categories = json.loads(data)
        for category, variantes in lexical_categories.items():
            for variante in variantes:
                if lexcat == variante:
                    return category

    def _extract_parts_of_speech(self, language_code, part_of_speech=None):
        """
        Parts of speech currently in the wiktionnaire :
        https://fr.wiktionary.org/wiki/Wiktionnaire:Liste_des_sections_de_types_de_mots

        Table generated by this lua script:
        https://fr.wiktionary.org/wiki/Module:types_de_mots/data
        """
        PART_OF_SPEECH_REGEX =  """(?x)\ *{{S\|
                                    (?P<lexcat>[^\|=]+)\|
                                    (?P<lang>[^\||}=]+)
                                """
        IS_INFLECTED_REGEX = r"\|flexion"
        NUM_REGEX = r"\|num=(?P<num>\d+)"

        lang_section = self._extract_lang(language_code)

        # TODO: if lang_section is None raise an error

        for section in lang_section.sections:
            if section.level != 3:
                continue

            title = section.title
            title = remove_sortkey(title)

            match = re.match(PART_OF_SPEECH_REGEX, title)
            if not match:
                continue

            lexcat = match.group("lexcat")
            if part_of_speech and lexcat != part_of_speech:
                continue

            lexical_category = None
            if not part_of_speech:
                lexical_category = self._available_lexical_category(lexcat)

            lexical_category = part_of_speech or lexical_category

            if re.search(NUM_REGEX, title):
                num = re.search(NUM_REGEX, title).group("num")
            else:
                num = 1

            is_inflected = bool(re.search(IS_INFLECTED_REGEX, title))

            yield PartOfSpeech(self.wikitext, language_code, lexical_category,
                               section, is_inflected, num)


class PartOfSpeech(Page):
    def __init__(self, wikitext, language_code, part_of_speech,
                 part_of_speech_section, is_inflected, num):
        super().__init__(wikitext)
        self.language_code = language_code
        self.part_of_speech = part_of_speech
        self.part_of_speech_section = part_of_speech_section
        self.is_inflected = is_inflected
        self.num = num

    def __repr__(self):
        return "%s-%s-%s" % (self.language_code, self.part_of_speech, self.num)
