# -*- coding: utf-8 -*-

# MIT License
#
# Copyright 2018-2022 New York University Abu Dhabi
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.


"""contains the CAMeL Tools default tagger.
"""


from collections import OrderedDict

from camel_tools.tagger.common import Tagger
from camel_tools.disambig.common import Disambiguator


def _tag_passthrough(feat, word):
    if len(word.analyses) == 0:
        return word.word

    feat_value = word.analyses[0].analysis.get(feat, None)

    if feat_value is None or feat_value == 'NOAN':
        return word.word

    return feat_value


def _tag_none(feat, word):
    if len(word.analyses) == 0:
        return None

    return word.analyses[0].analysis.get(feat, None)


def _tag_lex(feat, word):
    if len(word.analyses) == 0:
        return '{}_0'.format(word)

    return word.analyses[0].analysis.get('lex', '{}_0'.format(word))


_FEAT_ACTIONS = {
    'diac': _tag_passthrough,
    'bw': _tag_none,
    'lex': _tag_lex,
    'gloss': _tag_none,
    'pos': _tag_none,
    'asp': _tag_none,
    'cas': _tag_none,
    'mod': _tag_none,
    'num': _tag_none,
    'gen': _tag_none,
    'form_num': _tag_none,
    'form_gen': _tag_none,
    'stt': _tag_none,
    'vox': _tag_none,
    'per': _tag_none,
    'enc0': _tag_none,
    'enc1': _tag_none,
    'enc2': _tag_none,
    'prc0': _tag_none,
    'prc1': _tag_none,
    'prc2': _tag_none,
    'prc3': _tag_none,
    'atbtok': _tag_passthrough,
    'atbseg': _tag_passthrough,
    'bwtok': _tag_passthrough,
    'd1tok': _tag_passthrough,
    'd1seg': _tag_passthrough,
    'd2tok': _tag_passthrough,
    'd2seg': _tag_passthrough,
    'd3tok': _tag_passthrough,
    'd3seg': _tag_passthrough,
    'catib6': _tag_none,
    'ud': _tag_none,
    'caphi': _tag_none
}


class DefaultTaggerError(Exception):
    """Base class for errors raised by :obj:`DefaultTagger`.
    """
    pass


class InvalidDefaultTaggerDisambiguator(DefaultTaggerError, ValueError):
    """Error raised when a DefaultTagger is initialized with an object that
    object does not implement
    :obj:`~camel_tools.disambig.common.Disambiguator`.
    """

    def __str__(self):
        return 'Invalid disambiguator.'


class InvalidDefaultTaggerFeature(DefaultTaggerError, ValueError):
    """Error raised when a DefaultTagger is initialized with an invalid feature
    name.
    """

    def __init__(self, feature):
        self._feature = feature

    def __str__(self):
        return 'Invalid feature {}'.format(repr(self._feature))


class DefaultTagger(Tagger):
    """The default camel_tools tagger. It generates tags for a given feature by
    first disambiguating a word using a given disambiguator and then returning
    the associated value for that feature. It also provides sensible default
    values for when no analyses are generated by the disambiguator or when a
    feature is not present in the disambiguation.

    Args:
        disambiguator (:obj:`~camel_tools.disambig.common.Disambiguator`): The
            disambiguator used for disambiguating input.
        feature (:obj:`str`): The feature to be produced.

    Raises:
        :obj:`InvalidDefaultTaggerDisambiguator`: If `disambiguator` is not an
            instance of :obj:`~camel_tools.disambig.common.Disambiguator`.
        :obj:`InvalidDefaultTaggerFeature`: If `feature` is not a valid feature
            name.
    """

    def __init__(self, disambiguator, feature):
        if not isinstance(disambiguator, Disambiguator):
            raise InvalidDefaultTaggerDisambiguator()
        elif not feature in _FEAT_ACTIONS:
            raise InvalidDefaultTaggerFeature(feature)
 
        self._disambiguator = disambiguator
        self._feature = feature

    def _tag_disambiguated_word(self, word):
        return _FEAT_ACTIONS[self._feature](self._feature, word)

    def tag(self, sentence):
        """Generate a tag for each token in a given sentence.

        Args:
            sentence (:obj:`list` of :obj:`str`): The sentence to be tagged.

        Returns:
            :obj:`list`: The list of tags corresponding to each token in
            `sentence`.
        """

        disambig_words = self._disambiguator.disambiguate(sentence)

        return list(map(self._tag_disambiguated_word, disambig_words))

    @staticmethod
    def feature_list():
        """Returns list of valid features producible by :obj:`DefaultTagger`.
        """

        return _FEAT_ACTIONS.keys()
