import re

import numpy as np
import pandas as pd

from .types import PyStvError, RaceInfo, RaceMetadata

# The Google Form header pattern is the text of the question followed by the option in
# brackets. An optional parenthetical indicating the number of winners is allowed in
# between the question and the option.
# Examples:
#   What is your favorite season? [Spring]
#   City Council (4 winners) [Darth Vader]
#   Mayor (1 winner) [Luke Skywalker]
QUESTION_PATTERN = re.compile(
    r"^(?P<question>.*?)"
    r"(\s+\((?P<num_winners>\d+)\s+winners?\))?"
    r"\s*? "
    r"\[(?P<option>.*)\]$"
)


def parse_google_form_csv(buffer):
    df = pd.read_csv(buffer)
    race_infos = []
    for metadata, slice_ in parse_header(df.columns):
        goog = df.iloc[:, slice_].applymap(coerce).values
        goog = np.ma.array(goog, mask=(goog == 0))

        argsort = goog.argsort(axis=1)
        mask = np.take_along_axis(goog.mask, argsort, axis=1)
        ballots = np.ma.array(argsort, mask=mask) + 1
        ballots = ballots.filled(0)
        ballots, votes = np.unique(ballots, axis=0, return_counts=True)
        race_infos.append(RaceInfo(metadata, ballots.tolist(), votes.tolist()))
    return race_infos


def parse_header(header):
    current_question = None
    current_options = []

    questions = []
    options = []
    num_winners_list = []
    starts = []
    ends = []
    for col_idx, col in enumerate(header):
        match = QUESTION_PATTERN.match(col)
        if match:
            question = match.group("question").strip()
            num_winners = match.group("num_winners") or 1
            option = match.group("option").strip()
            if question != current_question:
                if current_question is not None:
                    ends.append(col_idx)
                    options.append(current_options)
                    current_options = []
                questions.append(question)
                num_winners_list.append(int(num_winners))
                starts.append(col_idx)
                current_question = question
            current_options.append(option)
        else:
            if current_question is not None:
                current_question = None
                ends.append(col_idx)
                options.append(current_options)
                current_options = []

    if current_question is not None:
        ends.append(col_idx + 1)
        options.append(current_options)

    num_questions = len(questions)
    assert len(options) == num_questions, options
    assert len(starts) == num_questions, starts
    assert len(ends) == num_questions, ends

    return [
        (RaceMetadata(q, w, o), slice(s, e))
        for q, w, o, s, e in zip(questions, num_winners_list, options, starts, ends)
    ]


def coerce(x):
    if isinstance(x, float):
        if np.isnan(x):
            return 0
        return int(x)
    numbers = re.findall(r"(\d+)[st|nd|rd|th]?", x)
    if not numbers or len(numbers) > 1:
        raise PyStvError(f"Could not determine number: {x}")
    return int(numbers[0])
