# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/example_nbs/validated/nbdev_high_quality.ipynb.

# %% auto 0
__all__ = ['traffic_percent', 'workers', 'model_level', 'min_date', 'echo', 'get_traffic_text', 'get_experiment_segment',
           'get_utterances', 'preprocess', 'Topics', 'fit', 'evaluate', 'serve_num_topics']

# %% ../../nbs/example_nbs/validated/nbdev_high_quality.ipynb 3
def echo(message):
    return message

# %% ../../nbs/example_nbs/validated/nbdev_high_quality.ipynb 7
import numpy as np
import pandas as pd

# %% ../../nbs/example_nbs/validated/nbdev_high_quality.ipynb 9
traffic_percent = 1
workers = 8
model_level = "dispatcher"
min_date = "2021-01-01"

# %% ../../nbs/example_nbs/validated/nbdev_high_quality.ipynb 10
def get_traffic_text(percent):
    return str(percent) if int(percent) >= 10 else "0" + str(percent)

# %% ../../nbs/example_nbs/validated/nbdev_high_quality.ipynb 14
def get_experiment_segment(traffic_percent):
    return tuple(get_traffic_text(tp) for tp in range(traffic_percent))

# %% ../../nbs/example_nbs/validated/nbdev_high_quality.ipynb 16
def get_utterances(model_level=None, min_date=None, traffic_percent=100):
    """
    You will probably call data preparation code here. To simplify dependencies we are just creating synthetic data instead.
    """
    get_experiment_segment(traffic_percent)
    dummy_data = pd.Series(
        np.random.choice(
            [
                "Hello",
                "Goodbye",
                "Hi",
                "Can you help?",
                "I have an issue, can you help me?",
            ],
            100,
        ),
        name="utterance",
    )
    return dummy_data

# %% ../../nbs/example_nbs/validated/nbdev_high_quality.ipynb 18
def preprocess(message, model_level=None, min_date=None, traffic_percent=100):
    print(f"I captialised the message: {message.upper()}")
    data = get_utterances(model_level, min_date, traffic_percent)
    results = {"documents": data}
    return results

# %% ../../nbs/example_nbs/validated/nbdev_high_quality.ipynb 21
class Topics:
    def __init__(self, documents, workers):
        pass

    def get_num_topics(self):
        return 6

    def get_topic_sizes(self):
        return [1, 2, 3, 4, 5, 6], [1, 2, 3, 4, 5, 6]

    def get_topics(self, num_topics):
        return (
            ["cat", "sat", "mat", "mouse", "house", "grouse"],
            np.asarray([1, 1, 1, 1, 1, 1]),
            [1, 2, 3, 4, 5, 6],
        )

    def plot_wordcloud(self):
        print("you may want to remove plotting code from testing to speed things up")

# %% ../../nbs/example_nbs/validated/nbdev_high_quality.ipynb 23
def fit(documents, workers=workers):
    model = Topics(documents, workers=workers)
    results = {"model": model}
    return results

# %% ../../nbs/example_nbs/validated/nbdev_high_quality.ipynb 35
def evaluate(model):
    topic_words, word_scores, topic_nums = model.get_topics(model.get_num_topics())

    topic_contains_non_empty_words = all([len(tw) > 0 for tw in topic_words])
    word_scores_in_range = word_scores.min() >= 0.0 and word_scores.max() <= 1.0
    as_many_items_as_topics = (
        model.get_num_topics() == len(topic_words) == word_scores.shape[0]
    )
    word_summaries = (
        topic_contains_non_empty_words
        and word_scores_in_range
        and as_many_items_as_topics
    )
    # You can add artifacts in a step that will be saved to block storage. Add the paths to the file on the local filesystem
    # and the artifact will be uploaded to remote storage.
    sample_df = pd.DataFrame(
        {"a": model.get_topic_sizes()[0], "b": model.get_topic_sizes()[1]}
    )
    sample_df.to_csv("/tmp/dataframe_artifact.csv", index=False)
    artifacts = ["/tmp/dataframe_artifact.csv"]
    # You can add step metrics too this time just add a list of 3-tuples where tuple order = (name, value, step)
    metrics = [("mae", 100, 0), ("mae", 67, 1), ("mae", 32, 2)]
    results = {
        "word_summaries": word_summaries,
        "artifacts": artifacts,
        "metrics": metrics,
    }
    return results

# %% ../../nbs/example_nbs/validated/nbdev_high_quality.ipynb 38
def serve_num_topics(model):
    return model.get_num_topics()
