import numpy as np
import pickle
import csv
import pandas as pd
import requests
import json

class Nutrition_Estimator:
    def __init__(self, 
    food_data_filepath = 'https://github.com/Big-Ideas-Lab/food2vec/raw/master/demo_data.csv', 
    food_embeddings_filepath = 'https://github.com/Big-Ideas-Lab/food2vec/raw/master/data.json', 
    demo_warning = True):


        self.embedding_dictionary = self.load_embeddings(food_embeddings_filepath)
        self.database_embeddings, self.database_values = self.load_database_pandas(food_data_filepath)
        

        if food_data_filepath == 'https://github.com/Big-Ideas-Lab/food2vec/raw/master/demo_data.csv' and demo_warning == True: 
            print("------ALERT--------")
            print("You're using demonstration nutrition data. This is a random subset (~100 items) from our annotated database.")
            print("We do not have permission to distribute our full dataset, and we recommend processing your own.")
            print("The USDA provides an excellent start: https://fdc.nal.usda.gov/")
            print("You can suppress this warning by setting the 'demo_warning' parameter to False.")
            print("-------------------")


    def load_database_pandas(self, filepath):
        '''
        Method to load a remote or local file containing nutrition information.
        The file is then converted into two numpy arrays linked by common food IDs 
        (embeddings with food ID, and nutrition values with food ID)

        PARAMS
        filepath : Str. Handles both local and remote.

        RETURNS
        database_embeddings : np.array([Float])
        database_values : np.array([Float])
        '''
        # download demo data from GitHub
        df = pd.read_csv(filepath)

        #remove unnamed columns generated by pandas, perform quality checks
        df.drop(df.columns[df.columns.str.contains('unnamed',case = False)], axis = 1, inplace = True)

        #save column names
        self.columns = df.columns

        # separate food name column
        foods = df['food_name'].values

        # get index values for food names for later reference
        foodsidx = df.index.values

        # get just nutrients from csv
        nutrient_table = df.values[:, 1:]

        # embed all food names, save as a list
        embeddings = [self.embed(food) for food in foods]

        # reshape arrays for concatenation into optimized numpy arrays
        foods_reshaped = foods.reshape(-1, 1)
        foodsidx_reshaped = foodsidx.reshape(-1, 1)
    
        # convert data to numpy arrays
        # This is just a demo. Arrays will be stored in memory for now.
        database_embeddings = np.concatenate((foodsidx_reshaped, embeddings), axis = 1)
        database_values = np.concatenate((foodsidx_reshaped, foods_reshaped, nutrient_table), axis = 1)

        return database_embeddings, database_values

    def load_embeddings(self, filepath):
        '''
        Method to load embedding dictionary from remote location.

        PARAMS
        -------
        filepath : Str (url)

        RETURNS
        -------
        embedding_dictionary : Dict {food : embedding}
        '''

        #download embeddings from Github
        embeddings_dict = dict(requests.get(filepath).json())

        #convert json strings to dictionary of key:value (string : np.array)
        #return dictionary of embeddings
        return {k:self.convert_json_string_to_numpy(v) for k,      v in embeddings_dict.items()}


    def semantic_search(self, food_string):
        '''
        Method to find best matching string to database using cosine similarity.
        Multiple entries are detected by the presence of "and" or "with"

        When finding multiple entries, both matches for full string and substrings 
        are searched and returned.

        PARAMS
        -------
        food_string : Str
            food_string can be natural language. E.g. "I ate a kale salad".
 
        RETURNS
        -------
        json_dict : JSON
            {
                food_name : value, 
                nutrient1 : value, 
                nutrient2 : value, 
                ...
                match: value,
            }
        '''
        #apply a cosine similarity function to full numpy matrix of database embeddings, compare to embedded food string for cosine sim array
        cosines = np.apply_along_axis(self.cosine, 1, self.database_embeddings[:, 1:], self.embed(food_string))

        #find best match value
        highest_match = cosines.max()

        #find index of best match value
        maxvalidx = np.argmax(cosines)

        #return nutrition data for match, alongside the cosine similarity of the best match. Exclude index column.
        food_match = self.database_values[maxvalidx][1:]

        #process data for return
        json_dict = self.convert_to_json(food_match, highest_match)

        return json_dict

    def convert_to_json(self, food_row, val):
       
        '''
        Method to convert food row and match value to a json format for API
        PARAMS
        -------
        food_match : List
            Matched row from database including food name and nutrients
        maxval : Float
            Cosine similarity of original item to matched item
 
        RETURNS
        -------
        food_match : List
            Matched row from database including food name and nutrients
        maxval : Float
            Cosine similarity of original item to matched item

        '''
        key_names = list(self.columns) + ['match']
        value_names = list(food_row) + [val]
     
        return json.dumps(dict(zip(key_names, value_names)))

    def embed(self, string):
        '''
        A method to embed a string of arbitrary size

        PARAMS
        string : String

        RETURNS
        embedding : np.array([Float]) of size 300
        '''
        base = np.zeros(300) 
        array = string.split(' ')
        for word in array:
            try:
                base += self.embedding_dictionary[word]
            except:
                continue
        return base

    @staticmethod
    def convert_json_string_to_numpy(json_string):
        ''' 
        A utility method to convert a json string to a numpy array
        '''
        return np.array(json.loads(json_string))

    @staticmethod
    def cosine(vA, vB):
        '''
        A utility method to find the cosine similarity between two vectors of equal length

        PARAMS

        vA : np.array([Float])
        vB : np.array([Float])

        RETURNS

        similarity : Float
        '''
        similarity = np.dot(vA, vB) / (np.linalg.norm(vA) * np.linalg.norm(vB))
        if np.isnan(similarity):
            return 0
        else:
            return similarity
