# -*- coding: utf-8 -*-

# @Time    : 2020/9/8 23:51
# @Email   : 986798607@qq.com
# @Software: PyCharm
# @License: BSD 3-Clause
# -*- coding: utf-8 -*-

"""
Created on Sun Jan 28 15:24:10 2018

@author: ww
"""
import gc

import numpy as np
import pandas as pd
import sklearn
import sklearn.utils
from mgetool.tool import parallelize
from scipy import stats
from sklearn.utils import check_array

print('\t\tFor grid building\nExample:\nspace=searchspace(li1,li2,li3)\nNote:parameters should be no more than 6')
print(
    '\n\t\tFor ego,kg,maxp\nExample:\nresults=egosearch(X=?,y=?,searchspace=space,number=500,regclf=RandomForestRegressor(),'
    'rankway="ego"or"kg"or"maxp"or"No")')
print(
    'return:\nResult is 2 dimentions array\n1st column = sequence number,\n2nd part = your searchspace,\n3rd part = '
    'mean,std,ego,kg,maxp,sequentially')


def search_space(*arg):
    """
    Generate grid.

    Parameters
    ----------
    arg: list of np.ndarray
        Examples:
            arg = [
            np.arange(0.1,0.35,0.1),
            np.arange(0.1, 2.1, 0.5),
            np.arange(0,1.3,0.3),
            np.array([0.5,1,1.2,1.3]),]

    Returns
    -------
    np.ndarray

    """
    meshes = np.meshgrid(*arg)
    meshes = [_.ravel() for _ in meshes]
    meshes = np.array(meshes).T
    return meshes


class Ego:
    """
    EGO (Efficient global optimization).

    References:
        Jones, D. R., Schonlau, M. & Welch, W. J. Efficient global optimization of expensive black-box functions. J.
        Global Optim. 13, 455–492 (1998)

    Examples:

        searchspace_list = [
            np.arange(0.1,0.35,0.1),
            np.arange(0.1, 1.3, 0.3),
            np.arange(0.1, 2.1, 0.5),
            np.arange(0,1.3,0.3),
            np.arange(0,7.5,1.5),
            np.arange(0,7.5,1.5),
            np.arange(800, 1300, 50),
            np.arange(200, 600, 40),
            np.array([20, 80, 138, 250]),]

        searchspace = search_space(*searchspace_list)

        me = Ego(searchspace, X, y, 500, SVR(), n_jobs=8)

        result = me.Rank()

    """

    def __init__(self, regclf, searchspace=None, X=None, y=None, number=500, n_jobs=2):
        """
        Parameters
        ----------
        searchspace: np.ndarray
            Custom or generate by .search_space() function.
        X: np.ndarray
            X data (2D).
        y: np.ndarray
            y data (1D).
        number: int>100
            Repeat number, default is 500.
        regclf: sklearn.Mode
            sklearn method, with "fit" and "predict".
        n_jobs: int
            Parallelize number.
        """

        self.n_jobs = n_jobs
        self.searchspace = searchspace
        self.X = X
        self.y = y
        self.regclf = regclf
        self.meanandstd_all = []
        self.predict_y_all = []
        self.number = number

    def fit(self, searchspace=None, X=None, y=None, *args):
        """

        Parameters
        ----------
        searchspace: np.ndarray
            Custom or generate by .search_space() function.
        X: np.ndarray
            X data (2D).
        y: np.ndarray
            y data (1D).

        """
        assert hasattr(self.regclf, "fit")
        assert hasattr(self.regclf, "predict")

        self.searchspace = self.searchspace if searchspace is None else searchspace
        self.X = self.X if X is None else X
        self.y = self.y if y is None else y
        searchspace = self.searchspace
        X = self.X
        y = self.y

        njobs = self.n_jobs
        regclf0 = self.regclf
        assert searchspace is not None and X is not None and y is not None, "searchspace, X, y should be np.array"
        check_array(X, ensure_2d=True, force_all_finite=True)
        check_array(y, ensure_2d=False, force_all_finite=True)
        check_array(searchspace, ensure_2d=True, force_all_finite=True)
        assert X.shape[1] == searchspace.shape[1]

        def fit_parllize(random_state):
            data_train, y_train = sklearn.utils.resample(X, y, n_samples=None, replace=True,
                                                         random_state=random_state)
            regclf0.fit(data_train, y_train)
            predict_data = regclf0.predict(searchspace)
            predict_data.ravel()
            return predict_data

        predict_dataj = parallelize(n_jobs=njobs, func=fit_parllize, iterable=range(self.number))
        predict_dataj = np.array(predict_dataj).T

        self.predict_dataj = predict_dataj

    def meanandstd(self, predict_dataj=None):
        """calculate meanandstd"""
        if predict_dataj is not None:
            self.predict_dataj = predict_dataj
        if not hasattr(self, "predict_dataj"):
            raise NotImplemented("Please fit first")
        if self.predict_dataj is None:
            raise NotImplemented("Please fit first")

        mean = np.mean(self.predict_dataj, axis=1)
        std = np.std(self.predict_dataj, axis=1)

        del self.predict_dataj
        gc.collect()

        self.ms = np.column_stack((mean, std))
        return self.ms

    @staticmethod
    def CalculateEi(y, meanstd0):
        """calculate EI"""
        ego = (meanstd0[:, 0] - max(y)) / (meanstd0[:, 1])
        ei_ego = meanstd0[:, 1] * ego * stats.norm.cdf(ego) + meanstd0[:, 1] * stats.norm.pdf(ego)
        kg = (meanstd0[:, 0] - max(max(meanstd0[:, 0]), max(y))) / (meanstd0[:, 1])
        ei_kg = meanstd0[:, 1] * kg * stats.norm.cdf(kg) + meanstd0[:, 1] * stats.norm.pdf(kg)
        max_P = stats.norm.cdf(ego, loc=meanstd0[:, 0], scale=meanstd0[:, 1])
        ei = np.column_stack((meanstd0, ei_ego, ei_kg, max_P))
        print('ego is done')
        return ei

    def egosearch(self, rankway="ego", meanstd=None, return_type="pd", reverse=True):
        """
        Result is 2 dimentions array
        1st column = sequence number,2nd part = your searchspace,3rd part = mean,std,ego,kg,maxp,sequentially.

        Parameters
        ----------
        reverse:bool
            sort method.

        return_type:str
            numpy.ndarray or pandas.DataFrame

        meanstd:np.ndarray, None

        rankway : str
            ["ego","kg","maxp","No"]
        """

        y = self.y

        if rankway not in ['ego', 'kg', 'maxp', 'no', 'No']:
            print('Don\'t kidding me,checking rankway=what?\a')
        else:
            if meanstd is None:
                if hasattr(self, "ms"):
                    meanstd = self.ms
                elif hasattr(self, "predict_dataj"):
                    meanstd = self.meanandstd()
                else:
                    self.fit()
                    meanstd = self.meanandstd()
            else:
                pass
            result = self.CalculateEi(y, meanstd)
            bianhao = np.arange(0, len(result))
            result1 = np.column_stack((bianhao, self.searchspace, result))
            if rankway == "No" or "no":
                pass
            if rankway == "ego":
                egopaixu = np.argsort(result1[:, -3])
                result1 = result1[egopaixu]
            elif rankway == "kg":
                kgpaixu = np.argsort(result1[:, -2])
                result1 = result1[kgpaixu]
            elif rankway == "maxp":
                max_paixu = np.argsort(result1[:, -1])
                result1 = result1[max_paixu]

            if reverse:
                result1 = np.flipud(result1)
            if return_type == "pd":
                result1 = pd.DataFrame(result1)
                fea = ["feature%d" % i for i in range(result1.shape[1] - 6)]
                name = ["number"] + fea + ["mean", "std", "ego", "kg", "maxp"]
                result1.columns = name
            return result1

    def Rank(self, rankway="ego", meanstd=None, return_type="pd", reverse=True):
        """
        The same as egosearch method.
        Result is 2 dimentions array.
        1st column = sequence number,2nd part = your searchspace,3rd part = mean,std,ego,kg,maxp,sequentially.

        Parameters
        ----------
        reverse:bool
            sort method.

        return_type:str
            numpy.ndarray or pandas.DataFrame

        meanstd:np.ndarray,None

        rankway : str
            ["ego","kg","maxp","No"]
        """
        return self.egosearch(rankway, meanstd=meanstd, return_type=return_type, reverse=reverse)

# if __name__ == "__main__":
#     from sklearn.datasets import load_boston
#     import numpy as np
#     from sklearn.svm import SVR
#
#     #####model1#####
#     model = SVR()
#     ###
#
#     #####model2#####
#     # parameters = {'C': [0.1, 1, 10]}
#     # model = GridSearchCV(SVR(), parameters)
#     ###
#
#     X, y = load_boston(return_X_y=True)
#     X = X[:, :5]  # (简化计算，示意)
#     searchspace_list = [
#         np.arange(0.01, 1, 0.1),
#         np.array([0, 20, 30, 50, 70, 90]),
#         np.arange(1, 10, 1),
#         np.array([0, 1]),
#         np.arange(0.4, 0.6, 0.02),
#     ]
#     searchspace = search_space(*searchspace_list)
#     #
#
#     me = Ego(model, searchspace, X, y, 500, n_jobs=6)
#     me.fit()
#     re = me.egosearch()
