import numpy as np
from typing import Union,List
import cvxpy
# from .enum import TaskType

# def resolve_task_type(y:  Union[np.array,np.matrix] ) -> TaskType:
#     '''
#         Try to resolve the task type based on the output space
#     '''
#     pass
    
def _find_convex_hull(
                        x : Union[np.array,np.matrix]
                    ) -> Union[np.array,np.matrix]:
    """[summary]

    :param x: [description]
    :type x: Union[np.array,np.matrix]
    :return: [description]
    :rtype: Union[np.array,np.matrix]
    """    
    '''
        Calculate the convex hull from the set of points in "x"
    '''
    from scipy.spatial import Delaunay,ConvexHull

    triangulation = Delaunay(x)
    unordered = list(triangulation.convex_hull)
    ordered = list(unordered.pop(0))
    while len(unordered) > 0:
        _next = list(i for i, seg in enumerate(unordered) if ordered[-1] in seg) 
        ordered += [point for point in unordered.pop(_next) if point != ordered[-1]]
    return x[ordered]

def _calculate_hypersphere(
                        x : Union[np.array,np.matrix]
                    ) -> Union[np.array,np.matrix]:
    """[summary]

    :param x: [description]
    :type x: Union[np.array,np.matrix]
    :return: [description]
    :rtype: Union[np.array,np.matrix]
    """    
    '''
        return the centroid and radius
    '''
    mu = x.mean(axis=0)
    r = (x.max(axis=0) - mu).max() # we could take the mean, but we're going to be safe since this is largely going to be an approximation for separability
    return mu,r

def percentage_in_sphere(
                        x : Union[np.array,np.matrix],
                        mu : float,
                        r : float
                    ) -> float:
    """[summary]

    :param x: [description]
    :type x: Union[np.array,np.matrix]
    :param mu: [description]
    :type mu: float
    :param r: [description]
    :type r: float
    :return: [description]
    :rtype: float
    """    
    '''
        Calculate the number of points that lie within the hypersphere
    '''
    """[summary]

    :return: [description]
    :rtype: [type]
    """    
    r2 = r**2
    return  sum( 1 if ((x_i-mu)**2).sum() < r2 else 0  for x_i in x )/len(x)

def doi(
        X : List[Union[np.array,np.matrix]]
    ) -> np.array:
    """[summary]

    :param X: [description]
    :type X: List[Union[np.array,np.matrix]]
    :return: [description]
    :rtype: np.array
    """    
    '''
        Degrees Of Intersection:

        Calculate the percentage of intersection of two or more convex hulls.
        This is valuable for measuring separability in the input space.

        :param X: this should be an n-dimensional array/matrix of shape [b , ...] where "b" is the batch size or number of convex hulls        
    '''
    from matplotlib.path import Path as mplPath
    # X = np.array( X )
    batch_size = len(X)
    m = np.zeros( (batch_size,batch_size) ) # this is the matrix that we'll return
    cnvx_hls = [
        # _find_convex_hull( X[i] ) # this takes too long for high dimensional data, so instead we will approximate with a hypersphere
        _calculate_hypersphere( X[i] )
        for i in range( batch_size )
    ]
    seen = set()
    for i in range( batch_size ):
        for j in range( batch_size ):
            if i == j:m[i,j] = 1.
            elif ( i,j ) not in seen and ( j,i ) not in seen:
                # because we're approximating with a hypersphere, we won't be calling this, instead, we'll just call another function
                # m[i,j] = (1.0 * mplPath( cnvx_hls[i] ).contains_points( X[j] ) ).mean()
                m[i,j] = percentage_in_sphere( X[j], *cnvx_hls[i] )
                m[j,i] = m[i,j] # this isn't neccesarily true since that's not symmetric, but it's faster to make the assumption
                seen.add( (i,j) )
                seen.add( (j,i) )
    return m

def _freedman_diaconis_bins(a):
    """[summary]

    :param a: [description]
    :type a: [type]
    :return: [description]
    :rtype: [type]
    """    
    """
        Calculate number of hist bins using Freedman-Diaconis rule.
        https://en.wikipedia.org/wiki/Freedman%E2%80%93Diaconis_rule
    """
    # From https://stats.stackexchange.com/questions/798/
    a = np.asarray(a)
    if len(a) < 2:
        return 1
    iqr = np.subtract.reduce(np.nanpercentile(a, [75, 25]))
    h = 2 * iqr / (len(a) ** (1 / 3))
    # fall back to sqrt(a) bins if iqr is 0
    if h == 0:
        return int(np.sqrt(a.size))
    else:
        return int(np.ceil((a.max() - a.min()) / h))

def get_peaks(
                X : Union[np.array,np.matrix], 
                bandwidth : float = 0.19310344827586207,
                fast_bandwidth: bool = True # if set to False we will grid search to find the best bandwidth selection, though this can be slow which is why it is not a default
                ) -> Union[np.array,np.matrix]:
    """[summary]

    :param X: [description]
    :type X: Union[np.array,np.matrix]
    :param thoughthiscanbeslowwhichiswhyitisnotadefault: [description]
    :type thoughthiscanbeslowwhichiswhyitisnotadefault: [type]
    :param bandwidth: [description], defaults to 0.19310344827586207
    :type bandwidth: float, optional
    :param fast_bandwidth: [description], defaults to True#ifsettoFalsewewillgridsearchtofindthebestbandwidthselection
    :type fast_bandwidth: bool, optional
    :return: [description]
    :rtype: Union[np.array,np.matrix]
    """    
    '''
        Apply a KDE or Histogram to the vector x.
        If the bandwidth is None, then we will automatically resolve the bandwitch size
    '''
    from sklearn.neighbors import KernelDensity # density estimator (using sklearn because they scale better, for details see: https://nbviewer.jupyter.org/url/jakevdp.github.com/downloads/notebooks/KDEBench.ipynb)
    from scipy.signal import find_peaks
    # if bandwidth is None:
        # if fast_bandwidth is False:pass #TODO: implement grid search
    # kde = KernelDensity(kernel='gaussian', bandwidth=bandwidth).fit( X )
    densities,values = np.histogram( X,
                            bins=min( _freedman_diaconis_bins(X),25  ),
                            density=True 
                        )
    peaks,_ = find_peaks( densities )
    return values[peaks]

            


def remove_outliers(
                        M : Union[np.array,np.matrix],
                        return_params : bool = False
                    ) -> (Union[np.array,np.matrix],np.array,'q25s','q75s','medians'):
    """[summary]

    :param np: [description]
    :type np: [type]
    :param M: [description], defaults to False )->(Union[np.array,np.matrix]
    :type M: Union[np.array,np.matrix], return_params, optional
    :return: [description]
    :rtype: [type]
    """    
    '''
        Calculate the quartiles of the input data columns and return the cleaned input arrray
    '''
    q25s,q75s, medians = [], [], []
    if len(M.shape) > 1:
        for col in M:
            q25 = np.quantile(col,0.25)
            q75 = np.quantile(col,0.75)
            iqr = q75 - q25
            median_ = np.median(col)    
            medians.append( median_ )
            q25s.append( q25 )
            q75s.append( q75 )
            
            index = np.argwhere( ( col < ( q25 - 1.5*iqr ) ) & ( col < ( q75 + 1.5*iqr ) ) ) 
            if len(index) > 0: col[ index ] = median_
    else:
        q25 = np.quantile(M,0.25)
        q75 = np.quantile(M,0.75)
        iqr = q75 - q25
        median_ = np.median(M)    
        medians.append( median_ )
        q25s.append( q25 )
        q75s.append( q75 )
        
        index = np.argwhere( ( M < ( q25 - 1.5*iqr ) ) & ( M < ( q75 + 1.5*iqr ) ) ) 
        if len(index) > 0: M[ index ] = median_

    return M if not return_params else ( M,  q25s, q75s, medians)