# ===========================================================
# ⚙️ NeuraPython_ML — Unified Scikit-learn Wrapper
# Author: Ibrahim Shahid
# ===========================================================

import numpy as np
import pandas as pd
import joblib

from sklearn import (
    datasets, preprocessing, model_selection, metrics,
    linear_model, tree, neighbors, svm, ensemble, cluster,
    decomposition
)

class NeuraPython_ML:
    def __init__(self):
        print("🧠 NeuraPython_ML initialized ")
        self.models = {}
        self.scaler = None
        self.encoder = None

# -----------------------------------------------------------
# 📥 DATASET LOADING
# -----------------------------------------------------------
    def load_builtin_dataset(self, name="iris"):
        name = name.lower()
        if name == "iris": data = datasets.load_iris()
        elif name == "digits": data = datasets.load_digits()
        elif name == "wine": data = datasets.load_wine()
        elif name == "breast_cancer": data = datasets.load_breast_cancer()
        else: raise ValueError("❌ Unknown dataset name.")
        return pd.DataFrame(data.data, columns=data.feature_names), data.target

# -----------------------------------------------------------
# 🔢 DATA PREPROCESSING
# -----------------------------------------------------------
    def normalize(self, X):
        self.scaler = preprocessing.Normalizer()
        return self.scaler.fit_transform(X)

    def standardize(self, X):
        self.scaler = preprocessing.StandardScaler()
        return self.scaler.fit_transform(X)

    def minmax_scale(self, X):
        self.scaler = preprocessing.MinMaxScaler()
        return self.scaler.fit_transform(X)

    def encode_labels(self, y):
        self.encoder = preprocessing.LabelEncoder()
        return self.encoder.fit_transform(y)

    def one_hot_encode(self, X):
        return preprocessing.OneHotEncoder().fit_transform(X).toarray()

# -----------------------------------------------------------
# 📊 TRAIN/TEST SPLIT
# -----------------------------------------------------------
    def split(self, X, y, test_size=0.2, random_state=42):
        return model_selection.train_test_split(X, y, test_size=test_size, random_state=random_state)

# -----------------------------------------------------------
# 🤖 MODEL CREATION (Unified Interface)
# -----------------------------------------------------------
    def create_model(self, model_name, **kwargs):
        name = model_name.lower()
        if name == "linear_regression": model = linear_model.LinearRegression(**kwargs)
        elif name == "logistic_regression": model = linear_model.LogisticRegression(**kwargs)
        elif name == "decision_tree": model = tree.DecisionTreeClassifier(**kwargs)
        elif name == "random_forest": model = ensemble.RandomForestClassifier(**kwargs)
        elif name == "svm": model = svm.SVC(**kwargs)
        elif name == "knn": model = neighbors.KNeighborsClassifier(**kwargs)
        elif name == "gradient_boosting": model = ensemble.GradientBoostingClassifier(**kwargs)
        elif name == "naive_bayes": 
            from sklearn.naive_bayes import GaussianNB
            model = GaussianNB(**kwargs)
        elif name == "kmeans": model = cluster.KMeans(**kwargs)
        elif name == "pca": model = decomposition.PCA(**kwargs)
        else:
            raise ValueError(f"❌ Unsupported model: {model_name}")
        
        self.models[model_name] = model
        print(f"✅ Model '{model_name}' created.")
        return model

# -----------------------------------------------------------
# 🧩 MODEL TRAINING & PREDICTION
# -----------------------------------------------------------
    def train(self, model_name, X_train, y_train):
        model = self.models.get(model_name)
        if model is None:
            raise ValueError(f"❌ Model '{model_name}' not found.")
        model.fit(X_train, y_train)
        print(f"🚀 Model '{model_name}' trained successfully.")
        return model

    def predict(self, model_name, X_test):
        model = self.models.get(model_name)
        if model is None:
            raise ValueError(f"❌ Model '{model_name}' not found.")
        return model.predict(X_test)

# -----------------------------------------------------------
# 📈 MODEL EVALUATION
# -----------------------------------------------------------
    def evaluate(self, y_true, y_pred):
        return {
            "accuracy": metrics.accuracy_score(y_true, y_pred),
            "precision": metrics.precision_score(y_true, y_pred, average='weighted', zero_division=0),
            "recall": metrics.recall_score(y_true, y_pred, average='weighted', zero_division=0),
            "f1_score": metrics.f1_score(y_true, y_pred, average='weighted', zero_division=0)
        }

    def confusion_matrix(self, y_true, y_pred):
        return metrics.confusion_matrix(y_true, y_pred)

    def classification_report(self, y_true, y_pred):
        return metrics.classification_report(y_true, y_pred)

# -----------------------------------------------------------
# 🧮 DIMENSIONALITY REDUCTION
# -----------------------------------------------------------
    def apply_pca(self, X, n_components=2):
        pca = decomposition.PCA(n_components=n_components)
        return pca.fit_transform(X)

# -----------------------------------------------------------
# 💾 MODEL SAVING & LOADING
# -----------------------------------------------------------
    def save_model(self, model_name, path):
        model = self.models.get(model_name)
        if model is None:
            raise ValueError(f"❌ Model '{model_name}' not found.")
        joblib.dump(model, path)
        print(f"💾 Model '{model_name}' saved at {path}")

    def load_model(self, path, model_name="loaded_model"):
        model = joblib.load(path)
        self.models[model_name] = model
        print(f"📂 Model '{model_name}' loaded from {path}")
        return model

# -----------------------------------------------------------
# 🧠 CROSS VALIDATION & GRID SEARCH
# -----------------------------------------------------------
    def cross_validate(self, model, X, y, cv=5):
        scores = model_selection.cross_val_score(model, X, y, cv=cv)
        return {"mean": np.mean(scores), "scores": scores}

    def grid_search(self, model, params, X, y, cv=5):
        search = model_selection.GridSearchCV(model, params, cv=cv)
        search.fit(X, y)
        return search.best_estimator_, search.best_params_, search.best_score_

# -----------------------------------------------------------
# 🔍 CLUSTERING UTILITIES
# -----------------------------------------------------------
    def kmeans_cluster(self, X, n_clusters=3):
        model = cluster.KMeans(n_clusters=n_clusters)
        y_pred = model.fit_predict(X)
        return model, y_pred

# -----------------------------------------------------------
# 🧩 FEATURE SELECTION
# -----------------------------------------------------------
    def feature_importances(self, model_name, feature_names=None):
        model = self.models.get(model_name)
        if model is None:
            raise ValueError(f"❌ Model '{model_name}' not found.")
        if hasattr(model, "feature_importances_"):
            imp = model.feature_importances_
            if feature_names is not None:
                return dict(zip(feature_names, imp))
            return imp
        else:
            raise AttributeError("⚠️ Model has no feature_importances_ attribute.")

# -----------------------------------------------------------
# 🧾 SUMMARY
# -----------------------------------------------------------
    def summary(self):
        print("=== NeuraPython_ML Models ===")
        for name, model in self.models.items():
            print(f"• {name}: {type(model).__name__}")
