#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Jun 26 19:00:58 2025

@author: pablo
"""
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Model
from tensorflow.keras.layers import LSTM, Dense, RepeatVector, TimeDistributed, Input
from tensorflow.keras.optimizers import Adam
from bic2 import calcula_BICs

# Carga y pretratamiento de datos:
df = pd.read_csv('/Users/pablo/Documents/IEIA/TFG/datos_csv/fault_free_testing.csv')

#Usamos 240k valores y quitamos las 3 primeras columnas que no aportan informacion
X=df.iloc[:240000, 3:]

# Normalización de los datos
scaler = MinMaxScaler()
Xn = scaler.fit_transform(X)  # Conversion a ndarray de numpy
Xmin = X.min(axis=0).values  # Mínimos de cada variable
Xmax = X.max(axis=0).values  # Máximos de cada variable

# Separamos la matriz de datos normalizada mediante la subdivisión obtenida mediante el metodo mrmr:
# Bloque | Columnas
#    1     1, 44, 15, 49, 12, 48, 30, 37, 45
#    2     3, 4, 7, 8, 10, 11, 13, 16, 18, 19, 22, 25, 31, 35, 43, 47, 50
#    3     5, 17, 42, 46, 52
#    4     2, 9, 21, 51
#    5     20, 27, 28, 33, 34, 36
#    6     6, 23, 24, 29, 38, 39, 41
#    7     14, 26, 32, 40

# Creamos los índices de columnas para cada grupo (restando 1 porque Python indexa desde 0)
cols_X1 = [0, 43, 14, 48, 11, 47, 29, 36, 44]
cols_X2 = [2, 3, 6, 7, 9, 10, 12, 15, 17, 18, 21, 24, 30, 34, 42, 46, 49]
cols_X3 = [4, 16, 41, 45, 51]
cols_X4 = [1, 8, 20, 50]
cols_X5 = [19, 26, 27, 32, 33, 35]
cols_X6 = [5, 22, 23, 28, 37, 38, 40]
cols_X7 = [13, 25, 31, 39]

# Usamos slicing de numpy para crear las submatrices
X1n = Xn[:, cols_X1]
X2n = Xn[:, cols_X2]
X3n = Xn[:, cols_X3]
X4n = Xn[:, cols_X4]
X5n = Xn[:, cols_X5]
X6n = Xn[:, cols_X6]
X7n = Xn[:, cols_X7]

# Para verificar dimensiones:
# print("X1n:", X1n.shape)
# print("X2n:", X2n.shape)
# print("X3n:", X3n.shape)
# print("X4n:", X4n.shape)
# print("X5n:", X5n.shape)
# print("X6n:", X6n.shape)
# print("X7n:", X7n.shape)


# Creamos los modelo de cada uno de los autoencoders, tantos como subgrupos tengamos

def train_lstm_autoencoder_group(Xgroup, nombre="Grupo", time_steps=5, epochs=4, batch_size=32):
    """
    Entrena un autoencoder LSTM para un grupo de variables.
    
    Parámetros:
    -----------
    Xgroup : numpy.ndarray
        Matriz con los datos normalizados del grupo (observaciones x variables).
    time_steps : int
        Tamaño de la ventana temporal.
    epochs : int
        Épocas de entrenamiento.
    batch_size : int
        Tamaño del batch.
        
    Retorna:
    --------
    autoencoder, encoder, history, T2, uT2, Q, uQ, hm, hdesv, time_steps
    """
    # Crear secuencias LSTM
    X_lstm = []
    for i in range(len(Xgroup) - time_steps):
        X_lstm.append(Xgroup[i:i+time_steps])
    X_lstm = np.array(X_lstm)
    
    input_dim = X_lstm.shape[2]
    
    # Regla para número de neuronas
    lstm_units = [max(int(input_dim / 1.3), 4), max(int(input_dim / 3), 2)]  # Asegura mínimo decente
    
    # Codificador
    input_layer = Input(shape=(time_steps, input_dim))
    encoded = LSTM(lstm_units[0], activation='selu', return_sequences=True)(input_layer)
    encoded = LSTM(lstm_units[1], activation='selu', return_sequences=False)(encoded)

    # Decodificador
    decoded = RepeatVector(time_steps)(encoded)
    decoded = LSTM(lstm_units[1], activation='selu', return_sequences=True)(decoded)
    decoded = LSTM(lstm_units[0], activation='selu', return_sequences=True)(decoded)
    decoded = TimeDistributed(Dense(input_dim))(decoded)

    autoencoder = Model(input_layer, decoded)
    autoencoder.compile(optimizer=Adam(learning_rate=0.001), loss='mse')

    history = autoencoder.fit(X_lstm, X_lstm, epochs=epochs, batch_size=batch_size, validation_split=0.2, verbose=1)

    plt.figure()
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.xlabel('Épocas')
    plt.ylabel('Loss')
    plt.title(f'Loss - {nombre}')
    plt.legend()
    plt.grid()
    plt.show()
    
    ## Encoder
    encoder = Model(input_layer, encoded)
    
    # Diagnóstico T²
    h = encoder.predict(X_lstm)
    hm = h.mean(axis=0)
    hdesv = np.cov(h.T) + np.eye(h.shape[1]) * 1e-5
    covin = np.linalg.inv(hdesv)
    
    T2 = np.array([np.dot(np.dot((h[i] - hm), covin), (h[i] - hm).T) for i in range(h.shape[0])])
    uT2 = np.percentile(T2, 99)

    plt.figure(figsize=(10, 5))
    plt.plot(T2, label='T²', color='blue')
    plt.axhline(y=uT2, color='red', linestyle='--', label='Umbral (99%)')
    plt.title(f'Gráfico T² - {nombre}')
    plt.xlabel('Observación')
    plt.ylabel('T²')
    plt.legend()
    plt.grid()
    plt.show()
    


    # Diagnóstico Q
    X_pred = autoencoder.predict(X_lstm)
    res = X_lstm - X_pred
    residuo = res.reshape(res.shape[0], -1)
    
    Q = np.array([np.dot((residuo[i]), (residuo[i]).T) for i in range(h.shape[0])])
    uQ = np.percentile(Q, 99)
    
    plt.figure(figsize=(10, 5))
    plt.plot(Q, label='Q', color='blue')
    plt.axhline(y=uQ, color='red', linestyle='--', label='Umbral (99%)')
    plt.title(f'Gráfico Q - {nombre}')
    plt.xlabel('Observación')
    plt.ylabel('Q')
    plt.legend()
    plt.grid()
    plt.show()
    
    return autoencoder, encoder, history, T2, uT2, Q, uQ, hm, hdesv, time_steps

# Llamamos a la funcion para crear todos los autoencoders para nuestros grupos

import os


# Rutas de guardado
model_dir = '/Users/pablo/Documents/IEIA/TFG/MODELOS/Distributed1_Models'
data_path = '/Users/pablo/Documents/IEIA/TFG/_Autoencoders/Distributed1_Data.pkl'

# Crear carpeta para modelos si no existe
os.makedirs(model_dir, exist_ok=True)

# Diccionario de los subgrupos
grupos = {
    'X1n': X1n,
    'X2n': X2n,
    'X3n': X3n,
    'X4n': X4n,
    'X5n': X5n,
    'X6n': X6n,
    'X7n': X7n
}

# Diccionario que asocia a cada grupo su lista de columnas
columnas_grupo = {
    'X1n': cols_X1,
    'X2n': cols_X2,
    'X3n': cols_X3,
    'X4n': cols_X4,
    'X5n': cols_X5,
    'X6n': cols_X6,
    'X7n': cols_X7
}

# Diccionario para guardar métricas y estadísticas
results = {}

# Entrenamiento y guardado de cada modelo
for nombre, datos in grupos.items():
    print(f"\n📦 Entrenando autoencoder para el grupo: {nombre}")
    autoencoder, encoder, history, T2, uT2, Q, uQ, hm, hdesv, time_steps = train_lstm_autoencoder_group(datos,nombre)

    # Guardar modelos
    autoencoder_path = os.path.join(model_dir, f"{nombre}_autoencoder.keras")
    encoder_path = os.path.join(model_dir, f"{nombre}_encoder.keras")
    autoencoder.save(autoencoder_path)
    encoder.save(encoder_path)

    # Guardar resultados
    results[nombre] = {
        'history': history.history,
        'T2': T2,
        'uT2': uT2,
        'Q': Q,
        'uQ': uQ,
        'hm': hm,
        'hdesv': hdesv,
        'time_steps': time_steps,
        'columnas': columnas_grupo[nombre]
    }



#===============================
#CALCULO DE BICS
#===============================

T2_list = [results[k]['T2'] for k in ['X1n','X2n','X3n','X4n','X5n','X6n','X7n']]
uT2_list = [results[k]['uT2'] for k in ['X1n','X2n','X3n','X4n','X5n','X6n','X7n']]
Q_list = [results[k]['Q'] for k in ['X1n','X2n','X3n','X4n','X5n','X6n','X7n']]
uQ_list = [results[k]['uQ'] for k in ['X1n','X2n','X3n','X4n','X5n','X6n','X7n']]

T2_array = np.column_stack(T2_list)
Q_array = np.column_stack(Q_list)
uT2_array = np.array(uT2_list)
uQ_array = np.array(uQ_list)

bic_t2, bic_q, umbral_bic_t2, umbral_bic_q = calcula_BICs(T2_array, uT2_array, Q_array, uQ_array, alpha_BIC=0.5)

# Guardar BICs en un archivo aparte
bic_data = {
    'bic_t2': bic_t2,
    'bic_q': bic_q,
    'umbral_bic_t2': umbral_bic_t2,
    'umbral_bic_q': umbral_bic_q
}

results['BICs'] = bic_data


# Guardar todos los resultados en un único archivo .pkl
with open(data_path, 'wb') as f:
    pickle.dump(results, f)

print("\n✅ Modelos y datos guardados correctamente.")
print(f"🔒 Modelos en: {model_dir}")
print(f"📄 Datos en: {data_path}")