# tests/test_data.py
"""
Tests unitarios para el módulo de datos
"""
import pytest
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from src.data.processor import DataProcessor

class TestDataProcessor:
    """
    Tests para DataProcessor
    """
    
    @pytest.fixture
    def sample_ohlcv_data(self):
        """
        Genera datos OHLCV de ejemplo para tests
        """
        dates = pd.date_range(start='2024-01-01', periods=100, freq='1H')
        np.random.seed(42)
        
        base_price = 50000
        df = pd.DataFrame({
            'open': base_price + np.random.randn(100) * 100,
            'high': base_price + np.random.randn(100) * 100 + 50,
            'low': base_price + np.random.randn(100) * 100 - 50,
            'close': base_price + np.random.randn(100) * 100,
            'volume': np.random.randint(1000, 10000, 100).astype(float),
        }, index=dates)
        
        # Asegurar que high >= low
        df['high'] = df[['open', 'close', 'high']].max(axis=1)
        df['low'] = df[['open', 'close', 'low']].min(axis=1)
        
        df['symbol'] = 'BTC/USDT'
        df['timeframe'] = '1h'
        
        return df
    
    def test_validate_ohlcv_valid(self, sample_ohlcv_data):
        """
        Test validación de datos OHLCV correctos
        """
        processor = DataProcessor()
        assert processor.validate_ohlcv(sample_ohlcv_data) == True
    
    def test_validate_ohlcv_missing_columns(self, sample_ohlcv_data):
        """
        Test validación con columnas faltantes
        """
        processor = DataProcessor()
        df_invalid = sample_ohlcv_data.drop(columns=['close'])
        assert processor.validate_ohlcv(df_invalid) == False
    
    def test_clean_data_removes_duplicates(self, sample_ohlcv_data):
        """
        Test limpieza elimina duplicados
        """
        processor = DataProcessor()
        
        # Añadir duplicados
        df_with_dupes = pd.concat([sample_ohlcv_data, sample_ohlcv_data.iloc[:5]])
        
        df_clean = processor.clean_data(df_with_dupes)
        
        # Verificar que no hay duplicados en el índice
        assert df_clean.index.duplicated().sum() == 0
    
    def test_clean_data_handles_nan(self, sample_ohlcv_data):
        """
        Test limpieza maneja valores NaN
        """
        processor = DataProcessor()
        
        # Introducir NaN
        df_with_nan = sample_ohlcv_data.copy()
        df_with_nan.loc[df_with_nan.index[10:15], 'close'] = np.nan
        
        df_clean = processor.clean_data(df_with_nan)
        
        # Verificar que no quedan NaN
        assert df_clean.isnull().sum().sum() == 0
    
    def test_resample_timeframe(self, sample_ohlcv_data):
        """
        Test resampleo a timeframe mayor
        """
        processor = DataProcessor()
        
        # Resamplear de 1h a 4h
        df_4h = processor.resample_timeframe(sample_ohlcv_data, '4h')
        
        # Verificar que hay menos velas
        assert len(df_4h) < len(sample_ohlcv_data)
        
        # Verificar que el timeframe se actualizó
        assert df_4h['timeframe'].iloc[0] == '4h'
        
        # Verificar lógica OHLC
        assert (df_4h['high'] >= df_4h['low']).all()
        assert (df_4h['high'] >= df_4h['open']).all()
        assert (df_4h['high'] >= df_4h['close']).all()
    
    def test_calculate_returns(self, sample_ohlcv_data):
        """
        Test cálculo de retornos
        """
        processor = DataProcessor()
        
        df_returns = processor.calculate_returns(sample_ohlcv_data)
        
        # Verificar que se añadieron columnas de retornos
        assert 'returns' in df_returns.columns
        assert 'log_returns' in df_returns.columns
        
        # Verificar que el primer valor es NaN (no hay retorno previo)
        assert pd.isna(df_returns['returns'].iloc[0])
    
    def test_detect_gaps(self, sample_ohlcv_data):
        """
        Test detección de gaps
        """
        processor = DataProcessor()
        
        # Crear datos con gap artificial
        df_with_gap = sample_ohlcv_data.iloc[:50].copy()
        df_after_gap = sample_ohlcv_data.iloc[60:].copy()
        df_with_gap = pd.concat([df_with_gap, df_after_gap])
        
        gaps = processor.detect_gaps(df_with_gap, '1h')
        
        # Debería detectar al menos un gap
        assert len(gaps) > 0
    
    def test_normalize_minmax(self, sample_ohlcv_data):
        """
        Test normalización min-max
        """
        processor = DataProcessor()
        
        df_norm = processor.normalize_data(sample_ohlcv_data, method='minmax')
        
        # Verificar que valores están entre 0 y 1
        numeric_cols = df_norm.select_dtypes(include=[np.number]).columns
        for col in numeric_cols:
            assert df_norm[col].min() >= 0
            assert df_norm[col].max() <= 1
    
    def test_normalize_zscore(self, sample_ohlcv_data):
        """
        Test normalización z-score
        """
        processor = DataProcessor()
        
        df_norm = processor.normalize_data(sample_ohlcv_data, method='zscore')
        
        # Verificar que la media es cercana a 0 y std cercana a 1
        numeric_cols = df_norm.select_dtypes(include=[np.number]).columns
        for col in numeric_cols:
            mean = df_norm[col].mean()
            std = df_norm[col].std()
            assert abs(mean) < 0.1  # Cercano a 0
            assert abs(std - 1) < 0.1  # Cercano a 1

# Para ejecutar tests:
# pytest tests/test_data.py -v