# tests/test_data.py """ Tests unitarios para el módulo de datos """ import pytest import pandas as pd import numpy as np from datetime import datetime, timedelta from src.data.processor import DataProcessor class TestDataProcessor: """ Tests para DataProcessor """ @pytest.fixture def sample_ohlcv_data(self): """ Genera datos OHLCV de ejemplo para tests """ dates = pd.date_range(start='2024-01-01', periods=100, freq='1H') np.random.seed(42) base_price = 50000 df = pd.DataFrame({ 'open': base_price + np.random.randn(100) * 100, 'high': base_price + np.random.randn(100) * 100 + 50, 'low': base_price + np.random.randn(100) * 100 - 50, 'close': base_price + np.random.randn(100) * 100, 'volume': np.random.randint(1000, 10000, 100).astype(float), }, index=dates) # Asegurar que high >= low df['high'] = df[['open', 'close', 'high']].max(axis=1) df['low'] = df[['open', 'close', 'low']].min(axis=1) df['symbol'] = 'BTC/USDT' df['timeframe'] = '1h' return df def test_validate_ohlcv_valid(self, sample_ohlcv_data): """ Test validación de datos OHLCV correctos """ processor = DataProcessor() assert processor.validate_ohlcv(sample_ohlcv_data) == True def test_validate_ohlcv_missing_columns(self, sample_ohlcv_data): """ Test validación con columnas faltantes """ processor = DataProcessor() df_invalid = sample_ohlcv_data.drop(columns=['close']) assert processor.validate_ohlcv(df_invalid) == False def test_clean_data_removes_duplicates(self, sample_ohlcv_data): """ Test limpieza elimina duplicados """ processor = DataProcessor() # Añadir duplicados df_with_dupes = pd.concat([sample_ohlcv_data, sample_ohlcv_data.iloc[:5]]) df_clean = processor.clean_data(df_with_dupes) # Verificar que no hay duplicados en el índice assert df_clean.index.duplicated().sum() == 0 def test_clean_data_handles_nan(self, sample_ohlcv_data): """ Test limpieza maneja valores NaN """ processor = DataProcessor() # Introducir NaN df_with_nan = sample_ohlcv_data.copy() df_with_nan.loc[df_with_nan.index[10:15], 'close'] = np.nan df_clean = processor.clean_data(df_with_nan) # Verificar que no quedan NaN assert df_clean.isnull().sum().sum() == 0 def test_resample_timeframe(self, sample_ohlcv_data): """ Test resampleo a timeframe mayor """ processor = DataProcessor() # Resamplear de 1h a 4h df_4h = processor.resample_timeframe(sample_ohlcv_data, '4h') # Verificar que hay menos velas assert len(df_4h) < len(sample_ohlcv_data) # Verificar que el timeframe se actualizó assert df_4h['timeframe'].iloc[0] == '4h' # Verificar lógica OHLC assert (df_4h['high'] >= df_4h['low']).all() assert (df_4h['high'] >= df_4h['open']).all() assert (df_4h['high'] >= df_4h['close']).all() def test_calculate_returns(self, sample_ohlcv_data): """ Test cálculo de retornos """ processor = DataProcessor() df_returns = processor.calculate_returns(sample_ohlcv_data) # Verificar que se añadieron columnas de retornos assert 'returns' in df_returns.columns assert 'log_returns' in df_returns.columns # Verificar que el primer valor es NaN (no hay retorno previo) assert pd.isna(df_returns['returns'].iloc[0]) def test_detect_gaps(self, sample_ohlcv_data): """ Test detección de gaps """ processor = DataProcessor() # Crear datos con gap artificial df_with_gap = sample_ohlcv_data.iloc[:50].copy() df_after_gap = sample_ohlcv_data.iloc[60:].copy() df_with_gap = pd.concat([df_with_gap, df_after_gap]) gaps = processor.detect_gaps(df_with_gap, '1h') # Debería detectar al menos un gap assert len(gaps) > 0 def test_normalize_minmax(self, sample_ohlcv_data): """ Test normalización min-max """ processor = DataProcessor() df_norm = processor.normalize_data(sample_ohlcv_data, method='minmax') # Verificar que valores están entre 0 y 1 numeric_cols = df_norm.select_dtypes(include=[np.number]).columns for col in numeric_cols: assert df_norm[col].min() >= 0 assert df_norm[col].max() <= 1 def test_normalize_zscore(self, sample_ohlcv_data): """ Test normalización z-score """ processor = DataProcessor() df_norm = processor.normalize_data(sample_ohlcv_data, method='zscore') # Verificar que la media es cercana a 0 y std cercana a 1 numeric_cols = df_norm.select_dtypes(include=[np.number]).columns for col in numeric_cols: mean = df_norm[col].mean() std = df_norm[col].std() assert abs(mean) < 0.1 # Cercano a 0 assert abs(std - 1) < 0.1 # Cercano a 1 # Para ejecutar tests: # pytest tests/test_data.py -v