Initial commit

2026-01-26 18:57:42 +01:00
commit dcffd9dfad
23 changed files with 1639 additions and 0 deletions
--- a/tests/test_data.py
+++ b/tests/test_data.py
@@ -0,0 +1,168 @@
+# tests/test_data.py
+"""
+Tests unitarios para el módulo de datos
+"""
+import pytest
+import pandas as pd
+import numpy as np
+from datetime import datetime, timedelta
+from src.data.processor import DataProcessor
+
+class TestDataProcessor:
+    """
+    Tests para DataProcessor
+    """
+    
+    @pytest.fixture
+    def sample_ohlcv_data(self):
+        """
+        Genera datos OHLCV de ejemplo para tests
+        """
+        dates = pd.date_range(start='2024-01-01', periods=100, freq='1H')
+        np.random.seed(42)
+        
+        base_price = 50000
+        df = pd.DataFrame({
+            'open': base_price + np.random.randn(100) * 100,
+            'high': base_price + np.random.randn(100) * 100 + 50,
+            'low': base_price + np.random.randn(100) * 100 - 50,
+            'close': base_price + np.random.randn(100) * 100,
+            'volume': np.random.randint(1000, 10000, 100).astype(float),
+        }, index=dates)
+        
+        # Asegurar que high >= low
+        df['high'] = df[['open', 'close', 'high']].max(axis=1)
+        df['low'] = df[['open', 'close', 'low']].min(axis=1)
+        
+        df['symbol'] = 'BTC/USDT'
+        df['timeframe'] = '1h'
+        
+        return df
+    
+    def test_validate_ohlcv_valid(self, sample_ohlcv_data):
+        """
+        Test validación de datos OHLCV correctos
+        """
+        processor = DataProcessor()
+        assert processor.validate_ohlcv(sample_ohlcv_data) == True
+    
+    def test_validate_ohlcv_missing_columns(self, sample_ohlcv_data):
+        """
+        Test validación con columnas faltantes
+        """
+        processor = DataProcessor()
+        df_invalid = sample_ohlcv_data.drop(columns=['close'])
+        assert processor.validate_ohlcv(df_invalid) == False
+    
+    def test_clean_data_removes_duplicates(self, sample_ohlcv_data):
+        """
+        Test limpieza elimina duplicados
+        """
+        processor = DataProcessor()
+        
+        # Añadir duplicados
+        df_with_dupes = pd.concat([sample_ohlcv_data, sample_ohlcv_data.iloc[:5]])
+        
+        df_clean = processor.clean_data(df_with_dupes)
+        
+        # Verificar que no hay duplicados en el índice
+        assert df_clean.index.duplicated().sum() == 0
+    
+    def test_clean_data_handles_nan(self, sample_ohlcv_data):
+        """
+        Test limpieza maneja valores NaN
+        """
+        processor = DataProcessor()
+        
+        # Introducir NaN
+        df_with_nan = sample_ohlcv_data.copy()
+        df_with_nan.loc[df_with_nan.index[10:15], 'close'] = np.nan
+        
+        df_clean = processor.clean_data(df_with_nan)
+        
+        # Verificar que no quedan NaN
+        assert df_clean.isnull().sum().sum() == 0
+    
+    def test_resample_timeframe(self, sample_ohlcv_data):
+        """
+        Test resampleo a timeframe mayor
+        """
+        processor = DataProcessor()
+        
+        # Resamplear de 1h a 4h
+        df_4h = processor.resample_timeframe(sample_ohlcv_data, '4h')
+        
+        # Verificar que hay menos velas
+        assert len(df_4h) < len(sample_ohlcv_data)
+        
+        # Verificar que el timeframe se actualizó
+        assert df_4h['timeframe'].iloc[0] == '4h'
+        
+        # Verificar lógica OHLC
+        assert (df_4h['high'] >= df_4h['low']).all()
+        assert (df_4h['high'] >= df_4h['open']).all()
+        assert (df_4h['high'] >= df_4h['close']).all()
+    
+    def test_calculate_returns(self, sample_ohlcv_data):
+        """
+        Test cálculo de retornos
+        """
+        processor = DataProcessor()
+        
+        df_returns = processor.calculate_returns(sample_ohlcv_data)
+        
+        # Verificar que se añadieron columnas de retornos
+        assert 'returns' in df_returns.columns
+        assert 'log_returns' in df_returns.columns
+        
+        # Verificar que el primer valor es NaN (no hay retorno previo)
+        assert pd.isna(df_returns['returns'].iloc[0])
+    
+    def test_detect_gaps(self, sample_ohlcv_data):
+        """
+        Test detección de gaps
+        """
+        processor = DataProcessor()
+        
+        # Crear datos con gap artificial
+        df_with_gap = sample_ohlcv_data.iloc[:50].copy()
+        df_after_gap = sample_ohlcv_data.iloc[60:].copy()
+        df_with_gap = pd.concat([df_with_gap, df_after_gap])
+        
+        gaps = processor.detect_gaps(df_with_gap, '1h')
+        
+        # Debería detectar al menos un gap
+        assert len(gaps) > 0
+    
+    def test_normalize_minmax(self, sample_ohlcv_data):
+        """
+        Test normalización min-max
+        """
+        processor = DataProcessor()
+        
+        df_norm = processor.normalize_data(sample_ohlcv_data, method='minmax')
+        
+        # Verificar que valores están entre 0 y 1
+        numeric_cols = df_norm.select_dtypes(include=[np.number]).columns
+        for col in numeric_cols:
+            assert df_norm[col].min() >= 0
+            assert df_norm[col].max() <= 1
+    
+    def test_normalize_zscore(self, sample_ohlcv_data):
+        """
+        Test normalización z-score
+        """
+        processor = DataProcessor()
+        
+        df_norm = processor.normalize_data(sample_ohlcv_data, method='zscore')
+        
+        # Verificar que la media es cercana a 0 y std cercana a 1
+        numeric_cols = df_norm.select_dtypes(include=[np.number]).columns
+        for col in numeric_cols:
+            mean = df_norm[col].mean()
+            std = df_norm[col].std()
+            assert abs(mean) < 0.1  # Cercano a 0
+            assert abs(std - 1) < 0.1  # Cercano a 1
+
+# Para ejecutar tests:
+# pytest tests/test_data.py -v