Files
Trading-Bot/tests/test_data.py
2026-01-26 18:57:42 +01:00

168 lines
5.5 KiB
Python

# tests/test_data.py
"""
Tests unitarios para el módulo de datos
"""
import pytest
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from src.data.processor import DataProcessor
class TestDataProcessor:
"""
Tests para DataProcessor
"""
@pytest.fixture
def sample_ohlcv_data(self):
"""
Genera datos OHLCV de ejemplo para tests
"""
dates = pd.date_range(start='2024-01-01', periods=100, freq='1H')
np.random.seed(42)
base_price = 50000
df = pd.DataFrame({
'open': base_price + np.random.randn(100) * 100,
'high': base_price + np.random.randn(100) * 100 + 50,
'low': base_price + np.random.randn(100) * 100 - 50,
'close': base_price + np.random.randn(100) * 100,
'volume': np.random.randint(1000, 10000, 100).astype(float),
}, index=dates)
# Asegurar que high >= low
df['high'] = df[['open', 'close', 'high']].max(axis=1)
df['low'] = df[['open', 'close', 'low']].min(axis=1)
df['symbol'] = 'BTC/USDT'
df['timeframe'] = '1h'
return df
def test_validate_ohlcv_valid(self, sample_ohlcv_data):
"""
Test validación de datos OHLCV correctos
"""
processor = DataProcessor()
assert processor.validate_ohlcv(sample_ohlcv_data) == True
def test_validate_ohlcv_missing_columns(self, sample_ohlcv_data):
"""
Test validación con columnas faltantes
"""
processor = DataProcessor()
df_invalid = sample_ohlcv_data.drop(columns=['close'])
assert processor.validate_ohlcv(df_invalid) == False
def test_clean_data_removes_duplicates(self, sample_ohlcv_data):
"""
Test limpieza elimina duplicados
"""
processor = DataProcessor()
# Añadir duplicados
df_with_dupes = pd.concat([sample_ohlcv_data, sample_ohlcv_data.iloc[:5]])
df_clean = processor.clean_data(df_with_dupes)
# Verificar que no hay duplicados en el índice
assert df_clean.index.duplicated().sum() == 0
def test_clean_data_handles_nan(self, sample_ohlcv_data):
"""
Test limpieza maneja valores NaN
"""
processor = DataProcessor()
# Introducir NaN
df_with_nan = sample_ohlcv_data.copy()
df_with_nan.loc[df_with_nan.index[10:15], 'close'] = np.nan
df_clean = processor.clean_data(df_with_nan)
# Verificar que no quedan NaN
assert df_clean.isnull().sum().sum() == 0
def test_resample_timeframe(self, sample_ohlcv_data):
"""
Test resampleo a timeframe mayor
"""
processor = DataProcessor()
# Resamplear de 1h a 4h
df_4h = processor.resample_timeframe(sample_ohlcv_data, '4h')
# Verificar que hay menos velas
assert len(df_4h) < len(sample_ohlcv_data)
# Verificar que el timeframe se actualizó
assert df_4h['timeframe'].iloc[0] == '4h'
# Verificar lógica OHLC
assert (df_4h['high'] >= df_4h['low']).all()
assert (df_4h['high'] >= df_4h['open']).all()
assert (df_4h['high'] >= df_4h['close']).all()
def test_calculate_returns(self, sample_ohlcv_data):
"""
Test cálculo de retornos
"""
processor = DataProcessor()
df_returns = processor.calculate_returns(sample_ohlcv_data)
# Verificar que se añadieron columnas de retornos
assert 'returns' in df_returns.columns
assert 'log_returns' in df_returns.columns
# Verificar que el primer valor es NaN (no hay retorno previo)
assert pd.isna(df_returns['returns'].iloc[0])
def test_detect_gaps(self, sample_ohlcv_data):
"""
Test detección de gaps
"""
processor = DataProcessor()
# Crear datos con gap artificial
df_with_gap = sample_ohlcv_data.iloc[:50].copy()
df_after_gap = sample_ohlcv_data.iloc[60:].copy()
df_with_gap = pd.concat([df_with_gap, df_after_gap])
gaps = processor.detect_gaps(df_with_gap, '1h')
# Debería detectar al menos un gap
assert len(gaps) > 0
def test_normalize_minmax(self, sample_ohlcv_data):
"""
Test normalización min-max
"""
processor = DataProcessor()
df_norm = processor.normalize_data(sample_ohlcv_data, method='minmax')
# Verificar que valores están entre 0 y 1
numeric_cols = df_norm.select_dtypes(include=[np.number]).columns
for col in numeric_cols:
assert df_norm[col].min() >= 0
assert df_norm[col].max() <= 1
def test_normalize_zscore(self, sample_ohlcv_data):
"""
Test normalización z-score
"""
processor = DataProcessor()
df_norm = processor.normalize_data(sample_ohlcv_data, method='zscore')
# Verificar que la media es cercana a 0 y std cercana a 1
numeric_cols = df_norm.select_dtypes(include=[np.number]).columns
for col in numeric_cols:
mean = df_norm[col].mean()
std = df_norm[col].std()
assert abs(mean) < 0.1 # Cercano a 0
assert abs(std - 1) < 0.1 # Cercano a 1
# Para ejecutar tests:
# pytest tests/test_data.py -v