docs: update backtesting research, optimizer, ADX and visual analysis

This commit is contained in:
DaM
2026-01-28 16:24:34 +01:00
parent 1add69eb56
commit e15074c0a7
10 changed files with 396 additions and 408 deletions

View File

@@ -1,5 +1,4 @@
# src/data/fetcher.py
# src/data/fetcher.py
"""
Módulo para obtener datos de exchanges usando CCXT
"""
@@ -110,7 +109,9 @@ class DataFetcher:
self,
symbol: str,
timeframe: str = '1h',
days: int = 30,
since: Optional[datetime] = None,
until: Optional[datetime] = None,
days: Optional[int] = None,
max_retries: int = 3
) -> pd.DataFrame:
"""
@@ -126,7 +127,13 @@ class DataFetcher:
DataFrame con todos los datos históricos
"""
all_data = []
since = datetime.now() - timedelta(days=days)
if since is None:
if days is None:
raise ValueError("Debes proporcionar 'since' o 'days'")
since = datetime.utcnow() - timedelta(days=days)
if until is None:
until = datetime.utcnow()
log.info(f"Iniciando descarga histórica: {symbol} desde {since.date()}")
@@ -151,7 +158,8 @@ class DataFetcher:
# Actualizar 'since' al último timestamp + 1
last_timestamp = df.index[-1]
since = last_timestamp + pd.Timedelta(seconds=1)
timeframe_seconds = self.exchange.parse_timeframe(timeframe)
since = last_timestamp + pd.Timedelta(seconds=timeframe_seconds)
# Verificar si ya llegamos al presente
if since >= datetime.now():

View File

@@ -3,6 +3,7 @@
Módulo para limpiar, validar y procesar datos de mercado
"""
import pandas as pd
import pandas_ta as ta
import numpy as np
from typing import Optional
from ..utils.logger import log
@@ -266,4 +267,23 @@ class DataProcessor:
raise ValueError(f"Método {method} no soportado")
log.debug(f"Datos normalizados usando {method}")
return df_norm
return df_norm
@staticmethod
def calculate_indicators(df: pd.DataFrame) -> pd.DataFrame:
"""
Calcula indicadores técnicos (ADX, etc.)
"""
adx = ta.adx(
high=df['high'],
low=df['low'],
close=df['close'],
length=14
)
df = df.copy()
df['adx'] = adx['ADX_14']
log.debug("Indicadores técnicos calculados (ADX)")
return df

View File

@@ -3,51 +3,47 @@
Módulo para almacenamiento persistente de datos en PostgreSQL y caché en Redis
"""
import pandas as pd
from sqlalchemy import create_engine, Column, String, Float, DateTime, Integer, Index, text
from sqlalchemy import (
create_engine, Column, String, Float,
DateTime, Integer, Index, text
)
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
from datetime import datetime
from typing import Optional, List
from typing import Optional
import redis
import json
from ..utils.logger import log
Base = declarative_base()
class OHLCV(Base):
"""
Modelo de tabla para datos OHLCV
"""
__tablename__ = 'ohlcv'
id = Column(Integer, primary_key=True, autoincrement=True)
id = Column(Integer, primary_key=True)
timestamp = Column(DateTime, nullable=False)
symbol = Column(String(20), nullable=False)
timeframe = Column(String(10), nullable=False)
open = Column(Float, nullable=False)
high = Column(Float, nullable=False)
low = Column(Float, nullable=False)
close = Column(Float, nullable=False)
volume = Column(Float, nullable=False)
returns = Column(Float, nullable=True) # Retornos simples
log_returns = Column(Float, nullable=True) # Retornos logarítmicos
# Índices compuestos para queries rápidas
returns = Column(Float)
log_returns = Column(Float)
adx = Column(Float)
__table_args__ = (
Index('idx_symbol_timeframe_timestamp', 'symbol', 'timeframe', 'timestamp'),
Index('idx_timestamp', 'timestamp'),
# CONSTRAINT único: no permitir duplicados
# Una combinación de symbol + timeframe + timestamp debe ser única
{'sqlite_autoincrement': True}
Index('idx_symbol_tf_ts', 'symbol', 'timeframe', 'timestamp', unique=True),
)
# Añadir constraint único manualmente en __init__ de StorageManager
class StorageManager:
"""
Gestor de almacenamiento con PostgreSQL y Redis
"""
def __init__(
self,
db_host: str,
@@ -59,47 +55,21 @@ class StorageManager:
redis_port: int = 6379,
redis_db: int = 0
):
"""
Inicializa conexiones a PostgreSQL y Redis
"""
# PostgreSQL connection
db_url = f"postgresql://{db_user}:{db_password}@{db_host}:{db_port}/{db_name}"
try:
self.engine = create_engine(
db_url,
pool_size=10,
max_overflow=20,
echo=False
)
# Crear tablas si no existen
Base.metadata.create_all(self.engine)
# Añadir constraint único si no existe (para evitar duplicados)
try:
with self.engine.connect() as conn:
conn.execute(text("""
ALTER TABLE ohlcv
ADD CONSTRAINT unique_ohlcv
UNIQUE (symbol, timeframe, timestamp)
"""))
conn.commit()
log.info("Constraint único añadido a la tabla ohlcv")
except Exception as e:
# El constraint ya existe o hubo error (no crítico)
log.debug(f"Constraint único ya existe o no se pudo añadir: {e}")
# Crear sesión
Session = sessionmaker(bind=self.engine)
self.session = Session()
log.success("Conectado a PostgreSQL")
except Exception as e:
log.error(f"Error conectando a PostgreSQL: {e}")
raise
# Redis connection (para caché)
# 🔑 Connection string (CLAVE para pandas)
self.db_url = f"postgresql://{db_user}:{db_password}@{db_host}:{db_port}/{db_name}"
# Engine SQLAlchemy (lecturas / queries)
self.engine = create_engine(self.db_url, echo=False)
# Crear tablas si no existen
Base.metadata.create_all(self.engine)
Session = sessionmaker(bind=self.engine)
self.session = Session()
log.success("Conectado a PostgreSQL")
# Redis (opcional)
try:
self.redis_client = redis.Redis(
host=redis_host,
@@ -109,91 +79,49 @@ class StorageManager:
)
self.redis_client.ping()
log.success("Conectado a Redis")
except Exception as e:
log.warning(f"No se pudo conectar a Redis: {e}. Continuando sin caché.")
except Exception:
self.redis_client = None
def save_ohlcv(self, df: pd.DataFrame, batch_size: int = 1000) -> int:
log.warning("Redis no disponible")
# ------------------------------------------------------------------
def save_ohlcv(self, df: pd.DataFrame) -> int:
"""
Guarda datos OHLCV en la base de datos
Args:
df: DataFrame con datos OHLCV
batch_size: Tamaño de lote para inserción
Returns:
Número de registros guardados
Guarda datos OHLCV usando pandas.to_sql (modo estable)
"""
if df.empty:
log.warning("DataFrame vacío, nada que guardar")
return 0
log.info(f"Guardando {len(df)} registros en base de datos")
try:
# Preparar datos para inserción
df_to_save = df.reset_index()
# Renombrar columna de índice a timestamp si es necesario
if df_to_save.columns[0] != 'timestamp':
df_to_save.rename(columns={df_to_save.columns[0]: 'timestamp'}, inplace=True)
# Mantener todas las columnas relevantes
allowed_columns = ['timestamp', 'open', 'high', 'low', 'close', 'volume', 'symbol', 'timeframe', 'returns', 'log_returns']
df_to_save = df_to_save[[col for col in allowed_columns if col in df_to_save.columns]]
# Insertar en lotes para mejor performance
records_saved = 0
records_skipped = 0
for i in range(0, len(df_to_save), batch_size):
batch = df_to_save.iloc[i:i+batch_size]
try:
# Usar to_sql con if_exists='append' y method='multi'
batch.to_sql(
'ohlcv',
self.engine,
if_exists='append',
index=False,
method='multi'
)
records_saved += len(batch)
log.debug(f"Guardados {records_saved}/{len(df_to_save)} registros")
except Exception as e:
# Si hay error de duplicados, intentar uno por uno
if 'unique' in str(e).lower() or 'duplicate' in str(e).lower():
log.warning(f"Duplicados detectados en batch, insertando uno por uno...")
for _, row in batch.iterrows():
try:
row.to_frame().T.to_sql(
'ohlcv',
self.engine,
if_exists='append',
index=False
)
records_saved += 1
except Exception:
# Este registro ya existe, saltarlo
records_skipped += 1
continue
else:
# Otro tipo de error, re-lanzar
raise e
if records_skipped > 0:
log.info(f"Saltados {records_skipped} registros duplicados")
log.success(f"Guardados {records_saved} registros exitosamente")
return records_saved
except Exception as e:
log.error(f"Error guardando datos: {e}")
self.session.rollback()
raise
df_to_save = df.reset_index()
if df_to_save.columns[0] != 'timestamp':
df_to_save.rename(columns={df_to_save.columns[0]: 'timestamp'}, inplace=True)
allowed_columns = [
'timestamp', 'symbol', 'timeframe',
'open', 'high', 'low', 'close', 'volume',
'returns', 'log_returns', 'adx'
]
df_to_save = df_to_save[[c for c in allowed_columns if c in df_to_save.columns]]
log.info(f"Guardando {len(df_to_save)} registros en base de datos")
# 🔥 CLAVE: pasar la URL como string
df_to_save.to_sql(
'ohlcv',
self.db_url,
if_exists='append',
index=False,
method='multi'
)
log.success(f"Guardados {len(df_to_save)} registros")
return len(df_to_save)
# ------------------------------------------------------------------
def load_ohlcv(
self,
symbol: str,
@@ -202,189 +130,78 @@ class StorageManager:
end_date: Optional[datetime] = None,
use_cache: bool = True
) -> pd.DataFrame:
"""
Carga datos OHLCV de la base de datos
Args:
symbol: Símbolo del par
timeframe: Timeframe
start_date: Fecha inicio (opcional)
end_date: Fecha fin (opcional)
use_cache: Si usar caché de Redis
Returns:
DataFrame con datos OHLCV
"""
# Generar cache key
cache_key = f"ohlcv:{symbol}:{timeframe}:{start_date}:{end_date}"
# Intentar obtener de caché
if use_cache and self.redis_client:
try:
cached_data = self.redis_client.get(cache_key)
if cached_data:
log.debug(f"Datos obtenidos de caché: {cache_key}")
df = pd.read_json(cached_data)
df.set_index('timestamp', inplace=True)
return df
except Exception as e:
log.warning(f"Error leyendo caché: {e}")
# Construir query
query = f"""
SELECT timestamp, open, high, low, close, volume, symbol, timeframe
FROM ohlcv
WHERE symbol = '{symbol}' AND timeframe = '{timeframe}'
"""
if start_date:
query += f" AND timestamp >= '{start_date}'"
if end_date:
query += f" AND timestamp <= '{end_date}'"
query += " ORDER BY timestamp ASC"
try:
df = pd.read_sql(query, self.engine)
if df.empty:
log.warning(f"No se encontraron datos para {symbol} {timeframe}")
return pd.DataFrame()
df['timestamp'] = pd.to_datetime(df['timestamp'])
df.set_index('timestamp', inplace=True)
log.info(f"Cargados {len(df)} registros de {symbol} {timeframe}")
# Guardar en caché
if self.redis_client:
try:
df_json = df.reset_index().to_json()
self.redis_client.setex(cache_key, 3600, df_json) # TTL 1 hora
log.debug(f"Datos guardados en caché: {cache_key}")
except Exception as e:
log.warning(f"Error guardando en caché: {e}")
return df
except Exception as e:
log.error(f"Error cargando datos: {e}")
raise
def get_latest_timestamp(self, symbol: str, timeframe: str) -> Optional[datetime]:
"""
Obtiene el último timestamp disponible para un símbolo/timeframe
Args:
symbol: Símbolo del par
timeframe: Timeframe
Returns:
Último timestamp o None si no hay datos
"""
query = f"""
SELECT MAX(timestamp) as last_timestamp
FROM ohlcv
WHERE symbol = '{symbol}' AND timeframe = '{timeframe}'
"""
try:
result = pd.read_sql(query, self.engine)
last_timestamp = result['last_timestamp'].iloc[0]
if pd.isna(last_timestamp):
log.debug(f"No hay datos previos para {symbol} {timeframe}")
return None
return last_timestamp
except Exception as e:
log.error(f"Error obteniendo último timestamp: {e}")
return None
def delete_ohlcv(
self,
symbol: str,
timeframe: str,
start_date: Optional[datetime] = None,
end_date: Optional[datetime] = None
) -> int:
"""
Elimina datos OHLCV
Args:
symbol: Símbolo del par
timeframe: Timeframe
start_date: Fecha inicio (opcional)
end_date: Fecha fin (opcional)
Returns:
Número de registros eliminados
"""
query = f"""
DELETE FROM ohlcv
WHERE symbol = '{symbol}' AND timeframe = '{timeframe}'
"""
if start_date:
query += f" AND timestamp >= '{start_date}'"
if end_date:
query += f" AND timestamp <= '{end_date}'"
try:
result = self.engine.execute(query)
deleted_count = result.rowcount
log.info(f"Eliminados {deleted_count} registros")
# Invalidar caché
if self.redis_client:
cache_pattern = f"ohlcv:{symbol}:{timeframe}:*"
keys = self.redis_client.keys(cache_pattern)
if keys:
self.redis_client.delete(*keys)
return deleted_count
except Exception as e:
log.error(f"Error eliminando datos: {e}")
raise
def get_available_data(self) -> pd.DataFrame:
"""
Obtiene resumen de datos disponibles en la base de datos
Returns:
DataFrame con información de símbolos y timeframes disponibles
"""
query = """
SELECT
SELECT *
FROM ohlcv
WHERE symbol = :symbol AND timeframe = :timeframe
"""
params = {'symbol': symbol, 'timeframe': timeframe}
if start_date:
query += " AND timestamp >= :start_date"
params['start_date'] = start_date
if end_date:
query += " AND timestamp <= :end_date"
params['end_date'] = end_date
query += " ORDER BY timestamp ASC"
with self.engine.connect() as conn:
df = pd.read_sql(text(query), conn, params=params)
if df.empty:
log.warning(f"No se encontraron datos para {symbol} {timeframe}")
return df
df['timestamp'] = pd.to_datetime(df['timestamp'])
df.set_index('timestamp', inplace=True)
log.info(f"Cargados {len(df)} registros de {symbol} {timeframe}")
return df
# ------------------------------------------------------------------
def get_latest_timestamp(self, symbol: str, timeframe: str) -> Optional[datetime]:
query = """
SELECT MAX(timestamp) AS last_timestamp
FROM ohlcv
WHERE symbol = :symbol AND timeframe = :timeframe
"""
with self.engine.connect() as conn:
result = conn.execute(
text(query),
{'symbol': symbol, 'timeframe': timeframe}
).fetchone()
return result[0] if result and result[0] else None
# ------------------------------------------------------------------
def get_available_data(self) -> pd.DataFrame:
query = """
SELECT
symbol,
timeframe,
MIN(timestamp) as first_date,
MAX(timestamp) as last_date,
COUNT(*) as record_count
MIN(timestamp) AS first_date,
MAX(timestamp) AS last_date,
COUNT(*) AS record_count
FROM ohlcv
GROUP BY symbol, timeframe
ORDER BY symbol, timeframe
"""
try:
df = pd.read_sql(query, self.engine)
log.info(f"Información de {len(df)} conjuntos de datos")
return df
except Exception as e:
log.error(f"Error obteniendo información de datos: {e}")
raise
with self.engine.connect() as conn:
return pd.read_sql(text(query), conn)
# ------------------------------------------------------------------
def close(self):
"""
Cierra conexiones
"""
try:
self.session.close()
self.engine.dispose()
if self.redis_client:
self.redis_client.close()
log.info("Conexiones cerradas")
except Exception as e:
log.error(f"Error cerrando conexiones: {e}")
self.session.close()
self.engine.dispose()
if self.redis_client:
self.redis_client.close()
log.info("Conexiones cerradas")

View File

@@ -1,29 +1,42 @@
# src/strategies/moving_average.py
"""
Estrategia de cruce de medias móviles
Estrategia de cruce de medias móviles con filtro ADX opcional
"""
import pandas as pd
from ..backtest.strategy import Strategy, Signal, calculate_sma, calculate_ema
class MovingAverageCrossover(Strategy):
"""
Estrategia simple de cruce de medias móviles
Estrategia de cruce de medias móviles
Señales:
- BUY: Cuando la media rápida cruza por encima de la lenta
- SELL: Cuando la media rápida cruza por debajo de la lenta
- BUY: Cruce alcista de medias + (ADX >= threshold si está activado)
- SELL: Cruce bajista de medias
- HOLD: En cualquier otro caso
Parámetros:
fast_period: Periodo de la media rápida (default: 10)
slow_period: Periodo de la media lenta (default: 30)
ma_type: Tipo de media móvil 'sma' o 'ema' (default: 'sma')
fast_period: Periodo MA rápida
slow_period: Periodo MA lenta
ma_type: 'sma' o 'ema'
use_adx: Activar filtro ADX
adx_threshold: Umbral mínimo de ADX
"""
def __init__(self, fast_period: int = 10, slow_period: int = 30, ma_type: str = 'sma'):
def __init__(
self,
fast_period: int = 10,
slow_period: int = 30,
ma_type: str = 'sma',
use_adx: bool = False,
adx_threshold: float = 20.0
):
params = {
'fast_period': fast_period,
'slow_period': slow_period,
'ma_type': ma_type
'ma_type': ma_type,
'use_adx': use_adx,
'adx_threshold': adx_threshold
}
super().__init__(name="Moving Average Crossover", params=params)
@@ -31,59 +44,71 @@ class MovingAverageCrossover(Strategy):
self.fast_period = fast_period
self.slow_period = slow_period
self.ma_type = ma_type.lower()
self.use_adx = use_adx
self.adx_threshold = adx_threshold
if self.ma_type not in ['sma', 'ema']:
raise ValueError("ma_type debe ser 'sma' o 'ema'")
# ------------------------------------------------------------------
def init_indicators(self, data: pd.DataFrame) -> pd.DataFrame:
"""
Calcula las medias móviles sobre los datos
Calcula indicadores necesarios (medias móviles)
"""
# Usar precio de cierre
close_prices = data['close']
# Calcular medias móviles según el tipo
if self.ma_type == 'sma':
data['ma_fast'] = calculate_sma(close_prices, self.fast_period)
data['ma_slow'] = calculate_sma(close_prices, self.slow_period)
else: # ema
else:
data['ma_fast'] = calculate_ema(close_prices, self.fast_period)
data['ma_slow'] = calculate_ema(close_prices, self.slow_period)
# Calcular cruce (1 = fast > slow, -1 = fast < slow)
# Estado del cruce
data['ma_cross'] = 0
data.loc[data['ma_fast'] > data['ma_slow'], 'ma_cross'] = 1
data.loc[data['ma_fast'] < data['ma_slow'], 'ma_cross'] = -1
# Detectar cambios (cruces)
# Cambio de estado (cruce real)
data['ma_cross_change'] = data['ma_cross'].diff()
return data
# ------------------------------------------------------------------
def generate_signal(self, idx: int) -> Signal:
"""
Genera señal basada en el cruce de medias móviles
Genera señal de trading
"""
if self.data is None:
raise ValueError("Data no establecida")
# Necesitamos al menos 2 puntos para detectar cruce
if idx < 1:
return Signal.HOLD
# Verificar que las MAs están calculadas (no son NaN)
if pd.isna(self.data.iloc[idx]['ma_fast']) or pd.isna(self.data.iloc[idx]['ma_slow']):
row = self.data.iloc[idx]
# MAs válidas
if pd.isna(row['ma_fast']) or pd.isna(row['ma_slow']):
return Signal.HOLD
cross_change = self.data.iloc[idx]['ma_cross_change']
# Cruce alcista: fast cruza por encima de slow
if cross_change == 2: # De -1 a 1
# 🔵 FILTRO ADX (solo para entradas)
if self.use_adx:
if 'adx' not in self.data.columns or pd.isna(row['adx']):
return Signal.HOLD
if row['adx'] < self.adx_threshold:
return Signal.HOLD
cross_change = row['ma_cross_change']
# Cruce alcista
if cross_change == 2:
return Signal.BUY
# Cruce bajista: fast cruza por debajo de slow
elif cross_change == -2: # De 1 a -1
# Cruce bajista (salida siempre permitida)
elif cross_change == -2:
return Signal.SELL
# Sin cruce
return Signal.HOLD
return Signal.HOLD