docs: update backtesting research, optimizer, ADX and visual analysis

2026-01-28 16:24:34 +01:00
parent 1add69eb56
commit e15074c0a7
10 changed files with 396 additions and 408 deletions
--- a/src/data/fetcher.py
+++ b/src/data/fetcher.py
@@ -1,5 +1,4 @@
 # src/data/fetcher.py
-# src/data/fetcher.py
 """
 Módulo para obtener datos de exchanges usando CCXT
 """
@@ -110,7 +109,9 @@ class DataFetcher:
        self,
        symbol: str,
        timeframe: str = '1h',
-        days: int = 30,
+        since: Optional[datetime] = None,
+        until: Optional[datetime] = None,
+        days: Optional[int] = None,
        max_retries: int = 3
    ) -> pd.DataFrame:
        """
@@ -126,7 +127,13 @@ class DataFetcher:
            DataFrame con todos los datos históricos
        """
        all_data = []
-        since = datetime.now() - timedelta(days=days)
+        if since is None:
+            if days is None:
+                raise ValueError("Debes proporcionar 'since' o 'days'")
+            since = datetime.utcnow() - timedelta(days=days)
+
+        if until is None:
+            until = datetime.utcnow()
        
        log.info(f"Iniciando descarga histórica: {symbol} desde {since.date()}")
        
@@ -151,7 +158,8 @@ class DataFetcher:
                    
                    # Actualizar 'since' al último timestamp + 1
                    last_timestamp = df.index[-1]
-                    since = last_timestamp + pd.Timedelta(seconds=1)
+                    timeframe_seconds = self.exchange.parse_timeframe(timeframe)
+                    since = last_timestamp + pd.Timedelta(seconds=timeframe_seconds)
                    
                    # Verificar si ya llegamos al presente
                    if since >= datetime.now():
--- a/src/data/processor.py
+++ b/src/data/processor.py
@@ -3,6 +3,7 @@
 Módulo para limpiar, validar y procesar datos de mercado
 """
 import pandas as pd
+import pandas_ta as ta
 import numpy as np
 from typing import Optional
 from ..utils.logger import log
@@ -266,4 +267,23 @@ class DataProcessor:
            raise ValueError(f"Método {method} no soportado")
        
        log.debug(f"Datos normalizados usando {method}")
-        return df_norm
+        return df_norm
+    
+    @staticmethod
+    def calculate_indicators(df: pd.DataFrame) -> pd.DataFrame:
+        """
+        Calcula indicadores técnicos (ADX, etc.)
+        """
+        adx = ta.adx(
+            high=df['high'],
+            low=df['low'],
+            close=df['close'],
+            length=14
+        )
+
+        df = df.copy()
+        df['adx'] = adx['ADX_14']
+
+        log.debug("Indicadores técnicos calculados (ADX)")
+
+        return df
--- a/src/data/storage.py
+++ b/src/data/storage.py
@@ -3,51 +3,47 @@
 Módulo para almacenamiento persistente de datos en PostgreSQL y caché en Redis
 """
 import pandas as pd
-from sqlalchemy import create_engine, Column, String, Float, DateTime, Integer, Index, text
+from sqlalchemy import (
+    create_engine, Column, String, Float,
+    DateTime, Integer, Index, text
+)
 from sqlalchemy.ext.declarative import declarative_base
 from sqlalchemy.orm import sessionmaker
 from datetime import datetime
-from typing import Optional, List
+from typing import Optional
 import redis
-import json
 from ..utils.logger import log

 Base = declarative_base()

+
 class OHLCV(Base):
-    """
-    Modelo de tabla para datos OHLCV
-    """
    __tablename__ = 'ohlcv'
-    
-    id = Column(Integer, primary_key=True, autoincrement=True)
+
+    id = Column(Integer, primary_key=True)
    timestamp = Column(DateTime, nullable=False)
    symbol = Column(String(20), nullable=False)
    timeframe = Column(String(10), nullable=False)
+
    open = Column(Float, nullable=False)
    high = Column(Float, nullable=False)
    low = Column(Float, nullable=False)
    close = Column(Float, nullable=False)
    volume = Column(Float, nullable=False)
-    returns = Column(Float, nullable=True)  # Retornos simples
-    log_returns = Column(Float, nullable=True)  # Retornos logarítmicos
-    
-    # Índices compuestos para queries rápidas
+
+    returns = Column(Float)
+    log_returns = Column(Float)
+    adx = Column(Float)
+
    __table_args__ = (
-        Index('idx_symbol_timeframe_timestamp', 'symbol', 'timeframe', 'timestamp'),
-        Index('idx_timestamp', 'timestamp'),
-        # CONSTRAINT único: no permitir duplicados
-        # Una combinación de symbol + timeframe + timestamp debe ser única
-        {'sqlite_autoincrement': True}
+        Index('idx_symbol_tf_ts', 'symbol', 'timeframe', 'timestamp', unique=True),
    )
-    
-    # Añadir constraint único manualmente en __init__ de StorageManager

 class StorageManager:
    """
    Gestor de almacenamiento con PostgreSQL y Redis
    """
-    
+
    def __init__(
        self,
        db_host: str,
@@ -59,47 +55,21 @@ class StorageManager:
        redis_port: int = 6379,
        redis_db: int = 0
    ):
-        """
-        Inicializa conexiones a PostgreSQL y Redis
-        """
-        # PostgreSQL connection
-        db_url = f"postgresql://{db_user}:{db_password}@{db_host}:{db_port}/{db_name}"
-        
-        try:
-            self.engine = create_engine(
-                db_url,
-                pool_size=10,
-                max_overflow=20,
-                echo=False
-            )
-            
-            # Crear tablas si no existen
-            Base.metadata.create_all(self.engine)
-            
-            # Añadir constraint único si no existe (para evitar duplicados)
-            try:
-                with self.engine.connect() as conn:
-                    conn.execute(text("""
-                        ALTER TABLE ohlcv 
-                        ADD CONSTRAINT unique_ohlcv 
-                        UNIQUE (symbol, timeframe, timestamp)
-                    """))
-                    conn.commit()
-                    log.info("Constraint único añadido a la tabla ohlcv")
-            except Exception as e:
-                # El constraint ya existe o hubo error (no crítico)
-                log.debug(f"Constraint único ya existe o no se pudo añadir: {e}")
-            
-            # Crear sesión
-            Session = sessionmaker(bind=self.engine)
-            self.session = Session()
-            
-            log.success("Conectado a PostgreSQL")
-        except Exception as e:
-            log.error(f"Error conectando a PostgreSQL: {e}")
-            raise
-        
-        # Redis connection (para caché)
+        # 🔑 Connection string (CLAVE para pandas)
+        self.db_url = f"postgresql://{db_user}:{db_password}@{db_host}:{db_port}/{db_name}"
+
+        # Engine SQLAlchemy (lecturas / queries)
+        self.engine = create_engine(self.db_url, echo=False)
+
+        # Crear tablas si no existen
+        Base.metadata.create_all(self.engine)
+
+        Session = sessionmaker(bind=self.engine)
+        self.session = Session()
+
+        log.success("Conectado a PostgreSQL")
+
+        # Redis (opcional)
        try:
            self.redis_client = redis.Redis(
                host=redis_host,
@@ -109,91 +79,49 @@ class StorageManager:
            )
            self.redis_client.ping()
            log.success("Conectado a Redis")
-        except Exception as e:
-            log.warning(f"No se pudo conectar a Redis: {e}. Continuando sin caché.")
+        except Exception:
            self.redis_client = None
-    
-    def save_ohlcv(self, df: pd.DataFrame, batch_size: int = 1000) -> int:
+            log.warning("Redis no disponible")
+
+    # ------------------------------------------------------------------
+
+    def save_ohlcv(self, df: pd.DataFrame) -> int:
        """
-        Guarda datos OHLCV en la base de datos
-        
-        Args:
-            df: DataFrame con datos OHLCV
-            batch_size: Tamaño de lote para inserción
-            
-        Returns:
-            Número de registros guardados
+        Guarda datos OHLCV usando pandas.to_sql (modo estable)
        """
        if df.empty:
            log.warning("DataFrame vacío, nada que guardar")
            return 0
-        
-        log.info(f"Guardando {len(df)} registros en base de datos")
-        
-        try:
-            # Preparar datos para inserción
-            df_to_save = df.reset_index()
-            
-            # Renombrar columna de índice a timestamp si es necesario
-            if df_to_save.columns[0] != 'timestamp':
-                df_to_save.rename(columns={df_to_save.columns[0]: 'timestamp'}, inplace=True)
-            
-            # Mantener todas las columnas relevantes
-            allowed_columns = ['timestamp', 'open', 'high', 'low', 'close', 'volume', 'symbol', 'timeframe', 'returns', 'log_returns']
-            df_to_save = df_to_save[[col for col in allowed_columns if col in df_to_save.columns]]
-            
-            # Insertar en lotes para mejor performance
-            records_saved = 0
-            records_skipped = 0
-            
-            for i in range(0, len(df_to_save), batch_size):
-                batch = df_to_save.iloc[i:i+batch_size]
-                
-                try:
-                    # Usar to_sql con if_exists='append' y method='multi'
-                    batch.to_sql(
-                        'ohlcv',
-                        self.engine,
-                        if_exists='append',
-                        index=False,
-                        method='multi'
-                    )
-                    records_saved += len(batch)
-                    log.debug(f"Guardados {records_saved}/{len(df_to_save)} registros")
-                    
-                except Exception as e:
-                    # Si hay error de duplicados, intentar uno por uno
-                    if 'unique' in str(e).lower() or 'duplicate' in str(e).lower():
-                        log.warning(f"Duplicados detectados en batch, insertando uno por uno...")
-                        
-                        for _, row in batch.iterrows():
-                            try:
-                                row.to_frame().T.to_sql(
-                                    'ohlcv',
-                                    self.engine,
-                                    if_exists='append',
-                                    index=False
-                                )
-                                records_saved += 1
-                            except Exception:
-                                # Este registro ya existe, saltarlo
-                                records_skipped += 1
-                                continue
-                    else:
-                        # Otro tipo de error, re-lanzar
-                        raise e
-            
-            if records_skipped > 0:
-                log.info(f"Saltados {records_skipped} registros duplicados")
-            
-            log.success(f"Guardados {records_saved} registros exitosamente")
-            return records_saved
-            
-        except Exception as e:
-            log.error(f"Error guardando datos: {e}")
-            self.session.rollback()
-            raise
-    
+
+        df_to_save = df.reset_index()
+
+        if df_to_save.columns[0] != 'timestamp':
+            df_to_save.rename(columns={df_to_save.columns[0]: 'timestamp'}, inplace=True)
+
+        allowed_columns = [
+            'timestamp', 'symbol', 'timeframe',
+            'open', 'high', 'low', 'close', 'volume',
+            'returns', 'log_returns', 'adx'
+        ]
+
+        df_to_save = df_to_save[[c for c in allowed_columns if c in df_to_save.columns]]
+
+        log.info(f"Guardando {len(df_to_save)} registros en base de datos")
+
+        # 🔥 CLAVE: pasar la URL como string
+        df_to_save.to_sql(
+            'ohlcv',
+            self.db_url,
+            if_exists='append',
+            index=False,
+            method='multi'
+        )
+
+        log.success(f"Guardados {len(df_to_save)} registros")
+        return len(df_to_save)
+
+    # ------------------------------------------------------------------
+
    def load_ohlcv(
        self,
        symbol: str,
@@ -202,189 +130,78 @@ class StorageManager:
        end_date: Optional[datetime] = None,
        use_cache: bool = True
    ) -> pd.DataFrame:
-        """
-        Carga datos OHLCV de la base de datos
-        
-        Args:
-            symbol: Símbolo del par
-            timeframe: Timeframe
-            start_date: Fecha inicio (opcional)
-            end_date: Fecha fin (opcional)
-            use_cache: Si usar caché de Redis
-            
-        Returns:
-            DataFrame con datos OHLCV
-        """
-        # Generar cache key
-        cache_key = f"ohlcv:{symbol}:{timeframe}:{start_date}:{end_date}"
-        
-        # Intentar obtener de caché
-        if use_cache and self.redis_client:
-            try:
-                cached_data = self.redis_client.get(cache_key)
-                if cached_data:
-                    log.debug(f"Datos obtenidos de caché: {cache_key}")
-                    df = pd.read_json(cached_data)
-                    df.set_index('timestamp', inplace=True)
-                    return df
-            except Exception as e:
-                log.warning(f"Error leyendo caché: {e}")
-        
-        # Construir query
-        query = f"""
-            SELECT timestamp, open, high, low, close, volume, symbol, timeframe
-            FROM ohlcv
-            WHERE symbol = '{symbol}' AND timeframe = '{timeframe}'
-        """
-        
-        if start_date:
-            query += f" AND timestamp >= '{start_date}'"
-        if end_date:
-            query += f" AND timestamp <= '{end_date}'"
-        
-        query += " ORDER BY timestamp ASC"
-        
-        try:
-            df = pd.read_sql(query, self.engine)
-            
-            if df.empty:
-                log.warning(f"No se encontraron datos para {symbol} {timeframe}")
-                return pd.DataFrame()
-            
-            df['timestamp'] = pd.to_datetime(df['timestamp'])
-            df.set_index('timestamp', inplace=True)
-            
-            log.info(f"Cargados {len(df)} registros de {symbol} {timeframe}")
-            
-            # Guardar en caché
-            if self.redis_client:
-                try:
-                    df_json = df.reset_index().to_json()
-                    self.redis_client.setex(cache_key, 3600, df_json)  # TTL 1 hora
-                    log.debug(f"Datos guardados en caché: {cache_key}")
-                except Exception as e:
-                    log.warning(f"Error guardando en caché: {e}")
-            
-            return df
-            
-        except Exception as e:
-            log.error(f"Error cargando datos: {e}")
-            raise
-    
-    def get_latest_timestamp(self, symbol: str, timeframe: str) -> Optional[datetime]:
-        """
-        Obtiene el último timestamp disponible para un símbolo/timeframe
-        
-        Args:
-            symbol: Símbolo del par
-            timeframe: Timeframe
-            
-        Returns:
-            Último timestamp o None si no hay datos
-        """
-        query = f"""
-            SELECT MAX(timestamp) as last_timestamp
-            FROM ohlcv
-            WHERE symbol = '{symbol}' AND timeframe = '{timeframe}'
-        """
-        
-        try:
-            result = pd.read_sql(query, self.engine)
-            last_timestamp = result['last_timestamp'].iloc[0]
-            
-            if pd.isna(last_timestamp):
-                log.debug(f"No hay datos previos para {symbol} {timeframe}")
-                return None
-            
-            return last_timestamp
-            
-        except Exception as e:
-            log.error(f"Error obteniendo último timestamp: {e}")
-            return None
-    
-    def delete_ohlcv(
-        self,
-        symbol: str,
-        timeframe: str,
-        start_date: Optional[datetime] = None,
-        end_date: Optional[datetime] = None
-    ) -> int:
-        """
-        Elimina datos OHLCV
-        
-        Args:
-            symbol: Símbolo del par
-            timeframe: Timeframe
-            start_date: Fecha inicio (opcional)
-            end_date: Fecha fin (opcional)
-            
-        Returns:
-            Número de registros eliminados
-        """
-        query = f"""
-            DELETE FROM ohlcv
-            WHERE symbol = '{symbol}' AND timeframe = '{timeframe}'
-        """
-        
-        if start_date:
-            query += f" AND timestamp >= '{start_date}'"
-        if end_date:
-            query += f" AND timestamp <= '{end_date}'"
-        
-        try:
-            result = self.engine.execute(query)
-            deleted_count = result.rowcount
-            log.info(f"Eliminados {deleted_count} registros")
-            
-            # Invalidar caché
-            if self.redis_client:
-                cache_pattern = f"ohlcv:{symbol}:{timeframe}:*"
-                keys = self.redis_client.keys(cache_pattern)
-                if keys:
-                    self.redis_client.delete(*keys)
-            
-            return deleted_count
-            
-        except Exception as e:
-            log.error(f"Error eliminando datos: {e}")
-            raise
-    
-    def get_available_data(self) -> pd.DataFrame:
-        """
-        Obtiene resumen de datos disponibles en la base de datos
-        
-        Returns:
-            DataFrame con información de símbolos y timeframes disponibles
-        """
+
        query = """
-            SELECT 
+            SELECT *
+            FROM ohlcv
+            WHERE symbol = :symbol AND timeframe = :timeframe
+        """
+
+        params = {'symbol': symbol, 'timeframe': timeframe}
+
+        if start_date:
+            query += " AND timestamp >= :start_date"
+            params['start_date'] = start_date
+
+        if end_date:
+            query += " AND timestamp <= :end_date"
+            params['end_date'] = end_date
+
+        query += " ORDER BY timestamp ASC"
+
+        with self.engine.connect() as conn:
+            df = pd.read_sql(text(query), conn, params=params)
+
+        if df.empty:
+            log.warning(f"No se encontraron datos para {symbol} {timeframe}")
+            return df
+
+        df['timestamp'] = pd.to_datetime(df['timestamp'])
+        df.set_index('timestamp', inplace=True)
+
+        log.info(f"Cargados {len(df)} registros de {symbol} {timeframe}")
+        return df
+
+    # ------------------------------------------------------------------
+
+    def get_latest_timestamp(self, symbol: str, timeframe: str) -> Optional[datetime]:
+        query = """
+            SELECT MAX(timestamp) AS last_timestamp
+            FROM ohlcv
+            WHERE symbol = :symbol AND timeframe = :timeframe
+        """
+
+        with self.engine.connect() as conn:
+            result = conn.execute(
+                text(query),
+                {'symbol': symbol, 'timeframe': timeframe}
+            ).fetchone()
+
+        return result[0] if result and result[0] else None
+
+    # ------------------------------------------------------------------
+
+    def get_available_data(self) -> pd.DataFrame:
+        query = """
+            SELECT
                symbol,
                timeframe,
-                MIN(timestamp) as first_date,
-                MAX(timestamp) as last_date,
-                COUNT(*) as record_count
+                MIN(timestamp) AS first_date,
+                MAX(timestamp) AS last_date,
+                COUNT(*) AS record_count
            FROM ohlcv
            GROUP BY symbol, timeframe
            ORDER BY symbol, timeframe
        """
-        
-        try:
-            df = pd.read_sql(query, self.engine)
-            log.info(f"Información de {len(df)} conjuntos de datos")
-            return df
-        except Exception as e:
-            log.error(f"Error obteniendo información de datos: {e}")
-            raise
-    
+
+        with self.engine.connect() as conn:
+            return pd.read_sql(text(query), conn)
+
+    # ------------------------------------------------------------------
+
    def close(self):
-        """
-        Cierra conexiones
-        """
-        try:
-            self.session.close()
-            self.engine.dispose()
-            if self.redis_client:
-                self.redis_client.close()
-            log.info("Conexiones cerradas")
-        except Exception as e:
-            log.error(f"Error cerrando conexiones: {e}")
+        self.session.close()
+        self.engine.dispose()
+        if self.redis_client:
+            self.redis_client.close()
+        log.info("Conexiones cerradas")
--- a/src/strategies/moving_average.py
+++ b/src/strategies/moving_average.py
@@ -1,29 +1,42 @@
 # src/strategies/moving_average.py
 """
-Estrategia de cruce de medias móviles
+Estrategia de cruce de medias móviles con filtro ADX opcional
 """
 import pandas as pd
 from ..backtest.strategy import Strategy, Signal, calculate_sma, calculate_ema

+
 class MovingAverageCrossover(Strategy):
    """
-    Estrategia simple de cruce de medias móviles
-    
+    Estrategia de cruce de medias móviles
+
    Señales:
-    - BUY: Cuando la media rápida cruza por encima de la lenta
-    - SELL: Cuando la media rápida cruza por debajo de la lenta
+    - BUY: Cruce alcista de medias + (ADX >= threshold si está activado)
+    - SELL: Cruce bajista de medias
    - HOLD: En cualquier otro caso
-    
+
    Parámetros:
-        fast_period: Periodo de la media rápida (default: 10)
-        slow_period: Periodo de la media lenta (default: 30)
-        ma_type: Tipo de media móvil 'sma' o 'ema' (default: 'sma')
+        fast_period: Periodo MA rápida
+        slow_period: Periodo MA lenta
+        ma_type: 'sma' o 'ema'
+        use_adx: Activar filtro ADX
+        adx_threshold: Umbral mínimo de ADX
    """
-    def __init__(self, fast_period: int = 10, slow_period: int = 30, ma_type: str = 'sma'):
+
+    def __init__(
+        self,
+        fast_period: int = 10,
+        slow_period: int = 30,
+        ma_type: str = 'sma',
+        use_adx: bool = False,
+        adx_threshold: float = 20.0
+    ):
        params = {
            'fast_period': fast_period,
            'slow_period': slow_period,
-            'ma_type': ma_type
+            'ma_type': ma_type,
+            'use_adx': use_adx,
+            'adx_threshold': adx_threshold
        }

        super().__init__(name="Moving Average Crossover", params=params)
@@ -31,59 +44,71 @@ class MovingAverageCrossover(Strategy):
        self.fast_period = fast_period
        self.slow_period = slow_period
        self.ma_type = ma_type.lower()
+        self.use_adx = use_adx
+        self.adx_threshold = adx_threshold

        if self.ma_type not in ['sma', 'ema']:
            raise ValueError("ma_type debe ser 'sma' o 'ema'")
-        
+
+    # ------------------------------------------------------------------
+
    def init_indicators(self, data: pd.DataFrame) -> pd.DataFrame:
        """
-        Calcula las medias móviles sobre los datos
+        Calcula indicadores necesarios (medias móviles)
        """
-        # Usar precio de cierre
        close_prices = data['close']

-        # Calcular medias móviles según el tipo
        if self.ma_type == 'sma':
            data['ma_fast'] = calculate_sma(close_prices, self.fast_period)
            data['ma_slow'] = calculate_sma(close_prices, self.slow_period)
-        else:  # ema
+        else:
            data['ma_fast'] = calculate_ema(close_prices, self.fast_period)
            data['ma_slow'] = calculate_ema(close_prices, self.slow_period)
-        
-        # Calcular cruce (1 = fast > slow, -1 = fast < slow)
+
+        # Estado del cruce
        data['ma_cross'] = 0
        data.loc[data['ma_fast'] > data['ma_slow'], 'ma_cross'] = 1
        data.loc[data['ma_fast'] < data['ma_slow'], 'ma_cross'] = -1
-        
-        # Detectar cambios (cruces)
+
+        # Cambio de estado (cruce real)
        data['ma_cross_change'] = data['ma_cross'].diff()
-        
+
        return data
-    
+
+    # ------------------------------------------------------------------
+
    def generate_signal(self, idx: int) -> Signal:
        """
-        Genera señal basada en el cruce de medias móviles
+        Genera señal de trading
        """
        if self.data is None:
            raise ValueError("Data no establecida")
-        
-        # Necesitamos al menos 2 puntos para detectar cruce
+
        if idx < 1:
            return Signal.HOLD
-        
-        # Verificar que las MAs están calculadas (no son NaN)
-        if pd.isna(self.data.iloc[idx]['ma_fast']) or pd.isna(self.data.iloc[idx]['ma_slow']):
+
+        row = self.data.iloc[idx]
+
+        # MAs válidas
+        if pd.isna(row['ma_fast']) or pd.isna(row['ma_slow']):
            return Signal.HOLD
-        
-        cross_change = self.data.iloc[idx]['ma_cross_change']
-        
-        # Cruce alcista: fast cruza por encima de slow
-        if cross_change == 2:  # De -1 a 1
+
+        # 🔵 FILTRO ADX (solo para entradas)
+        if self.use_adx:
+            if 'adx' not in self.data.columns or pd.isna(row['adx']):
+                return Signal.HOLD
+
+            if row['adx'] < self.adx_threshold:
+                return Signal.HOLD
+
+        cross_change = row['ma_cross_change']
+
+        # Cruce alcista
+        if cross_change == 2:
            return Signal.BUY
-        
-        # Cruce bajista: fast cruza por debajo de slow
-        elif cross_change == -2:  # De 1 a -1
+
+        # Cruce bajista (salida siempre permitida)
+        elif cross_change == -2:
            return Signal.SELL
-        
-        # Sin cruce
-        return Signal.HOLD
+
+        return Signal.HOLD