Initial commit

2026-01-26 18:57:42 +01:00
commit dcffd9dfad
23 changed files with 1639 additions and 0 deletions
--- a/src/data/fetcher.py
+++ b/src/data/fetcher.py
@@ -0,0 +1,212 @@
+# src/data/fetcher.py
+"""
+Módulo para obtener datos de exchanges usando CCXT
+"""
+import ccxt
+import pandas as pd
+from datetime import datetime, timedelta
+from typing import List, Optional, Dict
+import time
+from ..monitoring.logger import log
+
+class DataFetcher:
+    """
+    Clase para obtener datos históricos y en tiempo real de exchanges
+    """
+
+    def __init__(self, exchange_name: str, api_key: str = None, api_secret: str = None):
+        """
+        Inicializa la conexión con el exchange
+        
+        Args:
+            exchange_name: Nombre del exchange (binance, kraken, etc)
+            api_key: API key (opcional para datos públicos)
+            api_secret: API secret (opcional para datos públicos)
+        """
+        self.exchange_name = exchange_name
+
+        try:
+            exchange_class = getattr(ccxt, exchange_name)
+            self.exchange = exchange_class({
+                'apiKey': api_key,
+                'secret': api_secret,
+                'enableRateLimit': True,  # Importante para evitar bans
+                'options': {
+                    'defaultType': 'spot',  # spot, future, etc
+                }
+            })
+            log.info(f"Conectado al exchange: {exchange_name}")
+        except Exception as e:
+            log.error(f"Error conectando a {exchange_name}: {e}")
+            raise
+
+    def fetch_ohlcv(self, symbol: str, timeframe: str = '1h', since: Optional[datetime] = None,
+                    limit: int = 500) -> pd.DataFrame:
+        """
+        Obtiene datos OHLCV (Open, High, Low, Close, Volume)
+        
+        Args:
+            symbol: Par de trading (ej: 'BTC/USDT')
+            timeframe: Intervalo de tiempo ('1m', '5m', '1h', '1d')
+            since: Fecha desde la que obtener datos
+            limit: Número máximo de velas a obtener
+            
+        Returns:
+            DataFrame con los datos OHLCV
+        """
+        try:
+            # Convertir datetime a timestamp en ms
+            since_ms = None
+            if since:
+                since_ms = int(since.timestamp() * 1000)
+
+            log.info(f"Obteniendo datos OHLCV: {symbol} {timeframe}")
+
+            ohlcv = self.exchange.fetch_ohlcv(
+                symbol,
+                timeframe=timeframe,
+                since=since_ms,
+                limit=limit
+            )
+
+            # Convertir a DataFrame
+            df = pd.DataFrame(
+                ohlcv,
+                columns=['timestamp', 'open', 'high', 'low', 'close', 'volume']
+            )
+            
+            # Convertir timestamp a datetime
+            df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
+            df.set_index('timestamp', inplace=True)
+            
+            # Añadir metadata
+            df['symbol'] = symbol
+            df['timeframe'] = timeframe
+            
+            log.success(f"Obtenidos {len(df)} registros de {symbol}")
+            return df
+        
+        except Exception as e:
+            log.error(f"Error obteniendo OHLCV para {symbol}: {e}")
+            raise
+
+    def fetch_historical(self, symbol: str, timeframe: str = '1h', days: int = 30,
+                         max_retries: int = 3) -> pd.DataFrame:
+        """
+        Obtiene datos históricos completos (puede requerir múltiples llamadas)
+        
+        Args:
+            symbol: Par de trading
+            timeframe: Intervalo de tiempo
+            days: Días hacia atrás
+            max_retries: Intentos máximos por request
+            
+        Returns:
+            DataFrame con todos los datos históricos
+        """
+        all_data = []
+        since = datetime.now() - timedelta(days=days)
+        
+        log.info(f"Iniciando descarga histórica: {symbol} desde {since.date()}")
+        
+        while True:
+            retry_count = 0
+            success = False
+            
+            while retry_count < max_retries and not success:
+                try:
+                    df = self.fetch_ohlcv(symbol, timeframe, since, limit=1000)
+                    
+                    if df.empty:
+                        log.warning(f"No hay más datos disponibles para {symbol}")
+                        success = True
+                        break
+                    
+                    all_data.append(df)
+                    
+                    # Actualizar 'since' al último timestamp + 1
+                    last_timestamp = df.index[-1]
+                    since = last_timestamp + pd.Timedelta(seconds=1)
+                    
+                    # Verificar si ya llegamos al presente
+                    if since >= datetime.now():
+                        success = True
+                        break
+                    
+                    success = True
+                    time.sleep(self.exchange.rateLimit / 1000)  # Respetar rate limit
+                    
+                except Exception as e:
+                    retry_count += 1
+                    log.warning(f"Intento {retry_count}/{max_retries} falló: {e}")
+                    time.sleep(5 * retry_count)  # Backoff exponencial
+                    
+            if not success:
+                log.error(f"Falló después de {max_retries} intentos")
+                break
+                
+            if since >= datetime.now():
+                break
+        
+        if not all_data:
+            log.error("No se pudo obtener ningún dato histórico")
+            return pd.DataFrame()
+        
+        # Combinar todos los DataFrames
+        final_df = pd.concat(all_data).drop_duplicates()
+        final_df.sort_index(inplace=True)
+        
+        log.success(f"Descarga completa: {len(final_df)} velas de {symbol}")
+        return final_df
+    
+    def fetch_ticker(self, symbol: str) -> Dict:
+        """
+        Obtiene el precio actual y información del ticker
+        
+        Args:
+            symbol: Par de trading
+            
+        Returns:
+            Diccionario con información del ticker
+        """
+        try:
+            ticker = self.exchange.fetch_ticker(symbol)
+            log.debug(f"Ticker de {symbol}: {ticker['last']}")
+            return ticker
+        except Exception as e:
+            log.error(f"Error obteniendo ticker de {symbol}: {e}")
+            raise
+    
+    def fetch_order_book(self, symbol: str, limit: int = 20) -> Dict:
+        """
+        Obtiene el libro de órdenes (order book)
+        
+        Args:
+            symbol: Par de trading
+            limit: Profundidad del order book
+            
+        Returns:
+            Diccionario con bids y asks
+        """
+        try:
+            order_book = self.exchange.fetch_order_book(symbol, limit)
+            return order_book
+        except Exception as e:
+            log.error(f"Error obteniendo order book de {symbol}: {e}")
+            raise
+    
+    def get_available_symbols(self) -> List[str]:
+        """
+        Obtiene lista de símbolos disponibles en el exchange
+        
+        Returns:
+            Lista de símbolos
+        """
+        try:
+            markets = self.exchange.load_markets()
+            symbols = list(markets.keys())
+            log.info(f"Símbolos disponibles: {len(symbols)}")
+            return symbols
+        except Exception as e:
+            log.error(f"Error obteniendo símbolos: {e}")
+            raise
--- a/src/data/processor.py
+++ b/src/data/processor.py
@@ -0,0 +1,269 @@
+# src/data/processor.py
+"""
+Módulo para limpiar, validar y procesar datos de mercado
+"""
+import pandas as pd
+import numpy as np
+from typing import Optional
+from ..monitoring.logger import log
+
+class DataProcessor:
+    """
+    Clase para procesar y limpiar datos de mercado
+    """
+    
+    @staticmethod
+    def validate_ohlcv(df: pd.DataFrame) -> bool:
+        """
+        Valida que un DataFrame tenga estructura OHLCV válida
+        
+        Args:
+            df: DataFrame a validar
+            
+        Returns:
+            True si es válido, False si no
+        """
+        required_columns = ['open', 'high', 'low', 'close', 'volume']
+        
+        # Verificar columnas requeridas
+        if not all(col in df.columns for col in required_columns):
+            log.error(f"Faltan columnas requeridas. Presentes: {df.columns.tolist()}")
+            return False
+        
+        # Verificar que el índice sea datetime
+        if not isinstance(df.index, pd.DatetimeIndex):
+            log.error("El índice debe ser DatetimeIndex")
+            return False
+        
+        # Verificar valores numéricos
+        for col in required_columns:
+            if not pd.api.types.is_numeric_dtype(df[col]):
+                log.error(f"Columna {col} no es numérica")
+                return False
+        
+        # Verificar relaciones lógicas: high >= low, etc
+        invalid_rows = (df['high'] < df['low']) | (df['high'] < df['open']) | (df['high'] < df['close'])
+        if invalid_rows.any():
+            log.warning(f"Encontradas {invalid_rows.sum()} filas con valores ilógicos")
+            return False
+        
+        log.debug("Validación OHLCV exitosa")
+        return True
+    
+    @staticmethod
+    def clean_data(df: pd.DataFrame, remove_duplicates: bool = True) -> pd.DataFrame:
+        """
+        Limpia datos: duplicados, NaN, outliers
+        
+        Args:
+            df: DataFrame a limpiar
+            remove_duplicates: Si eliminar duplicados
+            
+        Returns:
+            DataFrame limpio
+        """
+        log.info(f"Limpiando datos. Filas iniciales: {len(df)}")
+        df_clean = df.copy()
+        
+        # Eliminar duplicados en el índice
+        if remove_duplicates:
+            before = len(df_clean)
+            df_clean = df_clean[~df_clean.index.duplicated(keep='first')]
+            removed = before - len(df_clean)
+            if removed > 0:
+                log.info(f"Eliminados {removed} duplicados")
+        
+        # Ordenar por índice
+        df_clean.sort_index(inplace=True)
+        
+        # Manejar valores faltantes
+        nan_count = df_clean.isnull().sum().sum()
+        if nan_count > 0:
+            log.warning(f"Encontrados {nan_count} valores NaN")
+            
+            # Forward fill para datos de serie temporal
+            df_clean.fillna(method='ffill', inplace=True)
+            
+            # Si aún quedan NaN al inicio, usar backward fill
+            df_clean.fillna(method='bfill', inplace=True)
+        
+        # Detectar y manejar outliers extremos (más de 100x el precio medio)
+        for col in ['open', 'high', 'low', 'close']:
+            if col in df_clean.columns:
+                median = df_clean[col].median()
+                outliers = (df_clean[col] > median * 100) | (df_clean[col] < median / 100)
+                outlier_count = outliers.sum()
+                
+                if outlier_count > 0:
+                    log.warning(f"Detectados {outlier_count} outliers en {col}")
+                    # Reemplazar outliers con el valor anterior válido
+                    df_clean.loc[outliers, col] = np.nan
+                    df_clean[col].fillna(method='ffill', inplace=True)
+        
+        # Verificar volumen negativo
+        if 'volume' in df_clean.columns:
+            negative_vol = df_clean['volume'] < 0
+            if negative_vol.any():
+                log.warning(f"Encontrados {negative_vol.sum()} volúmenes negativos")
+                df_clean.loc[negative_vol, 'volume'] = 0
+        
+        log.success(f"Limpieza completa. Filas finales: {len(df_clean)}")
+        return df_clean
+    
+    @staticmethod
+    def resample_timeframe(
+        df: pd.DataFrame, 
+        target_timeframe: str,
+        original_timeframe: Optional[str] = None
+    ) -> pd.DataFrame:
+        """
+        Resamplea datos a un timeframe diferente
+        
+        Args:
+            df: DataFrame con datos OHLCV
+            target_timeframe: Timeframe destino ('5m', '15m', '1h', '4h', '1d')
+            original_timeframe: Timeframe original (opcional, para validación)
+            
+        Returns:
+            DataFrame resampled
+        """
+        log.info(f"Resampling a {target_timeframe}")
+        
+        # Diccionario de conversión de timeframe a pandas offset
+        timeframe_map = {
+            '1m': '1T',
+            '5m': '5T',
+            '15m': '15T',
+            '30m': '30T',
+            '1h': '1H',
+            '4h': '4H',
+            '1d': '1D',
+            '1w': '1W'
+        }
+        
+        if target_timeframe not in timeframe_map:
+            raise ValueError(f"Timeframe {target_timeframe} no soportado")
+        
+        offset = timeframe_map[target_timeframe]
+        
+        # Resamplear usando reglas OHLC estándar
+        resampled = df.resample(offset).agg({
+            'open': 'first',
+            'high': 'max',
+            'low': 'min',
+            'close': 'last',
+            'volume': 'sum'
+        })
+        
+        # Eliminar filas con NaN (periodos sin datos)
+        resampled.dropna(inplace=True)
+        
+        # Mantener metadata si existe
+        if 'symbol' in df.columns:
+            resampled['symbol'] = df['symbol'].iloc[0]
+        resampled['timeframe'] = target_timeframe
+        
+        log.success(f"Resampling completo: {len(df)} -> {len(resampled)} velas")
+        return resampled
+    
+    @staticmethod
+    def calculate_returns(df: pd.DataFrame, period: int = 1) -> pd.DataFrame:
+        """
+        Calcula retornos simples y logarítmicos
+        
+        Args:
+            df: DataFrame con datos OHLCV
+            period: Periodo para calcular retornos
+            
+        Returns:
+            DataFrame con columnas de retornos añadidas
+        """
+        df_returns = df.copy()
+        
+        # Retornos simples
+        df_returns['returns'] = df_returns['close'].pct_change(period)
+        
+        # Retornos logarítmicos (mejores para análisis estadístico)
+        df_returns['log_returns'] = np.log(df_returns['close'] / df_returns['close'].shift(period))
+        
+        log.debug(f"Retornos calculados para periodo {period}")
+        return df_returns
+    
+    @staticmethod
+    def detect_gaps(df: pd.DataFrame, timeframe: str) -> pd.DataFrame:
+        """
+        Detecta gaps en los datos (periodos faltantes)
+        
+        Args:
+            df: DataFrame con índice temporal
+            timeframe: Timeframe esperado
+            
+        Returns:
+            DataFrame con información de gaps
+        """
+        timeframe_delta = {
+            '1m': pd.Timedelta(minutes=1),
+            '5m': pd.Timedelta(minutes=5),
+            '15m': pd.Timedelta(minutes=15),
+            '1h': pd.Timedelta(hours=1),
+            '4h': pd.Timedelta(hours=4),
+            '1d': pd.Timedelta(days=1)
+        }
+        
+        if timeframe not in timeframe_delta:
+            log.warning(f"Timeframe {timeframe} no reconocido para detección de gaps")
+            return pd.DataFrame()
+        
+        expected_delta = timeframe_delta[timeframe]
+        time_diffs = df.index.to_series().diff()
+        
+        # Gaps son diferencias mayores al timeframe esperado
+        gaps = time_diffs[time_diffs > expected_delta * 1.5]
+        
+        if len(gaps) > 0:
+            log.warning(f"Detectados {len(gaps)} gaps en los datos")
+            gap_info = pd.DataFrame({
+                'gap_start': gaps.index,
+                'gap_duration': gaps.values
+            })
+            return gap_info
+        
+        log.debug("No se detectaron gaps en los datos")
+        return pd.DataFrame()
+    
+    @staticmethod
+    def normalize_data(df: pd.DataFrame, method: str = 'minmax') -> pd.DataFrame:
+        """
+        Normaliza datos numéricos
+        
+        Args:
+            df: DataFrame a normalizar
+            method: Método de normalización ('minmax', 'zscore')
+            
+        Returns:
+            DataFrame normalizado
+        """
+        df_norm = df.copy()
+        numeric_cols = df_norm.select_dtypes(include=[np.number]).columns
+        
+        if method == 'minmax':
+            # Min-Max normalization [0, 1]
+            for col in numeric_cols:
+                min_val = df_norm[col].min()
+                max_val = df_norm[col].max()
+                if max_val > min_val:
+                    df_norm[col] = (df_norm[col] - min_val) / (max_val - min_val)
+        
+        elif method == 'zscore':
+            # Z-score normalization (mean=0, std=1)
+            for col in numeric_cols:
+                mean = df_norm[col].mean()
+                std = df_norm[col].std()
+                if std > 0:
+                    df_norm[col] = (df_norm[col] - mean) / std
+        
+        else:
+            raise ValueError(f"Método {method} no soportado")
+        
+        log.debug(f"Datos normalizados usando {method}")
+        return df_norm
--- a/src/data/storage.py
+++ b/src/data/storage.py
@@ -0,0 +1,338 @@
+# src/data/storage.py
+"""
+Módulo para almacenamiento persistente de datos en PostgreSQL y caché en Redis
+"""
+import pandas as pd
+from sqlalchemy import create_engine, Column, String, Float, DateTime, Integer, Index
+from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.orm import sessionmaker
+from datetime import datetime
+from typing import Optional, List
+import redis
+import json
+from ..monitoring.logger import log
+
+Base = declarative_base()
+
+class OHLCV(Base):
+    """
+    Modelo de tabla para datos OHLCV
+    """
+    __tablename__ = 'ohlcv'
+    
+    id = Column(Integer, primary_key=True, autoincrement=True)
+    timestamp = Column(DateTime, nullable=False)
+    symbol = Column(String(20), nullable=False)
+    timeframe = Column(String(10), nullable=False)
+    open = Column(Float, nullable=False)
+    high = Column(Float, nullable=False)
+    low = Column(Float, nullable=False)
+    close = Column(Float, nullable=False)
+    volume = Column(Float, nullable=False)
+    
+    # Índices compuestos para queries rápidas
+    __table_args__ = (
+        Index('idx_symbol_timeframe_timestamp', 'symbol', 'timeframe', 'timestamp'),
+        Index('idx_timestamp', 'timestamp'),
+    )
+
+class StorageManager:
+    """
+    Gestor de almacenamiento con PostgreSQL y Redis
+    """
+    
+    def __init__(
+        self,
+        db_host: str,
+        db_port: int,
+        db_name: str,
+        db_user: str,
+        db_password: str,
+        redis_host: str = 'localhost',
+        redis_port: int = 6379,
+        redis_db: int = 0
+    ):
+        """
+        Inicializa conexiones a PostgreSQL y Redis
+        """
+        # PostgreSQL connection
+        db_url = f"postgresql://{db_user}:{db_password}@{db_host}:{db_port}/{db_name}"
+        
+        try:
+            self.engine = create_engine(
+                db_url,
+                pool_size=10,
+                max_overflow=20,
+                echo=False
+            )
+            
+            # Crear tablas si no existen
+            Base.metadata.create_all(self.engine)
+            
+            # Crear sesión
+            Session = sessionmaker(bind=self.engine)
+            self.session = Session()
+            
+            log.success("Conectado a PostgreSQL")
+        except Exception as e:
+            log.error(f"Error conectando a PostgreSQL: {e}")
+            raise
+        
+        # Redis connection (para caché)
+        try:
+            self.redis_client = redis.Redis(
+                host=redis_host,
+                port=redis_port,
+                db=redis_db,
+                decode_responses=True
+            )
+            self.redis_client.ping()
+            log.success("Conectado a Redis")
+        except Exception as e:
+            log.warning(f"No se pudo conectar a Redis: {e}. Continuando sin caché.")
+            self.redis_client = None
+    
+    def save_ohlcv(self, df: pd.DataFrame, batch_size: int = 1000) -> int:
+        """
+        Guarda datos OHLCV en la base de datos
+        
+        Args:
+            df: DataFrame con datos OHLCV
+            batch_size: Tamaño de lote para inserción
+            
+        Returns:
+            Número de registros guardados
+        """
+        if df.empty:
+            log.warning("DataFrame vacío, nada que guardar")
+            return 0
+        
+        log.info(f"Guardando {len(df)} registros en base de datos")
+        
+        try:
+            # Preparar datos para inserción
+            df_to_save = df.reset_index()
+            
+            # Renombrar columna de índice a timestamp si es necesario
+            if df_to_save.columns[0] != 'timestamp':
+                df_to_save.rename(columns={df_to_save.columns[0]: 'timestamp'}, inplace=True)
+            
+            # Insertar en lotes para mejor performance
+            records_saved = 0
+            for i in range(0, len(df_to_save), batch_size):
+                batch = df_to_save.iloc[i:i+batch_size]
+                
+                # Usar to_sql con if_exists='append' y method='multi'
+                batch.to_sql(
+                    'ohlcv',
+                    self.engine,
+                    if_exists='append',
+                    index=False,
+                    method='multi'
+                )
+                
+                records_saved += len(batch)
+                log.debug(f"Guardados {records_saved}/{len(df_to_save)} registros")
+            
+            log.success(f"Guardados {records_saved} registros exitosamente")
+            return records_saved
+            
+        except Exception as e:
+            log.error(f"Error guardando datos: {e}")
+            self.session.rollback()
+            raise
+    
+    def load_ohlcv(
+        self,
+        symbol: str,
+        timeframe: str,
+        start_date: Optional[datetime] = None,
+        end_date: Optional[datetime] = None,
+        use_cache: bool = True
+    ) -> pd.DataFrame:
+        """
+        Carga datos OHLCV de la base de datos
+        
+        Args:
+            symbol: Símbolo del par
+            timeframe: Timeframe
+            start_date: Fecha inicio (opcional)
+            end_date: Fecha fin (opcional)
+            use_cache: Si usar caché de Redis
+            
+        Returns:
+            DataFrame con datos OHLCV
+        """
+        # Generar cache key
+        cache_key = f"ohlcv:{symbol}:{timeframe}:{start_date}:{end_date}"
+        
+        # Intentar obtener de caché
+        if use_cache and self.redis_client:
+            try:
+                cached_data = self.redis_client.get(cache_key)
+                if cached_data:
+                    log.debug(f"Datos obtenidos de caché: {cache_key}")
+                    df = pd.read_json(cached_data)
+                    df.set_index('timestamp', inplace=True)
+                    return df
+            except Exception as e:
+                log.warning(f"Error leyendo caché: {e}")
+        
+        # Construir query
+        query = f"""
+            SELECT timestamp, open, high, low, close, volume, symbol, timeframe
+            FROM ohlcv
+            WHERE symbol = '{symbol}' AND timeframe = '{timeframe}'
+        """
+        
+        if start_date:
+            query += f" AND timestamp >= '{start_date}'"
+        if end_date:
+            query += f" AND timestamp <= '{end_date}'"
+        
+        query += " ORDER BY timestamp ASC"
+        
+        try:
+            df = pd.read_sql(query, self.engine)
+            
+            if df.empty:
+                log.warning(f"No se encontraron datos para {symbol} {timeframe}")
+                return pd.DataFrame()
+            
+            df['timestamp'] = pd.to_datetime(df['timestamp'])
+            df.set_index('timestamp', inplace=True)
+            
+            log.info(f"Cargados {len(df)} registros de {symbol} {timeframe}")
+            
+            # Guardar en caché
+            if self.redis_client:
+                try:
+                    df_json = df.reset_index().to_json()
+                    self.redis_client.setex(cache_key, 3600, df_json)  # TTL 1 hora
+                    log.debug(f"Datos guardados en caché: {cache_key}")
+                except Exception as e:
+                    log.warning(f"Error guardando en caché: {e}")
+            
+            return df
+            
+        except Exception as e:
+            log.error(f"Error cargando datos: {e}")
+            raise
+    
+    def get_latest_timestamp(self, symbol: str, timeframe: str) -> Optional[datetime]:
+        """
+        Obtiene el último timestamp disponible para un símbolo/timeframe
+        
+        Args:
+            symbol: Símbolo del par
+            timeframe: Timeframe
+            
+        Returns:
+            Último timestamp o None si no hay datos
+        """
+        query = f"""
+            SELECT MAX(timestamp) as last_timestamp
+            FROM ohlcv
+            WHERE symbol = '{symbol}' AND timeframe = '{timeframe}'
+        """
+        
+        try:
+            result = pd.read_sql(query, self.engine)
+            last_timestamp = result['last_timestamp'].iloc[0]
+            
+            if pd.isna(last_timestamp):
+                log.debug(f"No hay datos previos para {symbol} {timeframe}")
+                return None
+            
+            return last_timestamp
+            
+        except Exception as e:
+            log.error(f"Error obteniendo último timestamp: {e}")
+            return None
+    
+    def delete_ohlcv(
+        self,
+        symbol: str,
+        timeframe: str,
+        start_date: Optional[datetime] = None,
+        end_date: Optional[datetime] = None
+    ) -> int:
+        """
+        Elimina datos OHLCV
+        
+        Args:
+            symbol: Símbolo del par
+            timeframe: Timeframe
+            start_date: Fecha inicio (opcional)
+            end_date: Fecha fin (opcional)
+            
+        Returns:
+            Número de registros eliminados
+        """
+        query = f"""
+            DELETE FROM ohlcv
+            WHERE symbol = '{symbol}' AND timeframe = '{timeframe}'
+        """
+        
+        if start_date:
+            query += f" AND timestamp >= '{start_date}'"
+        if end_date:
+            query += f" AND timestamp <= '{end_date}'"
+        
+        try:
+            result = self.engine.execute(query)
+            deleted_count = result.rowcount
+            log.info(f"Eliminados {deleted_count} registros")
+            
+            # Invalidar caché
+            if self.redis_client:
+                cache_pattern = f"ohlcv:{symbol}:{timeframe}:*"
+                keys = self.redis_client.keys(cache_pattern)
+                if keys:
+                    self.redis_client.delete(*keys)
+            
+            return deleted_count
+            
+        except Exception as e:
+            log.error(f"Error eliminando datos: {e}")
+            raise
+    
+    def get_available_data(self) -> pd.DataFrame:
+        """
+        Obtiene resumen de datos disponibles en la base de datos
+        
+        Returns:
+            DataFrame con información de símbolos y timeframes disponibles
+        """
+        query = """
+            SELECT 
+                symbol,
+                timeframe,
+                MIN(timestamp) as first_date,
+                MAX(timestamp) as last_date,
+                COUNT(*) as record_count
+            FROM ohlcv
+            GROUP BY symbol, timeframe
+            ORDER BY symbol, timeframe
+        """
+        
+        try:
+            df = pd.read_sql(query, self.engine)
+            log.info(f"Información de {len(df)} conjuntos de datos")
+            return df
+        except Exception as e:
+            log.error(f"Error obteniendo información de datos: {e}")
+            raise
+    
+    def close(self):
+        """
+        Cierra conexiones
+        """
+        try:
+            self.session.close()
+            self.engine.dispose()
+            if self.redis_client:
+                self.redis_client.close()
+            log.info("Conexiones cerradas")
+        except Exception as e:
+            log.error(f"Error cerrando conexiones: {e}")