pequeños cambios para que el step 1 sea mas robusto

This commit is contained in:
dam
2026-03-03 08:55:43 +01:00
parent 8259e85b68
commit 9a59879988
7 changed files with 184 additions and 79 deletions

View File

@@ -98,9 +98,11 @@ def inspect_strategies_config(
data_quality: Dict[str, Any],
include_series: bool,
progress_callback=None,
df: pd.DataFrame | None = None,
) -> Dict[str, Any]:
df = storage.load_ohlcv(symbol=payload.symbol, timeframe=payload.timeframe)
if df is None:
df = storage.load_ohlcv(symbol=payload.symbol, timeframe=payload.timeframe)
if df is None or df.empty:
return {
"valid": False,
@@ -117,14 +119,24 @@ def inspect_strategies_config(
}
if data_quality.get("status") == "fail":
return {
out = {
"valid": False,
"status": "fail",
"checks": checks,
"message": "Step 1 data quality is FAIL. Strategies cannot be validated.",
"results": [],
"series": {} if include_series else None,
"config": {
"wf": payload.wf.model_dump(),
"risk": payload.risk.model_dump(),
"stop": payload.stop.model_dump(),
"global_rules": payload.global_rules.model_dump(),
"commission": payload.commission,
"slippage": payload.slippage,
},
}
if include_series:
out["series"] = {"strategies": {}}
return out
stop_loss = _build_stop_loss(payload.stop)
base_sizer = _build_position_sizer(payload.risk)
@@ -183,7 +195,7 @@ def inspect_strategies_config(
"message": msg,
"warnings": [],
"series_available": False,
"series_error": "Unknown strategy_id (not in registry)",
"series_error": msg,
"n_windows": 0,
"oos_final_equity": payload.account_equity,
"oos_total_return_pct": 0.0,
@@ -204,9 +216,10 @@ def inspect_strategies_config(
# Wrapper sizer
class _CappedSizer(type(base_sizer)):
def __init__(self, inner):
class _CappedSizer:
def __init__(self, inner, max_position_fraction: float):
self.inner = inner
self.max_position_fraction = max_position_fraction
def calculate_size(self, *, capital, entry_price, stop_price=None, max_capital=None, volatility=None):
u = self.inner.calculate_size(
@@ -220,10 +233,10 @@ def inspect_strategies_config(
units=float(u),
capital=float(capital),
entry_price=float(entry_price),
max_position_fraction=float(payload.risk.max_position_fraction),
max_position_fraction=float(self.max_position_fraction),
)
capped_sizer = _CappedSizer(base_sizer)
capped_sizer = _CappedSizer(base_sizer, payload.risk.max_position_fraction)
log.info(f"🧠 Step3 | WF run | strategy={sid}")
@@ -302,11 +315,6 @@ def inspect_strategies_config(
oos_dd = win_df["max_dd_pct"].tolist()
n_windows = len(win_df)
required_cols = {"return_pct", "max_dd_pct", "trades", "window", "train_start", "train_end", "test_start", "test_end", "sharpe", "params"}
missing = required_cols - set(win_df.columns)
if missing:
raise ValueError(f"WF windows missing required columns: {sorted(missing)}")
trades = win_df["trades"].astype(int).tolist()
too_few = sum(t < int(payload.wf.min_trades_test) for t in trades)
@@ -401,6 +409,14 @@ def inspect_strategies_config(
"checks": checks,
"message": human_msg,
"results": results,
"config": {
"wf": payload.wf.model_dump(),
"risk": payload.risk.model_dump(),
"stop": payload.stop.model_dump(),
"global_rules": payload.global_rules.model_dump(),
"commission": payload.commission,
"slippage": payload.slippage,
},
}
if include_series:

View File

@@ -3,7 +3,7 @@
import logging
import threading
from datetime import datetime, timedelta
from typing import Dict, Optional
from typing import Dict, Optional, Literal
import pandas as pd
@@ -93,7 +93,7 @@ def _db_summary(
"candles": int(row["candles"] or 0),
}
def analyze_data_quality(df: pd.DataFrame, timeframe: str):
def analyze_data_quality(df: pd.DataFrame, timeframe: str, mode: Literal["crypto", "session"] = "crypto"):
# --- timeframe a timedelta ---
tf_map = {
"1m": timedelta(minutes=1),
@@ -105,28 +105,24 @@ def analyze_data_quality(df: pd.DataFrame, timeframe: str):
"1d": timedelta(days=1),
}
tf_delta = tf_map.get(timeframe)
if not tf_delta or df.empty:
if not tf_delta or df is None or df.empty:
return None
# --- continuidad / gaps ---
diffs = df.index.to_series().diff().dropna()
gap_threshold_warn = tf_delta * 1.5
gap_threshold_fail = tf_delta * 5
big_gaps = diffs[diffs > gap_threshold_warn]
max_gap = diffs.max()
continuity = "ok"
if any(diffs > gap_threshold_fail):
continuity = "fail"
elif len(big_gaps) > 0:
continuity = "warning"
# sanity: DatetimeIndex ordenado
if not isinstance(df.index, pd.DatetimeIndex):
return {
"status": "fail",
"checks": {"index": "fail"},
"message": "El dataframe no tiene DatetimeIndex.",
}
if not df.index.is_monotonic_increasing:
df = df.sort_index()
# --- cobertura ---
start, end = df.index.min(), df.index.max()
expected = int((end - start) / tf_delta) + 1
actual = len(df)
ratio = actual / expected if expected > 0 else 0
ratio = actual / expected if expected > 0 else 0.0
coverage_status = "ok"
if ratio < 0.98:
@@ -134,13 +130,53 @@ def analyze_data_quality(df: pd.DataFrame, timeframe: str):
elif ratio < 0.995:
coverage_status = "warning"
# --- continuidad / gaps ---
diffs = df.index.to_series().diff().dropna()
# Umbrales por timeframe (más realista para 1d)
if timeframe == "1d":
warn_gap = tf_delta * 2 # >= 2 días => warning
fail_gap = tf_delta * 10 # >= 10 días => fail
else:
warn_gap = max(tf_delta * 6, timedelta(hours=6)) # intradía
fail_gap = max(tf_delta * 48, timedelta(days=2)) # intradía
# Si es mercado por sesiones (acciones), un gap de fin de semana no es "malo".
# Aproximación simple: para 1d, no consideramos gaps de 2-3 días como warning/fail.
# (Sin calendario real, esto es la mejor opción práctica).
diffs_for_eval = diffs
if mode == "session" and timeframe == "1d":
diffs_for_eval = diffs[diffs > timedelta(days=3)] # ignorar 2-3 días típicos de finde
big_gaps = diffs[diffs > tf_delta * 1.5]
max_gap = diffs.max() if len(diffs) else pd.NaT
gaps_warn = diffs_for_eval[diffs_for_eval >= warn_gap]
gaps_fail = diffs_for_eval[diffs_for_eval >= fail_gap]
gaps_warn_ratio = (len(gaps_warn) / expected) if expected > 0 else 0.0
continuity = "ok"
if pd.notna(max_gap):
if len(gaps_fail) > 0 or gaps_warn_ratio > 0.02:
continuity = "fail"
elif len(gaps_warn) > 0:
continuity = "warning"
# --- volumen ---
zero_vol_ratio = (df["volume"] == 0).mean()
volume_status = "ok"
if zero_vol_ratio > 0.01:
volume_status = "fail"
elif zero_vol_ratio > 0.001:
volume_status = "warning"
zero_vol_ratio = None
nan_vol_ratio = None
volume_status = "warning"
if "volume" in df.columns:
nan_vol_ratio = df["volume"].isna().mean()
zero_vol_ratio = (df["volume"].fillna(0) == 0).mean()
volume_status = "ok"
if nan_vol_ratio > 0.005 or zero_vol_ratio > 0.01:
volume_status = "fail"
elif nan_vol_ratio > 0.0005 or zero_vol_ratio > 0.001:
volume_status = "warning"
# --- estado global ---
statuses = [continuity, coverage_status, volume_status]
@@ -154,6 +190,7 @@ def analyze_data_quality(df: pd.DataFrame, timeframe: str):
status = "ok"
msg = "Datos continuos y completos. Aptos para calibración."
# ⬇️ Mantener shape compatible con tu schema actual
return {
"status": status,
"checks": {
@@ -197,7 +234,7 @@ def inspect_calibration_data(
start_date=payload.start_date,
end_date=payload.end_date,
)
data_quality = analyze_data_quality(df, payload.timeframe)
data_quality = analyze_data_quality(df, payload.timeframe, mode=payload.mode)
return CalibrationDataResponse(
symbol=payload.symbol,

View File

@@ -90,6 +90,7 @@ def inspect_strategies(
payload=payload,
data_quality=data_quality,
include_series=False,
df=df,
)
return CalibrationStrategiesInspectResponse(**result)
@@ -110,6 +111,7 @@ def validate_strategies(
payload=payload,
data_quality=data_quality,
include_series=True,
df=df,
)
return CalibrationStrategiesValidateResponse(**result)
@@ -132,6 +134,7 @@ def report_strategies(
payload=payload,
data_quality=data_quality,
include_series=True,
df=df,
)
# ---------------------------------------------
@@ -187,6 +190,7 @@ def run_strategies_async(
WF_JOBS[job_id] = {
"status": "running",
"error": None,
"progress": 0,
"current_window": 0,
"total_windows": 0,
@@ -199,21 +203,44 @@ def run_strategies_async(
def progress_cb(window_id, total_windows):
WF_JOBS[job_id]["current_window"] = window_id
WF_JOBS[job_id]["total_windows"] = total_windows
WF_JOBS[job_id]["progress"] = int(
window_id / total_windows * 100
if total_windows and total_windows > 0:
WF_JOBS[job_id]["progress"] = int(window_id / total_windows * 100)
else:
WF_JOBS[job_id]["progress"] = 0
try:
# 1) Cargar OHLCV (igual que /inspect y /validate)
df = storage.load_ohlcv(symbol=payload.symbol, timeframe=payload.timeframe)
if df is None or df.empty:
WF_JOBS[job_id]["status"] = "error"
WF_JOBS[job_id]["progress"] = 0
WF_JOBS[job_id]["error"] = "No OHLCV data found. Run Step 1 first."
return
# 2) Calcular data_quality real
from .calibration_data import analyze_data_quality
data_quality = analyze_data_quality(df, payload.timeframe)
# 3) Ejecutar con data_quality real
result = inspect_strategies_config(
storage=storage,
payload=payload,
data_quality=data_quality,
include_series=True,
progress_callback=progress_cb,
df=df,
)
result = inspect_strategies_config(
storage=storage,
payload=payload,
data_quality={"status": "ok"},
include_series=True,
progress_callback=progress_cb, # ← lo pasamos
)
WF_JOBS[job_id]["status"] = "done"
WF_JOBS[job_id]["progress"] = 100
WF_JOBS[job_id]["result"] = result
WF_JOBS[job_id]["status"] = "done"
WF_JOBS[job_id]["progress"] = 100
WF_JOBS[job_id]["result"] = result
except Exception as e:
logger.exception("❌ WF async job failed")
WF_JOBS[job_id]["status"] = "error"
WF_JOBS[job_id]["error"] = str(e)
WF_JOBS[job_id]["progress"] = 0
thread = threading.Thread(target=background_job)
thread.start()

View File

@@ -7,26 +7,6 @@ from typing import Optional, Dict, Literal
from pydantic import BaseModel, Field
# ==================================================
# Inspect (DB)
# ==================================================
class CalibrationDataRequest(BaseModel):
symbol: str = Field(..., examples=["BTC/USDT"])
timeframe: str = Field(..., examples=["1h"])
start_date: Optional[datetime] = None
end_date: Optional[datetime] = None
class CalibrationDataResponse(BaseModel):
symbol: str
timeframe: str
first_available: Optional[datetime]
last_available: Optional[datetime]
candles_count: int
valid: bool
# ==================================================
# Download (Exchange -> DB) (modo "sync" / síncrono)
# ==================================================
@@ -147,7 +127,7 @@ class CalibrationDataRequest(BaseModel):
timeframe: str
start_date: Optional[datetime] = None
end_date: Optional[datetime] = None
mode: Literal["crypto", "session"] = "crypto"
QualityStatus = Literal["ok", "warning", "fail"]

View File

@@ -76,9 +76,10 @@ class CalibrationStrategiesInspectResponse(BaseModel):
status: Literal["ok", "warning", "fail"]
checks: Dict[str, Any]
message: str
results: List[StrategyRunResultSchema]
config: Optional[Dict[str, Any]] = None
class CalibrationStrategiesValidateResponse(CalibrationStrategiesInspectResponse):
series: Dict[str, Any]

View File

@@ -15,6 +15,7 @@ let downloadPollTimer = null;
async function inspectCalibrationData() {
console.log("[calibration_data] inspectCalibrationData() START ✅");
const mode = document.getElementById("market_mode")?.value || "crypto";
const symbol = document.getElementById("symbol")?.value;
const timeframe = document.getElementById("timeframe")?.value;
const start_date = document.getElementById("start_date")?.value || null;
@@ -22,7 +23,7 @@ async function inspectCalibrationData() {
const resultEl = document.getElementById("inspect-output");
const payload = { symbol, timeframe };
const payload = { symbol, timeframe, mode };
if (start_date) payload.start_date = start_date;
if (end_date) payload.end_date = end_date;
@@ -57,8 +58,11 @@ async function inspectCalibrationData() {
const dqStatus = data.data_quality?.status ?? "ok";
if (data.valid && dqStatus !== "fail") {
enableNextStep();
localStorage.setItem("calibration.mode", mode);
localStorage.setItem("calibration.symbol", data.symbol);
localStorage.setItem("calibration.timeframe", data.timeframe);
localStorage.setItem("calibration.start_date", start_date || "");
localStorage.setItem("calibration.end_date", end_date || "");
}
} catch (err) {
@@ -77,17 +81,25 @@ async function inspectCalibrationData() {
async function startDownloadJob() {
console.log("[calibration_data] startDownloadJob()");
const mode = document.getElementById("market_mode")?.value || "crypto";
const symbol = document.getElementById("symbol")?.value;
const timeframe = document.getElementById("timeframe")?.value;
const start_date = document.getElementById("start_date")?.value || null;
const end_date = document.getElementById("end_date")?.value || null;
const payload = { symbol, timeframe };
const payload = { symbol, timeframe, mode };
if (start_date) payload.start_date = start_date;
if (end_date) payload.end_date = end_date;
console.log("[calibration_data] download payload:", payload);
// ✅ Persist UI state (para restore tras recargar)
if (mode) localStorage.setItem("calibration.mode", mode);
if (symbol) localStorage.setItem("calibration.symbol", symbol);
if (timeframe) localStorage.setItem("calibration.timeframe", timeframe);
localStorage.setItem("calibration.start_date", start_date || "");
localStorage.setItem("calibration.end_date", end_date || "");
try {
const res = await fetch("/api/v2/calibration/data/download/job", {
method: "POST",
@@ -290,6 +302,29 @@ function renderDataSummary(data) {
document.addEventListener("DOMContentLoaded", () => {
console.log("[calibration_data] DOMContentLoaded ✅");
// --- Restore persisted calibration settings ---
const savedMode = localStorage.getItem("calibration.mode");
const savedSymbol = localStorage.getItem("calibration.symbol");
const savedTimeframe = localStorage.getItem("calibration.timeframe");
const savedStartDate = localStorage.getItem("calibration.start_date");
const savedEndDate = localStorage.getItem("calibration.end_date");
const modeEl = document.getElementById("market_mode");
if (modeEl && savedMode) modeEl.value = savedMode;
const symbolEl = document.getElementById("symbol");
if (symbolEl && savedSymbol) symbolEl.value = savedSymbol;
const tfEl = document.getElementById("timeframe");
if (tfEl && savedTimeframe) tfEl.value = savedTimeframe;
const startEl = document.getElementById("start_date");
if (startEl && savedStartDate) startEl.value = savedStartDate;
const endEl = document.getElementById("end_date");
if (endEl && savedEndDate) endEl.value = savedEndDate;
document
.getElementById("inspect-data-btn")
?.addEventListener("click", inspectCalibrationData);

View File

@@ -58,12 +58,21 @@
<div class="card-body">
<div class="row g-3">
<div class="col-md-3">
<div class="col-md-2">
<label class="form-label">Modo de mercado</label>
<select id="market_mode" class="form-select">
<option value="crypto" selected>Crypto (24/7)</option>
<option value="session">Acciones / ETFs (sesiones)</option>
</select>
</div>
<div class="col-md-2">
<label class="form-label">Símbolo</label>
<input id="symbol" class="form-control" value="BTC/USDT">
</div>
<div class="col-md-3">
<div class="col-md-2">
<label class="form-label">Timeframe</label>
<select id="timeframe" class="form-select">
<option value="1m">1m</option>
@@ -76,12 +85,12 @@
</select>
</div>
<div class="col-md-3">
<div class="col-md-2">
<label class="form-label">Fecha inicio (opcional)</label>
<input id="start_date" type="date" class="form-control">
</div>
<div class="col-md-3">
<div class="col-md-2">
<label class="form-label">Fecha fin (opcional)</label>
<input id="end_date" type="date" class="form-control">
</div>