pequeños cambios para que el step 1 sea mas robusto

This commit is contained in:
dam
2026-03-03 08:55:43 +01:00
parent 8259e85b68
commit 9a59879988
7 changed files with 184 additions and 79 deletions

View File

@@ -98,8 +98,10 @@ def inspect_strategies_config(
data_quality: Dict[str, Any], data_quality: Dict[str, Any],
include_series: bool, include_series: bool,
progress_callback=None, progress_callback=None,
df: pd.DataFrame | None = None,
) -> Dict[str, Any]: ) -> Dict[str, Any]:
if df is None:
df = storage.load_ohlcv(symbol=payload.symbol, timeframe=payload.timeframe) df = storage.load_ohlcv(symbol=payload.symbol, timeframe=payload.timeframe)
if df is None or df.empty: if df is None or df.empty:
return { return {
@@ -117,14 +119,24 @@ def inspect_strategies_config(
} }
if data_quality.get("status") == "fail": if data_quality.get("status") == "fail":
return { out = {
"valid": False, "valid": False,
"status": "fail", "status": "fail",
"checks": checks, "checks": checks,
"message": "Step 1 data quality is FAIL. Strategies cannot be validated.", "message": "Step 1 data quality is FAIL. Strategies cannot be validated.",
"results": [], "results": [],
"series": {} if include_series else None, "config": {
"wf": payload.wf.model_dump(),
"risk": payload.risk.model_dump(),
"stop": payload.stop.model_dump(),
"global_rules": payload.global_rules.model_dump(),
"commission": payload.commission,
"slippage": payload.slippage,
},
} }
if include_series:
out["series"] = {"strategies": {}}
return out
stop_loss = _build_stop_loss(payload.stop) stop_loss = _build_stop_loss(payload.stop)
base_sizer = _build_position_sizer(payload.risk) base_sizer = _build_position_sizer(payload.risk)
@@ -183,7 +195,7 @@ def inspect_strategies_config(
"message": msg, "message": msg,
"warnings": [], "warnings": [],
"series_available": False, "series_available": False,
"series_error": "Unknown strategy_id (not in registry)", "series_error": msg,
"n_windows": 0, "n_windows": 0,
"oos_final_equity": payload.account_equity, "oos_final_equity": payload.account_equity,
"oos_total_return_pct": 0.0, "oos_total_return_pct": 0.0,
@@ -204,9 +216,10 @@ def inspect_strategies_config(
# Wrapper sizer # Wrapper sizer
class _CappedSizer(type(base_sizer)): class _CappedSizer:
def __init__(self, inner): def __init__(self, inner, max_position_fraction: float):
self.inner = inner self.inner = inner
self.max_position_fraction = max_position_fraction
def calculate_size(self, *, capital, entry_price, stop_price=None, max_capital=None, volatility=None): def calculate_size(self, *, capital, entry_price, stop_price=None, max_capital=None, volatility=None):
u = self.inner.calculate_size( u = self.inner.calculate_size(
@@ -220,10 +233,10 @@ def inspect_strategies_config(
units=float(u), units=float(u),
capital=float(capital), capital=float(capital),
entry_price=float(entry_price), entry_price=float(entry_price),
max_position_fraction=float(payload.risk.max_position_fraction), max_position_fraction=float(self.max_position_fraction),
) )
capped_sizer = _CappedSizer(base_sizer) capped_sizer = _CappedSizer(base_sizer, payload.risk.max_position_fraction)
log.info(f"🧠 Step3 | WF run | strategy={sid}") log.info(f"🧠 Step3 | WF run | strategy={sid}")
@@ -302,11 +315,6 @@ def inspect_strategies_config(
oos_dd = win_df["max_dd_pct"].tolist() oos_dd = win_df["max_dd_pct"].tolist()
n_windows = len(win_df) n_windows = len(win_df)
required_cols = {"return_pct", "max_dd_pct", "trades", "window", "train_start", "train_end", "test_start", "test_end", "sharpe", "params"}
missing = required_cols - set(win_df.columns)
if missing:
raise ValueError(f"WF windows missing required columns: {sorted(missing)}")
trades = win_df["trades"].astype(int).tolist() trades = win_df["trades"].astype(int).tolist()
too_few = sum(t < int(payload.wf.min_trades_test) for t in trades) too_few = sum(t < int(payload.wf.min_trades_test) for t in trades)
@@ -401,6 +409,14 @@ def inspect_strategies_config(
"checks": checks, "checks": checks,
"message": human_msg, "message": human_msg,
"results": results, "results": results,
"config": {
"wf": payload.wf.model_dump(),
"risk": payload.risk.model_dump(),
"stop": payload.stop.model_dump(),
"global_rules": payload.global_rules.model_dump(),
"commission": payload.commission,
"slippage": payload.slippage,
},
} }
if include_series: if include_series:

View File

@@ -3,7 +3,7 @@
import logging import logging
import threading import threading
from datetime import datetime, timedelta from datetime import datetime, timedelta
from typing import Dict, Optional from typing import Dict, Optional, Literal
import pandas as pd import pandas as pd
@@ -93,7 +93,7 @@ def _db_summary(
"candles": int(row["candles"] or 0), "candles": int(row["candles"] or 0),
} }
def analyze_data_quality(df: pd.DataFrame, timeframe: str): def analyze_data_quality(df: pd.DataFrame, timeframe: str, mode: Literal["crypto", "session"] = "crypto"):
# --- timeframe a timedelta --- # --- timeframe a timedelta ---
tf_map = { tf_map = {
"1m": timedelta(minutes=1), "1m": timedelta(minutes=1),
@@ -105,28 +105,24 @@ def analyze_data_quality(df: pd.DataFrame, timeframe: str):
"1d": timedelta(days=1), "1d": timedelta(days=1),
} }
tf_delta = tf_map.get(timeframe) tf_delta = tf_map.get(timeframe)
if not tf_delta or df.empty: if not tf_delta or df is None or df.empty:
return None return None
# --- continuidad / gaps --- # sanity: DatetimeIndex ordenado
diffs = df.index.to_series().diff().dropna() if not isinstance(df.index, pd.DatetimeIndex):
gap_threshold_warn = tf_delta * 1.5 return {
gap_threshold_fail = tf_delta * 5 "status": "fail",
"checks": {"index": "fail"},
big_gaps = diffs[diffs > gap_threshold_warn] "message": "El dataframe no tiene DatetimeIndex.",
max_gap = diffs.max() }
if not df.index.is_monotonic_increasing:
continuity = "ok" df = df.sort_index()
if any(diffs > gap_threshold_fail):
continuity = "fail"
elif len(big_gaps) > 0:
continuity = "warning"
# --- cobertura --- # --- cobertura ---
start, end = df.index.min(), df.index.max() start, end = df.index.min(), df.index.max()
expected = int((end - start) / tf_delta) + 1 expected = int((end - start) / tf_delta) + 1
actual = len(df) actual = len(df)
ratio = actual / expected if expected > 0 else 0 ratio = actual / expected if expected > 0 else 0.0
coverage_status = "ok" coverage_status = "ok"
if ratio < 0.98: if ratio < 0.98:
@@ -134,12 +130,52 @@ def analyze_data_quality(df: pd.DataFrame, timeframe: str):
elif ratio < 0.995: elif ratio < 0.995:
coverage_status = "warning" coverage_status = "warning"
# --- continuidad / gaps ---
diffs = df.index.to_series().diff().dropna()
# Umbrales por timeframe (más realista para 1d)
if timeframe == "1d":
warn_gap = tf_delta * 2 # >= 2 días => warning
fail_gap = tf_delta * 10 # >= 10 días => fail
else:
warn_gap = max(tf_delta * 6, timedelta(hours=6)) # intradía
fail_gap = max(tf_delta * 48, timedelta(days=2)) # intradía
# Si es mercado por sesiones (acciones), un gap de fin de semana no es "malo".
# Aproximación simple: para 1d, no consideramos gaps de 2-3 días como warning/fail.
# (Sin calendario real, esto es la mejor opción práctica).
diffs_for_eval = diffs
if mode == "session" and timeframe == "1d":
diffs_for_eval = diffs[diffs > timedelta(days=3)] # ignorar 2-3 días típicos de finde
big_gaps = diffs[diffs > tf_delta * 1.5]
max_gap = diffs.max() if len(diffs) else pd.NaT
gaps_warn = diffs_for_eval[diffs_for_eval >= warn_gap]
gaps_fail = diffs_for_eval[diffs_for_eval >= fail_gap]
gaps_warn_ratio = (len(gaps_warn) / expected) if expected > 0 else 0.0
continuity = "ok"
if pd.notna(max_gap):
if len(gaps_fail) > 0 or gaps_warn_ratio > 0.02:
continuity = "fail"
elif len(gaps_warn) > 0:
continuity = "warning"
# --- volumen --- # --- volumen ---
zero_vol_ratio = (df["volume"] == 0).mean() zero_vol_ratio = None
nan_vol_ratio = None
volume_status = "warning"
if "volume" in df.columns:
nan_vol_ratio = df["volume"].isna().mean()
zero_vol_ratio = (df["volume"].fillna(0) == 0).mean()
volume_status = "ok" volume_status = "ok"
if zero_vol_ratio > 0.01: if nan_vol_ratio > 0.005 or zero_vol_ratio > 0.01:
volume_status = "fail" volume_status = "fail"
elif zero_vol_ratio > 0.001: elif nan_vol_ratio > 0.0005 or zero_vol_ratio > 0.001:
volume_status = "warning" volume_status = "warning"
# --- estado global --- # --- estado global ---
@@ -154,6 +190,7 @@ def analyze_data_quality(df: pd.DataFrame, timeframe: str):
status = "ok" status = "ok"
msg = "Datos continuos y completos. Aptos para calibración." msg = "Datos continuos y completos. Aptos para calibración."
# ⬇️ Mantener shape compatible con tu schema actual
return { return {
"status": status, "status": status,
"checks": { "checks": {
@@ -197,7 +234,7 @@ def inspect_calibration_data(
start_date=payload.start_date, start_date=payload.start_date,
end_date=payload.end_date, end_date=payload.end_date,
) )
data_quality = analyze_data_quality(df, payload.timeframe) data_quality = analyze_data_quality(df, payload.timeframe, mode=payload.mode)
return CalibrationDataResponse( return CalibrationDataResponse(
symbol=payload.symbol, symbol=payload.symbol,

View File

@@ -90,6 +90,7 @@ def inspect_strategies(
payload=payload, payload=payload,
data_quality=data_quality, data_quality=data_quality,
include_series=False, include_series=False,
df=df,
) )
return CalibrationStrategiesInspectResponse(**result) return CalibrationStrategiesInspectResponse(**result)
@@ -110,6 +111,7 @@ def validate_strategies(
payload=payload, payload=payload,
data_quality=data_quality, data_quality=data_quality,
include_series=True, include_series=True,
df=df,
) )
return CalibrationStrategiesValidateResponse(**result) return CalibrationStrategiesValidateResponse(**result)
@@ -132,6 +134,7 @@ def report_strategies(
payload=payload, payload=payload,
data_quality=data_quality, data_quality=data_quality,
include_series=True, include_series=True,
df=df,
) )
# --------------------------------------------- # ---------------------------------------------
@@ -187,6 +190,7 @@ def run_strategies_async(
WF_JOBS[job_id] = { WF_JOBS[job_id] = {
"status": "running", "status": "running",
"error": None,
"progress": 0, "progress": 0,
"current_window": 0, "current_window": 0,
"total_windows": 0, "total_windows": 0,
@@ -199,22 +203,45 @@ def run_strategies_async(
def progress_cb(window_id, total_windows): def progress_cb(window_id, total_windows):
WF_JOBS[job_id]["current_window"] = window_id WF_JOBS[job_id]["current_window"] = window_id
WF_JOBS[job_id]["total_windows"] = total_windows WF_JOBS[job_id]["total_windows"] = total_windows
WF_JOBS[job_id]["progress"] = int(
window_id / total_windows * 100
)
if total_windows and total_windows > 0:
WF_JOBS[job_id]["progress"] = int(window_id / total_windows * 100)
else:
WF_JOBS[job_id]["progress"] = 0
try:
# 1) Cargar OHLCV (igual que /inspect y /validate)
df = storage.load_ohlcv(symbol=payload.symbol, timeframe=payload.timeframe)
if df is None or df.empty:
WF_JOBS[job_id]["status"] = "error"
WF_JOBS[job_id]["progress"] = 0
WF_JOBS[job_id]["error"] = "No OHLCV data found. Run Step 1 first."
return
# 2) Calcular data_quality real
from .calibration_data import analyze_data_quality
data_quality = analyze_data_quality(df, payload.timeframe)
# 3) Ejecutar con data_quality real
result = inspect_strategies_config( result = inspect_strategies_config(
storage=storage, storage=storage,
payload=payload, payload=payload,
data_quality={"status": "ok"}, data_quality=data_quality,
include_series=True, include_series=True,
progress_callback=progress_cb, # ← lo pasamos progress_callback=progress_cb,
df=df,
) )
WF_JOBS[job_id]["status"] = "done" WF_JOBS[job_id]["status"] = "done"
WF_JOBS[job_id]["progress"] = 100 WF_JOBS[job_id]["progress"] = 100
WF_JOBS[job_id]["result"] = result WF_JOBS[job_id]["result"] = result
except Exception as e:
logger.exception("❌ WF async job failed")
WF_JOBS[job_id]["status"] = "error"
WF_JOBS[job_id]["error"] = str(e)
WF_JOBS[job_id]["progress"] = 0
thread = threading.Thread(target=background_job) thread = threading.Thread(target=background_job)
thread.start() thread.start()

View File

@@ -7,26 +7,6 @@ from typing import Optional, Dict, Literal
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
# ==================================================
# Inspect (DB)
# ==================================================
class CalibrationDataRequest(BaseModel):
symbol: str = Field(..., examples=["BTC/USDT"])
timeframe: str = Field(..., examples=["1h"])
start_date: Optional[datetime] = None
end_date: Optional[datetime] = None
class CalibrationDataResponse(BaseModel):
symbol: str
timeframe: str
first_available: Optional[datetime]
last_available: Optional[datetime]
candles_count: int
valid: bool
# ================================================== # ==================================================
# Download (Exchange -> DB) (modo "sync" / síncrono) # Download (Exchange -> DB) (modo "sync" / síncrono)
# ================================================== # ==================================================
@@ -147,7 +127,7 @@ class CalibrationDataRequest(BaseModel):
timeframe: str timeframe: str
start_date: Optional[datetime] = None start_date: Optional[datetime] = None
end_date: Optional[datetime] = None end_date: Optional[datetime] = None
mode: Literal["crypto", "session"] = "crypto"
QualityStatus = Literal["ok", "warning", "fail"] QualityStatus = Literal["ok", "warning", "fail"]

View File

@@ -76,9 +76,10 @@ class CalibrationStrategiesInspectResponse(BaseModel):
status: Literal["ok", "warning", "fail"] status: Literal["ok", "warning", "fail"]
checks: Dict[str, Any] checks: Dict[str, Any]
message: str message: str
results: List[StrategyRunResultSchema] results: List[StrategyRunResultSchema]
config: Optional[Dict[str, Any]] = None
class CalibrationStrategiesValidateResponse(CalibrationStrategiesInspectResponse): class CalibrationStrategiesValidateResponse(CalibrationStrategiesInspectResponse):
series: Dict[str, Any] series: Dict[str, Any]

View File

@@ -15,6 +15,7 @@ let downloadPollTimer = null;
async function inspectCalibrationData() { async function inspectCalibrationData() {
console.log("[calibration_data] inspectCalibrationData() START ✅"); console.log("[calibration_data] inspectCalibrationData() START ✅");
const mode = document.getElementById("market_mode")?.value || "crypto";
const symbol = document.getElementById("symbol")?.value; const symbol = document.getElementById("symbol")?.value;
const timeframe = document.getElementById("timeframe")?.value; const timeframe = document.getElementById("timeframe")?.value;
const start_date = document.getElementById("start_date")?.value || null; const start_date = document.getElementById("start_date")?.value || null;
@@ -22,7 +23,7 @@ async function inspectCalibrationData() {
const resultEl = document.getElementById("inspect-output"); const resultEl = document.getElementById("inspect-output");
const payload = { symbol, timeframe }; const payload = { symbol, timeframe, mode };
if (start_date) payload.start_date = start_date; if (start_date) payload.start_date = start_date;
if (end_date) payload.end_date = end_date; if (end_date) payload.end_date = end_date;
@@ -57,8 +58,11 @@ async function inspectCalibrationData() {
const dqStatus = data.data_quality?.status ?? "ok"; const dqStatus = data.data_quality?.status ?? "ok";
if (data.valid && dqStatus !== "fail") { if (data.valid && dqStatus !== "fail") {
enableNextStep(); enableNextStep();
localStorage.setItem("calibration.mode", mode);
localStorage.setItem("calibration.symbol", data.symbol); localStorage.setItem("calibration.symbol", data.symbol);
localStorage.setItem("calibration.timeframe", data.timeframe); localStorage.setItem("calibration.timeframe", data.timeframe);
localStorage.setItem("calibration.start_date", start_date || "");
localStorage.setItem("calibration.end_date", end_date || "");
} }
} catch (err) { } catch (err) {
@@ -77,17 +81,25 @@ async function inspectCalibrationData() {
async function startDownloadJob() { async function startDownloadJob() {
console.log("[calibration_data] startDownloadJob()"); console.log("[calibration_data] startDownloadJob()");
const mode = document.getElementById("market_mode")?.value || "crypto";
const symbol = document.getElementById("symbol")?.value; const symbol = document.getElementById("symbol")?.value;
const timeframe = document.getElementById("timeframe")?.value; const timeframe = document.getElementById("timeframe")?.value;
const start_date = document.getElementById("start_date")?.value || null; const start_date = document.getElementById("start_date")?.value || null;
const end_date = document.getElementById("end_date")?.value || null; const end_date = document.getElementById("end_date")?.value || null;
const payload = { symbol, timeframe }; const payload = { symbol, timeframe, mode };
if (start_date) payload.start_date = start_date; if (start_date) payload.start_date = start_date;
if (end_date) payload.end_date = end_date; if (end_date) payload.end_date = end_date;
console.log("[calibration_data] download payload:", payload); console.log("[calibration_data] download payload:", payload);
// ✅ Persist UI state (para restore tras recargar)
if (mode) localStorage.setItem("calibration.mode", mode);
if (symbol) localStorage.setItem("calibration.symbol", symbol);
if (timeframe) localStorage.setItem("calibration.timeframe", timeframe);
localStorage.setItem("calibration.start_date", start_date || "");
localStorage.setItem("calibration.end_date", end_date || "");
try { try {
const res = await fetch("/api/v2/calibration/data/download/job", { const res = await fetch("/api/v2/calibration/data/download/job", {
method: "POST", method: "POST",
@@ -290,6 +302,29 @@ function renderDataSummary(data) {
document.addEventListener("DOMContentLoaded", () => { document.addEventListener("DOMContentLoaded", () => {
console.log("[calibration_data] DOMContentLoaded ✅"); console.log("[calibration_data] DOMContentLoaded ✅");
// --- Restore persisted calibration settings ---
const savedMode = localStorage.getItem("calibration.mode");
const savedSymbol = localStorage.getItem("calibration.symbol");
const savedTimeframe = localStorage.getItem("calibration.timeframe");
const savedStartDate = localStorage.getItem("calibration.start_date");
const savedEndDate = localStorage.getItem("calibration.end_date");
const modeEl = document.getElementById("market_mode");
if (modeEl && savedMode) modeEl.value = savedMode;
const symbolEl = document.getElementById("symbol");
if (symbolEl && savedSymbol) symbolEl.value = savedSymbol;
const tfEl = document.getElementById("timeframe");
if (tfEl && savedTimeframe) tfEl.value = savedTimeframe;
const startEl = document.getElementById("start_date");
if (startEl && savedStartDate) startEl.value = savedStartDate;
const endEl = document.getElementById("end_date");
if (endEl && savedEndDate) endEl.value = savedEndDate;
document document
.getElementById("inspect-data-btn") .getElementById("inspect-data-btn")
?.addEventListener("click", inspectCalibrationData); ?.addEventListener("click", inspectCalibrationData);

View File

@@ -58,12 +58,21 @@
<div class="card-body"> <div class="card-body">
<div class="row g-3"> <div class="row g-3">
<div class="col-md-3">
<div class="col-md-2">
<label class="form-label">Modo de mercado</label>
<select id="market_mode" class="form-select">
<option value="crypto" selected>Crypto (24/7)</option>
<option value="session">Acciones / ETFs (sesiones)</option>
</select>
</div>
<div class="col-md-2">
<label class="form-label">Símbolo</label> <label class="form-label">Símbolo</label>
<input id="symbol" class="form-control" value="BTC/USDT"> <input id="symbol" class="form-control" value="BTC/USDT">
</div> </div>
<div class="col-md-3"> <div class="col-md-2">
<label class="form-label">Timeframe</label> <label class="form-label">Timeframe</label>
<select id="timeframe" class="form-select"> <select id="timeframe" class="form-select">
<option value="1m">1m</option> <option value="1m">1m</option>
@@ -76,12 +85,12 @@
</select> </select>
</div> </div>
<div class="col-md-3"> <div class="col-md-2">
<label class="form-label">Fecha inicio (opcional)</label> <label class="form-label">Fecha inicio (opcional)</label>
<input id="start_date" type="date" class="form-control"> <input id="start_date" type="date" class="form-control">
</div> </div>
<div class="col-md-3"> <div class="col-md-2">
<label class="form-label">Fecha fin (opcional)</label> <label class="form-label">Fecha fin (opcional)</label>
<input id="end_date" type="date" class="form-control"> <input id="end_date" type="date" class="form-control">
</div> </div>