Multi-machine optimization loop: - VPS orchestrator coordinates training and LLM analysis - Windows PC (RTX 4070 Ti) runs XGBoost/LightGBM/CatBoost with GPU - Mac Mini runs qwen3.5:27b via Ollama for strategy analysis Includes 60+ technical features, walk-forward validation, confidence-scaled position sizing, and automated convergence detection. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
77 lines
2.4 KiB
Python
Executable File
77 lines
2.4 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""Fetch BTC/USDT OHLCV data from Binance using ccxt."""
|
|
|
|
import os
|
|
import sys
|
|
import time
|
|
import ccxt
|
|
import pandas as pd
|
|
from datetime import datetime, timezone
|
|
|
|
DATA_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "data")
|
|
SYMBOL = "BTC/USDT"
|
|
EXCHANGE_ID = "binance"
|
|
YEARS_HISTORY = 2
|
|
LIMIT_PER_REQUEST = 1000 # Binance max
|
|
|
|
|
|
def fetch_ohlcv(timeframe: str) -> pd.DataFrame:
|
|
"""Fetch OHLCV data for a given timeframe."""
|
|
exchange = ccxt.binance({"enableRateLimit": True})
|
|
|
|
# Calculate start time
|
|
now_ms = int(time.time() * 1000)
|
|
if timeframe == "1h":
|
|
ms_per_candle = 3600 * 1000
|
|
elif timeframe == "4h":
|
|
ms_per_candle = 4 * 3600 * 1000
|
|
else:
|
|
raise ValueError(f"Unsupported timeframe: {timeframe}")
|
|
|
|
since = now_ms - (YEARS_HISTORY * 365 * 24 * 3600 * 1000)
|
|
|
|
all_candles = []
|
|
print(f" Fetching {SYMBOL} {timeframe} from {datetime.fromtimestamp(since / 1000, tz=timezone.utc).strftime('%Y-%m-%d')}...")
|
|
|
|
while since < now_ms:
|
|
try:
|
|
candles = exchange.fetch_ohlcv(SYMBOL, timeframe, since=since, limit=LIMIT_PER_REQUEST)
|
|
except Exception as e:
|
|
print(f" Warning: fetch error, retrying in 5s — {e}")
|
|
time.sleep(5)
|
|
continue
|
|
|
|
if not candles:
|
|
break
|
|
|
|
all_candles.extend(candles)
|
|
since = candles[-1][0] + ms_per_candle
|
|
sys.stdout.write(f"\r Downloaded {len(all_candles)} candles...")
|
|
sys.stdout.flush()
|
|
time.sleep(exchange.rateLimit / 1000)
|
|
|
|
print(f"\r Downloaded {len(all_candles)} candles total.")
|
|
|
|
df = pd.DataFrame(all_candles, columns=["timestamp", "open", "high", "low", "close", "volume"])
|
|
df["timestamp"] = pd.to_datetime(df["timestamp"], unit="ms", utc=True)
|
|
df = df.drop_duplicates(subset=["timestamp"]).sort_values("timestamp").reset_index(drop=True)
|
|
return df
|
|
|
|
|
|
def main():
|
|
os.makedirs(DATA_DIR, exist_ok=True)
|
|
|
|
for tf in ["1h", "4h"]:
|
|
print(f"\n[*] Fetching {tf} data...")
|
|
df = fetch_ohlcv(tf)
|
|
out_path = os.path.join(DATA_DIR, f"btc_{tf}.csv")
|
|
df.to_csv(out_path, index=False)
|
|
print(f" Saved {len(df)} rows to {out_path}")
|
|
print(f" Range: {df['timestamp'].iloc[0]} → {df['timestamp'].iloc[-1]}")
|
|
|
|
print("\nData fetch complete!")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|