fix: use binanceus for data fetch (binance geo-blocked), add multi-exchange fallback
This commit is contained in:
parent
f13e1679cd
commit
c2eab35811
BIN
__pycache__/orchestrator.cpython-313.pyc
Normal file
BIN
__pycache__/orchestrator.cpython-313.pyc
Normal file
Binary file not shown.
17521
data/btc_1h.csv
Normal file
17521
data/btc_1h.csv
Normal file
File diff suppressed because it is too large
Load Diff
4381
data/btc_4h.csv
Normal file
4381
data/btc_4h.csv
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,76 +1,65 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Fetch BTC/USDT OHLCV data from Binance using ccxt."""
|
||||
"""Fetch BTC/USD OHLCV data using ccxt (multiple exchanges for best coverage)."""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import ccxt
|
||||
import pandas as pd
|
||||
import os, sys, time, ccxt, pandas as pd
|
||||
from datetime import datetime, timezone
|
||||
|
||||
DATA_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "data")
|
||||
SYMBOL = "BTC/USDT"
|
||||
EXCHANGE_ID = "binance"
|
||||
YEARS_HISTORY = 2
|
||||
LIMIT_PER_REQUEST = 1000 # Binance max
|
||||
YEARS = 2
|
||||
LIMIT = 300
|
||||
|
||||
|
||||
def fetch_ohlcv(timeframe: str) -> pd.DataFrame:
|
||||
"""Fetch OHLCV data for a given timeframe."""
|
||||
exchange = ccxt.binance({"enableRateLimit": True})
|
||||
|
||||
# Calculate start time
|
||||
def fetch_ohlcv(timeframe):
|
||||
"""Try multiple exchanges until we get sufficient history."""
|
||||
exchanges = [
|
||||
("coinbasepro", "BTC/USD"),
|
||||
("binanceus", "BTC/USDT"),
|
||||
("kraken", "BTC/USD"),
|
||||
("bitfinex", "BTC/USD"),
|
||||
]
|
||||
now_ms = int(time.time() * 1000)
|
||||
if timeframe == "1h":
|
||||
ms_per_candle = 3600 * 1000
|
||||
elif timeframe == "4h":
|
||||
ms_per_candle = 4 * 3600 * 1000
|
||||
else:
|
||||
raise ValueError(f"Unsupported timeframe: {timeframe}")
|
||||
ms_map = {"1h": 3600_000, "4h": 14400_000}
|
||||
ms_per = ms_map[timeframe]
|
||||
target_since = now_ms - (YEARS * 365 * 24 * 3600 * 1000)
|
||||
|
||||
since = now_ms - (YEARS_HISTORY * 365 * 24 * 3600 * 1000)
|
||||
|
||||
all_candles = []
|
||||
print(f" Fetching {SYMBOL} {timeframe} from {datetime.fromtimestamp(since / 1000, tz=timezone.utc).strftime('%Y-%m-%d')}...")
|
||||
|
||||
while since < now_ms:
|
||||
for exch_id, symbol in exchanges:
|
||||
print(f" Trying {exch_id} for {symbol} {timeframe}...")
|
||||
try:
|
||||
candles = exchange.fetch_ohlcv(SYMBOL, timeframe, since=since, limit=LIMIT_PER_REQUEST)
|
||||
exchange = getattr(ccxt, exch_id)({"enableRateLimit": True})
|
||||
since = target_since
|
||||
all_c = []
|
||||
fails = 0
|
||||
while since < now_ms and fails < 5:
|
||||
try:
|
||||
candles = exchange.fetch_ohlcv(symbol, timeframe, since=since, limit=LIMIT)
|
||||
except Exception as e:
|
||||
fails += 1
|
||||
print(f" Error ({fails}/5): {e}")
|
||||
time.sleep(3)
|
||||
continue
|
||||
if not candles:
|
||||
break
|
||||
all_c.extend(candles)
|
||||
since = candles[-1][0] + ms_per
|
||||
print(f"\r {len(all_c)} candles...", end="", flush=True)
|
||||
time.sleep(exchange.rateLimit / 1000 + 0.1)
|
||||
print(f"\r {len(all_c)} candles from {exch_id}")
|
||||
if len(all_c) > 2000: # Good enough
|
||||
df = pd.DataFrame(all_c, columns=["timestamp","open","high","low","close","volume"])
|
||||
df["timestamp"] = pd.to_datetime(df["timestamp"], unit="ms", utc=True)
|
||||
return df.drop_duplicates(subset=["timestamp"]).sort_values("timestamp").reset_index(drop=True)
|
||||
else:
|
||||
print(f" Only {len(all_c)} candles, trying next exchange...")
|
||||
except Exception as e:
|
||||
print(f" Warning: fetch error, retrying in 5s — {e}")
|
||||
time.sleep(5)
|
||||
print(f" Exchange {exch_id} failed: {e}")
|
||||
continue
|
||||
|
||||
if not candles:
|
||||
break
|
||||
|
||||
all_candles.extend(candles)
|
||||
since = candles[-1][0] + ms_per_candle
|
||||
sys.stdout.write(f"\r Downloaded {len(all_candles)} candles...")
|
||||
sys.stdout.flush()
|
||||
time.sleep(exchange.rateLimit / 1000)
|
||||
|
||||
print(f"\r Downloaded {len(all_candles)} candles total.")
|
||||
|
||||
df = pd.DataFrame(all_candles, columns=["timestamp", "open", "high", "low", "close", "volume"])
|
||||
df["timestamp"] = pd.to_datetime(df["timestamp"], unit="ms", utc=True)
|
||||
df = df.drop_duplicates(subset=["timestamp"]).sort_values("timestamp").reset_index(drop=True)
|
||||
return df
|
||||
|
||||
|
||||
def main():
|
||||
os.makedirs(DATA_DIR, exist_ok=True)
|
||||
|
||||
for tf in ["1h", "4h"]:
|
||||
print(f"\n[*] Fetching {tf} data...")
|
||||
df = fetch_ohlcv(tf)
|
||||
out_path = os.path.join(DATA_DIR, f"btc_{tf}.csv")
|
||||
df.to_csv(out_path, index=False)
|
||||
print(f" Saved {len(df)} rows to {out_path}")
|
||||
print(f" Range: {df['timestamp'].iloc[0]} → {df['timestamp'].iloc[-1]}")
|
||||
|
||||
print("\nData fetch complete!")
|
||||
|
||||
raise RuntimeError("Could not fetch sufficient data from any exchange")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
os.makedirs(DATA_DIR, exist_ok=True)
|
||||
for tf in ["4h", "1h"]:
|
||||
print(f"[*] Fetching {tf}...")
|
||||
df = fetch_ohlcv(tf)
|
||||
out = os.path.join(DATA_DIR, f"btc_{tf}.csv")
|
||||
df.to_csv(out, index=False)
|
||||
print(f" Saved {out} ({len(df)} rows, {df['timestamp'].min()} to {df['timestamp'].max()})")
|
||||
print("[OK] Done!")
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user