diff --git a/data_platform/daily_all_update.py b/data_platform/daily_all_update.py index 8437c798..78bbf944 100644 --- a/data_platform/daily_all_update.py +++ b/data_platform/daily_all_update.py @@ -207,6 +207,86 @@ class SourceHealthMonitor: # ======================== 日线更新 ======================== +def get_em_secid(code: str) -> str: + """获取东方财富secid: 1.沪市 0.深市""" + if code.startswith(("60", "68", "51")): + return f"1.{code}" + return f"0.{code}" + + +def fetch_eastmoney_daily(code: str, start_date: str, end_date: str) -> Optional[pd.DataFrame]: + """东方财富K线API获取日线(主源) + + 特点: + - 一次请求可拿多年数据(实测4年1046条) + - amount有真实值 + - 反爬严格:必须Referer+UA+ut,限频3-5s/请求 + """ + import requests as _requests + import random + + secid = get_em_secid(code) + ts = str(int(time.time() * 1000)) + url = ( + f"https://push2his.eastmoney.com/api/qt/stock/kline/get?" + f"secid={secid}&klt=101&fqt=1&" + f"beg={start_date.replace('-', '')}&end={end_date.replace('-', '')}&" + f"fields1=f1,f2,f3,f4,f5,f6,f7,f8&" + f"fields2=f51,f52,f53,f54,f55,f56,f57,f58,f59,f60,f61&" + f"ut=b2884a393a59ad64002292a3e90d46a5&" + f"lmt=10000&" + f"cb=jQuery_em_{ts}&_={ts}" + ) + + session = _requests.Session() + session.trust_env = False # 绕过系统代理 + + try: + r = session.get(url, headers=HEADERS_EM, timeout=15, verify=False) + if r.status_code != 200: + return None + # 解析JSONP + text = r.text + start_idx = text.index("(") + 1 + end_idx = text.rindex(")") + data = json.loads(text[start_idx:end_idx]) + except Exception as e: + logger.debug("东方财富日线失败 %s: %s", code, e) + return None + + if data.get("rc") != 0: + return None + klines = data.get("data", {}).get("klines", []) + if not klines: + return None + + # 解析: date,open,close,high,low,volume,amount,amplitude,pct_change,change,turnover_rate + rows = [] + for line in klines: + parts = line.split(",") + if len(parts) < 7: + continue + rows.append({ + "date": parts[0], + "open": float(parts[1]), + "close": float(parts[2]), + "high": float(parts[3]), + "low": float(parts[4]), + "volume": float(parts[5]), + "amount": float(parts[6]), + }) + + if not rows: + return None + df = pd.DataFrame(rows) + # 东方财富日期格式可能含时间,标准化 + df["date"] = pd.to_datetime(df["date"]).dt.strftime("%Y-%m-%d") + df["date"] = df["date"].astype(str) + mask = (df["date"] >= start_date) & (df["date"] <= end_date) + result = df.loc[mask, ["date", "open", "high", "low", "close", "volume", "amount"]] + return result if not result.empty else None + + def fetch_tencent_daily(code: str, start_date: str, end_date: str) -> Optional[pd.DataFrame]: """腾讯K线API获取日线增量""" prefix, clean = get_market_prefix(code)