|
|
|
|
|
import akshare as ak
|
|
|
|
|
|
import pandas as pd
|
|
|
|
|
|
import random
|
|
|
|
|
|
import time
|
|
|
|
|
|
import logging
|
|
|
|
|
|
import requests
|
|
|
|
|
|
from typing import List, Optional, Dict
|
|
|
|
|
|
from datetime import datetime
|
|
|
|
|
|
from app.services.datasource.base import DataSourceBase
|
|
|
|
|
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class SmartRequester:
|
|
|
|
|
|
"""
|
|
|
|
|
|
反爬综合管理器:集成 Headers 伪装、拟人化延时、重试机制
|
|
|
|
|
|
(IP 代理部分暂按文档要求空缺,后续扩展)
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
def __init__(self, max_retries: int = 3):
|
|
|
|
|
|
self.max_retries = max_retries
|
|
|
|
|
|
self.session = requests.Session()
|
|
|
|
|
|
|
|
|
|
|
|
# User-Agent 池
|
|
|
|
|
|
self.user_agents = [
|
|
|
|
|
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36",
|
|
|
|
|
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36",
|
|
|
|
|
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.0",
|
|
|
|
|
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.42",
|
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
def get_random_headers(self, referer: str = "https://finance.sina.com.cn/") -> Dict[str, str]:
|
|
|
|
|
|
"""生成随机请求头,模拟真实浏览器"""
|
|
|
|
|
|
return {
|
|
|
|
|
|
"User-Agent": random.choice(self.user_agents),
|
|
|
|
|
|
"Referer": referer,
|
|
|
|
|
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
|
|
|
|
|
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
|
|
|
|
|
|
"Accept-Encoding": "gzip, deflate, br",
|
|
|
|
|
|
"Connection": "keep-alive",
|
|
|
|
|
|
"Upgrade-Insecure-Requests": "1",
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
def request(self, url, referer="https://finance.sina.com.cn/", method="GET", **kwargs) -> Optional[requests.Response]:
|
|
|
|
|
|
"""执行智能请求"""
|
|
|
|
|
|
last_error = None
|
|
|
|
|
|
|
|
|
|
|
|
for attempt in range(self.max_retries):
|
|
|
|
|
|
try:
|
|
|
|
|
|
# 1. 拟人化延时
|
|
|
|
|
|
if attempt == 0:
|
|
|
|
|
|
time.sleep(random.uniform(0.5, 1.5))
|
|
|
|
|
|
else:
|
|
|
|
|
|
delay = (2 ** attempt) + random.uniform(1, 3)
|
|
|
|
|
|
logging.warning(f"第 {attempt+1} 次重试,等待 {delay:.1f} 秒...")
|
|
|
|
|
|
time.sleep(delay)
|
|
|
|
|
|
|
|
|
|
|
|
# 2. 轮换 Headers
|
|
|
|
|
|
headers = kwargs.pop("headers", {})
|
|
|
|
|
|
random_headers = self.get_random_headers(referer=referer)
|
|
|
|
|
|
headers.update(random_headers)
|
|
|
|
|
|
kwargs["headers"] = headers
|
|
|
|
|
|
|
|
|
|
|
|
# 3. 发送请求
|
|
|
|
|
|
logging.debug(f"请求: {method} {url}")
|
|
|
|
|
|
response = self.session.request(method, url, timeout=10, **kwargs)
|
|
|
|
|
|
|
|
|
|
|
|
# 4. 检查状态
|
|
|
|
|
|
if response.status_code == 200:
|
|
|
|
|
|
logging.info("✅ 请求成功")
|
|
|
|
|
|
return response
|
|
|
|
|
|
elif response.status_code == 403:
|
|
|
|
|
|
logging.warning("⚠️ 收到 403 Forbidden,将尝试重试")
|
|
|
|
|
|
raise requests.exceptions.HTTPError("403 Forbidden")
|
|
|
|
|
|
else:
|
|
|
|
|
|
response.raise_for_status()
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
last_error = e
|
|
|
|
|
|
logging.warning(f"❌ 请求失败: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
|
logging.error(f"🚫 所有 {self.max_retries} 次尝试均失败")
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class AkshareSource(DataSourceBase):
|
|
|
|
|
|
"""AKShare 数据源适配器"""
|
|
|
|
|
|
|
|
|
|
|
|
def __init__(self, config: dict):
|
|
|
|
|
|
super().__init__(config)
|
|
|
|
|
|
self.name = "akshare"
|
|
|
|
|
|
self.requester = SmartRequester(max_retries=config.get("max_retries", 3))
|
|
|
|
|
|
|
|
|
|
|
|
def initialize(self) -> bool:
|
|
|
|
|
|
"""初始化检查"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
ak.__version__
|
|
|
|
|
|
self._initialized = True
|
|
|
|
|
|
return True
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
self._initialized = False
|
|
|
|
|
|
logging.error(f"AkshareSource 初始化失败: {e}")
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
def get_contract_list(self, exchange: Optional[str] = None) -> List[dict]:
|
|
|
|
|
|
"""获取期货合约列表"""
|
|
|
|
|
|
if not self._initialized:
|
|
|
|
|
|
self.initialize()
|
|
|
|
|
|
|
|
|
|
|
|
results = []
|
|
|
|
|
|
try:
|
|
|
|
|
|
exchanges_to_fetch = ["CZCE", "DCE", "SHFE", "INE", "CFFEX", "GFEX"]
|
|
|
|
|
|
if exchange:
|
|
|
|
|
|
exchanges_to_fetch = [exchange]
|
|
|
|
|
|
|
|
|
|
|
|
for ex in exchanges_to_fetch:
|
|
|
|
|
|
try:
|
|
|
|
|
|
func_name = f"futures_contract_info_{ex.lower()}"
|
|
|
|
|
|
if hasattr(ak, func_name):
|
|
|
|
|
|
df = getattr(ak, func_name)()
|
|
|
|
|
|
else:
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
if df is not None and not df.empty:
|
|
|
|
|
|
# 统一列名映射
|
|
|
|
|
|
col_map = {}
|
|
|
|
|
|
if '合约代码' in df.columns:
|
|
|
|
|
|
col_map['合约代码'] = 'symbol'
|
|
|
|
|
|
if '产品代码' in df.columns:
|
|
|
|
|
|
col_map['产品代码'] = 'product'
|
|
|
|
|
|
if '品种' in df.columns:
|
|
|
|
|
|
col_map['品种'] = 'product'
|
|
|
|
|
|
if '交易单位' in df.columns:
|
|
|
|
|
|
col_map['交易单位'] = 'multiplier'
|
|
|
|
|
|
if '最小变动价位' in df.columns:
|
|
|
|
|
|
col_map['最小变动价位'] = 'price_tick'
|
|
|
|
|
|
if '上市日' in df.columns:
|
|
|
|
|
|
col_map['上市日'] = 'list_date'
|
|
|
|
|
|
if '到期日' in df.columns:
|
|
|
|
|
|
col_map['到期日'] = 'expire_date'
|
|
|
|
|
|
|
|
|
|
|
|
df = df.rename(columns=col_map)
|
|
|
|
|
|
|
|
|
|
|
|
for _, row in df.iterrows():
|
|
|
|
|
|
symbol = row.get("symbol")
|
|
|
|
|
|
if not symbol:
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
# 提取品种代码 (通常 symbol 最后两位是年月,前面是品种,如 rb2401 -> rb)
|
|
|
|
|
|
product = row.get("product", "")
|
|
|
|
|
|
if not product and len(symbol) > 2:
|
|
|
|
|
|
# 简单提取字母部分
|
|
|
|
|
|
import re
|
|
|
|
|
|
match = re.match(r"([a-zA-Z]+)", symbol)
|
|
|
|
|
|
if match:
|
|
|
|
|
|
product = match.group(1).lower()
|
|
|
|
|
|
|
|
|
|
|
|
multiplier = row.get("multiplier")
|
|
|
|
|
|
if multiplier:
|
|
|
|
|
|
try:
|
|
|
|
|
|
multiplier = int(float(str(multiplier).replace(',', '')))
|
|
|
|
|
|
except:
|
|
|
|
|
|
multiplier = 10
|
|
|
|
|
|
else:
|
|
|
|
|
|
multiplier = 10 # 默认值
|
|
|
|
|
|
|
|
|
|
|
|
price_tick = row.get("price_tick")
|
|
|
|
|
|
if price_tick:
|
|
|
|
|
|
try:
|
|
|
|
|
|
price_tick = float(str(price_tick).replace(',', ''))
|
|
|
|
|
|
except:
|
|
|
|
|
|
price_tick = None
|
|
|
|
|
|
|
|
|
|
|
|
results.append({
|
|
|
|
|
|
"symbol": symbol,
|
|
|
|
|
|
"exchange": ex,
|
|
|
|
|
|
"name": symbol, # AKShare 通常不返回中文名称,用代码代替
|
|
|
|
|
|
"product": product,
|
|
|
|
|
|
"multiplier": multiplier,
|
|
|
|
|
|
"price_tick": price_tick,
|
|
|
|
|
|
"expire_date": self._parse_date(row.get("expire_date")),
|
|
|
|
|
|
"is_active": True,
|
|
|
|
|
|
})
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logging.warning(f"获取 {ex} 合约列表失败: {e}")
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logging.error(f"获取合约列表异常: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
return results
|
|
|
|
|
|
|
|
|
|
|
|
def _parse_date(self, date_str) -> Optional[datetime]:
|
|
|
|
|
|
"""解析日期"""
|
|
|
|
|
|
if not date_str:
|
|
|
|
|
|
return None
|
|
|
|
|
|
try:
|
|
|
|
|
|
if isinstance(date_str, str):
|
|
|
|
|
|
return datetime.strptime(date_str, "%Y-%m-%d")
|
|
|
|
|
|
elif isinstance(date_str, pd.Timestamp):
|
|
|
|
|
|
return date_str.to_pydatetime()
|
|
|
|
|
|
return None
|
|
|
|
|
|
except:
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
def get_kline_daily(self, symbol: str, start_date: str, end_date: str) -> pd.DataFrame:
|
|
|
|
|
|
"""获取日 K 线数据"""
|
|
|
|
|
|
logger.info(f"[AKShare-日K线] 开始获取 symbol={symbol}, start_date={start_date}, end_date={end_date}")
|
|
|
|
|
|
|
|
|
|
|
|
if not self._initialized:
|
|
|
|
|
|
logger.info(f"[AKShare-日K线] 初始化 AKShare")
|
|
|
|
|
|
self.initialize()
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
# AKShare 期货日 K 线接口:futures_zh_daily_sina
|
|
|
|
|
|
logger.info(f"[AKShare-日K线] 调用 ak.futures_zh_daily_sina(symbol='{symbol}')")
|
|
|
|
|
|
df = ak.futures_zh_daily_sina(symbol=symbol)
|
|
|
|
|
|
|
|
|
|
|
|
if df is None or df.empty:
|
|
|
|
|
|
logger.warning(f"[AKShare-日K线] AKShare 返回空数据,symbol={symbol}")
|
|
|
|
|
|
return pd.DataFrame()
|
|
|
|
|
|
|
|
|
|
|
|
logger.info(f"[AKShare-日K线] AKShare 返回 {len(df)} 条原始数据")
|
|
|
|
|
|
logger.debug(f"[AKShare-日K线] 原始数据列: {df.columns.tolist()}")
|
|
|
|
|
|
logger.debug(f"[AKShare-日K线] 原始数据样例:\n{df.head()}")
|
|
|
|
|
|
|
|
|
|
|
|
# 过滤日期范围
|
|
|
|
|
|
df['date'] = pd.to_datetime(df['date'])
|
|
|
|
|
|
logger.debug(f"[AKShare-日K线] 日期范围: {df['date'].min()} ~ {df['date'].max()}")
|
|
|
|
|
|
|
|
|
|
|
|
mask = (df['date'] >= start_date) & (df['date'] <= end_date)
|
|
|
|
|
|
df_filtered = df.loc[mask].copy()
|
|
|
|
|
|
logger.info(f"[AKShare-日K线] 日期过滤后剩余 {len(df_filtered)} 条记录")
|
|
|
|
|
|
|
|
|
|
|
|
# 统一列名
|
|
|
|
|
|
df_filtered = df_filtered.rename(columns={
|
|
|
|
|
|
"date": "trade_date",
|
|
|
|
|
|
"open": "open",
|
|
|
|
|
|
"high": "high",
|
|
|
|
|
|
"low": "low",
|
|
|
|
|
|
"close": "close",
|
|
|
|
|
|
"volume": "volume",
|
|
|
|
|
|
"hold": "open_interest",
|
|
|
|
|
|
"settle": "settle",
|
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
# AKShare 日线通常没有 turnover 和 pre_settle,置空
|
|
|
|
|
|
df_filtered["turnover"] = None
|
|
|
|
|
|
df_filtered["pre_settle"] = None
|
|
|
|
|
|
|
|
|
|
|
|
df_filtered["trade_date"] = pd.to_datetime(df_filtered["trade_date"])
|
|
|
|
|
|
logger.info(f"[AKShare-日K线] 最终返回 {len(df_filtered)} 条记录")
|
|
|
|
|
|
|
|
|
|
|
|
return df_filtered[["trade_date", "open", "high", "low", "close", "volume", "turnover", "open_interest", "settle", "pre_settle"]]
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"[AKShare-日K线] 获取 {symbol} 日 K 线失败: {e}", exc_info=True)
|
|
|
|
|
|
return pd.DataFrame()
|
|
|
|
|
|
|
|
|
|
|
|
def get_kline_weekly(self, symbol: str, start_date: str, end_date: str) -> pd.DataFrame:
|
|
|
|
|
|
"""获取周 K 线数据 (通过日 K 聚合)"""
|
|
|
|
|
|
daily_df = self.get_kline_daily(symbol, start_date, end_date)
|
|
|
|
|
|
if daily_df.empty:
|
|
|
|
|
|
return pd.DataFrame()
|
|
|
|
|
|
|
|
|
|
|
|
daily_df = daily_df.set_index("trade_date")
|
|
|
|
|
|
weekly = daily_df.resample("W-FRI").agg({
|
|
|
|
|
|
"open": "first",
|
|
|
|
|
|
"high": "max",
|
|
|
|
|
|
"low": "min",
|
|
|
|
|
|
"close": "last",
|
|
|
|
|
|
"volume": "sum",
|
|
|
|
|
|
"turnover": "sum",
|
|
|
|
|
|
"open_interest": "last",
|
|
|
|
|
|
}).dropna()
|
|
|
|
|
|
|
|
|
|
|
|
weekly = weekly.reset_index()
|
|
|
|
|
|
weekly = weekly.rename(columns={"trade_date": "trade_date"})
|
|
|
|
|
|
return weekly
|
|
|
|
|
|
|
|
|
|
|
|
def get_kline_intraday(self, symbol: str, period: str, start_date: str, end_date: str) -> pd.DataFrame:
|
|
|
|
|
|
"""获取分钟级 K 线数据"""
|
|
|
|
|
|
if not self._initialized:
|
|
|
|
|
|
self.initialize()
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
# AKShare 期货分钟 K 线接口:futures_zh_minute_sina
|
|
|
|
|
|
period_map = {"5m": "5", "15m": "15", "30m": "30", "60m": "60"}
|
|
|
|
|
|
freq = period_map.get(period, "5")
|
|
|
|
|
|
|
|
|
|
|
|
df = ak.futures_zh_minute_sina(symbol=symbol, period=freq)
|
|
|
|
|
|
|
|
|
|
|
|
if df is None or df.empty:
|
|
|
|
|
|
return pd.DataFrame()
|
|
|
|
|
|
|
|
|
|
|
|
# 过滤日期
|
|
|
|
|
|
df['datetime'] = pd.to_datetime(df['datetime'])
|
|
|
|
|
|
mask = (df['datetime'] >= start_date) & (df['datetime'] <= end_date)
|
|
|
|
|
|
df = df.loc[mask].copy()
|
|
|
|
|
|
|
|
|
|
|
|
# 统一列名
|
|
|
|
|
|
df = df.rename(columns={
|
|
|
|
|
|
"datetime": "trade_time",
|
|
|
|
|
|
"open": "open",
|
|
|
|
|
|
"high": "high",
|
|
|
|
|
|
"low": "low",
|
|
|
|
|
|
"close": "close",
|
|
|
|
|
|
"volume": "volume",
|
|
|
|
|
|
"hold": "open_interest",
|
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
df["turnover"] = None
|
|
|
|
|
|
|
|
|
|
|
|
return df[["trade_time", "open", "high", "low", "close", "volume", "turnover", "open_interest"]]
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logging.error(f"获取 {symbol} 分钟 K 线失败: {e}")
|
|
|
|
|
|
return pd.DataFrame()
|