""" 缓存管理服务 """ import asyncio import logging from typing import List, Dict, Optional from datetime import date, datetime from sqlalchemy.orm import Session from sqlalchemy import and_, func from app.models.cache import CacheTask, CacheTaskDetail from app.models.stock import StockKlineDaily from app.models.future import FutureKlineDaily from app.models.config import SDKConfig from app.services.base_data_service import BaseDataService from app.services.stock_service import StockService from app.services.future_service import FutureService from app.services.sdk_manager import sdk_manager from app.utils.date_utils import parse_date, format_date, get_market_from_code from app.config import settings from app.core.redis_client import redis_client from app.core.progress_manager import progress_manager logger = logging.getLogger(__name__) class CacheService: """缓存服务""" CODE_LIST_CACHE_EXPIRE = 12 * 60 * 60 def __init__(self, db: Session): self.db = db self.base_service = BaseDataService(db) self.stock_service = StockService(db) self.future_service = FutureService(db) def get_all_codes(self, security_type: str, contract_type: str = "all") -> List[str]: """ 获取所有代码列表(带Redis缓存) Args: security_type: 证券类型 (stock, future) contract_type: 合约类型 (all, main) - 仅对期货有效 Returns: 代码列表 """ cache_key = f"code_list:{security_type}:{contract_type}" cached_codes = redis_client.get(cache_key) if cached_codes is not None: logger.info(f"从Redis缓存获取代码列表: {security_type}/{contract_type}, 共{len(cached_codes)}个") return cached_codes adapter = sdk_manager.get_default_connection() if not adapter: raise RuntimeError("SDK连接失败") codes = [] if security_type == "stock": codes = adapter.get_code_list("EXTRA_STOCK_A") elif security_type == "future": if contract_type == "main": main_contracts = adapter.get_all_main_contracts() codes = list(main_contracts.values()) else: codes = adapter.get_code_list("EXTRA_FUTURE") if codes: redis_client.set(cache_key, codes, expire=self.CODE_LIST_CACHE_EXPIRE) logger.info(f"代码列表已缓存到Redis: {security_type}/{contract_type}, 共{len(codes)}个, 有效期12小时") return codes def get_future_varieties(self) -> List[str]: """获取期货品种列表(带Redis缓存)""" cache_key = "future_varieties" cached = redis_client.get(cache_key) if cached is not None: logger.info(f"从Redis缓存获取期货品种列表, 共{len(cached)}个") return cached adapter = sdk_manager.get_default_connection() if not adapter: raise RuntimeError("SDK连接失败") varieties = adapter.get_future_varieties() if varieties: redis_client.set(cache_key, varieties, expire=24 * 60 * 60) logger.info(f"期货品种列表已缓存到Redis, 共{len(varieties)}个, 有效期24小时") return varieties def get_trading_calendar_cached(self, market: str, start_date: date = None, end_date: date = None) -> List[date]: """获取交易日历(带Redis缓存)""" cache_key = f"trading_calendar:{market}" cached = redis_client.get(cache_key) if cached is not None: logger.info(f"从Redis缓存获取交易日历: {market}") from app.utils.date_utils import int_to_date all_dates = [int_to_date(d) for d in cached] if start_date: all_dates = [d for d in all_dates if d >= start_date] if end_date: all_dates = [d for d in all_dates if d <= end_date] return all_dates trading_days = self.base_service.get_trading_calendar(market) from app.utils.date_utils import date_to_int all_dates_int = [date_to_int(d) for d in trading_days] redis_client.set(cache_key, all_dates_int, expire=365 * 24 * 60 * 60) logger.info(f"交易日历已缓存到Redis: {market}, 有效期1年") return trading_days def get_main_contract_cached(self, variety: str) -> Optional[str]: """获取主力合约(带Redis缓存)""" cache_key = f"main_contract:{variety}" cached = redis_client.get(cache_key) if cached is not None: logger.info(f"从Redis缓存获取主力合约: {variety} -> {cached}") return cached adapter = sdk_manager.get_default_connection() if not adapter: raise RuntimeError("SDK连接失败") main_contract = adapter.get_main_contract(variety) if main_contract: redis_client.set(cache_key, main_contract, expire=1 * 60 * 60) logger.info(f"主力合约已缓存到Redis: {variety} -> {main_contract}, 有效期1小时") return main_contract def get_all_main_contracts_cached(self) -> Dict[str, str]: """获取所有主力合约(带Redis缓存)""" cache_key = "all_main_contracts" cached = redis_client.get(cache_key) if cached is not None: logger.info(f"从Redis缓存获取所有主力合约, 共{len(cached)}个") return cached adapter = sdk_manager.get_default_connection() if not adapter: raise RuntimeError("SDK连接失败") main_contracts = adapter.get_all_main_contracts() if main_contracts: redis_client.set(cache_key, main_contracts, expire=1 * 60 * 60) logger.info(f"所有主力合约已缓存到Redis, 共{len(main_contracts)}个, 有效期1小时") return main_contracts def detect_all_missing_data( self, security_type: str, period_type: str, start_date: date, end_date: date, contract_type: str = "all", task_id: str = None ) -> Dict: """ 一键检测所有数据的缺失情况 改进逻辑: 1. 初步检测:统计每个代码的数据条数,找出疑似缺失的代码 2. 二次验证:对疑似缺失的代码,按交易日调用接口验证是否真的缺失 - 如果接口也没有数据,说明该交易日股票未上市或停牌,不算缺失 - 如果接口有数据但本地没有,说明确实缺失 Args: security_type: 证券类型 (stock, future) period_type: 周期类型 (daily, min1, etc.) start_date: 开始日期 end_date: 结束日期 contract_type: 合约类型 (all, main) - 仅对期货有效 task_id: WebSocket任务ID Returns: 检测结果字典 """ code_list = self.get_all_codes(security_type, contract_type) if not code_list: raise ValueError(f"无法获取{security_type}代码列表") logger.info(f"获取到{len(code_list)}个{security_type}代码") ws_task_id = task_id or f"detect_{security_type}_{datetime.utcnow().strftime('%Y%m%d%H%M%S')}" def push_progress(progress, status, **kwargs): try: loop = asyncio.get_event_loop() if loop.is_running(): asyncio.create_task(progress_manager.update_progress(ws_task_id, { "progress": progress, "status": status, "total_count": len(code_list), **kwargs })) except RuntimeError: pass push_progress(0, "starting", message="开始检测...") task = CacheTask( task_name=f"一键检测所有数据 - {security_type} - {contract_type} - {len(code_list)}个代码", task_type="detect_all_missing", security_type=security_type, period_type=period_type, start_date=start_date, end_date=end_date, code_list=",".join(code_list[:100]) + "...", status="running", total_count=len(code_list), started_at=datetime.utcnow() ) self.db.add(task) self.db.commit() self.db.refresh(task) push_progress(5, "running", message="获取交易日历...") try: market = "CFE" if security_type == "future" else "SH" trading_days = self.base_service.get_trading_calendar(market, start_date, end_date) expected_count = len(trading_days) trading_days_set = set(trading_days) preliminary_missing_codes = [] complete_codes = [] error_count = 0 daily_stats = {} for td in trading_days: daily_stats[format_date(td)] = { "expected": len(code_list), "actual": 0, "missing": 0, "verified_missing": 0 } push_progress(10, "running", message="查询数据库统计...") code_existing_dates = {} if security_type == "stock" and period_type == "daily": from sqlalchemy import func code_count_query = self.db.query( StockKlineDaily.code, func.count(StockKlineDaily.id).label('count') ).filter( and_( StockKlineDaily.trade_date >= start_date, StockKlineDaily.trade_date <= end_date ) ).group_by(StockKlineDaily.code).all() code_counts = {r.code: r.count for r in code_count_query} code_dates_query = self.db.query( StockKlineDaily.code, StockKlineDaily.trade_date ).filter( and_( StockKlineDaily.trade_date >= start_date, StockKlineDaily.trade_date <= end_date ) ).all() for r in code_dates_query: if r.code not in code_existing_dates: code_existing_dates[r.code] = set() code_existing_dates[r.code].add(r.trade_date) date_count_query = self.db.query( func.date(StockKlineDaily.trade_date).label('trade_date'), func.count(StockKlineDaily.id).label('count') ).filter( and_( StockKlineDaily.trade_date >= start_date, StockKlineDaily.trade_date <= end_date ) ).group_by(func.date(StockKlineDaily.trade_date)).all() for r in date_count_query: date_key = format_date(r.trade_date) if hasattr(r.trade_date, 'strftime') else str(r.trade_date) if date_key in daily_stats: daily_stats[date_key]["actual"] = r.count push_progress(20, "running", message="初步分析数据完整性...") for i, code in enumerate(code_list): actual_count = code_counts.get(code, 0) is_missing = actual_count < expected_count if is_missing: existing_dates = code_existing_dates.get(code, set()) missing_dates = trading_days_set - existing_dates preliminary_missing_codes.append({ "code": code, "actual_count": actual_count, "expected_count": expected_count, "missing_count": len(missing_dates), "missing_dates": sorted(list(missing_dates)), "missing_ratio": len(missing_dates) / expected_count if expected_count > 0 else 0 }) else: complete_codes.append(code) if (i + 1) % 500 == 0 or i == len(code_list) - 1: task.success_count = len(preliminary_missing_codes) + len(complete_codes) task.error_count = error_count task.progress = min(50, int((i + 1) / len(code_list) * 30)) self.db.commit() push_progress( 20 + int((i + 1) / len(code_list) * 30), "running", processed=len(preliminary_missing_codes) + len(complete_codes), missing=len(preliminary_missing_codes), complete=len(complete_codes) ) elif security_type == "future" and period_type == "daily": from sqlalchemy import func code_count_query = self.db.query( FutureKlineDaily.code, func.count(FutureKlineDaily.id).label('count') ).filter( and_( FutureKlineDaily.trade_date >= start_date, FutureKlineDaily.trade_date <= end_date ) ).group_by(FutureKlineDaily.code).all() code_counts = {r.code: r.count for r in code_count_query} code_dates_query = self.db.query( FutureKlineDaily.code, FutureKlineDaily.trade_date ).filter( and_( FutureKlineDaily.trade_date >= start_date, FutureKlineDaily.trade_date <= end_date ) ).all() for r in code_dates_query: if r.code not in code_existing_dates: code_existing_dates[r.code] = set() code_existing_dates[r.code].add(r.trade_date) date_count_query = self.db.query( func.date(FutureKlineDaily.trade_date).label('trade_date'), func.count(FutureKlineDaily.id).label('count') ).filter( and_( FutureKlineDaily.trade_date >= start_date, FutureKlineDaily.trade_date <= end_date ) ).group_by(func.date(FutureKlineDaily.trade_date)).all() for r in date_count_query: date_key = format_date(r.trade_date) if hasattr(r.trade_date, 'strftime') else str(r.trade_date) if date_key in daily_stats: daily_stats[date_key]["actual"] = r.count push_progress(20, "running", message="初步分析数据完整性...") for i, code in enumerate(code_list): actual_count = code_counts.get(code, 0) is_missing = actual_count < expected_count if is_missing: existing_dates = code_existing_dates.get(code, set()) missing_dates = trading_days_set - existing_dates preliminary_missing_codes.append({ "code": code, "actual_count": actual_count, "expected_count": expected_count, "missing_count": len(missing_dates), "missing_dates": sorted(list(missing_dates)), "missing_ratio": len(missing_dates) / expected_count if expected_count > 0 else 0 }) else: complete_codes.append(code) if (i + 1) % 500 == 0 or i == len(code_list) - 1: task.success_count = len(preliminary_missing_codes) + len(complete_codes) task.error_count = error_count task.progress = min(50, int((i + 1) / len(code_list) * 30)) self.db.commit() push_progress( 20 + int((i + 1) / len(code_list) * 30), "running", processed=len(preliminary_missing_codes) + len(complete_codes), missing=len(preliminary_missing_codes), complete=len(complete_codes) ) else: for i, code in enumerate(code_list): preliminary_missing_codes.append({ "code": code, "actual_count": 0, "expected_count": expected_count, "missing_count": expected_count, "missing_dates": trading_days, "missing_ratio": 1.0 }) if (i + 1) % 500 == 0 or i == len(code_list) - 1: task.success_count = len(preliminary_missing_codes) task.error_count = error_count task.progress = min(50, int((i + 1) / len(code_list) * 30)) self.db.commit() push_progress( 20 + int((i + 1) / len(code_list) * 30), "running", processed=len(preliminary_missing_codes), missing=len(preliminary_missing_codes) ) logger.info(f"初步检测完成: 完整{len(complete_codes)}个, 疑似缺失{len(preliminary_missing_codes)}个") push_progress(50, "running", message="开始二次验证疑似缺失数据...") verified_missing_codes = [] verified_complete_codes = [] adapter = sdk_manager.get_default_connection() if not adapter: logger.warning("SDK连接失败,跳过二次验证,使用初步检测结果") verified_missing_codes = preliminary_missing_codes else: total_preliminary = len(preliminary_missing_codes) for i, item in enumerate(preliminary_missing_codes): code = item["code"] missing_dates = item["missing_dates"] truly_missing_dates = [] unavailable_dates = [] for trade_date in missing_dates: try: if security_type == "stock": kline_data = adapter.get_stock_kline( code, format_date(trade_date), format_date(trade_date), period_type ) elif security_type == "future": kline_data = adapter.get_future_kline( code, format_date(trade_date), format_date(trade_date), period_type ) else: kline_data = [] if kline_data and len(kline_data) > 0: truly_missing_dates.append(trade_date) else: unavailable_dates.append(trade_date) except Exception as e: logger.debug(f"验证{code} {format_date(trade_date)}时出错: {str(e)}") unavailable_dates.append(trade_date) truly_missing_count = len(truly_missing_dates) if truly_missing_count > 0: verified_missing_codes.append({ "code": code, "actual_count": item["actual_count"], "expected_count": item["expected_count"], "missing_count": truly_missing_count, "missing_dates": truly_missing_dates, "unavailable_dates": unavailable_dates, "missing_ratio": truly_missing_count / expected_count if expected_count > 0 else 0 }) detail = CacheTaskDetail( task_id=task.id, code=code, trade_date=start_date, expected_count=expected_count, actual_count=item["actual_count"], is_missing=True, status="pending" ) self.db.add(detail) else: verified_complete_codes.append(code) complete_codes.append(code) if (i + 1) % 50 == 0 or i == total_preliminary - 1: task.success_count = len(complete_codes) task.error_count = error_count task.progress = min(100, 50 + int((i + 1) / total_preliminary * 50)) self.db.commit() push_progress( 50 + int((i + 1) / total_preliminary * 50), "running", message=f"二次验证进度: {i + 1}/{total_preliminary}", processed=len(complete_codes), missing=len(verified_missing_codes), complete=len(complete_codes) ) for date_key in daily_stats: daily_stats[date_key]["missing"] = daily_stats[date_key]["expected"] - daily_stats[date_key]["actual"] verified_missing_code_list = [m["code"] for m in verified_missing_codes] task.code_list = ",".join(verified_missing_code_list[:500]) if verified_missing_code_list else "" task.status = "completed" task.success_count = len(complete_codes) task.error_count = error_count task.completed_at = datetime.utcnow() self.db.commit() push_progress(100, "completed", message="检测完成(含二次验证)", complete_count=len(complete_codes), missing_count=len(verified_missing_codes), error_count=error_count ) logger.info(f"检测完成(含二次验证): 完整{len(complete_codes)}个, 真实缺失{len(verified_missing_codes)}个, 错误{error_count}个") missing_codes_display = [] for m in verified_missing_codes[:100]: missing_dates_str = [format_date(d) for d in m.get("missing_dates", [])] missing_codes_display.append({ "code": m["code"], "actual_count": m["actual_count"], "expected_count": m["expected_count"], "missing_count": m["missing_count"], "missing_ratio": m["missing_ratio"], "missing_dates_display": missing_dates_str[:10] }) return { "task_id": task.id, "ws_task_id": ws_task_id, "task_name": task.task_name, "status": task.status, "progress": float(task.progress), "total_count": task.total_count, "complete_count": len(complete_codes), "missing_count": len(verified_missing_codes), "error_count": error_count, "expected_days": expected_count, "start_date": format_date(start_date), "end_date": format_date(end_date), "security_type": security_type, "period_type": period_type, "daily_stats": daily_stats, "missing_codes": missing_codes_display, "missing_code_list": verified_missing_code_list, "verification_enabled": True } except Exception as e: task.status = "failed" task.error_message = str(e) task.completed_at = datetime.utcnow() self.db.commit() logger.error(f"一键检测缺失数据任务失败: {str(e)}") push_progress(100, "failed", error=str(e)) return { "task_id": task.id, "ws_task_id": ws_task_id, "task_name": task.task_name, "status": task.status, "error_message": str(e) } def _create_cache_task( self, security_type: str, period_type: str, start_date: date, end_date: date, contract_type: str = "all" ) -> CacheTask: """创建缓存任务记录""" code_list = self.get_all_codes(security_type, contract_type) if not code_list: raise ValueError(f"无法获取{security_type}代码列表") task = CacheTask( task_name=f"一键缓存所有数据 - {security_type} - {contract_type} - {len(code_list)}个代码", task_type="cache_all_data", security_type=security_type, period_type=period_type, start_date=start_date, end_date=end_date, code_list=",".join(code_list[:100]) + "...", status="pending", total_count=len(code_list), started_at=datetime.utcnow() ) self.db.add(task) self.db.commit() self.db.refresh(task) return task def _create_fill_missing_task( self, security_type: str, period_type: str, start_date: date, end_date: date, missing_codes: List[str] ) -> CacheTask: """创建补齐缺失数据任务记录""" if not missing_codes: raise ValueError("没有缺失代码需要补齐") task = CacheTask( task_name=f"一键补齐缺失数据 - {security_type} - {len(missing_codes)}个代码", task_type="fill_missing_data", security_type=security_type, period_type=period_type, start_date=start_date, end_date=end_date, code_list=",".join(missing_codes[:500]), status="pending", total_count=len(missing_codes), started_at=datetime.utcnow() ) self.db.add(task) self.db.commit() self.db.refresh(task) return task def _execute_fill_missing_task( self, task_id: int, security_type: str, period_type: str, start_date: date, end_date: date, missing_codes: List[str] ): """执行补齐缺失数据任务""" task = self.db.query(CacheTask).filter(CacheTask.id == task_id).first() if not task: return task.status = "running" self.db.commit() try: success_count = 0 error_count = 0 for i, code in enumerate(missing_codes): try: if security_type == "stock": self.stock_service.get_kline([code], start_date, end_date, period_type) elif security_type == "future": self.future_service.get_kline([code], start_date, end_date, period_type) success_count += 1 except Exception as e: logger.error(f"补齐{code}数据失败: {str(e)}") error_count += 1 if (i + 1) % 10 == 0 or i == len(missing_codes) - 1: task.success_count = success_count task.error_count = error_count task.progress = min(100, int((i + 1) / len(missing_codes) * 100)) self.db.commit() task.status = "completed" task.success_count = success_count task.error_count = error_count task.completed_at = datetime.utcnow() self.db.commit() except Exception as e: task.status = "failed" task.error_message = str(e) task.completed_at = datetime.utcnow() self.db.commit() def _execute_cache_task( self, task_id: int, security_type: str, period_type: str, start_date: date, end_date: date, contract_type: str = "all" ): """执行缓存任务""" task = self.db.query(CacheTask).filter(CacheTask.id == task_id).first() if not task: return task.status = "running" self.db.commit() code_list = self.get_all_codes(security_type, contract_type) try: market = "CFE" if security_type == "future" else "SH" trading_days = self.base_service.get_trading_calendar(market, start_date, end_date) expected_count = len(trading_days) success_count = 0 skipped_count = 0 error_count = 0 for i, code in enumerate(code_list): try: if security_type == "stock" and period_type == "daily": actual_count = self.db.query(StockKlineDaily).filter( and_( StockKlineDaily.code == code, StockKlineDaily.trade_date >= start_date, StockKlineDaily.trade_date <= end_date ) ).count() elif security_type == "future" and period_type == "daily": actual_count = self.db.query(FutureKlineDaily).filter( and_( FutureKlineDaily.code == code, FutureKlineDaily.trade_date >= start_date, FutureKlineDaily.trade_date <= end_date ) ).count() else: actual_count = 0 if actual_count >= expected_count: skipped_count += 1 continue if security_type == "stock": self.stock_service.get_kline([code], start_date, end_date, period_type) elif security_type == "future": self.future_service.get_kline([code], start_date, end_date, period_type) success_count += 1 except Exception as e: logger.error(f"缓存{code}数据失败: {str(e)}") error_count += 1 if (i + 1) % 10 == 0 or i == len(code_list) - 1: task.success_count = success_count task.error_count = error_count task.progress = min(100, int((i + 1) / len(code_list) * 100)) self.db.commit() task.status = "completed" task.success_count = success_count task.error_count = error_count task.completed_at = datetime.utcnow() self.db.commit() except Exception as e: task.status = "failed" task.error_message = str(e) task.completed_at = datetime.utcnow() self.db.commit() def cache_all_missing_data( self, security_type: str, period_type: str, start_date: date, end_date: date, contract_type: str = "all" ) -> CacheTask: """ 一键缓存所有缺失数据 Args: security_type: 证券类型 period_type: 周期类型 start_date: 开始日期 end_date: 结束日期 contract_type: 合约类型 (all, main) - 仅对期货有效 Returns: 缓存任务对象 """ # 获取所有代码 code_list = self.get_all_codes(security_type, contract_type) if not code_list: raise ValueError(f"无法获取{security_type}代码列表") logger.info(f"获取到{len(code_list)}个{security_type}代码,开始缓存") # 创建缓存任务 task = CacheTask( task_name=f"一键缓存所有数据 - {security_type} - {len(code_list)}个代码", task_type="cache_all_data", security_type=security_type, period_type=period_type, start_date=start_date, end_date=end_date, code_list=",".join(code_list[:100]) + "...", status="running", total_count=len(code_list), started_at=datetime.utcnow() ) self.db.add(task) self.db.commit() self.db.refresh(task) try: # 获取交易日历 market = "CFE" if security_type == "future" else "SH" trading_days = self.base_service.get_trading_calendar(market, start_date, end_date) expected_count = len(trading_days) success_count = 0 skipped_count = 0 error_count = 0 for i, code in enumerate(code_list): try: # 先检查是否已有完整数据 if security_type == "stock" and period_type == "daily": actual_count = self.db.query(StockKlineDaily).filter( and_( StockKlineDaily.code == code, StockKlineDaily.trade_date >= start_date, StockKlineDaily.trade_date <= end_date ) ).count() elif security_type == "future" and period_type == "daily": actual_count = self.db.query(FutureKlineDaily).filter( and_( FutureKlineDaily.code == code, FutureKlineDaily.trade_date >= start_date, FutureKlineDaily.trade_date <= end_date ) ).count() else: actual_count = 0 # 如果数据完整,跳过 if actual_count >= expected_count: skipped_count += 1 detail = CacheTaskDetail( task_id=task.id, code=code, trade_date=start_date, expected_count=expected_count, actual_count=actual_count, is_missing=False, status="skipped" ) self.db.add(detail) continue # 获取数据(会自动缓存) if security_type == "stock": self.stock_service.get_kline([code], start_date, end_date, period_type) elif security_type == "future": self.future_service.get_kline([code], start_date, end_date, period_type) success_count += 1 detail = CacheTaskDetail( task_id=task.id, code=code, trade_date=start_date, expected_count=expected_count, actual_count=actual_count, is_missing=True, status="success", processed_at=datetime.utcnow() ) self.db.add(detail) except Exception as e: logger.error(f"缓存{code}数据失败: {str(e)}") error_count += 1 detail = CacheTaskDetail( task_id=task.id, code=code, trade_date=start_date, status="failed", error_message=str(e) ) self.db.add(detail) # 每50个代码更新一次进度 if (i + 1) % 50 == 0 or i == len(code_list) - 1: task.success_count = success_count task.error_count = error_count task.progress = min(100, int((i + 1) / len(code_list) * 100)) self.db.commit() logger.info(f"进度: {i + 1}/{len(code_list)}, 成功: {success_count}, 跳过: {skipped_count}, 错误: {error_count}") task.status = "completed" task.success_count = success_count task.error_count = error_count task.completed_at = datetime.utcnow() self.db.commit() logger.info(f"缓存完成: 成功{success_count}个, 跳过{skipped_count}个, 错误{error_count}个") except Exception as e: task.status = "failed" task.error_message = str(e) task.completed_at = datetime.utcnow() self.db.commit() logger.error(f"一键缓存数据任务失败: {str(e)}") return task def detect_missing_data( self, security_type: str, period_type: str, start_date: date, end_date: date, code_list: List[str] ) -> CacheTask: """ 检测缺失数据 Args: security_type: 证券类型 (stock, future) period_type: 周期类型 (daily, min1, etc.) start_date: 开始日期 end_date: 结束日期 code_list: 代码列表 Returns: 缓存任务对象 """ # 创建检测任务 task = CacheTask( task_name=f"检测缺失数据 - {security_type} - {len(code_list)}个代码", task_type="detect_missing", security_type=security_type, period_type=period_type, start_date=start_date, end_date=end_date, code_list=",".join(code_list), status="running", total_count=len(code_list), started_at=datetime.utcnow() ) self.db.add(task) self.db.commit() self.db.refresh(task) try: # 获取交易日历 market = "CFE" if security_type == "future" else "SH" trading_days = self.base_service.get_trading_calendar(market, start_date, end_date) expected_count = len(trading_days) success_count = 0 error_count = 0 for code in code_list: try: # 查询实际数据量 if security_type == "stock" and period_type == "daily": actual_count = self.db.query(StockKlineDaily).filter( and_( StockKlineDaily.code == code, StockKlineDaily.trade_date >= start_date, StockKlineDaily.trade_date <= end_date ) ).count() elif security_type == "future" and period_type == "daily": actual_count = self.db.query(FutureKlineDaily).filter( and_( FutureKlineDaily.code == code, FutureKlineDaily.trade_date >= start_date, FutureKlineDaily.trade_date <= end_date ) ).count() else: actual_count = 0 # 计算缺失率 missing_ratio = 0 if expected_count > 0: missing_ratio = (expected_count - actual_count) / expected_count is_missing = missing_ratio > settings.CACHE_MISSING_THRESHOLD # 创建任务详情 detail = CacheTaskDetail( task_id=task.id, code=code, trade_date=start_date, expected_count=expected_count, actual_count=actual_count, is_missing=is_missing, status="pending" if is_missing else "skipped" ) self.db.add(detail) if is_missing: success_count += 1 except Exception as e: logger.error(f"检测{code}缺失数据失败: {str(e)}") error_count += 1 detail = CacheTaskDetail( task_id=task.id, code=code, trade_date=start_date, status="failed", error_message=str(e) ) self.db.add(detail) # 更新进度 task.success_count = success_count task.error_count = error_count task.progress = min(100, int((success_count + error_count) / len(code_list) * 100)) self.db.commit() task.status = "completed" task.completed_at = datetime.utcnow() self.db.commit() except Exception as e: task.status = "failed" task.error_message = str(e) task.completed_at = datetime.utcnow() self.db.commit() logger.error(f"检测缺失数据任务失败: {str(e)}") return task def batch_cache_data( self, security_type: str, period_type: str, start_date: date, end_date: date, code_list: List[str] ) -> CacheTask: """ 批量缓存数据 Args: security_type: 证券类型 period_type: 周期类型 start_date: 开始日期 end_date: 结束日期 code_list: 代码列表 Returns: 缓存任务对象 """ # 创建缓存任务 task = CacheTask( task_name=f"批量缓存数据 - {security_type} - {len(code_list)}个代码", task_type="cache_data", security_type=security_type, period_type=period_type, start_date=start_date, end_date=end_date, code_list=",".join(code_list), status="running", total_count=len(code_list), started_at=datetime.utcnow() ) self.db.add(task) self.db.commit() self.db.refresh(task) try: success_count = 0 error_count = 0 for code in code_list: try: # 获取数据(会自动缓存) if security_type == "stock": self.stock_service.get_kline([code], start_date, end_date, period_type) elif security_type == "future": self.future_service.get_kline([code], start_date, end_date, period_type) success_count += 1 # 创建任务详情 detail = CacheTaskDetail( task_id=task.id, code=code, trade_date=start_date, status="success", processed_at=datetime.utcnow() ) self.db.add(detail) except Exception as e: logger.error(f"缓存{code}数据失败: {str(e)}") error_count += 1 detail = CacheTaskDetail( task_id=task.id, code=code, trade_date=start_date, status="failed", error_message=str(e) ) self.db.add(detail) # 更新进度 task.success_count = success_count task.error_count = error_count task.progress = min(100, int((success_count + error_count) / len(code_list) * 100)) self.db.commit() task.status = "completed" task.completed_at = datetime.utcnow() self.db.commit() except Exception as e: task.status = "failed" task.error_message = str(e) task.completed_at = datetime.utcnow() self.db.commit() logger.error(f"批量缓存数据任务失败: {str(e)}") return task def get_tasks( self, page: int = 1, page_size: int = 20 ) -> Dict: """获取缓存任务列表""" query = self.db.query(CacheTask).order_by(CacheTask.created_at.desc()) total = query.count() tasks = query.offset((page - 1) * page_size).limit(page_size).all() return { "items": tasks, "total": total, "page": page, "page_size": page_size, "total_pages": (total + page_size - 1) // page_size } def get_task(self, task_id: int) -> Optional[CacheTask]: """获取任务详情""" return self.db.query(CacheTask).filter(CacheTask.id == task_id).first() def get_task_details(self, task_id: int) -> List[CacheTaskDetail]: """获取任务详情列表""" return self.db.query(CacheTaskDetail).filter( CacheTaskDetail.task_id == task_id ).all() def cancel_task(self, task_id: int) -> bool: """取消任务""" task = self.db.query(CacheTask).filter(CacheTask.id == task_id).first() if task and task.status == "running": task.status = "cancelled" task.completed_at = datetime.utcnow() self.db.commit() return True return False def get_cache_status(self, code: str, security_type: str, period_type: str) -> Dict: """获取代码缓存状态""" if security_type == "stock": return self.stock_service.get_cache_status(code, period_type) elif security_type == "future": return self.future_service.get_cache_status(code, period_type) else: return { "code": code, "security_type": security_type, "period_type": period_type, "record_count": 0, "min_date": None, "max_date": None } def get_missing_dates_for_code( self, code: str, security_type: str, period_type: str, start_date: date, end_date: date ) -> Dict: """ 获取单个代码的缺失交易日详情 Args: code: 证券代码 security_type: 证券类型 (stock, future) period_type: 周期类型 (daily, min1, etc.) start_date: 开始日期 end_date: 结束日期 Returns: 缺失交易日详情 """ market = get_market_from_code(code) trading_days = self.base_service.get_trading_calendar(market, start_date, end_date) expected_dates = set(trading_days) actual_dates = set() if security_type == "stock" and period_type == "daily": records = self.db.query(StockKlineDaily.trade_date).filter( and_( StockKlineDaily.code == code, StockKlineDaily.trade_date >= start_date, StockKlineDaily.trade_date <= end_date ) ).all() actual_dates = set(r.trade_date for r in records) elif security_type == "future" and period_type == "daily": records = self.db.query(FutureKlineDaily.trade_date).filter( and_( FutureKlineDaily.code == code, FutureKlineDaily.trade_date >= start_date, FutureKlineDaily.trade_date <= end_date ) ).all() actual_dates = set(r.trade_date for r in records) missing_dates = sorted(list(expected_dates - actual_dates)) missing_dates_list = [] for d in missing_dates: missing_dates_list.append({ "date": format_date(d), "date_obj": d.isoformat() }) return { "code": code, "security_type": security_type, "period_type": period_type, "start_date": format_date(start_date), "end_date": format_date(end_date), "expected_count": len(expected_dates), "actual_count": len(actual_dates), "missing_count": len(missing_dates), "missing_dates": missing_dates_list } def get_missing_codes_by_date( self, trade_date: date, security_type: str, period_type: str, contract_type: str = "all" ) -> Dict: """ 获取某个交易日缺失的代码列表 Args: trade_date: 交易日 security_type: 证券类型 (stock, future) period_type: 周期类型 (daily, min1, etc.) contract_type: 合约类型 (all, main) Returns: 缺失代码列表详情 """ code_list = self.get_all_codes(security_type, contract_type) if not code_list: raise ValueError(f"无法获取{security_type}代码列表") logger.info(f"获取{trade_date}交易日缺失代码,共{len(code_list)}个代码") existing_codes = set() if security_type == "stock" and period_type == "daily": records = self.db.query(StockKlineDaily.code).filter( StockKlineDaily.trade_date == trade_date ).all() existing_codes = set(r.code for r in records) elif security_type == "future" and period_type == "daily": records = self.db.query(FutureKlineDaily.code).filter( FutureKlineDaily.trade_date == trade_date ).all() existing_codes = set(r.code for r in records) preliminary_missing_codes = [code for code in code_list if code not in existing_codes] adapter = sdk_manager.get_default_connection() truly_missing_codes = [] unavailable_codes = [] unverified_codes = [] sdk_connection_error = None if adapter: logger.info(f"SDK连接成功,开始二次验证 {len(preliminary_missing_codes)} 个疑似缺失代码") for code in preliminary_missing_codes: try: if security_type == "stock": kline_data = adapter.get_stock_kline( code, format_date(trade_date), format_date(trade_date), period_type ) elif security_type == "future": kline_data = adapter.get_future_kline( code, format_date(trade_date), format_date(trade_date), period_type ) else: kline_data = [] if kline_data and len(kline_data) > 0: truly_missing_codes.append(code) logger.debug(f"验证结果: {code} {format_date(trade_date)} - 接口有数据,确认为缺失") else: unavailable_codes.append(code) logger.debug(f"验证结果: {code} {format_date(trade_date)} - 接口无数据,可能未上市或停牌") except Exception as e: error_msg = str(e) logger.warning(f"验证{code} {format_date(trade_date)}时出错: {error_msg}") unavailable_codes.append(code) else: sdk_connection_error = "SDK连接失败,无法进行二次验证" from app.services.config_service import ConfigService sdk_configs = self.db.query(SDKConfig).filter(SDKConfig.is_active == True).all() if not sdk_configs: sdk_connection_error = "SDK连接失败:未配置任何SDK连接,请前往配置管理添加SDK配置" logger.error(sdk_connection_error) else: default_config = self.db.query(SDKConfig).filter(SDKConfig.is_default == True).first() if not default_config: sdk_connection_error = "SDK连接失败:未设置默认SDK配置,请前往配置管理设置默认配置" logger.error(sdk_connection_error) else: sdk_connection_error = f"SDK连接失败:默认配置 '{default_config.name}' 连接失败,请检查配置或测试连接" logger.error(sdk_connection_error) unverified_codes = preliminary_missing_codes logger.warning(f"由于SDK连接失败,{len(unverified_codes)} 个疑似缺失代码未能验证,暂不标记为真实缺失") return { "trade_date": format_date(trade_date), "security_type": security_type, "period_type": period_type, "contract_type": contract_type, "total_codes": len(code_list), "existing_codes": len(existing_codes), "missing_count": len(truly_missing_codes), "unavailable_count": len(unavailable_codes), "unverified_count": len(unverified_codes), "missing_codes": truly_missing_codes[:100], "missing_code_list": truly_missing_codes, "unavailable_codes": unavailable_codes[:50], "unverified_codes": unverified_codes[:50], "verification_status": "verified" if adapter else "skipped", "sdk_connection_error": sdk_connection_error } def fill_single_date_data( self, code: str, security_type: str, period_type: str, trade_date: date ): """ 补齐单个代码的单个交易日数据 Args: code: 证券代码 security_type: 证券类型 period_type: 周期类型 trade_date: 交易日 """ logger.info(f"补齐单日数据: {code} - {format_date(trade_date)}") try: if security_type == "stock": self.stock_service.get_kline([code], trade_date, trade_date, period_type) elif security_type == "future": self.future_service.get_kline([code], trade_date, trade_date, period_type) logger.info(f"补齐单日数据成功: {code} - {format_date(trade_date)}") except Exception as e: logger.error(f"补齐单日数据失败: {code} - {format_date(trade_date)}, 错误: {str(e)}") raise def fill_all_dates_for_code( self, code: str, security_type: str, period_type: str, start_date: date, end_date: date, missing_dates: List[str] = None ): """ 补齐单个代码的所有缺失交易日数据 Args: code: 证券代码 security_type: 证券类型 period_type: 周期类型 start_date: 开始日期 end_date: 结束日期 missing_dates: 缺失日期列表(可选,如果不提供则自动检测) """ logger.info(f"补齐所有数据: {code} - {format_date(start_date)} 到 {format_date(end_date)}") if missing_dates: from app.utils.date_utils import parse_date dates_to_fill = [parse_date(d) for d in missing_dates] else: result = self.get_missing_dates_for_code(code, security_type, period_type, start_date, end_date) dates_to_fill = [parse_date(d["date"]) for d in result["missing_dates"]] if not dates_to_fill: logger.info(f"没有缺失数据需要补齐: {code}") return success_count = 0 error_count = 0 for trade_date in dates_to_fill: try: if security_type == "stock": self.stock_service.get_kline([code], trade_date, trade_date, period_type) elif security_type == "future": self.future_service.get_kline([code], trade_date, trade_date, period_type) success_count += 1 except Exception as e: logger.error(f"补齐{code} - {format_date(trade_date)}失败: {str(e)}") error_count += 1 logger.info(f"补齐完成: {code}, 成功{success_count}个, 失败{error_count}个") def _create_batch_fill_task( self, trade_date: date, security_type: str, period_type: str, missing_codes: List[str] ) -> CacheTask: """创建批量补齐交易日数据任务记录""" if not missing_codes: raise ValueError("没有缺失代码需要补齐") task = CacheTask( task_name=f"批量补齐交易日数据 - {format_date(trade_date)} - {len(missing_codes)}个代码", task_type="batch_fill_date", security_type=security_type, period_type=period_type, start_date=trade_date, end_date=trade_date, code_list=",".join(missing_codes[:500]), status="pending", total_count=len(missing_codes), started_at=datetime.utcnow() ) self.db.add(task) self.db.commit() self.db.refresh(task) return task def _execute_batch_fill_task( self, task_id: int, trade_date: date, security_type: str, period_type: str, missing_codes: List[str] ): """执行批量补齐交易日数据任务""" task = self.db.query(CacheTask).filter(CacheTask.id == task_id).first() if not task: return task.status = "running" self.db.commit() adapter = sdk_manager.get_default_connection() success_count = 0 error_count = 0 for i, code in enumerate(missing_codes): try: if adapter: if security_type == "stock": kline_data = adapter.get_stock_kline( code, format_date(trade_date), format_date(trade_date), period_type ) elif security_type == "future": kline_data = adapter.get_future_kline( code, format_date(trade_date), format_date(trade_date), period_type ) else: kline_data = [] if kline_data and len(kline_data) > 0: if security_type == "stock": self.stock_service._save_kline_data(code, kline_data, period_type) elif security_type == "future": self.future_service._save_kline_data(code, kline_data, period_type) success_count += 1 else: error_count += 1 logger.debug(f"{code} {format_date(trade_date)} 无数据") else: error_count += 1 logger.warning(f"SDK连接失败,无法补齐 {code}") except Exception as e: logger.error(f"补齐{code} {format_date(trade_date)}失败: {str(e)}") error_count += 1 if (i + 1) % 20 == 0 or i == len(missing_codes) - 1: task.success_count = success_count task.error_count = error_count task.progress = min(100, int((i + 1) / len(missing_codes) * 100)) self.db.commit() task.status = "completed" task.success_count = success_count task.error_count = error_count task.completed_at = datetime.utcnow() self.db.commit() logger.info(f"批量补齐完成: {format_date(trade_date)}, 成功{success_count}个, 失败{error_count}个")