Backtesting/calculteLimit.py

import pandas as pd
import pymysql
from sqlalchemy import create_engine
import matplotlib.pyplot as plt

# # 创建数据库连接
# engine = create_engine('mysql+pymysql://root:1qazse42W3@192.168.0.222:3306/ry')

# # 执行SQL查询并将结果存储在pandas DataFrame中
# df = pd.read_sql_query('select code,trade_day as date ,open as open_price,close as close_price from stocks where trade_day between "2023-06-01" and "2023-06-30" ORDER BY trade_day', engine)

# # 将date字段转换为日期类型，并设置为索引
# df['date'] = pd.to_datetime(df['date'])
# df.set_index('date', inplace=True)

# # 对每个股票计算涨跌幅
# df['change'] = df.groupby('code')['open_price'].shift(-1) - df['close_price']

# # 计算涨跌幅分布
# distribution = df['change'].value_counts(bins=10, normalize=True)

# # 显示涨跌幅分布的图表
# distribution.plot(kind='bar')
# plt.title('Distribution of Price Changes')
# plt.xlabel('Price Change')
# plt.ylabel('Frequency')
# plt.show()

# 创建数据库连接
engine = create_engine('mysql+pymysql://root:1qazse42W3@192.168.0.222:3306/ry')
# 执行SQL查询来获取日期列表
# 假设你的数据表中有一个date字段表示日期
date_df = pd.read_sql_query('select date from trade_dates where trade = "trading" and date between "2023-06-01" and "2023-06-30" order by date', engine)

# 将date字段转换为日期类型
date_df['date'] = pd.to_datetime(date_df['date'])

# 获取日期列表
date_list = date_df['date'].tolist()

total_count = 0
# 低于-3%的开盘价计数
low_open_count = 0
# 正常开盘，介于-3%和3%之间的计数
normal_open_count = 0
# 高于3%的开盘价计数
high_open_count = 0
# 低于-3%的收盘价计数
low_close_count = 0
# 正常收盘，介于-3%和3%之间的计数
normal_close_count = 0
# 高于3%的收盘价计数
high_close_count = 0

# 收盘低于-3% 且开盘低于-3%的计数
low_open_low_close_count = 0
# 收盘价低于-3% 且开盘价介于-3%和3%之间的计数
low_open_normal_close_count = 0
# 收盘低于-3% 且开盘高于3%的计数
low_open_high_close_count = 0
# 收盘价介于-3%和3%之间 且开盘低于-3%的计数
normal_open_low_close_count = 0
# 收盘价介于-3%和3%之间 且开盘介于-3%和3%之间的计数
normal_open_normal_close_count = 0
# 收盘价介于-3%和3%之间 且开盘高于3%的计数
normal_open_high_close_count = 0
# 收盘高于3% 且开盘低于-3%的计数
high_open_low_close_count = 0
# 收盘高于3% 且开盘介于-3%和3%之间的计数
high_open_normal_close_count = 0
# 收盘高于3% 且开盘高于3%的计数
high_open_high_close_count = 0


# 遍历日期列表
for i, date in enumerate(date_list):
    if i+1 < len(date_list):
        # 将日期转换为字符串格式，以便在SQL查询中使用
        date_str = date.strftime('%Y-%m-%d')
        nextdate_str = date_list[i+1].strftime('%Y-%m-%d')

        print(f'正在处理{date_str}的数据')

        # 执行SQL查询（这只是一个示例，你需要根据你的实际需求和数据库结构进行修改）
        query = f'select code,open,close,differrange from stocks where islimit = "是" and trade_day = "{date_str}"'
        df = pd.read_sql_query(query, engine)
        limit_list = df['code'].tolist()
        for code in limit_list:
            # 总统计数
            total_count += 1
            print(f'正在处理{date_str} - {code}的数据')
            query = f'select code,open,close,differrange from stocks where trade_day = "{nextdate_str}" and code = "{code}"'
            dfnext = pd.read_sql_query(query,engine)
            # 处理数据...
            df['next_open'] = dfnext['open']
            df['next_open_diff'] = 100 * (dfnext['open'] - df['open'])/df['open']
            print(df['next_open_diff'][0])
            df['next_close'] = dfnext['close']
            df['next_close_differrange'] = dfnext['differrange']
            print(df['next_close_differrange'][0])

            if df['next_open_diff'][0] < -3:
                low_open_count += 1
            elif df['next_open_diff'][0] > 3:
                high_open_count += 1
            else:
                normal_open_count += 1

            if df['next_close_differrange'][0] < -3:
                low_close_count += 1
            elif df['next_close_differrange'][0] > 3:
                high_close_count += 1
            else:
                normal_close_count += 1

            if df['next_open_diff'][0] < -3 and df['next_close_differrange'][0] < -3:
                low_open_low_close_count += 1
            elif df['next_open_diff'][0] < -3 and df['next_close_differrange'][0] > -3 and df['next_close_differrange'][0] < 3:
                low_open_normal_close_count += 1
            elif df['next_open_diff'][0] < -3 and df['next_close_differrange'][0] > 3:
                low_open_high_close_count += 1
            elif df['next_open_diff'][0] > -3 and df['next_open_diff'][0] < 3 and df['next_close_differrange'][0] < -3:
                normal_open_low_close_count += 1
            elif df['next_open_diff'][0] > -3 and df['next_open_diff'][0] < 3 and df['next_close_differrange'][0] > -3 and df['next_close_differrange'][0] < 3:
                normal_open_normal_close_count += 1
            elif df['next_open_diff'][0] > -3 and df['next_open_diff'][0] < 3 and df['next_close_differrange'][0] > 3:
                normal_open_high_close_count += 1
            elif df['next_open_diff'][0] > 3 and df['next_close_differrange'][0] < -3:
                high_open_low_close_count += 1
            elif df['next_open_diff'][0] > 3 and df['next_close_differrange'][0] > -3 and df['next_close_differrange'][0] < 3:
                high_open_normal_close_count += 1
            elif df['next_open_diff'][0] > 3 and df['next_close_differrange'][0] > 3:
                high_open_high_close_count += 1
        # 将数据框保存到磁盘
        df.to_csv(f'./data/{date_str}_stock.csv', index=False)
        # # 从磁盘加载数据框
        # df = pd.read_csv('df_distribution.csv')

print(f'总统计数：{total_count}')
print(f'低于-3%的开盘价计数：{low_open_count}')
print(f'正常开盘，介于-3%和3%之间的计数：{normal_open_count}')
print(f'高于3%的开盘价计数：{high_open_count}')
print(f'低于-3%的收盘价计数：{low_close_count}')
print(f'正常收盘，介于-3%和3%之间的计数：{normal_close_count}')
print(f'高于3%的收盘价计数：{high_close_count}')
print(f'收盘低于-3% 且开盘低于-3%的计数：{low_open_low_close_count}')
print(f'收盘价低于-3% 且开盘价介于-3%和3%之间的计数：{low_open_normal_close_count}')
print(f'收盘低于-3% 且开盘高于3%的计数：{low_open_high_close_count}')
print(f'收盘价介于-3%和3%之间 且开盘低于-3%的计数：{normal_open_low_close_count}')
print(f'收盘价介于-3%和3%之间 且开盘介于-3%和3%之间的计数：{normal_open_normal_close_count}')
print(f'收盘价介于-3%和3%之间 且开盘高于3%的计数：{normal_open_high_close_count}')
print(f'收盘高于3% 且开盘低于-3%的计数：{high_open_low_close_count}')
print(f'收盘高于3% 且开盘介于-3%和3%之间的计数：{high_open_normal_close_count}')
print(f'收盘高于3% 且开盘高于3%的计数：{high_open_high_close_count}')

df_distribution = pd.DataFrame({'test':[1]})
df_distribution['total_count'] = total_count
df_distribution['low_open_count'] = low_open_count
df_distribution['normal_open_count'] = normal_open_count
df_distribution['high_open_count'] = high_open_count
df_distribution['low_close_count'] = low_close_count
df_distribution['normal_close_count'] = normal_close_count
df_distribution['high_close_count'] = high_close_count
df_distribution['low_open_low_close_count'] = low_open_low_close_count
df_distribution['low_open_normal_close_count'] = low_open_normal_close_count
df_distribution['low_open_high_close_count'] = low_open_high_close_count
df_distribution['normal_open_low_close_count'] = normal_open_low_close_count
df_distribution['normal_open_normal_close_count'] = normal_open_normal_close_count
df_distribution['normal_open_high_close_count'] = normal_open_high_close_count
df_distribution['high_open_low_close_count'] = high_open_low_close_count
df_distribution['high_open_normal_close_count'] = high_open_normal_close_count
df_distribution['high_open_high_close_count'] = high_open_high_close_count
# df_distribution["low_open_low_close_rate"] = low_open_low_close_count / total_count
# df_distribution["low_open_normal_close_rate"] = low_open_normal_close_count / total_count
# df_distribution["low_open_high_close_rate"] = low_open_high_close_count / total_count
# df_distribution["normal_open_low_close_rate"] = normal_open_low_close_count / total_count
# df_distribution["normal_open_normal_close_rate"] = normal_open_normal_close_count / total_count
# df_distribution["normal_open_high_close_rate"] = normal_open_high_close_count / total_count
# df_distribution["high_open_low_close_rate"] = high_open_low_close_count / total_count
# df_distribution["high_open_normal_close_rate"] = high_open_normal_close_count / total_count
# df_distribution["high_open_high_close_rate"] = high_open_high_close_count / total_count
# df_distribution["low_open_rate"] = low_open_count / total_count
# df_distribution["normal_open_rate"] = normal_open_count / total_count
# df_distribution["high_open_rate"] = high_open_count / total_count
# df_distribution["low_close_rate"] = low_close_count / total_count
# df_distribution["normal_close_rate"] = normal_close_count / total_count
# df_distribution["high_close_rate"] = high_close_count / total_count


# 删除'test'列
df_distribution = df_distribution.drop(columns=['test'])

# 创建一个新的图形
fig, ax = plt.subplots()

# 创建一个条形图
df_distribution.plot(kind='bar', ax=ax)

# 设置图形的标题和轴标签
ax.set_title('Distribution')
ax.set_xlabel('Category')
ax.set_ylabel('Count')

# 显示图形
plt.show()