|
|
import pandas as pd
|
|
|
import pymysql
|
|
|
from sqlalchemy import create_engine
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
|
|
# # 创建数据库连接
|
|
|
# engine = create_engine('mysql+pymysql://root:1qazse42W3@192.168.0.222:3306/ry')
|
|
|
|
|
|
# # 执行SQL查询并将结果存储在pandas DataFrame中
|
|
|
# df = pd.read_sql_query('select code,trade_day as date ,open as open_price,close as close_price from stocks where trade_day between "2023-06-01" and "2023-06-30" ORDER BY trade_day', engine)
|
|
|
|
|
|
# # 将date字段转换为日期类型,并设置为索引
|
|
|
# df['date'] = pd.to_datetime(df['date'])
|
|
|
# df.set_index('date', inplace=True)
|
|
|
|
|
|
# # 对每个股票计算涨跌幅
|
|
|
# df['change'] = df.groupby('code')['open_price'].shift(-1) - df['close_price']
|
|
|
|
|
|
# # 计算涨跌幅分布
|
|
|
# distribution = df['change'].value_counts(bins=10, normalize=True)
|
|
|
|
|
|
# # 显示涨跌幅分布的图表
|
|
|
# distribution.plot(kind='bar')
|
|
|
# plt.title('Distribution of Price Changes')
|
|
|
# plt.xlabel('Price Change')
|
|
|
# plt.ylabel('Frequency')
|
|
|
# plt.show()
|
|
|
|
|
|
# 创建数据库连接
|
|
|
engine = create_engine('mysql+pymysql://root:1qazse42W3@192.168.0.222:3306/ry')
|
|
|
# 执行SQL查询来获取日期列表
|
|
|
# 假设你的数据表中有一个date字段表示日期
|
|
|
date_df = pd.read_sql_query('select date from trade_dates where trade = "trading" and date between "2023-06-01" and "2023-06-30" order by date', engine)
|
|
|
|
|
|
# 将date字段转换为日期类型
|
|
|
date_df['date'] = pd.to_datetime(date_df['date'])
|
|
|
|
|
|
# 获取日期列表
|
|
|
date_list = date_df['date'].tolist()
|
|
|
|
|
|
total_count = 0
|
|
|
# 低于-3%的开盘价计数
|
|
|
low_open_count = 0
|
|
|
# 正常开盘,介于-3%和3%之间的计数
|
|
|
normal_open_count = 0
|
|
|
# 高于3%的开盘价计数
|
|
|
high_open_count = 0
|
|
|
# 低于-3%的收盘价计数
|
|
|
low_close_count = 0
|
|
|
# 正常收盘,介于-3%和3%之间的计数
|
|
|
normal_close_count = 0
|
|
|
# 高于3%的收盘价计数
|
|
|
high_close_count = 0
|
|
|
|
|
|
# 收盘低于-3% 且开盘低于-3%的计数
|
|
|
low_open_low_close_count = 0
|
|
|
# 收盘价低于-3% 且开盘价介于-3%和3%之间的计数
|
|
|
low_open_normal_close_count = 0
|
|
|
# 收盘低于-3% 且开盘高于3%的计数
|
|
|
low_open_high_close_count = 0
|
|
|
# 收盘价介于-3%和3%之间 且开盘低于-3%的计数
|
|
|
normal_open_low_close_count = 0
|
|
|
# 收盘价介于-3%和3%之间 且开盘介于-3%和3%之间的计数
|
|
|
normal_open_normal_close_count = 0
|
|
|
# 收盘价介于-3%和3%之间 且开盘高于3%的计数
|
|
|
normal_open_high_close_count = 0
|
|
|
# 收盘高于3% 且开盘低于-3%的计数
|
|
|
high_open_low_close_count = 0
|
|
|
# 收盘高于3% 且开盘介于-3%和3%之间的计数
|
|
|
high_open_normal_close_count = 0
|
|
|
# 收盘高于3% 且开盘高于3%的计数
|
|
|
high_open_high_close_count = 0
|
|
|
|
|
|
|
|
|
|
|
|
# 遍历日期列表
|
|
|
for i, date in enumerate(date_list):
|
|
|
if i+1 < len(date_list):
|
|
|
# 将日期转换为字符串格式,以便在SQL查询中使用
|
|
|
date_str = date.strftime('%Y-%m-%d')
|
|
|
nextdate_str = date_list[i+1].strftime('%Y-%m-%d')
|
|
|
|
|
|
print(f'正在处理{date_str}的数据')
|
|
|
|
|
|
# 执行SQL查询(这只是一个示例,你需要根据你的实际需求和数据库结构进行修改)
|
|
|
query = f'select code,open,close,differrange from stocks where islimit = "是" and trade_day = "{date_str}"'
|
|
|
df = pd.read_sql_query(query, engine)
|
|
|
limit_list = df['code'].tolist()
|
|
|
for code in limit_list:
|
|
|
# 总统计数
|
|
|
total_count += 1
|
|
|
print(f'正在处理{date_str} - {code}的数据')
|
|
|
query = f'select code,open,close,differrange from stocks where trade_day = "{nextdate_str}" and code = "{code}"'
|
|
|
dfnext = pd.read_sql_query(query,engine)
|
|
|
# 处理数据...
|
|
|
df['next_open'] = dfnext['open']
|
|
|
df['next_open_diff'] = 100 * (dfnext['open'] - df['open'])/df['open']
|
|
|
print(df['next_open_diff'][0])
|
|
|
df['next_close'] = dfnext['close']
|
|
|
df['next_close_differrange'] = dfnext['differrange']
|
|
|
print(df['next_close_differrange'][0])
|
|
|
|
|
|
if df['next_open_diff'][0] < -3:
|
|
|
low_open_count += 1
|
|
|
elif df['next_open_diff'][0] > 3:
|
|
|
high_open_count += 1
|
|
|
else:
|
|
|
normal_open_count += 1
|
|
|
|
|
|
if df['next_close_differrange'][0] < -3:
|
|
|
low_close_count += 1
|
|
|
elif df['next_close_differrange'][0] > 3:
|
|
|
high_close_count += 1
|
|
|
else:
|
|
|
normal_close_count += 1
|
|
|
|
|
|
if df['next_open_diff'][0] < -3 and df['next_close_differrange'][0] < -3:
|
|
|
low_open_low_close_count += 1
|
|
|
elif df['next_open_diff'][0] < -3 and df['next_close_differrange'][0] > -3 and df['next_close_differrange'][0] < 3:
|
|
|
low_open_normal_close_count += 1
|
|
|
elif df['next_open_diff'][0] < -3 and df['next_close_differrange'][0] > 3:
|
|
|
low_open_high_close_count += 1
|
|
|
elif df['next_open_diff'][0] > -3 and df['next_open_diff'][0] < 3 and df['next_close_differrange'][0] < -3:
|
|
|
normal_open_low_close_count += 1
|
|
|
elif df['next_open_diff'][0] > -3 and df['next_open_diff'][0] < 3 and df['next_close_differrange'][0] > -3 and df['next_close_differrange'][0] < 3:
|
|
|
normal_open_normal_close_count += 1
|
|
|
elif df['next_open_diff'][0] > -3 and df['next_open_diff'][0] < 3 and df['next_close_differrange'][0] > 3:
|
|
|
normal_open_high_close_count += 1
|
|
|
elif df['next_open_diff'][0] > 3 and df['next_close_differrange'][0] < -3:
|
|
|
high_open_low_close_count += 1
|
|
|
elif df['next_open_diff'][0] > 3 and df['next_close_differrange'][0] > -3 and df['next_close_differrange'][0] < 3:
|
|
|
high_open_normal_close_count += 1
|
|
|
elif df['next_open_diff'][0] > 3 and df['next_close_differrange'][0] > 3:
|
|
|
high_open_high_close_count += 1
|
|
|
# 将数据框保存到磁盘
|
|
|
df.to_csv(f'./data/{date_str}_stock.csv', index=False)
|
|
|
# # 从磁盘加载数据框
|
|
|
# df = pd.read_csv('df_distribution.csv')
|
|
|
|
|
|
print(f'总统计数:{total_count}')
|
|
|
print(f'低于-3%的开盘价计数:{low_open_count}')
|
|
|
print(f'正常开盘,介于-3%和3%之间的计数:{normal_open_count}')
|
|
|
print(f'高于3%的开盘价计数:{high_open_count}')
|
|
|
print(f'低于-3%的收盘价计数:{low_close_count}')
|
|
|
print(f'正常收盘,介于-3%和3%之间的计数:{normal_close_count}')
|
|
|
print(f'高于3%的收盘价计数:{high_close_count}')
|
|
|
print(f'收盘低于-3% 且开盘低于-3%的计数:{low_open_low_close_count}')
|
|
|
print(f'收盘价低于-3% 且开盘价介于-3%和3%之间的计数:{low_open_normal_close_count}')
|
|
|
print(f'收盘低于-3% 且开盘高于3%的计数:{low_open_high_close_count}')
|
|
|
print(f'收盘价介于-3%和3%之间 且开盘低于-3%的计数:{normal_open_low_close_count}')
|
|
|
print(f'收盘价介于-3%和3%之间 且开盘介于-3%和3%之间的计数:{normal_open_normal_close_count}')
|
|
|
print(f'收盘价介于-3%和3%之间 且开盘高于3%的计数:{normal_open_high_close_count}')
|
|
|
print(f'收盘高于3% 且开盘低于-3%的计数:{high_open_low_close_count}')
|
|
|
print(f'收盘高于3% 且开盘介于-3%和3%之间的计数:{high_open_normal_close_count}')
|
|
|
print(f'收盘高于3% 且开盘高于3%的计数:{high_open_high_close_count}')
|
|
|
|
|
|
df_distribution = pd.DataFrame({'test':[1]})
|
|
|
df_distribution['total_count'] = total_count
|
|
|
df_distribution['low_open_count'] = low_open_count
|
|
|
df_distribution['normal_open_count'] = normal_open_count
|
|
|
df_distribution['high_open_count'] = high_open_count
|
|
|
df_distribution['low_close_count'] = low_close_count
|
|
|
df_distribution['normal_close_count'] = normal_close_count
|
|
|
df_distribution['high_close_count'] = high_close_count
|
|
|
df_distribution['low_open_low_close_count'] = low_open_low_close_count
|
|
|
df_distribution['low_open_normal_close_count'] = low_open_normal_close_count
|
|
|
df_distribution['low_open_high_close_count'] = low_open_high_close_count
|
|
|
df_distribution['normal_open_low_close_count'] = normal_open_low_close_count
|
|
|
df_distribution['normal_open_normal_close_count'] = normal_open_normal_close_count
|
|
|
df_distribution['normal_open_high_close_count'] = normal_open_high_close_count
|
|
|
df_distribution['high_open_low_close_count'] = high_open_low_close_count
|
|
|
df_distribution['high_open_normal_close_count'] = high_open_normal_close_count
|
|
|
df_distribution['high_open_high_close_count'] = high_open_high_close_count
|
|
|
# df_distribution["low_open_low_close_rate"] = low_open_low_close_count / total_count
|
|
|
# df_distribution["low_open_normal_close_rate"] = low_open_normal_close_count / total_count
|
|
|
# df_distribution["low_open_high_close_rate"] = low_open_high_close_count / total_count
|
|
|
# df_distribution["normal_open_low_close_rate"] = normal_open_low_close_count / total_count
|
|
|
# df_distribution["normal_open_normal_close_rate"] = normal_open_normal_close_count / total_count
|
|
|
# df_distribution["normal_open_high_close_rate"] = normal_open_high_close_count / total_count
|
|
|
# df_distribution["high_open_low_close_rate"] = high_open_low_close_count / total_count
|
|
|
# df_distribution["high_open_normal_close_rate"] = high_open_normal_close_count / total_count
|
|
|
# df_distribution["high_open_high_close_rate"] = high_open_high_close_count / total_count
|
|
|
# df_distribution["low_open_rate"] = low_open_count / total_count
|
|
|
# df_distribution["normal_open_rate"] = normal_open_count / total_count
|
|
|
# df_distribution["high_open_rate"] = high_open_count / total_count
|
|
|
# df_distribution["low_close_rate"] = low_close_count / total_count
|
|
|
# df_distribution["normal_close_rate"] = normal_close_count / total_count
|
|
|
# df_distribution["high_close_rate"] = high_close_count / total_count
|
|
|
|
|
|
|
|
|
# 删除'test'列
|
|
|
df_distribution = df_distribution.drop(columns=['test'])
|
|
|
|
|
|
# 创建一个新的图形
|
|
|
fig, ax = plt.subplots()
|
|
|
|
|
|
# 创建一个条形图
|
|
|
df_distribution.plot(kind='bar', ax=ax)
|
|
|
|
|
|
# 设置图形的标题和轴标签
|
|
|
ax.set_title('Distribution')
|
|
|
ax.set_xlabel('Category')
|
|
|
ax.set_ylabel('Count')
|
|
|
|
|
|
# 显示图形
|
|
|
plt.show() |