You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

206 lines
9.8 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import pandas as pd
import pymysql
from sqlalchemy import create_engine
import matplotlib.pyplot as plt
# # 创建数据库连接
# engine = create_engine('mysql+pymysql://root:1qazse42W3@192.168.0.222:3306/ry')
# # 执行SQL查询并将结果存储在pandas DataFrame中
# df = pd.read_sql_query('select code,trade_day as date ,open as open_price,close as close_price from stocks where trade_day between "2023-06-01" and "2023-06-30" ORDER BY trade_day', engine)
# # 将date字段转换为日期类型并设置为索引
# df['date'] = pd.to_datetime(df['date'])
# df.set_index('date', inplace=True)
# # 对每个股票计算涨跌幅
# df['change'] = df.groupby('code')['open_price'].shift(-1) - df['close_price']
# # 计算涨跌幅分布
# distribution = df['change'].value_counts(bins=10, normalize=True)
# # 显示涨跌幅分布的图表
# distribution.plot(kind='bar')
# plt.title('Distribution of Price Changes')
# plt.xlabel('Price Change')
# plt.ylabel('Frequency')
# plt.show()
# 创建数据库连接
engine = create_engine('mysql+pymysql://root:1qazse42W3@192.168.0.222:3306/ry')
# 执行SQL查询来获取日期列表
# 假设你的数据表中有一个date字段表示日期
date_df = pd.read_sql_query('select date from trade_dates where trade = "trading" and date between "2023-06-01" and "2023-06-30" order by date', engine)
# 将date字段转换为日期类型
date_df['date'] = pd.to_datetime(date_df['date'])
# 获取日期列表
date_list = date_df['date'].tolist()
total_count = 0
# 低于-3%的开盘价计数
low_open_count = 0
# 正常开盘,介于-3%和3%之间的计数
normal_open_count = 0
# 高于3%的开盘价计数
high_open_count = 0
# 低于-3%的收盘价计数
low_close_count = 0
# 正常收盘,介于-3%和3%之间的计数
normal_close_count = 0
# 高于3%的收盘价计数
high_close_count = 0
# 收盘低于-3% 且开盘低于-3%的计数
low_open_low_close_count = 0
# 收盘价低于-3% 且开盘价介于-3%和3%之间的计数
low_open_normal_close_count = 0
# 收盘低于-3% 且开盘高于3%的计数
low_open_high_close_count = 0
# 收盘价介于-3%和3%之间 且开盘低于-3%的计数
normal_open_low_close_count = 0
# 收盘价介于-3%和3%之间 且开盘介于-3%和3%之间的计数
normal_open_normal_close_count = 0
# 收盘价介于-3%和3%之间 且开盘高于3%的计数
normal_open_high_close_count = 0
# 收盘高于3% 且开盘低于-3%的计数
high_open_low_close_count = 0
# 收盘高于3% 且开盘介于-3%和3%之间的计数
high_open_normal_close_count = 0
# 收盘高于3% 且开盘高于3%的计数
high_open_high_close_count = 0
# 遍历日期列表
for i, date in enumerate(date_list):
if i+1 < len(date_list):
# 将日期转换为字符串格式以便在SQL查询中使用
date_str = date.strftime('%Y-%m-%d')
nextdate_str = date_list[i+1].strftime('%Y-%m-%d')
print(f'正在处理{date_str}的数据')
# 执行SQL查询这只是一个示例你需要根据你的实际需求和数据库结构进行修改
query = f'select code,open,close,differrange from stocks where islimit = "" and trade_day = "{date_str}"'
df = pd.read_sql_query(query, engine)
limit_list = df['code'].tolist()
for code in limit_list:
# 总统计数
total_count += 1
print(f'正在处理{date_str} - {code}的数据')
query = f'select code,open,close,differrange from stocks where trade_day = "{nextdate_str}" and code = "{code}"'
dfnext = pd.read_sql_query(query,engine)
# 处理数据...
df['next_open'] = dfnext['open']
df['next_open_diff'] = 100 * (dfnext['open'] - df['open'])/df['open']
print(df['next_open_diff'][0])
df['next_close'] = dfnext['close']
df['next_close_differrange'] = dfnext['differrange']
print(df['next_close_differrange'][0])
if df['next_open_diff'][0] < -3:
low_open_count += 1
elif df['next_open_diff'][0] > 3:
high_open_count += 1
else:
normal_open_count += 1
if df['next_close_differrange'][0] < -3:
low_close_count += 1
elif df['next_close_differrange'][0] > 3:
high_close_count += 1
else:
normal_close_count += 1
if df['next_open_diff'][0] < -3 and df['next_close_differrange'][0] < -3:
low_open_low_close_count += 1
elif df['next_open_diff'][0] < -3 and df['next_close_differrange'][0] > -3 and df['next_close_differrange'][0] < 3:
low_open_normal_close_count += 1
elif df['next_open_diff'][0] < -3 and df['next_close_differrange'][0] > 3:
low_open_high_close_count += 1
elif df['next_open_diff'][0] > -3 and df['next_open_diff'][0] < 3 and df['next_close_differrange'][0] < -3:
normal_open_low_close_count += 1
elif df['next_open_diff'][0] > -3 and df['next_open_diff'][0] < 3 and df['next_close_differrange'][0] > -3 and df['next_close_differrange'][0] < 3:
normal_open_normal_close_count += 1
elif df['next_open_diff'][0] > -3 and df['next_open_diff'][0] < 3 and df['next_close_differrange'][0] > 3:
normal_open_high_close_count += 1
elif df['next_open_diff'][0] > 3 and df['next_close_differrange'][0] < -3:
high_open_low_close_count += 1
elif df['next_open_diff'][0] > 3 and df['next_close_differrange'][0] > -3 and df['next_close_differrange'][0] < 3:
high_open_normal_close_count += 1
elif df['next_open_diff'][0] > 3 and df['next_close_differrange'][0] > 3:
high_open_high_close_count += 1
# 将数据框保存到磁盘
df.to_csv(f'./data/{date_str}_stock.csv', index=False)
# # 从磁盘加载数据框
# df = pd.read_csv('df_distribution.csv')
print(f'总统计数:{total_count}')
print(f'低于-3%的开盘价计数:{low_open_count}')
print(f'正常开盘,介于-3%和3%之间的计数:{normal_open_count}')
print(f'高于3%的开盘价计数:{high_open_count}')
print(f'低于-3%的收盘价计数:{low_close_count}')
print(f'正常收盘,介于-3%和3%之间的计数:{normal_close_count}')
print(f'高于3%的收盘价计数:{high_close_count}')
print(f'收盘低于-3% 且开盘低于-3%的计数:{low_open_low_close_count}')
print(f'收盘价低于-3% 且开盘价介于-3%和3%之间的计数:{low_open_normal_close_count}')
print(f'收盘低于-3% 且开盘高于3%的计数:{low_open_high_close_count}')
print(f'收盘价介于-3%和3%之间 且开盘低于-3%的计数:{normal_open_low_close_count}')
print(f'收盘价介于-3%和3%之间 且开盘介于-3%和3%之间的计数:{normal_open_normal_close_count}')
print(f'收盘价介于-3%和3%之间 且开盘高于3%的计数:{normal_open_high_close_count}')
print(f'收盘高于3% 且开盘低于-3%的计数:{high_open_low_close_count}')
print(f'收盘高于3% 且开盘介于-3%和3%之间的计数:{high_open_normal_close_count}')
print(f'收盘高于3% 且开盘高于3%的计数:{high_open_high_close_count}')
df_distribution = pd.DataFrame({'test':[1]})
df_distribution['total_count'] = total_count
df_distribution['low_open_count'] = low_open_count
df_distribution['normal_open_count'] = normal_open_count
df_distribution['high_open_count'] = high_open_count
df_distribution['low_close_count'] = low_close_count
df_distribution['normal_close_count'] = normal_close_count
df_distribution['high_close_count'] = high_close_count
df_distribution['low_open_low_close_count'] = low_open_low_close_count
df_distribution['low_open_normal_close_count'] = low_open_normal_close_count
df_distribution['low_open_high_close_count'] = low_open_high_close_count
df_distribution['normal_open_low_close_count'] = normal_open_low_close_count
df_distribution['normal_open_normal_close_count'] = normal_open_normal_close_count
df_distribution['normal_open_high_close_count'] = normal_open_high_close_count
df_distribution['high_open_low_close_count'] = high_open_low_close_count
df_distribution['high_open_normal_close_count'] = high_open_normal_close_count
df_distribution['high_open_high_close_count'] = high_open_high_close_count
# df_distribution["low_open_low_close_rate"] = low_open_low_close_count / total_count
# df_distribution["low_open_normal_close_rate"] = low_open_normal_close_count / total_count
# df_distribution["low_open_high_close_rate"] = low_open_high_close_count / total_count
# df_distribution["normal_open_low_close_rate"] = normal_open_low_close_count / total_count
# df_distribution["normal_open_normal_close_rate"] = normal_open_normal_close_count / total_count
# df_distribution["normal_open_high_close_rate"] = normal_open_high_close_count / total_count
# df_distribution["high_open_low_close_rate"] = high_open_low_close_count / total_count
# df_distribution["high_open_normal_close_rate"] = high_open_normal_close_count / total_count
# df_distribution["high_open_high_close_rate"] = high_open_high_close_count / total_count
# df_distribution["low_open_rate"] = low_open_count / total_count
# df_distribution["normal_open_rate"] = normal_open_count / total_count
# df_distribution["high_open_rate"] = high_open_count / total_count
# df_distribution["low_close_rate"] = low_close_count / total_count
# df_distribution["normal_close_rate"] = normal_close_count / total_count
# df_distribution["high_close_rate"] = high_close_count / total_count
# 删除'test'列
df_distribution = df_distribution.drop(columns=['test'])
# 创建一个新的图形
fig, ax = plt.subplots()
# 创建一个条形图
df_distribution.plot(kind='bar', ax=ax)
# 设置图形的标题和轴标签
ax.set_title('Distribution')
ax.set_xlabel('Category')
ax.set_ylabel('Count')
# 显示图形
plt.show()