import pandas as pd import pymysql from sqlalchemy import create_engine import matplotlib.pyplot as plt # # 创建数据库连接 # engine = create_engine('mysql+pymysql://root:1qazse42W3@192.168.0.222:3306/ry') # # 执行SQL查询并将结果存储在pandas DataFrame中 # df = pd.read_sql_query('select code,trade_day as date ,open as open_price,close as close_price from stocks where trade_day between "2023-06-01" and "2023-06-30" ORDER BY trade_day', engine) # # 将date字段转换为日期类型,并设置为索引 # df['date'] = pd.to_datetime(df['date']) # df.set_index('date', inplace=True) # # 对每个股票计算涨跌幅 # df['change'] = df.groupby('code')['open_price'].shift(-1) - df['close_price'] # # 计算涨跌幅分布 # distribution = df['change'].value_counts(bins=10, normalize=True) # # 显示涨跌幅分布的图表 # distribution.plot(kind='bar') # plt.title('Distribution of Price Changes') # plt.xlabel('Price Change') # plt.ylabel('Frequency') # plt.show() # 创建数据库连接 engine = create_engine('mysql+pymysql://root:1qazse42W3@192.168.0.222:3306/ry') # 执行SQL查询来获取日期列表 # 假设你的数据表中有一个date字段表示日期 date_df = pd.read_sql_query('select date from trade_dates where trade = "trading" and date between "2023-06-01" and "2023-06-30" order by date', engine) # 将date字段转换为日期类型 date_df['date'] = pd.to_datetime(date_df['date']) # 获取日期列表 date_list = date_df['date'].tolist() total_count = 0 # 低于-3%的开盘价计数 low_open_count = 0 # 正常开盘,介于-3%和3%之间的计数 normal_open_count = 0 # 高于3%的开盘价计数 high_open_count = 0 # 低于-3%的收盘价计数 low_close_count = 0 # 正常收盘,介于-3%和3%之间的计数 normal_close_count = 0 # 高于3%的收盘价计数 high_close_count = 0 # 收盘低于-3% 且开盘低于-3%的计数 low_open_low_close_count = 0 # 收盘价低于-3% 且开盘价介于-3%和3%之间的计数 low_open_normal_close_count = 0 # 收盘低于-3% 且开盘高于3%的计数 low_open_high_close_count = 0 # 收盘价介于-3%和3%之间 且开盘低于-3%的计数 normal_open_low_close_count = 0 # 收盘价介于-3%和3%之间 且开盘介于-3%和3%之间的计数 normal_open_normal_close_count = 0 # 收盘价介于-3%和3%之间 且开盘高于3%的计数 normal_open_high_close_count = 0 # 收盘高于3% 且开盘低于-3%的计数 high_open_low_close_count = 0 # 收盘高于3% 且开盘介于-3%和3%之间的计数 high_open_normal_close_count = 0 # 收盘高于3% 且开盘高于3%的计数 high_open_high_close_count = 0 # 遍历日期列表 for i, date in enumerate(date_list): if i+1 < len(date_list): # 将日期转换为字符串格式,以便在SQL查询中使用 date_str = date.strftime('%Y-%m-%d') nextdate_str = date_list[i+1].strftime('%Y-%m-%d') print(f'正在处理{date_str}的数据') # 执行SQL查询(这只是一个示例,你需要根据你的实际需求和数据库结构进行修改) query = f'select code,open,close,differrange from stocks where islimit = "是" and trade_day = "{date_str}"' df = pd.read_sql_query(query, engine) limit_list = df['code'].tolist() for code in limit_list: # 总统计数 total_count += 1 print(f'正在处理{date_str} - {code}的数据') query = f'select code,open,close,differrange from stocks where trade_day = "{nextdate_str}" and code = "{code}"' dfnext = pd.read_sql_query(query,engine) # 处理数据... df['next_open'] = dfnext['open'] df['next_open_diff'] = 100 * (dfnext['open'] - df['open'])/df['open'] print(df['next_open_diff'][0]) df['next_close'] = dfnext['close'] df['next_close_differrange'] = dfnext['differrange'] print(df['next_close_differrange'][0]) if df['next_open_diff'][0] < -3: low_open_count += 1 elif df['next_open_diff'][0] > 3: high_open_count += 1 else: normal_open_count += 1 if df['next_close_differrange'][0] < -3: low_close_count += 1 elif df['next_close_differrange'][0] > 3: high_close_count += 1 else: normal_close_count += 1 if df['next_open_diff'][0] < -3 and df['next_close_differrange'][0] < -3: low_open_low_close_count += 1 elif df['next_open_diff'][0] < -3 and df['next_close_differrange'][0] > -3 and df['next_close_differrange'][0] < 3: low_open_normal_close_count += 1 elif df['next_open_diff'][0] < -3 and df['next_close_differrange'][0] > 3: low_open_high_close_count += 1 elif df['next_open_diff'][0] > -3 and df['next_open_diff'][0] < 3 and df['next_close_differrange'][0] < -3: normal_open_low_close_count += 1 elif df['next_open_diff'][0] > -3 and df['next_open_diff'][0] < 3 and df['next_close_differrange'][0] > -3 and df['next_close_differrange'][0] < 3: normal_open_normal_close_count += 1 elif df['next_open_diff'][0] > -3 and df['next_open_diff'][0] < 3 and df['next_close_differrange'][0] > 3: normal_open_high_close_count += 1 elif df['next_open_diff'][0] > 3 and df['next_close_differrange'][0] < -3: high_open_low_close_count += 1 elif df['next_open_diff'][0] > 3 and df['next_close_differrange'][0] > -3 and df['next_close_differrange'][0] < 3: high_open_normal_close_count += 1 elif df['next_open_diff'][0] > 3 and df['next_close_differrange'][0] > 3: high_open_high_close_count += 1 # 将数据框保存到磁盘 df.to_csv(f'./data/{date_str}_stock.csv', index=False) # # 从磁盘加载数据框 # df = pd.read_csv('df_distribution.csv') print(f'总统计数:{total_count}') print(f'低于-3%的开盘价计数:{low_open_count}') print(f'正常开盘,介于-3%和3%之间的计数:{normal_open_count}') print(f'高于3%的开盘价计数:{high_open_count}') print(f'低于-3%的收盘价计数:{low_close_count}') print(f'正常收盘,介于-3%和3%之间的计数:{normal_close_count}') print(f'高于3%的收盘价计数:{high_close_count}') print(f'收盘低于-3% 且开盘低于-3%的计数:{low_open_low_close_count}') print(f'收盘价低于-3% 且开盘价介于-3%和3%之间的计数:{low_open_normal_close_count}') print(f'收盘低于-3% 且开盘高于3%的计数:{low_open_high_close_count}') print(f'收盘价介于-3%和3%之间 且开盘低于-3%的计数:{normal_open_low_close_count}') print(f'收盘价介于-3%和3%之间 且开盘介于-3%和3%之间的计数:{normal_open_normal_close_count}') print(f'收盘价介于-3%和3%之间 且开盘高于3%的计数:{normal_open_high_close_count}') print(f'收盘高于3% 且开盘低于-3%的计数:{high_open_low_close_count}') print(f'收盘高于3% 且开盘介于-3%和3%之间的计数:{high_open_normal_close_count}') print(f'收盘高于3% 且开盘高于3%的计数:{high_open_high_close_count}') df_distribution = pd.DataFrame({'test':[1]}) df_distribution['total_count'] = total_count df_distribution['low_open_count'] = low_open_count df_distribution['normal_open_count'] = normal_open_count df_distribution['high_open_count'] = high_open_count df_distribution['low_close_count'] = low_close_count df_distribution['normal_close_count'] = normal_close_count df_distribution['high_close_count'] = high_close_count df_distribution['low_open_low_close_count'] = low_open_low_close_count df_distribution['low_open_normal_close_count'] = low_open_normal_close_count df_distribution['low_open_high_close_count'] = low_open_high_close_count df_distribution['normal_open_low_close_count'] = normal_open_low_close_count df_distribution['normal_open_normal_close_count'] = normal_open_normal_close_count df_distribution['normal_open_high_close_count'] = normal_open_high_close_count df_distribution['high_open_low_close_count'] = high_open_low_close_count df_distribution['high_open_normal_close_count'] = high_open_normal_close_count df_distribution['high_open_high_close_count'] = high_open_high_close_count # df_distribution["low_open_low_close_rate"] = low_open_low_close_count / total_count # df_distribution["low_open_normal_close_rate"] = low_open_normal_close_count / total_count # df_distribution["low_open_high_close_rate"] = low_open_high_close_count / total_count # df_distribution["normal_open_low_close_rate"] = normal_open_low_close_count / total_count # df_distribution["normal_open_normal_close_rate"] = normal_open_normal_close_count / total_count # df_distribution["normal_open_high_close_rate"] = normal_open_high_close_count / total_count # df_distribution["high_open_low_close_rate"] = high_open_low_close_count / total_count # df_distribution["high_open_normal_close_rate"] = high_open_normal_close_count / total_count # df_distribution["high_open_high_close_rate"] = high_open_high_close_count / total_count # df_distribution["low_open_rate"] = low_open_count / total_count # df_distribution["normal_open_rate"] = normal_open_count / total_count # df_distribution["high_open_rate"] = high_open_count / total_count # df_distribution["low_close_rate"] = low_close_count / total_count # df_distribution["normal_close_rate"] = normal_close_count / total_count # df_distribution["high_close_rate"] = high_close_count / total_count # 删除'test'列 df_distribution = df_distribution.drop(columns=['test']) # 创建一个新的图形 fig, ax = plt.subplots() # 创建一个条形图 df_distribution.plot(kind='bar', ax=ax) # 设置图形的标题和轴标签 ax.set_title('Distribution') ax.set_xlabel('Category') ax.set_ylabel('Count') # 显示图形 plt.show()