"""创建一个新表,将symbol编码为symbol_int,保留前导零"""defcreate_table_with_symbol_int_preserve_zeros(clickhouse_client):try:# 创建一个新表,包含symbol_int列print("创建新表...")clickhouse_client.execute(""" CREATE TABLE IF NOT EXISTS stock_data_with_int ( symbol String, symbol_int Int64, # 使用Int64以确保能存储足够大的数字 frame Date, open Float64, high Float64, low Float64, close Float64, vol Float64, amount Float64, adjust Float64, st UInt8, buy_limit Float64, sell_limit Float64, is_index UInt8 MATERIALIZED if(match(symbol, '^(000001|000016|000300|000905|000852|000688|399001|399006|399300|399905|399673)\\.(SH|SZ)$'), 1, 0) ) ENGINE = MergeTree() PARTITION BY toYYYYMM(frame) ORDER BY (symbol, frame) """)# 从原表插入数据,同时计算symbol_intprint("从原表复制数据并计算symbol_int(保留前导零)...")# 使用正确的SQL语法计算symbol_int,保留前导零clickhouse_client.execute(""" INSERT INTO stock_data_with_int SELECT symbol, -- 使用条件判断交易所,然后拼接前缀和完整的股票代码(包括前导零) CASE WHEN endsWith(symbol, '.SH') THEN toInt64(concat('1', splitByChar('.', symbol)[1])) WHEN endsWith(symbol, '.SZ') THEN toInt64(concat('2', splitByChar('.', symbol)[1])) ELSE 0 END AS symbol_int, frame, open, high, low, close, vol, amount, adjust, st, buy_limit, sell_limit FROM stock_data """)print("新表创建并填充完成")# 创建索引print("在symbol_int列上创建索引...")clickhouse_client.execute(""" ALTER TABLE stock_data_with_int ADD INDEX idx_symbol_int (symbol_int) TYPE minmax GRANULARITY 1 """)# 验证数据result=clickhouse_client.execute(""" SELECT symbol, symbol_int, count() FROM stock_data_with_int GROUP BY symbol, symbol_int ORDER BY count() DESC LIMIT 10 """)print("\n验证数据(前10条):")forrowinresult:print(f"Symbol: {row[0]}, Symbol Int: {row[1]}, Count: {row[2]}")exceptExceptionase:print(f"创建新表时出错: {str(e)}")create_table_with_symbol_int_preserve_zeros(clickhouse_client)
defcompare_sort_keys():"""对比两个表的排序键"""try:# 获取两个表的排序键stock_data_sort_keys=clickhouse_client.execute(""" SELECT name, type FROM system.columns WHERE table = 'stock_data' AND database = currentDatabase() AND is_in_sorting_key = 1 ORDER BY name """)stock_data_with_int_sort_keys=clickhouse_client.execute(""" SELECT name, type FROM system.columns WHERE table = 'stock_data_with_int' AND database = currentDatabase() AND is_in_sorting_key = 1 ORDER BY name """)print("stock_data表排序键:")forkeyinstock_data_sort_keys:print(f"列名: {key[0]}, 类型: {key[1]}")print("\nstock_data_with_int表排序键:")forkeyinstock_data_with_int_sort_keys:print(f"列名: {key[0]}, 类型: {key[1]}")# 检查排序键数量iflen(stock_data_sort_keys)!=len(stock_data_with_int_sort_keys):print(f"\n警告: 两个表的排序键数量不同: stock_data有{len(stock_data_sort_keys)}个排序键,stock_data_with_int有{len(stock_data_with_int_sort_keys)}个排序键")else:print(f"\n两个表的排序键数量相同: 各有{len(stock_data_sort_keys)}个排序键")exceptExceptionase:print(f"对比排序键时出错: {str(e)}")# 执行排序键对比compare_sort_keys()
defcompare_partitions_fixed():"""对比两个表的分区(修复版)"""try:# 从system.parts表获取分区信息stock_data_partitions=clickhouse_client.execute(""" SELECT partition, sum(rows) as row_count FROM system.parts WHERE table = 'stock_data' AND database = currentDatabase() AND active = 1 GROUP BY partition ORDER BY partition """)stock_data_with_int_partitions=clickhouse_client.execute(""" SELECT partition, sum(rows) as row_count FROM system.parts WHERE table = 'stock_data_with_int' AND database = currentDatabase() AND active = 1 GROUP BY partition ORDER BY partition """)print("stock_data表分区:")forpartinstock_data_partitions:print(f"分区: {part[0]}, 记录数: {part[1]:,}")print("\nstock_data_with_int表分区:")forpartinstock_data_with_int_partitions:print(f"分区: {part[0]}, 记录数: {part[1]:,}")# 检查分区数量iflen(stock_data_partitions)!=len(stock_data_with_int_partitions):print(f"\n警告: 两个表的分区数量不同: stock_data有{len(stock_data_partitions)}个分区,stock_data_with_int有{len(stock_data_with_int_partitions)}个分区")else:print(f"\n两个表的分区数量相同: 各有{len(stock_data_partitions)}个分区")# 检查分区键try:# 获取表的分区键stock_data_partition_key=clickhouse_client.execute(""" SELECT partition_key FROM system.tables WHERE name = 'stock_data' AND database = currentDatabase() """)[0][0]stock_data_with_int_partition_key=clickhouse_client.execute(""" SELECT partition_key FROM system.tables WHERE name = 'stock_data_with_int' AND database = currentDatabase() """)[0][0]print(f"\nstock_data表分区键: {stock_data_partition_key}")print(f"stock_data_with_int表分区键: {stock_data_with_int_partition_key}")ifstock_data_partition_key!=stock_data_with_int_partition_key:print(f"\n警告: 两个表的分区键不同")else:print(f"\n两个表的分区键相同: {stock_data_partition_key}")exceptExceptionase:print(f"\n获取分区键信息时出错: {str(e)}")print("尝试使用SHOW CREATE TABLE获取表定义...")try:stock_data_create=clickhouse_client.execute("SHOW CREATE TABLE stock_data")[0][0]stock_data_with_int_create=clickhouse_client.execute("SHOW CREATE TABLE stock_data_with_int")[0][0]# 提取PARTITION BY子句importrestock_data_partition_match=re.search(r'PARTITION BY\s+([^\n]+)',stock_data_create)stock_data_with_int_partition_match=re.search(r'PARTITION BY\s+([^\n]+)',stock_data_with_int_create)ifstock_data_partition_matchandstock_data_with_int_partition_match:stock_data_partition=stock_data_partition_match.group(1)stock_data_with_int_partition=stock_data_with_int_partition_match.group(1)print(f"\nstock_data表分区表达式: {stock_data_partition}")print(f"stock_data_with_int表分区表达式: {stock_data_with_int_partition}")ifstock_data_partition!=stock_data_with_int_partition:print(f"\n警告: 两个表的分区表达式不同")else:print(f"\n两个表的分区表达式相同: {stock_data_partition}")else:print("\n无法从表定义中提取分区表达式")exceptExceptionase:print(f"\n获取表定义时出错: {str(e)}")exceptExceptionase:print(f"对比分区时出错: {str(e)}")# 执行修复后的分区对比compare_partitions_fixed()
defcompare_engines():"""对比两个表的引擎"""try:# 获取两个表的引擎stock_data_engine=clickhouse_client.execute(""" SELECT engine FROM system.tables WHERE name = 'stock_data' AND database = currentDatabase() """)[0][0]stock_data_with_int_engine=clickhouse_client.execute(""" SELECT engine FROM system.tables WHERE name = 'stock_data_with_int' AND database = currentDatabase() """)[0][0]print(f"stock_data表引擎: {stock_data_engine}")print(f"stock_data_with_int表引擎: {stock_data_with_int_engine}")ifstock_data_engine!=stock_data_with_int_engine:print(f"警告: 两个表的引擎不同")else:print("两个表的引擎相同")exceptExceptionase:print(f"对比引擎时出错: {str(e)}")# 执行引擎对比compare_engines()
defassess_fairness():"""综合评估测试公平性"""try:# 收集所有对比结果issues=[]# 检查表结构stock_data_col_count=len([cforcinstock_data_columnsifc[0]!='symbol'])stock_data_with_int_col_count=len([cforcinstock_data_with_int_columnsifc[0]!='symbol_int'])ifstock_data_col_count!=stock_data_with_int_col_count:issues.append(f"两个表的列数不同 (不考虑symbol/symbol_int): stock_data有{stock_data_col_count}列,stock_data_with_int有{stock_data_with_int_col_count}列")# 检查数据量stock_data_count=clickhouse_client.execute("SELECT count() FROM stock_data")[0][0]stock_data_with_int_count=clickhouse_client.execute("SELECT count() FROM stock_data_with_int")[0][0]ifstock_data_count!=stock_data_with_int_count:issues.append(f"两个表的记录数不同: stock_data有{stock_data_count:,}条记录,stock_data_with_int有{stock_data_with_int_count:,}条记录")# 检查索引stock_data_indexes=clickhouse_client.execute(""" SELECT count(*) FROM system.data_skipping_indices WHERE table = 'stock_data' AND database = currentDatabase() """)[0][0]stock_data_with_int_indexes=clickhouse_client.execute(""" SELECT count(*) FROM system.data_skipping_indices WHERE table = 'stock_data_with_int' AND database = currentDatabase() """)[0][0]ifstock_data_indexes!=stock_data_with_int_indexes:issues.append(f"两个表的索引数量不同: stock_data有{stock_data_indexes}个索引,stock_data_with_int有{stock_data_with_int_indexes}个索引")# 检查排序键stock_data_sort_keys=clickhouse_client.execute(""" SELECT count(*) FROM system.columns WHERE table = 'stock_data' AND database = currentDatabase() AND is_in_sorting_key = 1 """)[0][0]stock_data_with_int_sort_keys=clickhouse_client.execute(""" SELECT count(*) FROM system.columns WHERE table = 'stock_data_with_int' AND database = currentDatabase() AND is_in_sorting_key = 1 """)[0][0]ifstock_data_sort_keys!=stock_data_with_int_sort_keys:issues.append(f"两个表的排序键数量不同: stock_data有{stock_data_sort_keys}个排序键,stock_data_with_int有{stock_data_with_int_sort_keys}个排序键")# 检查引擎stock_data_engine=clickhouse_client.execute(""" SELECT engine FROM system.tables WHERE name = 'stock_data' AND database = currentDatabase() """)[0][0]stock_data_with_int_engine=clickhouse_client.execute(""" SELECT engine FROM system.tables WHERE name = 'stock_data_with_int' AND database = currentDatabase() """)[0][0]ifstock_data_engine!=stock_data_with_int_engine:issues.append(f"两个表的引擎不同: stock_data使用{stock_data_engine},stock_data_with_int使用{stock_data_with_int_engine}")# 给出综合评估print("\n综合评估:")ifissues:print("发现以下可能影响测试公平性的问题:")fori,issueinenumerate(issues):print(f"{i+1}. {issue}")print("\n建议: 在进行性能测试前,先解决这些问题,确保两个表除了symbol/symbol_int列的类型外,其他方面尽可能相同。")else:print("两个表在结构、数据量、索引等方面基本一致,适合进行公平的性能测试。")exceptExceptionase:print(f"评估公平性时出错: {str(e)}")# 执行公平性评估assess_fairness()
deftest_query_performance_fixed(num_tests=50):# 获取随机股票代码test_symbols=clickhouse_client.execute(f""" SELECT DISTINCT symbol FROM stock_data WHERE is_index = 0 ORDER BY rand() LIMIT {num_tests} """)string_query_times=[]int_query_times=[]print(f"开始测试 {len(test_symbols)} 个股票的查询性能...")forsymbol_tupleintqdm(test_symbols,desc="测试查询性能"):symbol=symbol_tuple[0]# 获取对应的整数编码code,exchange=symbol.split('.')ifexchange.upper()=='SH':prefix='1'elifexchange.upper()=='SZ':prefix='2'else:continuesymbol_int=int(prefix+code)# 清理缓冲print("清理缓冲")clickhouse_client.execute(""" SYSTEM DROP MARK CACHE; """)clickhouse_client.execute(""" SYSTEM DROP UNCOMPRESSED CACHE; """)# 测试字符串查询start_time=time.time()clickhouse_client.execute(f""" SELECT symbol, frame, open, high, low, close, vol, amount FROM stock_data WHERE symbol = '{symbol}' ORDER BY frame LIMIT 1000 """)string_time=time.time()-start_timestring_query_times.append(string_time)# 清理缓冲print("清理缓冲")clickhouse_client.execute(""" SYSTEM DROP MARK CACHE; """)clickhouse_client.execute(""" SYSTEM DROP UNCOMPRESSED CACHE; """)# 测试整数查询start_time=time.time()clickhouse_client.execute(f""" SELECT symbol_int, frame, open, high, low, close, vol, amount FROM stock_data_with_int WHERE symbol_int = {symbol_int} ORDER BY frame LIMIT 1000 """)int_time=time.time()-start_timeint_query_times.append(int_time)# 计算平均查询时间avg_string_time=sum(string_query_times)/len(string_query_times)avg_int_time=sum(int_query_times)/len(int_query_times)print(f"字符串查询平均时间: {avg_string_time:.6f} 秒")print(f"整数查询平均时间: {avg_int_time:.6f} 秒")print(f"性能提升: {(avg_string_time-avg_int_time)/avg_string_time*100:.2f}%")return{'string_times':string_query_times,'int_times':int_query_times,'avg_string_time':avg_string_time,'avg_int_time':avg_int_time,'improvement':(avg_string_time-avg_int_time)/avg_string_time*100}"""绘制性能对比图表"""defplot_performance_comparison(results):plt.figure(figsize=(15,12))# 1. 单条记录查询性能对比if'single'inresults:plt.subplot(2,2,1)data=[results['single']['string_times'],results['single']['int_times']]plt.boxplot(data,labels=['字符串查询','整数查询'])plt.title('单条记录查询性能对比')plt.ylabel('查询时间 (秒)')plt.grid(True,linestyle='--',alpha=0.7)# 2. 日期范围查询性能对比if'range'inresults:plt.subplot(2,2,2)data=[results['range']['string_times'],results['range']['int_times']]plt.boxplot(data,labels=['字符串查询','整数查询'])plt.title('日期范围查询性能对比')plt.ylabel('查询时间 (秒)')plt.grid(True,linestyle='--',alpha=0.7)# 3. 批量查询性能对比if'batch'inresults:plt.subplot(2,2,3)batch_sizes=[r['batch_size']forrinresults['batch']]string_times=[r['string_time']forrinresults['batch']]int_times=[r['int_time']forrinresults['batch']]x=np.arange(len(batch_sizes))width=0.35plt.bar(x-width/2,string_times,width,label='字符串查询')plt.bar(x+width/2,int_times,width,label='整数查询')plt.xlabel('批量大小')plt.ylabel('查询时间 (秒)')plt.title('批量查询性能对比')plt.xticks(x,batch_sizes)plt.legend()plt.grid(True,linestyle='--',alpha=0.7)# 4. 性能提升百分比plt.subplot(2,2,4)categories=[]improvements=[]if'single'inresults:categories.append('单条查询')improvements.append(results['single']['improvement'])if'range'inresults:categories.append('范围查询')improvements.append(results['range']['improvement'])if'batch'inresults:categories.append('批量查询')# 使用平均提升avg_batch_improvement=sum(r['improvement']forrinresults['batch'])/len(results['batch'])improvements.append(avg_batch_improvement)if'aggregate'inresults:categories.append('聚合查询')improvements.append(results['aggregate']['improvement'])plt.bar(categories,improvements)plt.title('整数编码性能提升百分比')plt.ylabel('性能提升 (%)')plt.grid(True,linestyle='--',alpha=0.7)plt.tight_layout()plt.savefig('symbol_encoding_performance_comparison.png')plt.close()print("\n性能对比图已保存为 symbol_encoding_performance_comparison.png")print("开始symbol编码性能测试...")# 1. 清理缓冲print("清理缓冲")clickhouse_client.execute("""SYSTEM DROP MARK CACHE;""")clickhouse_client.execute("""SYSTEM DROP UNCOMPRESSED CACHE;""")# 2. 执行测试results=test_query_performance_fixed(num_tests=50)
# 查询性能可视化importmatplotlib.pyplotaspltimportnumpyasnpimportpandasaspdimporttimefromtqdmimporttqdmimportseabornassnsfrommatplotlib.tickerimportPercentFormatterfrommatplotlibimportfont_managerfont_path='/Volumes/share/data/WBQ/note/4_性能测试/SimHei.ttf'# 替换为SimHei.ttf的实际路径font_manager.fontManager.addfont(font_path)plt.rcParams['font.family']='SimHei'defvisualize_performance_results(results):"""将性能测试结果可视化为图表"""ifnotresults:print("没有测试结果可供可视化")return# 设置图表风格sns.set(style="whitegrid")plt.figure(figsize=(20,15))# 1. 查询时间对比 - 条形图plt.subplot(2,2,1)test_names=[r['name']forrinresults]str_times=[r['avg_str_time']forrinresults]int_times=[r['avg_int_time']forrinresults]x=np.arange(len(test_names))width=0.35plt.bar(x-width/2,str_times,width,label='字符串查询 (symbol)',color='#3498db',yerr=[r['std_str_time']forrinresults],capsize=5)plt.bar(x+width/2,int_times,width,label='整数查询 (symbol_int)',color='#e74c3c',yerr=[r['std_int_time']forrinresults],capsize=5)plt.xlabel('查询类型',fontsize=12)plt.ylabel('平均查询时间 (秒)',fontsize=12)plt.title('不同查询类型的平执行时间对比',fontsize=14,fontweight='bold')plt.xticks(x,[nameiflen(name)<15elsename[:12]+'...'fornameintest_names],rotation=45,ha='right')plt.legend(fontsize=10)plt.grid(True,linestyle='--',alpha=0.7)# 2. 性能提升百分比 - 水平条形图plt.subplot(2,2,2)improvements=[r['improvement']forrinresults]colors=['#2ecc71'ifimp>0else'#e74c3c'forimpinimprovements]y_pos=np.arange(len(test_names))plt.barh(y_pos,improvements,color=colors)plt.axvline(x=0,color='black',linestyle='-',alpha=0.7)plt.yticks(y_pos,[nameiflen(name)<15elsename[:12]+'...'fornameintest_names])plt.xlabel('性能提升 (%)',fontsize=12)plt.title('整数编码相对于字符串的性能提升',fontsize=14,fontweight='bold')plt.grid(True,linestyle='--',alpha=0.7)# 添加数值标签fori,vinenumerate(improvements):plt.text(v+(1ifv>=0else-1),i,f"{v:.1f}%",va='center',fontweight='bold',color='black')# 3. 查询时间分布 - 箱线图plt.subplot(2,2,3)# 准备数据data_to_plot=[]labels=[]forrinresults:data_to_plot.append(r['str_times'])data_to_plot.append(r['int_times'])labels.append(f"{r['name']} (str)")labels.append(f"{r['name']} (int)")# 绘制箱线图box=plt.boxplot(data_to_plot,patch_artist=True,labels=labels)# 设置颜色colors=[]foriinrange(len(data_to_plot)):ifi%2==0:# 字符串查询colors.append('#3498db')else:# 整数查询colors.append('#e74c3c')forpatch,colorinzip(box['boxes'],colors):patch.set_facecolor(color)plt.xticks(rotation=90)plt.ylabel('查询时间 (秒)',fontsize=12)plt.title('查询时间分布',fontsize=14,fontweight='bold')plt.grid(True,linestyle='--',alpha=0.7)# 4. 平均性能对比 - 饼图plt.subplot(2,2,4)# 计算平均性能提升avg_improvement=sum(improvements)/len(improvements)# 创建饼图数据ifavg_improvement>0:# 整数查询更快labels=['整数查询更快','字符串查询']sizes=[avg_improvement,100-avg_improvement]colors=['#2ecc71','#3498db']title=f'平均而言,整数查询比字符串查询快 {avg_improvement:.1f}%'else:# 字符串查询更快labels=['字符串查询更快','整数查询']sizes=[-avg_improvement,100+avg_improvement]colors=['#3498db','#e74c3c']title=f'平均而言,字符串查询比整数查询快 {-avg_improvement:.1f}%'plt.pie(sizes,labels=labels,colors=colors,autopct='%1.1f%%',startangle=90,explode=(0.1,0),shadow=True)plt.axis('equal')plt.title(title,fontsize=14,fontweight='bold')# 调整布局并保存plt.tight_layout()plt.savefig('query_performance_comparison.png',dpi=300,bbox_inches='tight')plt.show()print("性能测试结果图表已保存为 'query_performance_comparison.png'")# 创建详细的性能报告create_performance_report(results)defcreate_performance_report(results):"""创建详细的性能测试报告"""# 创建DataFramereport_data=[]forrinresults:report_data.append({'查询类型':r['name'],'字符串查询平均时间(秒)':r['avg_str_time'],'整数查询平均时间(秒)':r['avg_int_time'],'字符串查询标准差':r['std_str_time'],'整数查询标准差':r['std_int_time'],'性能提升(%)':r['improvement'],'字符串查询最小时间':min(r['str_times']),'字符串查询最大时间':max(r['str_times']),'整数查询最小时间':min(r['int_times']),'整数查询最大时间':max(r['int_times']),})df=pd.DataFrame(report_data)# 计算总体统计avg_str_time=df['字符串查询平均时间(秒)'].mean()avg_int_time=df['整数查询平均时间(秒)'].mean()avg_improvement=df['性能提升(%)'].mean()# 打印报告print("\n===== 性能测试详细报告 =====")print(f"测试场景数量: {len(results)}")print(f"总体平均字符串查询时间: {avg_str_time:.6f} 秒")print(f"总体平均整数查询时间: {avg_int_time:.6f} 秒")print(f"总体平均性能提升: {avg_improvement:.2f}%")# 打印每个场景的详细信息print("\n各场景详细数据:")print(df.to_string(index=False))# 保存报告到CSVdf.to_csv('performance_test_report.csv',index=False)print("\n详细报告已保存到 'performance_test_report.csv'")defrun_performance_test(test_cases,num_iterations=5):""" 运行性能测试 Args: test_cases: 测试用例列表,每个测试用例是一个字典,包含name, str_query和int_query num_iterations: 每个测试用例重复执行的次数 Returns: 测试结果列表 """results=[]fortest_idx,test_caseinenumerate(test_cases):str_query=test_case['str_query']int_query=test_case['int_query']test_name=test_case['name']print(f"\n测试 {test_idx+1}/{len(test_cases)}: {test_name}")str_times=[]int_times=[]foriinrange(num_iterations):try:# 清除缓存ifi==0:# 只在第一次迭代时清除缓存try:clickhouse_client.execute("SYSTEM DROP MARK CACHE")clickhouse_client.execute("SYSTEM DROP UNCOMPRESSED CACHE")except:pass# 忽略清除缓存的错误# 测试字符串查询start_time=time.time()clickhouse_client.execute(str_query)str_time=time.time()-start_timestr_times.append(str_time)# 测试整数查询start_time=time.time()clickhouse_client.execute(int_query)int_time=time.time()-start_timeint_times.append(int_time)print(f" 迭代 {i+1}/{num_iterations}: 字符串 {str_time:.6f}秒, 整数 {int_time:.6f}秒")exceptExceptionase:print(f" 迭代 {i+1}/{num_iterations} 出错: {str(e)}")print(f" 字符串查询: {str_query}")print(f" 整数查询: {int_query}")continueifnotstr_timesornotint_times:print(f" 测试 {test_name} 失败,跳过")continue# 计算平均时间和标准差avg_str_time=sum(str_times)/len(str_times)avg_int_time=sum(int_times)/len(int_times)std_str_time=(sum((t-avg_str_time)**2fortinstr_times)/len(str_times))**0.5std_int_time=(sum((t-avg_int_time)**2fortinint_times)/len(int_times))**0.5improvement=(avg_str_time-avg_int_time)/avg_str_time*100print(f" 平均: 字符串 {avg_str_time:.6f}±{std_str_time:.6f}秒, 整数 {avg_int_time:.6f}±{std_int_time:.6f}秒")print(f" 性能提升: {improvement:.2f}%")results.append({'name':test_name,'str_times':str_times,'int_times':int_times,'avg_str_time':avg_str_time,'avg_int_time':avg_int_time,'std_str_time':std_str_time,'std_int_time':std_int_time,'improvement':improvement})returnresults# 定义多种查询场景的测试用例defcreate_test_cases():"""创建多种查询场景的测试用例"""# 获取一些随机的股票代码用于测试symbols=clickhouse_client.execute(""" SELECT DISTINCT symbol FROM stock_data ORDER BY rand() LIMIT 20 """)symbol_ints=[]forsymbol_tupleinsymbols:symbol=symbol_tuple[0]code,exchange=symbol.split('.')ifexchange.upper()=='SH':prefix='1'elifexchange.upper()=='SZ':prefix='2'else:continuesymbol_ints.append(int(prefix+code))# 确保我们有足够的股票代码iflen(symbols)<10orlen(symbol_ints)<10:print("警告: 没有足够的股票代码用于测试")return[]# 创建测试用例test_cases=[# 1. 单条记录精确查询{'name':'单条记录精确查询','str_query':f"SELECT * FROM stock_data WHERE symbol = '{symbols[0][0]}' LIMIT 1000",'int_query':f"SELECT * FROM stock_data_with_int WHERE symbol_int = {symbol_ints[0]} LIMIT 1000"},# 2. 日期范围查询{'name':'日期范围查询','str_query':f"SELECT * FROM stock_data WHERE symbol = '{symbols[1][0]}' AND frame BETWEEN '2016-01-01' AND '2016-12-31'",'int_query':f"SELECT * FROM stock_data_with_int WHERE symbol_int = {symbol_ints[1]} AND frame BETWEEN '2016-01-01' AND '2016-12-31'"},# 3. 批量查询 (IN条件){'name':'批量查询 (5个股票)','str_query':f"SELECT * FROM stock_data WHERE symbol IN ('{symbols[0][0]}', '{symbols[1][0]}', '{symbols[2][0]}', '{symbols[3][0]}', '{symbols[4][0]}') LIMIT 1000",'int_query':f"SELECT * FROM stock_data_with_int WHERE symbol_int IN ({symbol_ints[0]}, {symbol_ints[1]}, {symbol_ints[2]}, {symbol_ints[3]}, {symbol_ints[4]}) LIMIT 1000"},# 4. 聚合查询 (AVG){'name':'聚合查询 (AVG)','str_query':f"SELECT AVG(close) FROM stock_data WHERE symbol = '{symbols[2][0]}' GROUP BY toYYYYMM(frame)",'int_query':f"SELECT AVG(close) FROM stock_data_with_int WHERE symbol_int = {symbol_ints[2]} GROUP BY toYYYYMM(frame)"},# 5. 排序查询{'name':'排序查询','str_query':f"SELECT * FROM stock_data WHERE symbol = '{symbols[3][0]}' ORDER BY frame DESC LIMIT 1000",'int_query':f"SELECT * FROM stock_data_with_int WHERE symbol_int = {symbol_ints[3]} ORDER BY frame DESC LIMIT 1000"},# 6. 复杂条件查询{'name':'复杂条件查询','str_query':f"SELECT * FROM stock_data WHERE symbol = '{symbols[4][0]}' AND close > open AND vol > 1000000 LIMIT 1000",'int_query':f"SELECT * FROM stock_data_with_int WHERE symbol_int = {symbol_ints[4]} AND close > open AND vol > 1000000 LIMIT 1000"},# 7. JOIN查询{'name':'JOIN查询','str_query':f""" SELECT a.symbol, a.frame, a.close, b.close as prev_close FROM stock_data a LEFT JOIN stock_data b ON a.symbol = b.symbol AND b.frame = addDays(a.frame, -1) WHERE a.symbol = '{symbols[5][0]}' LIMIT 1000 """,'int_query':f""" SELECT a.symbol_int, a.frame, a.close, b.close as prev_close FROM stock_data_with_int a LEFT JOIN stock_data_with_int b ON a.symbol_int = b.symbol_int AND b.frame = addDays(a.frame, -1) WHERE a.symbol_int = {symbol_ints[5]} LIMIT 1000 """},# 8. 大批量查询 (更多股票){'name':'大批量查询 (10个股票)','str_query':"SELECT * FROM stock_data WHERE symbol IN ("+", ".join([f"'{s[0]}'"forsinsymbols[:10]])+") LIMIT 5000",'int_query':"SELECT * FROM stock_data_with_int WHERE symbol_int IN ("+", ".join([str(s)forsinsymbol_ints[:10]])+") LIMIT 5000"},# 9. 聚合查询 (COUNT){'name':'聚合查询 (COUNT)','str_query':f"SELECT COUNT(*) FROM stock_data WHERE symbol = '{symbols[6][0]}' GROUP BY toYear(frame)",'int_query':f"SELECT COUNT(*) FROM stock_data_with_int WHERE symbol_int = {symbol_ints[6]} GROUP BY toYear(frame)"},# 10. 复杂聚合查询{'name':'复杂聚合查询','str_query':f""" SELECT toYear(frame) AS year, AVG(close) AS avg_close, MAX(high) AS max_high, MIN(low) AS min_low, SUM(vol) AS total_vol FROM stock_data WHERE symbol = '{symbols[7][0]}' GROUP BY year ORDER BY year """,'int_query':f""" SELECT toYear(frame) AS year, AVG(close) AS avg_close, MAX(high) AS max_high, MIN(low) AS min_low, SUM(vol) AS total_vol FROM stock_data_with_int WHERE symbol_int = {symbol_ints[7]} GROUP BY year ORDER BY year """}]returntest_cases# 创建测试用例test_cases=create_test_cases()ifnottest_cases:print("无法创建测试用例,请检查数据库连接和表结构")exit()# 运行性能测试print(f"开始运行 {len(test_cases)} 个测试用例,每个用例重复 5 次...")results=run_performance_test(test_cases,num_iterations=5)ifnotresults:print("测试失败,没有结果可供分析")exit()# 可视化结果visualize_performance_results(results)