|
| 1 | +import pandas as pd |
| 2 | +import matplotlib.pyplot as plt |
| 3 | + |
| 4 | +# set arial as the default font |
| 5 | +plt.rcParams['font.family'] = 'Arial' |
| 6 | + |
| 7 | +# Load the data |
| 8 | +df_compare = pd.read_csv('benchmark_results_compare_many_v_many.csv') |
| 9 | +df_multisearch = pd.read_csv('benchmark_results_multisearch_many_v_many.csv') |
| 10 | + |
| 11 | +# col names: query_filesize,ref_filesize,cpu_time,wall_clock_time,peak_memory_usage |
| 12 | + |
| 13 | +# plot wall-clock time for query file size = 50000, use different reference file sizes in x-axis |
| 14 | +df_compare_50000 = df_compare[df_compare['query_filesize'] == 50000] |
| 15 | +df_multisearch_50000 = df_multisearch[df_multisearch['query_filesize'] == 50000] |
| 16 | + |
| 17 | +plt.plot(df_compare_50000['ref_filesize'], df_compare_50000['wall_clock_time'], label='compare (cpp)', marker='o') |
| 18 | +plt.plot(df_multisearch_50000['ref_filesize'], df_multisearch_50000['wall_clock_time'], label='multisearch (sourmash)', marker='o') |
| 19 | + |
| 20 | +plt.xlabel('Reference list size') |
| 21 | +plt.ylabel('Wall-clock time (s)') |
| 22 | +plt.title('Wall-clock time for 50k queries') |
| 23 | +plt.xticks(df_compare_50000['ref_filesize']) |
| 24 | +plt.legend() |
| 25 | +plt.grid(linestyle='--', alpha=0.3) |
| 26 | +plt.savefig('plots/benchmark_results_compare_many_v_many_wall_clock_time.pdf') |
| 27 | + |
| 28 | + |
| 29 | +# plot peak memory usage for query file size = 50000, use different reference file sizes in x-axis |
| 30 | +plt.clf() |
| 31 | +plt.plot(df_compare_50000['ref_filesize'], df_compare_50000['peak_memory_usage'], label='compare (cpp)', marker='o') |
| 32 | +plt.plot(df_multisearch_50000['ref_filesize'], df_multisearch_50000['peak_memory_usage'], label='multisearch (sourmash)', marker='o') |
| 33 | + |
| 34 | +plt.xlabel('Reference list size') |
| 35 | +plt.ylabel('Peak memory usage (GB)') |
| 36 | +plt.title('Peak memory usage for 50k queries') |
| 37 | +plt.xticks(df_compare_50000['ref_filesize']) |
| 38 | +plt.legend() |
| 39 | +plt.grid(linestyle='--', alpha=0.3) |
| 40 | +plt.savefig('plots/benchmark_results_compare_many_v_many_peak_memory_usage.pdf') |
| 41 | + |
| 42 | + |
| 43 | +# plot wall-clock time for ref file size = 50000, use different query file sizes in x-axis |
| 44 | +plt.clf() |
| 45 | +df_compare_50000 = df_compare[df_compare['ref_filesize'] == 50000] |
| 46 | +df_multisearch_50000 = df_multisearch[df_multisearch['ref_filesize'] == 50000] |
| 47 | + |
| 48 | +plt.plot(df_compare_50000['query_filesize'], df_compare_50000['wall_clock_time'], label='compare (cpp)', marker='o') |
| 49 | +plt.plot(df_multisearch_50000['query_filesize'], df_multisearch_50000['wall_clock_time'], label='multisearch (sourmash)', marker='o') |
| 50 | + |
| 51 | +plt.xlabel('Query list size') |
| 52 | +plt.ylabel('Wall-clock time (s)') |
| 53 | +plt.title('Wall-clock time for 50k references') |
| 54 | +plt.xticks(df_compare_50000['query_filesize']) |
| 55 | +plt.legend() |
| 56 | +plt.grid(linestyle='--', alpha=0.3) |
| 57 | +plt.savefig('plots/benchmark_results_compare_many_v_many_wall_clock_time_query.pdf') |
| 58 | + |
| 59 | + |
| 60 | +# plot peak memory usage for ref file size = 50000, use different query file sizes in x-axis |
| 61 | +plt.clf() |
| 62 | +plt.plot(df_compare_50000['query_filesize'], df_compare_50000['peak_memory_usage'], label='compare (cpp)', marker='o') |
| 63 | +plt.plot(df_multisearch_50000['query_filesize'], df_multisearch_50000['peak_memory_usage'], label='multisearch (sourmash)', marker='o') |
| 64 | + |
| 65 | +plt.xlabel('Query list size') |
| 66 | +plt.ylabel('Peak memory usage (GB)') |
| 67 | +plt.title('Peak memory usage for 50k references') |
| 68 | +plt.xticks(df_compare_50000['query_filesize']) |
| 69 | +plt.legend() |
| 70 | +plt.grid(linestyle='--', alpha=0.3) |
| 71 | +plt.savefig('plots/benchmark_results_compare_many_v_many_peak_memory_usage_query.pdf') |
0 commit comments