|
6 | 6 |
|
7 | 7 | # Load the data
|
8 | 8 | df_compare = pd.read_csv('benchmark_results_compare_many_v_many.csv')
|
| 9 | +df_compare_without_index = pd.read_csv('benchmark_results_compare_many_v_many_no_index.csv') |
9 | 10 | df_multisearch = pd.read_csv('benchmark_results_multisearch_many_v_many.csv')
|
10 | 11 |
|
| 12 | +# drop rows with query_filesize > 30000, ref_filesize > 30000 |
| 13 | +df_compare = df_compare[(df_compare['query_filesize'] <= 30000) & (df_compare['ref_filesize'] <= 30000)] |
| 14 | +df_compare_without_index = df_compare_without_index[(df_compare_without_index['query_filesize'] <= 30000) & (df_compare_without_index['ref_filesize'] <= 30000)] |
| 15 | +df_multisearch = df_multisearch[(df_multisearch['query_filesize'] <= 30000) & (df_multisearch['ref_filesize'] <= 30000)] |
| 16 | + |
11 | 17 | # col names: query_filesize,ref_filesize,cpu_time,wall_clock_time,peak_memory_usage
|
12 | 18 |
|
13 |
| -# plot wall-clock time for query file size = 50000, use different reference file sizes in x-axis |
14 |
| -df_compare_50000 = df_compare[df_compare['query_filesize'] == 50000] |
15 |
| -df_multisearch_50000 = df_multisearch[df_multisearch['query_filesize'] == 50000] |
| 19 | +# plot wall-clock time for query file size = 30000, use different reference file sizes in x-axis |
| 20 | +df_compare_30000 = df_compare[df_compare['query_filesize'] == 30000] |
| 21 | +df_compare_without_index_30000 = df_compare_without_index[df_compare_without_index['query_filesize'] == 30000] |
| 22 | +df_multisearch_30000 = df_multisearch[df_multisearch['query_filesize'] == 30000] |
16 | 23 |
|
17 |
| -plt.plot(df_compare_50000['ref_filesize'], df_compare_50000['wall_clock_time'], label='compare (cpp)', marker='o') |
18 |
| -plt.plot(df_multisearch_50000['ref_filesize'], df_multisearch_50000['wall_clock_time'], label='multisearch (sourmash)', marker='o') |
| 24 | +plt.plot(df_compare_30000['ref_filesize'], df_compare_30000['wall_clock_time'], label='compare (cpp)', marker='o') |
| 25 | +plt.plot(df_compare_without_index_30000['ref_filesize'], df_compare_without_index_30000['wall_clock_time'], label='compare (cpp) without index', marker='o') |
| 26 | +plt.plot(df_multisearch_30000['ref_filesize'], df_multisearch_30000['wall_clock_time'], label='multisearch (sourmash)', marker='o') |
19 | 27 |
|
20 | 28 | plt.xlabel('Reference list size')
|
21 | 29 | plt.ylabel('Wall-clock time (s)')
|
22 |
| -plt.title('Wall-clock time for 50k queries') |
23 |
| -plt.xticks(df_compare_50000['ref_filesize']) |
| 30 | +plt.title('Wall-clock time for 30k queries') |
| 31 | +plt.xticks(df_compare_30000['ref_filesize']) |
24 | 32 | plt.legend()
|
25 | 33 | plt.grid(linestyle='--', alpha=0.3)
|
26 | 34 | plt.savefig('plots/benchmark_results_compare_many_v_many_wall_clock_time.pdf')
|
27 | 35 |
|
28 | 36 |
|
29 |
| -# plot peak memory usage for query file size = 50000, use different reference file sizes in x-axis |
| 37 | +# plot CPU time for query file size = 30000, use different reference file sizes in x-axis |
| 38 | +plt.clf() |
| 39 | +plt.plot(df_compare_30000['ref_filesize'], df_compare_30000['cpu_time'], label='compare (cpp)', marker='o') |
| 40 | +plt.plot(df_compare_without_index_30000['ref_filesize'], df_compare_without_index_30000['cpu_time'], label='compare (cpp) without index', marker='o') |
| 41 | +plt.plot(df_multisearch_30000['ref_filesize'], df_multisearch_30000['cpu_time'], label='multisearch (sourmash)', marker='o') |
| 42 | + |
| 43 | +plt.xlabel('Reference list size') |
| 44 | +plt.ylabel('CPU time (s)') |
| 45 | +plt.title('CPU time for 30k queries') |
| 46 | +plt.xticks(df_compare_30000['ref_filesize']) |
| 47 | +plt.legend() |
| 48 | +plt.grid(linestyle='--', alpha=0.3) |
| 49 | +plt.savefig('plots/benchmark_results_compare_many_v_many_cpu_time.pdf') |
| 50 | + |
| 51 | + |
| 52 | +# plot peak memory usage for query file size = 30000, use different reference file sizes in x-axis |
30 | 53 | plt.clf()
|
31 |
| -plt.plot(df_compare_50000['ref_filesize'], df_compare_50000['peak_memory_usage'], label='compare (cpp)', marker='o') |
32 |
| -plt.plot(df_multisearch_50000['ref_filesize'], df_multisearch_50000['peak_memory_usage'], label='multisearch (sourmash)', marker='o') |
| 54 | +plt.plot(df_compare_30000['ref_filesize'], df_compare_30000['peak_memory_usage'], label='compare (cpp)', marker='o') |
| 55 | +plt.plot(df_compare_without_index_30000['ref_filesize'], df_compare_without_index_30000['peak_memory_usage'], label='compare (cpp) without index', marker='o') |
| 56 | +plt.plot(df_multisearch_30000['ref_filesize'], df_multisearch_30000['peak_memory_usage'], label='multisearch (sourmash)', marker='o') |
33 | 57 |
|
34 | 58 | plt.xlabel('Reference list size')
|
35 | 59 | plt.ylabel('Peak memory usage (GB)')
|
36 |
| -plt.title('Peak memory usage for 50k queries') |
37 |
| -plt.xticks(df_compare_50000['ref_filesize']) |
| 60 | +plt.title('Peak memory usage for 30k queries') |
| 61 | +plt.xticks(df_compare_30000['ref_filesize']) |
38 | 62 | plt.legend()
|
39 | 63 | plt.grid(linestyle='--', alpha=0.3)
|
40 | 64 | plt.savefig('plots/benchmark_results_compare_many_v_many_peak_memory_usage.pdf')
|
41 | 65 |
|
42 | 66 |
|
43 |
| -# plot wall-clock time for ref file size = 50000, use different query file sizes in x-axis |
| 67 | +# plot wall-clock time for ref file size = 30000, use different query file sizes in x-axis |
44 | 68 | plt.clf()
|
45 |
| -df_compare_50000 = df_compare[df_compare['ref_filesize'] == 50000] |
46 |
| -df_multisearch_50000 = df_multisearch[df_multisearch['ref_filesize'] == 50000] |
| 69 | +df_compare_30000 = df_compare[df_compare['ref_filesize'] == 30000] |
| 70 | +df_compare_without_index_30000 = df_compare_without_index[df_compare_without_index['ref_filesize'] == 30000] |
| 71 | +df_multisearch_30000 = df_multisearch[df_multisearch['ref_filesize'] == 30000] |
47 | 72 |
|
48 |
| -plt.plot(df_compare_50000['query_filesize'], df_compare_50000['wall_clock_time'], label='compare (cpp)', marker='o') |
49 |
| -plt.plot(df_multisearch_50000['query_filesize'], df_multisearch_50000['wall_clock_time'], label='multisearch (sourmash)', marker='o') |
| 73 | +plt.plot(df_compare_30000['query_filesize'], df_compare_30000['wall_clock_time'], label='compare (cpp)', marker='o') |
| 74 | +plt.plot(df_compare_without_index_30000['query_filesize'], df_compare_without_index_30000['wall_clock_time'], label='compare (cpp) without index', marker='o') |
| 75 | +plt.plot(df_multisearch_30000['query_filesize'], df_multisearch_30000['wall_clock_time'], label='multisearch (sourmash)', marker='o') |
50 | 76 |
|
51 | 77 | plt.xlabel('Query list size')
|
52 | 78 | plt.ylabel('Wall-clock time (s)')
|
53 |
| -plt.title('Wall-clock time for 50k references') |
54 |
| -plt.xticks(df_compare_50000['query_filesize']) |
| 79 | +plt.title('Wall-clock time for 30k references') |
| 80 | +plt.xticks(df_compare_30000['query_filesize']) |
55 | 81 | plt.legend()
|
56 | 82 | plt.grid(linestyle='--', alpha=0.3)
|
57 | 83 | plt.savefig('plots/benchmark_results_compare_many_v_many_wall_clock_time_query.pdf')
|
58 | 84 |
|
59 | 85 |
|
60 |
| -# plot peak memory usage for ref file size = 50000, use different query file sizes in x-axis |
| 86 | +# plot peak memory usage for ref file size = 30000, use different query file sizes in x-axis |
61 | 87 | plt.clf()
|
62 |
| -plt.plot(df_compare_50000['query_filesize'], df_compare_50000['peak_memory_usage'], label='compare (cpp)', marker='o') |
63 |
| -plt.plot(df_multisearch_50000['query_filesize'], df_multisearch_50000['peak_memory_usage'], label='multisearch (sourmash)', marker='o') |
| 88 | +plt.plot(df_compare_30000['query_filesize'], df_compare_30000['peak_memory_usage'], label='compare (cpp)', marker='o') |
| 89 | +plt.plot(df_compare_without_index_30000['query_filesize'], df_compare_without_index_30000['peak_memory_usage'], label='compare (cpp) without index', marker='o') |
| 90 | +plt.plot(df_multisearch_30000['query_filesize'], df_multisearch_30000['peak_memory_usage'], label='multisearch (sourmash)', marker='o') |
64 | 91 |
|
65 | 92 | plt.xlabel('Query list size')
|
66 | 93 | plt.ylabel('Peak memory usage (GB)')
|
67 |
| -plt.title('Peak memory usage for 50k references') |
68 |
| -plt.xticks(df_compare_50000['query_filesize']) |
| 94 | +plt.title('Peak memory usage for 30k references') |
| 95 | +plt.xticks(df_compare_30000['query_filesize']) |
69 | 96 | plt.legend()
|
70 | 97 | plt.grid(linestyle='--', alpha=0.3)
|
71 | 98 | plt.savefig('plots/benchmark_results_compare_many_v_many_peak_memory_usage_query.pdf')
|
| 99 | + |
| 100 | + |
| 101 | +# plot CPU time for ref file size = 30000, use different query file sizes in x-axis |
| 102 | +plt.clf() |
| 103 | +plt.plot(df_compare_30000['query_filesize'], df_compare_30000['cpu_time'], label='compare (cpp)', marker='o') |
| 104 | +plt.plot(df_compare_without_index_30000['query_filesize'], df_compare_without_index_30000['cpu_time'], label='compare (cpp) without index', marker='o') |
| 105 | +plt.plot(df_multisearch_30000['query_filesize'], df_multisearch_30000['cpu_time'], label='multisearch (sourmash)', marker='o') |
| 106 | + |
| 107 | +plt.xlabel('Query list size') |
| 108 | +plt.ylabel('CPU time (s)') |
| 109 | +plt.title('CPU time for 30k references') |
| 110 | +plt.xticks(df_compare_30000['query_filesize']) |
| 111 | +plt.legend() |
| 112 | +plt.grid(linestyle='--', alpha=0.3) |
| 113 | +plt.savefig('plots/benchmark_results_compare_many_v_many_cpu_time_query.pdf') |
0 commit comments