Skip to content

Commit 647be2b

Browse files
authored
Merge pull request #24 from openworm/development
Improved testing
2 parents f6ed1bc + 211c203 commit 647be2b

20 files changed

+5374
-81
lines changed

corpus/papers/test/Wang2024_NeurotransmitterAtlas.pdf.json

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

openworm_ai/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# Version of the Python module.
2-
__version__ = "0.2.6"
2+
__version__ = "0.2.7"
33

44

55
def print_(msg, print_it=True):

openworm_ai/graphrag/GraphRAG_test.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,7 @@ def process_query(query, model, verbose=False):
267267
"What are the main differences between NeuroML versions 1 and 2?",
268268
"What are the main types of cell in the C. elegans pharynx?",
269269
"Give me 3 facts about the coelomocyte system in C. elegans",
270+
"Tell me about the neurotransmitter betaine in C. elegans",
270271
]
271272

272273
print_("Processing %i queries" % len(queries))

openworm_ai/parser/ParseLlamaIndexJson.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,10 @@ def convert_to_json(paper_ref, paper_info, output_dir):
8383
"corpus/papers/test/SinhaEtAl2025.pdf.json",
8484
"https://elifesciences.org/articles/95135",
8585
],
86+
"Wang_et_al_2024": [
87+
"corpus/papers/test/Wang2024_NeurotransmitterAtlas.pdf.json",
88+
"https://elifesciences.org/articles/95402",
89+
],
8690
}
8791

8892
# Loop through papers and process markdown sections
34.4 KB
Loading
36.1 KB
Loading
358 Bytes
Loading
34.2 KB
Loading
5 Bytes
Loading

openworm_ai/quiz/figures/quizplot_grid.py

Lines changed: 62 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,45 @@
11
import json
22
import os
3+
import sys
34
import matplotlib.pyplot as plt
45
import pandas as pd
56

7+
# ruff: noqa: F401
8+
from openworm_ai.utils.llms import (
9+
LLM_OLLAMA_LLAMA32_1B,
10+
LLM_OLLAMA_LLAMA32_3B,
11+
LLM_GPT4o,
12+
LLM_GEMINI_2F,
13+
LLM_CLAUDE37,
14+
LLM_GPT35,
15+
LLM_OLLAMA_PHI4,
16+
LLM_OLLAMA_GEMMA2,
17+
LLM_OLLAMA_GEMMA,
18+
LLM_OLLAMA_QWEN,
19+
LLM_OLLAMA_TINYLLAMA,
20+
ask_question_get_response,
21+
)
22+
623
# Define model parameters (LLM parameter sizes in billions)
724
llm_parameters = {
25+
LLM_GPT4o: 1760,
26+
LLM_GPT35: 175,
827
"GPT3.5": 20,
928
"Phi4": 14,
1029
"Gemma2": 9,
1130
"Gemma": 7,
1231
"Qwen": 4,
1332
"Llama3.2": 1,
14-
"TinyLlama":1.1,
33+
"TinyLlama": 1.1,
1534
"GPT4o": 1760,
1635
"Gemini": 500,
17-
"Claude 3.5 Sonnet": 175
36+
"Claude 3.5 Sonnet": 175,
1837
}
1938

2039
# Define model distributors for coloring
2140
model_distributors = {
41+
LLM_GPT4o: "OpenAI",
42+
LLM_GPT35: "OpenAI",
2243
"GPT3.5": "OpenAI",
2344
"GPT4o": "OpenAI",
2445
"Phi4": "Microsoft",
@@ -28,15 +49,15 @@
2849
"Claude 3.5 Sonnet": "Anthropic",
2950
"Qwen": "Alibaba",
3051
"Llama3.2": "Meta",
31-
"TinyLlama":"Open Source"
52+
"TinyLlama": "Open Source",
3253
}
3354

3455
# Define quiz categories and corresponding file paths
3556
file_paths = {
36-
#"General Knowledge": "openworm_ai/quiz/scores/general/llm_scores_general_24-02-25.json",
37-
#"Science": "openworm_ai/quiz/scores/science/llm_scores_science_24-02-25.json",
38-
#"C. Elegans": "openworm_ai/quiz/scores/celegans/llm_scores_celegans_24-02-25.json",
39-
"RAG":"openworm_ai/quiz/scores/rag/llm_scores_rag_16-03-25_2.json"
57+
# "General Knowledge": "openworm_ai/quiz/scores/general/llm_scores_general_24-02-25.json",
58+
# "Science": "openworm_ai/quiz/scores/science/llm_scores_science_24-02-25.json",
59+
# "C. Elegans": "openworm_ai/quiz/scores/celegans/llm_scores_celegans_24-02-25.json",
60+
"RAG": "openworm_ai/quiz/scores/rag/llm_scores_rag_16-03-25_2.json"
4061
}
4162

4263
# Folder to save figures
@@ -51,16 +72,19 @@
5172
"Microsoft": "purple",
5273
"Alibaba": "orange",
5374
"Meta": "cyan",
54-
"Open Source":"yellow"
75+
"Open Source": "yellow",
5576
}
5677

5778
# Process each quiz category
5879
for category, file_path in file_paths.items():
59-
save_path = os.path.join(figures_folder, f"llm_accuracy_vs_parameters_{category.replace(' ', '_').lower()}.png")
80+
save_path = os.path.join(
81+
figures_folder,
82+
f"llm_accuracy_vs_parameters_{category.replace(' ', '_').lower()}.png",
83+
)
6084

6185
# Check if the file exists
6286
if not os.path.exists(file_path):
63-
print(f"⚠️ Warning: File not found - {file_path}. Skipping this category.")
87+
print(f"Warning: File not found - {file_path}. Skipping this category.")
6488
continue
6589

6690
# Load JSON data
@@ -72,17 +96,19 @@
7296
for result in data.get("Results", []): # Use .get() to avoid KeyError
7397
for key in llm_parameters:
7498
if key.lower() in result["LLM"].lower():
75-
category_results.append({
76-
"Model": key,
77-
"Accuracy (%)": result["Accuracy (%)"],
78-
"Parameters (B)": llm_parameters[key],
79-
"Distributor": model_distributors.get(key, "Unknown")
80-
})
99+
category_results.append(
100+
{
101+
"Model": key,
102+
"Accuracy (%)": result["Accuracy (%)"],
103+
"Parameters (B)": llm_parameters[key],
104+
"Distributor": model_distributors.get(key, "Unknown"),
105+
}
106+
)
81107
break
82108

83109
# Skip if no data
84110
if not category_results:
85-
print(f"⚠️ No valid results found in {file_path}. Skipping...")
111+
print(f"No valid results found in {file_path}. Skipping...")
86112
continue
87113

88114
# Convert to DataFrame
@@ -94,11 +120,25 @@
94120
# Scatter plot with model labels, colored by distributor
95121
for distributor, color in distributor_colors.items():
96122
subset = df[df["Distributor"] == distributor]
97-
plt.scatter(subset["Parameters (B)"], subset["Accuracy (%)"], s=100, color=color, label=distributor, edgecolor="black")
123+
plt.scatter(
124+
subset["Parameters (B)"],
125+
subset["Accuracy (%)"],
126+
s=100,
127+
color=color,
128+
label=distributor,
129+
edgecolor="black",
130+
)
98131

99132
# Add model labels to each point
100133
for i, row in df.iterrows():
101-
plt.text(row["Parameters (B)"], row["Accuracy (%)"], row["Model"], fontsize=10, ha="right", va="bottom")
134+
plt.text(
135+
row["Parameters (B)"],
136+
row["Accuracy (%)"],
137+
row["Model"],
138+
fontsize=10,
139+
ha="right",
140+
va="bottom",
141+
)
102142

103143
# Log scale for x-axis (model parameters)
104144
plt.xscale("log")
@@ -113,5 +153,6 @@
113153
# Save figure
114154
plt.legend()
115155
plt.savefig(save_path)
116-
print(f"✅ Saved plot: {save_path}")
117-
plt.show()
156+
print(f"Saved plot: {save_path}")
157+
if "-nogui" not in sys.argv:
158+
plt.show()

0 commit comments

Comments
 (0)