Merge pull request #20 from openworm/development

pgleeson · web-flow · commit 1609c8404fc6 · 2025-04-23T10:19:05.000+01:00
Consolidating LLM definitions, etc.
diff --git a/.gitignore b/.gitignore
@@ -14,3 +14,5 @@ venv/
 /openworm_ai/quiz/samples/Ollama_phi3_latest_10questions_celegans.json
 /store
 /openworm_ai/corpus/wormatlas/ignore
+/openworm_ai/quiz/samples/GPT4o_10questions_general.json
+/openworm_ai/quiz/samples/GPT4o_10questions_science.json
diff --git a/openworm_ai/__init__.py b/openworm_ai/__init__.py
@@ -1,5 +1,5 @@
 # Version of the Python module.
-__version__ = "0.2.0"
+__version__ = "0.2.1"
 
 
 def print_(msg, print_it=True):
diff --git a/openworm_ai/quiz/QuizMaster.py b/openworm_ai/quiz/QuizMaster.py
@@ -1,17 +1,35 @@
 from openworm_ai.quiz.QuizModel import MultipleChoiceQuiz, Question, Answer
 
-from openworm_ai.quiz.TemplatesCelegans import GENERATE_Q, TEXT_ANSWER_EXAMPLE
-# from openworm_ai.quiz.Templates import GENERATE_Q, TEXT_ANSWER_EXAMPLE
 
 from openworm_ai.utils.llms import ask_question_get_response
 from openworm_ai.utils.llms import get_llm_from_argv
 
 import random
+from enum import Enum
 
 indexing = ["A", "B", "C", "D"]
 
+QuizScope = Enum(
+    "QuizScope", [("GeneralKnowledge", 1), ("Science", 2), ("CElegans", 3)]
+)
+
+
+def save_quiz(num_questions, num_answers, llm_ver, quiz_scope, temperature=0):
+    suffix = None
+
+    if quiz_scope == QuizScope.GeneralKnowledge:
+        from openworm_ai.quiz.Templates import GENERATE_Q, TEXT_ANSWER_EXAMPLE
+
+        suffix = "_general"
+    elif quiz_scope == QuizScope.Science:
+        from openworm_ai.quiz.TemplatesScience import GENERATE_Q, TEXT_ANSWER_EXAMPLE
+
+        suffix = "_science"
+    elif quiz_scope == QuizScope.CElegans:
+        from openworm_ai.quiz.TemplatesCelegans import GENERATE_Q, TEXT_ANSWER_EXAMPLE
+
+        suffix = "_celegans"
 
-def save_quiz(num_questions, num_answers, llm_ver, temperature=0):
     question = (
         GENERATE_Q.replace("<QUESTION_NUMBER>", str(num_questions)).replace(
             "<ANSWER_NUMBER>", str(num_answers)
@@ -22,13 +40,14 @@ def save_quiz(num_questions, num_answers, llm_ver, temperature=0):
     response = ask_question_get_response(question, llm_ver, temperature)
 
     quiz = MultipleChoiceQuiz(
-        title="GPT4o_%iquestions_celegans" % num_questions,
+        title="GPT4o_%iquestions%s" % (num_questions, suffix),
         source="Generated by %s, temperature: %s" % (llm_ver, temperature),
     )
 
     last_question = None
 
     indexing = ["1", "2", "3", "4"]
+    
     for line in response.split("\n"):
         if len(line.strip()) > 0:
             if "QUESTION" in line or line[-1] == "?":
@@ -51,8 +70,8 @@ def save_quiz(num_questions, num_answers, llm_ver, temperature=0):
     print(quiz.to_yaml())
 
     quiz.to_json_file(
-        "openworm_ai/quiz/samples/%s_%iquestions_celegans.json"
-        % (llm_ver.replace(":", "_"), num_questions)
+        "openworm_ai/quiz/samples/%s_%iquestions%s.json"
+        % (llm_ver.replace(":", "_"), num_questions, suffix)
     )
 
 
@@ -140,6 +159,7 @@ def save_quiz(num_questions, num_answers, llm_ver, temperature=0):
         print(
             f"\n  The LLM {llm_ver} got {total_correct} out of {total_qs} questions correct ({'%.2f %%' % (100 * total_correct / total_qs)})!\n"
         )
+
     # make this into a method which returns a dictionary of all the "stats" that lists the llm, correct/incorrect answers
     # this can be used to plot comparison of variety of llms on general knowledge
     else:
@@ -148,4 +168,4 @@ def save_quiz(num_questions, num_answers, llm_ver, temperature=0):
             if a.isnumeric():
                 num = int(a)
         print(f"Using LLM {llm_ver} for saving quiz with {num} questions")
-        save_quiz(num, 4, llm_ver, temperature=0.2)
+        save_quiz(num, 4, llm_ver, quiz_scope=QuizScope.CElegans, temperature=0.2)
diff --git a/openworm_ai/quiz/QuizMasterCorpus.py b/openworm_ai/quiz/QuizMasterCorpus.py
@@ -196,5 +196,4 @@ def save_quiz(num_questions=100, num_answers=4, llm_ver=LLM_GPT4o, temperature=0
             )
 
     else:
-        print(f"Debug: Using LLM {llm_ver} for saving quiz")
         save_quiz(100, 4, llm_ver, temperature=0.2)
diff --git a/openworm_ai/quiz/Templates.py b/openworm_ai/quiz/Templates.py
@@ -51,7 +51,7 @@
     import sys
 
     question = (
-        GENERATE_Q.replace("<QUESTION_NUMBER>", "5").replace("<ANSWER_NUMBER>", "4")
+        GENERATE_Q.replace("<QUESTION_NUMBER>", "100").replace("<ANSWER_NUMBER>", "4")
         + TEXT_ANSWER_EXAMPLE
     )
 
diff --git a/openworm_ai/quiz/quiz_all.py b/openworm_ai/quiz/quiz_all.py
@@ -48,7 +48,6 @@ def load_llms():
         # LLM_OLLAMA_FALCON2 - 'only an assistant with no acess to external resources',
         # LLM_OLLAMA_CODELLAMA - understands only a fraction of questions, doesnt understand prompts
     ]  # Defined constants
-    print(f"Debug: Loaded LLMs -> {llms}")
     return llms
 
 
diff --git a/openworm_ai/utils/llms.py b/openworm_ai/utils/llms.py
@@ -4,26 +4,45 @@
 from langchain.prompts import PromptTemplate
 from langchain_core.output_parsers import StrOutputParser
 
+LLM_CMD_LINE_ARGS = {}
+
 LLM_GPT35 = "GPT3.5"
 LLM_GPT4 = "GPT4"
 LLM_GPT4o = "GPT4o"
+
 LLM_LLAMA2 = "LLAMA2"
+LLM_CMD_LINE_ARGS["-l"] = LLM_LLAMA2
 LLM_GEMINI = "gemini-2.0-flash"
+LLM_CMD_LINE_ARGS["-g"] = LLM_GEMINI
 LLM_AI21 = "AI21"
+LLM_CMD_LINE_ARGS["-a"] = LLM_AI21
 LLM_CLAUDE37 = "claude-3-7-sonnet-20250219"
+LLM_CMD_LINE_ARGS["-c"] = LLM_CLAUDE37
 LLM_COHERE = "Cohere"
+LLM_CMD_LINE_ARGS["-co"] = LLM_COHERE
+
 LLM_OLLAMA_LLAMA32 = "Ollama:llama3.2"
+LLM_CMD_LINE_ARGS["-o-l32"] = LLM_OLLAMA_LLAMA32
 LLM_OLLAMA_LLAMA32_1B = "Ollama:llama3.2:1b"
+LLM_CMD_LINE_ARGS["-o-l321b"] = LLM_OLLAMA_LLAMA32_1B
 LLM_OLLAMA_MISTRAL = "Ollama:mistral"
+LLM_CMD_LINE_ARGS["-o-m"] = LLM_OLLAMA_MISTRAL
 LLM_OLLAMA_TINYLLAMA = "Ollama:tinyllama"
-
+LLM_CMD_LINE_ARGS["-o-t"] = LLM_OLLAMA_TINYLLAMA
 LLM_OLLAMA_PHI3 = "Ollama:phi3:latest"
+LLM_CMD_LINE_ARGS["-o-phi3"] = LLM_OLLAMA_PHI3
 LLM_OLLAMA_PHI4 = "Ollama:phi4:latest"
+LLM_CMD_LINE_ARGS["-o-phi4"] = LLM_OLLAMA_PHI4
+LLM_OLLAMA_GEMMA = "Ollama:gemma:7b"
+LLM_CMD_LINE_ARGS["-ge"] = LLM_OLLAMA_GEMMA
 LLM_OLLAMA_GEMMA2 = "Ollama:gemma2:latest"
+LLM_CMD_LINE_ARGS["-ge2"] = LLM_OLLAMA_GEMMA2
 LLM_OLLAMA_GEMMA3 = "Ollama:gemma3:4b"
+LLM_CMD_LINE_ARGS["-ge3"] = LLM_OLLAMA_GEMMA3
 LLM_OLLAMA_DEEPSEEK = "Ollama:deepseek-r1:7b"
-LLM_OLLAMA_GEMMA = "Ollama:gemma:7b"
+LLM_CMD_LINE_ARGS["-o-dsr1"] = LLM_OLLAMA_DEEPSEEK
 LLM_OLLAMA_QWEN = "Ollama:qwen:4b"
+LLM_CMD_LINE_ARGS["-qw"] = LLM_OLLAMA_QWEN
 LLM_OLLAMA_CODELLAMA = "Ollama:codellama:latest"
 LLM_OLLAMA_FALCON2 = "Ollama:falcon2:latest"
 
@@ -99,17 +118,14 @@ def get_cohere_key():
 
 
 def get_llm(llm_ver, temperature):
-    print(f"Debug: get_llm received {llm_ver}")
     if llm_ver == LLM_GPT35:
-        print("Debug: Using GPT-3.5")
         from langchain_openai import OpenAI
 
         return OpenAI(temperature=temperature, openai_api_key=get_openai_api_key())
 
     elif llm_ver == LLM_GPT4:
         from langchain_openai import ChatOpenAI
 
-        print("Debug: Using GPT-4")
         return ChatOpenAI(
             model_name="gpt-4",
             openai_api_key=get_openai_api_key(),
@@ -118,7 +134,6 @@ def get_llm(llm_ver, temperature):
     elif llm_ver == LLM_GPT4o:
         from langchain_openai import ChatOpenAI
 
-        print("Debug: Using GPT-4o")
         return ChatOpenAI(
             model_name="gpt-4o",
             openai_api_key=get_openai_api_key(),
@@ -144,7 +159,6 @@ def get_llm(llm_ver, temperature):
     elif llm_ver == LLM_GEMINI:
         from langchain_google_genai import ChatGoogleGenerativeAI
 
-        print("Debug: Using Gemini Flash")
         return ChatGoogleGenerativeAI(
             model="gemini-2.0-flash",
             google_api_key=get_gemini_api_key(),  # Retrieve API key
@@ -159,7 +173,6 @@ def get_llm(llm_ver, temperature):
     elif llm_ver == LLM_CLAUDE37:
         from langchain_anthropic import ChatAnthropic
 
-        print("Debug: Using Claude 3.7 Sonnet")
         return ChatAnthropic(
             model_name="claude-3-7-sonnet-20250219",
             anthropic_api_key=get_anthropic_key(),  # Retrieve API key
@@ -171,75 +184,23 @@ def get_llm(llm_ver, temperature):
 
         llm = ChatCohere()
 
-    elif llm_ver == LLM_OLLAMA_LLAMA32_1B:
-        from langchain_ollama.llms import OllamaLLM
-
-        llm = OllamaLLM(model="llama3.2:1b")
-
-    elif llm_ver == LLM_OLLAMA_MISTRAL:
-        from langchain_ollama.llms import OllamaLLM
-
-        llm = OllamaLLM(model="mistral")
-
-    elif llm_ver == LLM_OLLAMA_TINYLLAMA:
-        from langchain_ollama.llms import OllamaLLM
-
-        llm = OllamaLLM(model="tinyllama")
-
-    elif llm_ver == LLM_OLLAMA_PHI3:
-        from langchain_ollama.llms import OllamaLLM
-
-        print("Debug: Using Phi-3")
-        llm = OllamaLLM(model="phi3:latest", temperature=temperature)
-
-    elif llm_ver == LLM_OLLAMA_PHI4:
-        from langchain_ollama.llms import OllamaLLM
-
-        print("Debug: Using Phi-4")
-        llm = OllamaLLM(model="phi4:latest", temperature=temperature)
-
-    elif llm_ver == LLM_OLLAMA_GEMMA2:
-        from langchain_ollama.llms import OllamaLLM
-
-        print("Debug: Using Gemma-2")
-        llm = OllamaLLM(model="gemma2:latest", temperature=temperature)
-
-    elif llm_ver == LLM_OLLAMA_GEMMA3:
-        from langchain_ollama.llms import OllamaLLM
-
-        print("Debug: Using Gemma-3")
-        llm = OllamaLLM(model="gemma3:4b", temperature=temperature)
-
-    elif llm_ver == LLM_OLLAMA_DEEPSEEK:
-        from langchain_ollama.llms import OllamaLLM
-
-        print("Debug: Using DeepSeek")
-        return OllamaLLM(model="deepseek-r1:7b", temperature=temperature)
-
-    elif llm_ver == LLM_OLLAMA_GEMMA:
-        from langchain_ollama.llms import OllamaLLM
-
-        print("Debug: Using Gemma")
-        return OllamaLLM(model="gemma:7b", temperature=temperature)
-
-    elif llm_ver == LLM_OLLAMA_QWEN:
-        from langchain_ollama.llms import OllamaLLM
-
-        print("Debug: Using Qwen")
-        return OllamaLLM(model="qwen:4b", temperature=temperature)
-
-    elif llm_ver == LLM_OLLAMA_CODELLAMA:
-        from langchain_ollama.llms import OllamaLLM
-
-        print("Debug: Using CodeLlama")
-        return OllamaLLM(model="codellama:latest", temperature=temperature)
-
-    elif llm_ver == LLM_OLLAMA_FALCON2:
+    elif llm_ver in [
+        LLM_OLLAMA_LLAMA32_1B,
+        LLM_OLLAMA_MISTRAL,
+        LLM_OLLAMA_TINYLLAMA,
+        LLM_OLLAMA_PHI3,
+        LLM_OLLAMA_PHI4,
+        LLM_OLLAMA_GEMMA,
+        LLM_OLLAMA_GEMMA2,
+        LLM_OLLAMA_GEMMA3,
+        LLM_OLLAMA_DEEPSEEK,
+        LLM_OLLAMA_QWEN,
+        LLM_OLLAMA_CODELLAMA,
+        LLM_OLLAMA_FALCON2,
+    ]:
         from langchain_ollama.llms import OllamaLLM
 
-        print("Debug: Using Falcon2")
-        return OllamaLLM(model="falcon2:latest", temperature=temperature)
-
+        llm = OllamaLLM(model=llm_ver.split(":", 1)[1])
     return llm
 
 
@@ -340,66 +301,16 @@ def generate_panel_response(input_text, llm_panelists, llm_panel_chair, temperat
 def get_llm_from_argv(argv):
     llm_ver = LLM_GPT4o
 
-    if "-g" in argv:
-        llm_ver = LLM_GEMINI
-
-    if "-ge" in argv:
-        llm_ver = LLM_OLLAMA_GEMMA
-
-    if "-ge2" in argv:
-        llm_ver = LLM_OLLAMA_GEMMA2
-
-    if "-ge3" in argv:
-        llm_ver = LLM_OLLAMA_GEMMA3
-
-    if "-qw" in argv:
-        llm_ver = LLM_OLLAMA_QWEN
-
-    if "-l" in argv:
-        llm_ver = LLM_LLAMA2
-
-    if "-a" in argv:
-        llm_ver = LLM_AI21
-
-    if "-cl" in argv:
-        llm_ver = LLM_CLAUDE37
-
-    if "-co" in argv:
-        llm_ver = LLM_COHERE
-
-    if "-o-l32" in argv:
-        llm_ver = LLM_OLLAMA_LLAMA32
-
-    if "-o-l321b" in argv:
-        llm_ver = LLM_OLLAMA_LLAMA32_1B
-        print(f"DEBUG: Selected LLM Version = {llm_ver}")
-        return llm_ver
-
-    if "-o-m" in argv:
-        llm_ver = LLM_OLLAMA_MISTRAL
-
-    if "-o-t" in argv:
-        llm_ver = LLM_OLLAMA_TINYLLAMA
-
-    if "-o-phi3" in argv:
-        llm_ver = LLM_OLLAMA_PHI3
-        print(f"DEBUG: Selected LLM Version = {llm_ver}")
-
-    if "-o-phi4" in argv:
-        llm_ver = LLM_OLLAMA_PHI4
-
-    if "-o-dsr1" in argv:
-        llm_ver = LLM_OLLAMA_DEEPSEEK
-
-    print(f"Debug: get_llm_from_argv selected {llm_ver}")
+    for arg in LLM_CMD_LINE_ARGS:
+        if arg in argv:
+            llm_ver = LLM_CMD_LINE_ARGS[arg]
 
     return llm_ver
 
 
 def ask_question_get_response(
     question, llm_ver, temperature=0, only_celegans=False, print_question=True
 ):
-    print(f"Debug: ask_question_get_response received llm_ver={llm_ver}")
     print("--------------------------------------------------------")
     if print_question:
         print("Asking question:\n   %s" % question)

Original file line number	Diff line number	Diff line change
`@@ -196,5 +196,4 @@ def save_quiz(num_questions=100, num_answers=4, llm_ver=LLM_GPT4o, temperature=0`
`196`	`196`	`)`
`197`	`197`
`198`	`198`	`else:`
`199`		`- print(f"Debug: Using LLM {llm_ver} for saving quiz")`
`200`	`199`	`save_quiz(100, 4, llm_ver, temperature=0.2)`
Original file line number	Diff line number	Diff line change
`@@ -51,7 +51,7 @@`
`51`	`51`	`import sys`
`52`	`52`
`53`	`53`	`question = (`
`54`		`- GENERATE_Q.replace("<QUESTION_NUMBER>", "5").replace("<ANSWER_NUMBER>", "4")`
	`54`	`+ GENERATE_Q.replace("<QUESTION_NUMBER>", "100").replace("<ANSWER_NUMBER>", "4")`
`55`	`55`	`+ TEXT_ANSWER_EXAMPLE`
`56`	`56`	`)`
`57`	`57`