diff --git a/README.md b/README.md index c442d62..a84cab3 100644 --- a/README.md +++ b/README.md @@ -26,24 +26,24 @@ For the image scope, the program takes up to two files, depending on the prompt - Saves response output in Markdown format with a predefined template or prints to stdout. ## Argument Details -| Argument | Description | Required | -|----------------------|-------------------------------------------------------------------|----------| -| `--submission_type` | Type of submission (from `arg_options.FileType`) | ❌ | -| `--prompt` | Pre-defined prompt name or file path to custom prompt file | ❌ **| -| `--prompt_text` | Additional string text prompt that can be fed to model. | ❌ ** | -| `--scope` | Processing scope (`image` or `code` or `text`) | ✅ | -| `--submission` | Submission file path | ✅ | -| `--question` | Specific question to evaluate | ❌ | -| `--model` | Model type (from `arg_options.Models`) | ✅ | -| `--output` | File path for where to record the output | ❌ | -| `--solution` | File path for the solution file | ❌ | -| `--test_output` | File path for the file containing the results from tests | ❌ | -| `--submission_image` | File path for the submission image file | ❌ | -| `--solution_image` | File path for the solution image file | ❌ | +| Argument | Description | Required | +|----------------------|---------------------------------------------------------------------|----------| +| `--submission_type` | Type of submission (from `arg_options.FileType`) | ❌ | +| `--prompt` | Pre-defined prompt name or file path to custom prompt file | ❌ **| +| `--prompt_text` | String prompt | ❌ ** | +| `--scope` | Processing scope (`image` or `code` or `text`) | ✅ | +| `--submission` | Submission file path | ✅ | +| `--question` | Specific question to evaluate | ❌ | +| `--model` | Model type (from `arg_options.Models`) | ✅ | +| `--output` | File path for where to record the output | ❌ | +| `--solution` | File path for the solution file | ❌ | +| `--test_output` | File path for the file containing the results from tests | ❌ | +| `--submission_image` | File path for the submission image file | ❌ | +| `--solution_image` | File path for the solution image file | ❌ | | `--system_prompt` | Pre-defined system prompt name or file path to custom system prompt | ❌ | -| `--llama_mode` | How to invoke deepSeek-v3 (choices in `arg_options.LlamaMode`) | ❌ | -| `--output_template` | Output template file (from `arg_options.OutputTemplate) | ❌ | -** One of either `--prompt` or `--prompt_text` must be selected. +| `--llama_mode` | How to invoke deepSeek-v3 (choices in `arg_options.LlamaMode`) | ❌ | +| `--output_template` | Output template file (from `arg_options.OutputTemplate) | ❌ | +** One of either `--prompt` or `--prompt_text` must be selected. If both are provided, `--prompt_text` will be appended to the contents of the file specified by `--prompt`. ## Scope The program supports three scopes: code or text or image. Depending on which is selected, the program supports different models and prompts tailored for each option. diff --git a/ai_feedback/__main__.py b/ai_feedback/__main__.py index 33f57c2..ae0ca8f 100644 --- a/ai_feedback/__main__.py +++ b/ai_feedback/__main__.py @@ -215,7 +215,6 @@ def main() -> int: args.submission_type = detect_submission_type(args.submission) prompt_content = "" - system_instructions = load_system_prompt_content(args.system_prompt) if args.prompt: diff --git a/ai_feedback/helpers/constants.py b/ai_feedback/helpers/constants.py index d0162fc..592e16b 100644 --- a/ai_feedback/helpers/constants.py +++ b/ai_feedback/helpers/constants.py @@ -2,7 +2,7 @@ HELP_MESSAGES = { "submission_type": "The format of the submission file (e.g., Jupyter notebook, Python script).", "prompt": "Pre-defined prompt name (from ai_feedback/data/prompts/user/) or file path to custom prompt file.", - "prompt_text": "Additional messages to concatenate to the prompt.", + "prompt_text": "The string prompt that is sent to the model", "scope": "The section of the assignment the model should analyze (e.g., code or image).", "submission": "The file path for the submission file.", "solution": "The file path for the solution file.", diff --git a/promptfoo/promptfoo_test_runner.py b/promptfoo/promptfoo_test_runner.py index 160daae..4847d17 100644 --- a/promptfoo/promptfoo_test_runner.py +++ b/promptfoo/promptfoo_test_runner.py @@ -34,8 +34,8 @@ def call_api(prompt: str, context: dict, metadata: dict) -> dict: options["scope"], "--model", options["model"], - "--prompt", - options['prompt'], + "--prompt_text", + prompt, "--llama_mode", "server", "--output_template", diff --git a/promptfoo/tests/codellama_tests/codellama_code_tests.yaml b/promptfoo/tests/codellama_tests/codellama_code_tests.yaml index c6a2cd1..01df32a 100644 --- a/promptfoo/tests/codellama_tests/codellama_code_tests.yaml +++ b/promptfoo/tests/codellama_tests/codellama_code_tests.yaml @@ -6,59 +6,59 @@ defaultTest: model: codellama:latest scope: code -scenarios: - - config: - - vars: { prompt: code_overall } - tests: - - vars: - submission_file: test_submissions/csc263_opt_connected/correct_submission/correct_submission.py - solution_file: test_submissions/csc263_opt_connected/solution.py - - - vars: - submission_file: test_submissions/csc263_opt_connected/fail_submission/fail_submission.py - solution_file: test_submissions/csc263_opt_connected/solution.py - - - - vars: - submission_file: test_submissions/csc263_opt_connected/incorrect_algo_submission/incorrect_algo_submission.py - solution_file: test_submissions/csc263_opt_connected/solution.py - - - vars: - submission_file: test_submissions/csc263_opt_connected/style_issues_submission/style_issues_submission.py - solution_file: test_submissions/csc263_opt_connected/solution.py - - - vars: - submission_file: test_submissions/csc108/correct_submission/correct_submission.py - solution_file: test_submissions/csc108/solution.py - - - vars: - submission_file: test_submissions/csc108/correctness_1_submission/correctness_1_submission.py - solution_file: test_submissions/csc108/solution.py - - - vars: - submission_file: test_submissions/csc108/correctness_2_submission/correctness_2_submission.py - solution_file: test_submissions/csc108/solution.py - - - vars: - submission_file: test_submissions/csc108/efficiency_submission/efficiency_submission.py - solution_file: test_submissions/csc108/solution.py - - - vars: - submission_file: test_submissions/csc108/style_submission/style_submission.py - solution_file: test_submissions/csc108/solution.py - - - vars: - submission_file: test_submissions/gac_example/correct_submission/correct_submission.py - solution_file: test_submissions/gac_example/solution.py - - - vars: - submission_file: test_submissions/gac_example/fail_submission/fail_submission.py - solution_file: test_submissions/gac_example/solution.py - - - vars: - submission_file: test_submissions/gac_example/inefficient_submission/inefficient_submission.py - solution_file: test_submissions/gac_example/solution.py - - - vars: - submission_file: test_submissions/gac_example/partial_correct_submission/partial_correct_submission.py - solution_file: test_submissions/gac_example/solution.py +prompts: + - file://../../../ai_feedback/data/prompts/user/code_overall.md + +tests: + - vars: + submission_file: test_submissions/csc263_opt_connected/correct_submission/correct_submission.py + solution_file: test_submissions/csc263_opt_connected/solution.py + + - vars: + submission_file: test_submissions/csc263_opt_connected/fail_submission/fail_submission.py + solution_file: test_submissions/csc263_opt_connected/solution.py + + + - vars: + submission_file: test_submissions/csc263_opt_connected/incorrect_algo_submission/incorrect_algo_submission.py + solution_file: test_submissions/csc263_opt_connected/solution.py + + - vars: + submission_file: test_submissions/csc263_opt_connected/style_issues_submission/style_issues_submission.py + solution_file: test_submissions/csc263_opt_connected/solution.py + + - vars: + submission_file: test_submissions/csc108/correct_submission/correct_submission.py + solution_file: test_submissions/csc108/solution.py + + - vars: + submission_file: test_submissions/csc108/correctness_1_submission/correctness_1_submission.py + solution_file: test_submissions/csc108/solution.py + + - vars: + submission_file: test_submissions/csc108/correctness_2_submission/correctness_2_submission.py + solution_file: test_submissions/csc108/solution.py + + - vars: + submission_file: test_submissions/csc108/efficiency_submission/efficiency_submission.py + solution_file: test_submissions/csc108/solution.py + + - vars: + submission_file: test_submissions/csc108/style_submission/style_submission.py + solution_file: test_submissions/csc108/solution.py + + - vars: + submission_file: test_submissions/gac_example/correct_submission/correct_submission.py + solution_file: test_submissions/gac_example/solution.py + + - vars: + submission_file: test_submissions/gac_example/fail_submission/fail_submission.py + solution_file: test_submissions/gac_example/solution.py + + - vars: + submission_file: test_submissions/gac_example/inefficient_submission/inefficient_submission.py + solution_file: test_submissions/gac_example/solution.py + + - vars: + submission_file: test_submissions/gac_example/partial_correct_submission/partial_correct_submission.py + solution_file: test_submissions/gac_example/solution.py diff --git a/promptfoo/tests/deepseek_r1_tests/deepseek_r1_code_tests.yaml b/promptfoo/tests/deepseek_r1_tests/deepseek_r1_code_tests.yaml index 4420f74..55fe5cb 100644 --- a/promptfoo/tests/deepseek_r1_tests/deepseek_r1_code_tests.yaml +++ b/promptfoo/tests/deepseek_r1_tests/deepseek_r1_code_tests.yaml @@ -6,58 +6,58 @@ defaultTest: model: deepSeek-R1:70B scope: code -scenarios: - - config: - - vars: { prompt: code_feedback_r1 } - tests: - - vars: - submission_file: test_submissions/csc263_opt_connected/correct_submission/correct_submission.py - solution_file: test_submissions/csc263_opt_connected/solution.py - - - vars: - submission_file: test_submissions/csc263_opt_connected/fail_submission/fail_submission.py - solution_file: test_submissions/csc263_opt_connected/solution.py - - - vars: - submission_file: test_submissions/csc263_opt_connected/incorrect_algo_submission/incorrect_algo_submission.py - solution_file: test_submissions/csc263_opt_connected/solution.py - - - vars: - submission_file: test_submissions/csc263_opt_connected/style_issues_submission/style_issues_submission.py - solution_file: test_submissions/csc263_opt_connected/solution.py - - - vars: - submission_file: test_submissions/csc108/correct_submission/correct_submission.py - solution_file: test_submissions/csc108/solution.py - - - vars: - submission_file: test_submissions/csc108/correctness_1_submission/correctness_1_submission.py - solution_file: test_submissions/csc108/solution.py - - - vars: - submission_file: test_submissions/csc108/correctness_2_submission/correctness_2_submission.py - solution_file: test_submissions/csc108/solution.py - - - vars: - submission_file: test_submissions/csc108/efficiency_submission/efficiency_submission.py - solution_file: test_submissions/csc108/solution.py - - - vars: - submission_file: test_submissions/csc108/style_submission/style_submission.py - solution_file: test_submissions/csc108/solution.py - - - vars: - submission_file: test_submissions/gac_example/correct_submission/correct_submission.py - solution_file: test_submissions/gac_example/solution.py - - - vars: - submission_file: test_submissions/gac_example/fail_submission/fail_submission.py - solution_file: test_submissions/gac_example/solution.py - - - vars: - submission_file: test_submissions/gac_example/inefficient_submission/inefficient_submission.py - solution_file: test_submissions/gac_example/solution.py - - - vars: - submission_file: test_submissions/gac_example/partial_correct_submission/partial_correct_submission.py - solution_file: test_submissions/gac_example/solution.py +prompts: + - file://../../../ai_feedback/data/prompts/user/code_feedback_r1.md + +tests: + - vars: + submission_file: test_submissions/csc263_opt_connected/correct_submission/correct_submission.py + solution_file: test_submissions/csc263_opt_connected/solution.py + + - vars: + submission_file: test_submissions/csc263_opt_connected/fail_submission/fail_submission.py + solution_file: test_submissions/csc263_opt_connected/solution.py + + - vars: + submission_file: test_submissions/csc263_opt_connected/incorrect_algo_submission/incorrect_algo_submission.py + solution_file: test_submissions/csc263_opt_connected/solution.py + + - vars: + submission_file: test_submissions/csc263_opt_connected/style_issues_submission/style_issues_submission.py + solution_file: test_submissions/csc263_opt_connected/solution.py + + - vars: + submission_file: test_submissions/csc108/correct_submission/correct_submission.py + solution_file: test_submissions/csc108/solution.py + + - vars: + submission_file: test_submissions/csc108/correctness_1_submission/correctness_1_submission.py + solution_file: test_submissions/csc108/solution.py + + - vars: + submission_file: test_submissions/csc108/correctness_2_submission/correctness_2_submission.py + solution_file: test_submissions/csc108/solution.py + + - vars: + submission_file: test_submissions/csc108/efficiency_submission/efficiency_submission.py + solution_file: test_submissions/csc108/solution.py + + - vars: + submission_file: test_submissions/csc108/style_submission/style_submission.py + solution_file: test_submissions/csc108/solution.py + + - vars: + submission_file: test_submissions/gac_example/correct_submission/correct_submission.py + solution_file: test_submissions/gac_example/solution.py + + - vars: + submission_file: test_submissions/gac_example/fail_submission/fail_submission.py + solution_file: test_submissions/gac_example/solution.py + + - vars: + submission_file: test_submissions/gac_example/inefficient_submission/inefficient_submission.py + solution_file: test_submissions/gac_example/solution.py + + - vars: + submission_file: test_submissions/gac_example/partial_correct_submission/partial_correct_submission.py + solution_file: test_submissions/gac_example/solution.py diff --git a/promptfoo/tests/deepseek_r1_tests/deepseek_r1_text_tests.yaml b/promptfoo/tests/deepseek_r1_tests/deepseek_r1_text_tests.yaml index bb5625f..cb51f3f 100644 --- a/promptfoo/tests/deepseek_r1_tests/deepseek_r1_text_tests.yaml +++ b/promptfoo/tests/deepseek_r1_tests/deepseek_r1_text_tests.yaml @@ -6,34 +6,34 @@ defaultTest: model: deepSeek-R1:70B scope: text -scenarios: - - config: - - vars: { prompt: text_analyze_r1 } - tests: - - vars: - submission_file: test_submissions/data_collection_ethics_module/average_submission/average_submission.txt - solution_file: test_submissions/data_collection_ethics_module/solution.txt - - - vars: - submission_file: test_submissions/data_collection_ethics_module/excellent_submission/excellent_submission.txt - solution_file: test_submissions/data_collection_ethics_module/solution.txt - - - vars: - submission_file: test_submissions/data_collection_ethics_module/off_topic_submission/off_topic_submission.txt - solution_file: test_submissions/data_collection_ethics_module/solution.txt - - - vars: - submission_file: test_submissions/data_collection_ethics_module/weak_submission/weak_submission.txt - solution_file: test_submissions/data_collection_ethics_module/solution.txt - - - vars: - submission_file: test_submissions/csc373_eft_optimality_proof/fail_submission/fail_submission.pdf - solution_file: test_submissions/csc373_eft_optimality_proof/solution.pdf - - - vars: - submission_file: test_submissions/csc373_eft_optimality_proof/incomplete_submission/incomplete_submission.pdf - solution_file: test_submissions/csc373_eft_optimality_proof/solution.pdf - - - vars: - submission_file: test_submissions/csc373_eft_optimality_proof/induction_submission/induction_submission.pdf - solution_file: test_submissions/csc373_eft_optimality_proof/solution.pdf +prompts: + - file://../../../ai_feedback/data/prompts/user/text_analyze_r1.md + +tests: + - vars: + submission_file: test_submissions/data_collection_ethics_module/average_submission/average_submission.txt + solution_file: test_submissions/data_collection_ethics_module/solution.txt + + - vars: + submission_file: test_submissions/data_collection_ethics_module/excellent_submission/excellent_submission.txt + solution_file: test_submissions/data_collection_ethics_module/solution.txt + + - vars: + submission_file: test_submissions/data_collection_ethics_module/off_topic_submission/off_topic_submission.txt + solution_file: test_submissions/data_collection_ethics_module/solution.txt + + - vars: + submission_file: test_submissions/data_collection_ethics_module/weak_submission/weak_submission.txt + solution_file: test_submissions/data_collection_ethics_module/solution.txt + + - vars: + submission_file: test_submissions/csc373_eft_optimality_proof/fail_submission/fail_submission.pdf + solution_file: test_submissions/csc373_eft_optimality_proof/solution.pdf + + - vars: + submission_file: test_submissions/csc373_eft_optimality_proof/incomplete_submission/incomplete_submission.pdf + solution_file: test_submissions/csc373_eft_optimality_proof/solution.pdf + + - vars: + submission_file: test_submissions/csc373_eft_optimality_proof/induction_submission/induction_submission.pdf + solution_file: test_submissions/csc373_eft_optimality_proof/solution.pdf diff --git a/promptfoo/tests/deepseek_v3_tests/deepseek_v3_code_tests.yaml b/promptfoo/tests/deepseek_v3_tests/deepseek_v3_code_tests.yaml index 5d9a7a2..dd39efb 100644 --- a/promptfoo/tests/deepseek_v3_tests/deepseek_v3_code_tests.yaml +++ b/promptfoo/tests/deepseek_v3_tests/deepseek_v3_code_tests.yaml @@ -5,60 +5,60 @@ defaultTest: vars: model: deepSeek-v3 scope: code + system_prompt: code_feedback_v3 -scenarios: - - config: - - vars: { prompt: code_feedback_v3, system_prompt: code_feedback_v3} +prompts: + - file://../../../ai_feedback/data/prompts/user/code_feedback_v3.md - tests: - - vars: - submission_file: test_submissions/csc263_opt_connected/correct_submission/correct_submission.py - solution_file: test_submissions/csc263_opt_connected/solution.py +tests: + - vars: + submission_file: test_submissions/csc263_opt_connected/correct_submission/correct_submission.py + solution_file: test_submissions/csc263_opt_connected/solution.py - - vars: - submission_file: test_submissions/csc263_opt_connected/fail_submission/fail_submission.py - solution_file: test_submissions/csc263_opt_connected/solution.py + - vars: + submission_file: test_submissions/csc263_opt_connected/fail_submission/fail_submission.py + solution_file: test_submissions/csc263_opt_connected/solution.py - - vars: - submission_file: test_submissions/csc263_opt_connected/incorrect_algo_submission/incorrect_algo_submission.py - solution_file: test_submissions/csc263_opt_connected/solution.py + - vars: + submission_file: test_submissions/csc263_opt_connected/incorrect_algo_submission/incorrect_algo_submission.py + solution_file: test_submissions/csc263_opt_connected/solution.py - - vars: - submission_file: test_submissions/csc263_opt_connected/style_issues_submission/style_issues_submission.py - solution_file: test_submissions/csc263_opt_connected/solution.py + - vars: + submission_file: test_submissions/csc263_opt_connected/style_issues_submission/style_issues_submission.py + solution_file: test_submissions/csc263_opt_connected/solution.py - - vars: - submission_file: test_submissions/csc108/correct_submission/correct_submission.py - solution_file: test_submissions/csc108/solution.py + - vars: + submission_file: test_submissions/csc108/correct_submission/correct_submission.py + solution_file: test_submissions/csc108/solution.py - - vars: - submission_file: test_submissions/csc108/correctness_1_submission/correctness_1_submission.py - solution_file: test_submissions/csc108/solution.py + - vars: + submission_file: test_submissions/csc108/correctness_1_submission/correctness_1_submission.py + solution_file: test_submissions/csc108/solution.py - - vars: - submission_file: test_submissions/csc108/correctness_2_submission/correctness_2_submission.py - solution_file: test_submissions/csc108/solution.py + - vars: + submission_file: test_submissions/csc108/correctness_2_submission/correctness_2_submission.py + solution_file: test_submissions/csc108/solution.py - - vars: - submission_file: test_submissions/csc108/efficiency_submission/efficiency_submission.py - solution_file: test_submissions/csc108/solution.py + - vars: + submission_file: test_submissions/csc108/efficiency_submission/efficiency_submission.py + solution_file: test_submissions/csc108/solution.py - - vars: - submission_file: test_submissions/csc108/style_submission/style_submission.py - solution_file: test_submissions/csc108/solution.py + - vars: + submission_file: test_submissions/csc108/style_submission/style_submission.py + solution_file: test_submissions/csc108/solution.py - - vars: - submission_file: test_submissions/gac_example/correct_submission/correct_submission.py - solution_file: test_submissions/gac_example/solution.py + - vars: + submission_file: test_submissions/gac_example/correct_submission/correct_submission.py + solution_file: test_submissions/gac_example/solution.py - - vars: - submission_file: test_submissions/gac_example/fail_submission/fail_submission.py - solution_file: test_submissions/gac_example/solution.py + - vars: + submission_file: test_submissions/gac_example/fail_submission/fail_submission.py + solution_file: test_submissions/gac_example/solution.py - - vars: - submission_file: test_submissions/gac_example/inefficient_submission/inefficient_submission.py - solution_file: test_submissions/gac_example/solution.py + - vars: + submission_file: test_submissions/gac_example/inefficient_submission/inefficient_submission.py + solution_file: test_submissions/gac_example/solution.py - - vars: - submission_file: test_submissions/gac_example/partial_correct_submission/partial_correct_submission.py - solution_file: test_submissions/gac_example/solution.py + - vars: + submission_file: test_submissions/gac_example/partial_correct_submission/partial_correct_submission.py + solution_file: test_submissions/gac_example/solution.py diff --git a/promptfoo/tests/deepseek_v3_tests/deepseek_v3_text_tests.yaml b/promptfoo/tests/deepseek_v3_tests/deepseek_v3_text_tests.yaml index 4af1d62..e8c4b91 100644 --- a/promptfoo/tests/deepseek_v3_tests/deepseek_v3_text_tests.yaml +++ b/promptfoo/tests/deepseek_v3_tests/deepseek_v3_text_tests.yaml @@ -5,38 +5,39 @@ defaultTest: vars: model: deepSeek-v3 scope: text - -scenarios: - - config: - - vars: { prompt: text_analyze_v3, system_prompt: text_feedback_v3 } - tests: - - vars: - submission_file: test_submissions/data_collection_ethics_module/average_submission/average_submission.txt - solution_file: test_submissions/data_collection_ethics_module/solution.txt - - - vars: - submission_file: test_submissions/data_collection_ethics_module/excellent_submission/excellent_submission.txt - solution_file: test_submissions/data_collection_ethics_module/solution.txt - - - vars: - submission_file: test_submissions/data_collection_ethics_module/off_topic_submission/off_topic_submission.txt - solution_file: test_submissions/data_collection_ethics_module/solution.txt - - - vars: - submission_file: test_submissions/data_collection_ethics_module/weak_submission/weak_submission.txt - solution_file: test_submissions/data_collection_ethics_module/solution.txt - - - vars: - submission_file: test_submissions/csc373_eft_optimality_proof/fail_submission/fail_submission.pdf - solution_file: test_submissions/csc373_eft_optimality_proof/solution.pdf - prompt: text_eft_proof - - - vars: - submission_file: test_submissions/csc373_eft_optimality_proof/incomplete_submission/incomplete_submission.pdf - solution_file: test_submissions/csc373_eft_optimality_proof/solution.pdf - prompt: text_eft_proof - - - vars: - submission_file: test_submissions/csc373_eft_optimality_proof/induction_submission/induction_submission.pdf - solution_file: test_submissions/csc373_eft_optimality_proof/solution.pdf - prompt: text_eft_proof + system_prompt: text_feedback_v3 + +prompts: + - file://../../../ai_feedback/data/prompts/user/text_analyze_v3.md + +tests: + - vars: + submission_file: test_submissions/data_collection_ethics_module/average_submission/average_submission.txt + solution_file: test_submissions/data_collection_ethics_module/solution.txt + + - vars: + submission_file: test_submissions/data_collection_ethics_module/excellent_submission/excellent_submission.txt + solution_file: test_submissions/data_collection_ethics_module/solution.txt + + - vars: + submission_file: test_submissions/data_collection_ethics_module/off_topic_submission/off_topic_submission.txt + solution_file: test_submissions/data_collection_ethics_module/solution.txt + + - vars: + submission_file: test_submissions/data_collection_ethics_module/weak_submission/weak_submission.txt + solution_file: test_submissions/data_collection_ethics_module/solution.txt + + - vars: + submission_file: test_submissions/csc373_eft_optimality_proof/fail_submission/fail_submission.pdf + solution_file: test_submissions/csc373_eft_optimality_proof/solution.pdf + prompt: text_eft_proof + + - vars: + submission_file: test_submissions/csc373_eft_optimality_proof/incomplete_submission/incomplete_submission.pdf + solution_file: test_submissions/csc373_eft_optimality_proof/solution.pdf + prompt: text_eft_proof + + - vars: + submission_file: test_submissions/csc373_eft_optimality_proof/induction_submission/induction_submission.pdf + solution_file: test_submissions/csc373_eft_optimality_proof/solution.pdf + prompt: text_eft_proof diff --git a/promptfoo/tests/llama_3.2_vision_tests/llama_3.2_vision_image_tests.yaml b/promptfoo/tests/llama_3.2_vision_tests/llama_3.2_vision_image_tests.yaml index 644c1df..906aa96 100644 --- a/promptfoo/tests/llama_3.2_vision_tests/llama_3.2_vision_image_tests.yaml +++ b/promptfoo/tests/llama_3.2_vision_tests/llama_3.2_vision_image_tests.yaml @@ -6,38 +6,38 @@ defaultTest: model: llama3.2-vision:90b scope: image -scenarios: - - config: - - vars: { prompt: image_overall } - tests: - - description: "JSC270 correct submission" - vars: - submission_file: test_submissions/jsc270/correct_submission/correct_submission.ipynb - - - description: "JSC270 correctness issue" - vars: - submission_file: test_submissions/jsc270/correctness_submission/correctness_submission.ipynb - - - description: "JSC270 efficiency issue" - vars: - submission_file: test_submissions/jsc270/efficiency_submission/efficiency_submission.ipynb - - - description: "JSC270 style issue" - vars: - submission_file: test_submissions/jsc270/style_submission/style_submission.ipynb - - - description: "STA130 correct submission" - vars: - submission_file: test_submissions/sta130/correct_submission/correct_submission.ipynb - - - description: "STA130 correctness issue" - vars: - submission_file: test_submissions/sta130/correctness_submission/correctness_submission.ipynb - - - description: "STA130 efficiency issue" - vars: - submission_file: test_submissions/sta130/efficiency_submission/efficiency_submission.ipynb - - - description: "STA130 style issue" - vars: - submission_file: test_submissions/sta130/style_submission/style_submission.ipynb +prompts: + - file://../../../ai_feedback/data/prompts/user/image_overall.md + +tests: + - description: "JSC270 correct submission" + vars: + submission_file: test_submissions/jsc270/correct_submission/correct_submission.ipynb + + - description: "JSC270 correctness issue" + vars: + submission_file: test_submissions/jsc270/correctness_submission/correctness_submission.ipynb + + - description: "JSC270 efficiency issue" + vars: + submission_file: test_submissions/jsc270/efficiency_submission/efficiency_submission.ipynb + + - description: "JSC270 style issue" + vars: + submission_file: test_submissions/jsc270/style_submission/style_submission.ipynb + + - description: "STA130 correct submission" + vars: + submission_file: test_submissions/sta130/correct_submission/correct_submission.ipynb + + - description: "STA130 correctness issue" + vars: + submission_file: test_submissions/sta130/correctness_submission/correctness_submission.ipynb + + - description: "STA130 efficiency issue" + vars: + submission_file: test_submissions/sta130/efficiency_submission/efficiency_submission.ipynb + + - description: "STA130 style issue" + vars: + submission_file: test_submissions/sta130/style_submission/style_submission.ipynb diff --git a/promptfoo/tests/llama_3.2_vision_tests/llama_3.2_vision_image_tests_with_system_prompt.yaml b/promptfoo/tests/llama_3.2_vision_tests/llama_3.2_vision_image_tests_with_system_prompt.yaml index 4408e4d..b86f375 100644 --- a/promptfoo/tests/llama_3.2_vision_tests/llama_3.2_vision_image_tests_with_system_prompt.yaml +++ b/promptfoo/tests/llama_3.2_vision_tests/llama_3.2_vision_image_tests_with_system_prompt.yaml @@ -7,38 +7,38 @@ defaultTest: scope: image system_prompt: image_style_grader -scenarios: - - config: - - vars: { prompt: image_overall } - tests: - - description: "JSC270 correct submission" - vars: - submission_file: test_submissions/jsc270/correct_submission/correct_submission.ipynb - - - description: "JSC270 correctness issue" - vars: - submission_file: test_submissions/jsc270/correctness_submission/correctness_submission.ipynb - - - description: "JSC270 efficiency issue" - vars: - submission_file: test_submissions/jsc270/efficiency_submission/efficiency_submission.ipynb - - - description: "JSC270 style issue" - vars: - submission_file: test_submissions/jsc270/style_submission/style_submission.ipynb - - - description: "STA130 correct submission" - vars: - submission_file: test_submissions/sta130/correct_submission/correct_submission.ipynb - - - description: "STA130 correctness issue" - vars: - submission_file: test_submissions/sta130/correctness_submission/correctness_submission.ipynb - - - description: "STA130 efficiency issue" - vars: - submission_file: test_submissions/sta130/efficiency_submission/efficiency_submission.ipynb - - - description: "STA130 style issue" - vars: - submission_file: test_submissions/sta130/style_submission/style_submission.ipynb +prompts: + - file://../../../ai_feedback/data/prompts/user/image_overall.md + +tests: + - description: "JSC270 correct submission" + vars: + submission_file: test_submissions/jsc270/correct_submission/correct_submission.ipynb + + - description: "JSC270 correctness issue" + vars: + submission_file: test_submissions/jsc270/correctness_submission/correctness_submission.ipynb + + - description: "JSC270 efficiency issue" + vars: + submission_file: test_submissions/jsc270/efficiency_submission/efficiency_submission.ipynb + + - description: "JSC270 style issue" + vars: + submission_file: test_submissions/jsc270/style_submission/style_submission.ipynb + + - description: "STA130 correct submission" + vars: + submission_file: test_submissions/sta130/correct_submission/correct_submission.ipynb + + - description: "STA130 correctness issue" + vars: + submission_file: test_submissions/sta130/correctness_submission/correctness_submission.ipynb + + - description: "STA130 efficiency issue" + vars: + submission_file: test_submissions/sta130/efficiency_submission/efficiency_submission.ipynb + + - description: "STA130 style issue" + vars: + submission_file: test_submissions/sta130/style_submission/style_submission.ipynb diff --git a/promptfoo/tests/llava_tests/llava_34b_image_tests.yaml b/promptfoo/tests/llava_tests/llava_34b_image_tests.yaml index 95c07bb..b1a512d 100644 --- a/promptfoo/tests/llava_tests/llava_34b_image_tests.yaml +++ b/promptfoo/tests/llava_tests/llava_34b_image_tests.yaml @@ -6,38 +6,38 @@ defaultTest: model: llava:34b scope: image -scenarios: - - config: - - vars: { prompt: image_overall } - tests: - - description: "JSC270 correct submission" - vars: - submission_file: test_submissions/jsc270/correct_submission/correct_submission.ipynb - - - description: "JSC270 correctness issue" - vars: - submission_file: test_submissions/jsc270/correctness_submission/correctness_submission.ipynb - - - description: "JSC270 efficiency issue" - vars: - submission_file: test_submissions/jsc270/efficiency_submission/efficiency_submission.ipynb - - - description: "JSC270 style issue" - vars: - submission_file: test_submissions/jsc270/style_submission/style_submission.ipynb - - - description: "STA130 correct submission" - vars: - submission_file: test_submissions/sta130/correct_submission/correct_submission.ipynb - - - description: "STA130 correctness issue" - vars: - submission_file: test_submissions/sta130/correctness_submission/correctness_submission.ipynb - - - description: "STA130 efficiency issue" - vars: - submission_file: test_submissions/sta130/efficiency_submission/efficiency_submission.ipynb - - - description: "STA130 style issue" - vars: - submission_file: test_submissions/sta130/style_submission/style_submission.ipynb +prompts: + - file://../../../ai_feedback/data/prompts/user/image_overall.md + +tests: + - description: "JSC270 correct submission" + vars: + submission_file: test_submissions/jsc270/correct_submission/correct_submission.ipynb + + - description: "JSC270 correctness issue" + vars: + submission_file: test_submissions/jsc270/correctness_submission/correctness_submission.ipynb + + - description: "JSC270 efficiency issue" + vars: + submission_file: test_submissions/jsc270/efficiency_submission/efficiency_submission.ipynb + + - description: "JSC270 style issue" + vars: + submission_file: test_submissions/jsc270/style_submission/style_submission.ipynb + + - description: "STA130 correct submission" + vars: + submission_file: test_submissions/sta130/correct_submission/correct_submission.ipynb + + - description: "STA130 correctness issue" + vars: + submission_file: test_submissions/sta130/correctness_submission/correctness_submission.ipynb + + - description: "STA130 efficiency issue" + vars: + submission_file: test_submissions/sta130/efficiency_submission/efficiency_submission.ipynb + + - description: "STA130 style issue" + vars: + submission_file: test_submissions/sta130/style_submission/style_submission.ipynb diff --git a/promptfoo/tests/llava_tests/llava_34b_image_tests_with_system_prompt.yaml b/promptfoo/tests/llava_tests/llava_34b_image_tests_with_system_prompt.yaml index a4763b9..177c915 100644 --- a/promptfoo/tests/llava_tests/llava_34b_image_tests_with_system_prompt.yaml +++ b/promptfoo/tests/llava_tests/llava_34b_image_tests_with_system_prompt.yaml @@ -7,38 +7,38 @@ defaultTest: scope: image system_prompt: "You are a teaching-assistant for an undergraduate Data-Visualization course. A student submits one figure (PNG/JPG/PDF screenshot) with no accompanying text." -scenarios: - - config: - - vars: { prompt: image_overall } - tests: - - description: "JSC270 correct submission" - vars: - submission_file: test_submissions/jsc270/correct_submission/correct_submission.ipynb - - - description: "JSC270 correctness issue" - vars: - submission_file: test_submissions/jsc270/correctness_submission/correctness_submission.ipynb - - - description: "JSC270 efficiency issue" - vars: - submission_file: test_submissions/jsc270/efficiency_submission/efficiency_submission.ipynb - - - description: "JSC270 style issue" - vars: - submission_file: test_submissions/jsc270/style_submission/style_submission.ipynb - - - description: "STA130 correct submission" - vars: - submission_file: test_submissions/sta130/correct_submission/correct_submission.ipynb - - - description: "STA130 correctness issue" - vars: - submission_file: test_submissions/sta130/correctness_submission/correctness_submission.ipynb - - - description: "STA130 efficiency issue" - vars: - submission_file: test_submissions/sta130/efficiency_submission/efficiency_submission.ipynb - - - description: "STA130 style issue" - vars: - submission_file: test_submissions/sta130/style_submission/style_submission.ipynb +prompts: + - file://../../../ai_feedback/data/prompts/user/image_overall.md + +tests: + - description: "JSC270 correct submission" + vars: + submission_file: test_submissions/jsc270/correct_submission/correct_submission.ipynb + + - description: "JSC270 correctness issue" + vars: + submission_file: test_submissions/jsc270/correctness_submission/correctness_submission.ipynb + + - description: "JSC270 efficiency issue" + vars: + submission_file: test_submissions/jsc270/efficiency_submission/efficiency_submission.ipynb + + - description: "JSC270 style issue" + vars: + submission_file: test_submissions/jsc270/style_submission/style_submission.ipynb + + - description: "STA130 correct submission" + vars: + submission_file: test_submissions/sta130/correct_submission/correct_submission.ipynb + + - description: "STA130 correctness issue" + vars: + submission_file: test_submissions/sta130/correctness_submission/correctness_submission.ipynb + + - description: "STA130 efficiency issue" + vars: + submission_file: test_submissions/sta130/efficiency_submission/efficiency_submission.ipynb + + - description: "STA130 style issue" + vars: + submission_file: test_submissions/sta130/style_submission/style_submission.ipynb diff --git a/promptfoo/tests/remote_tests/remote_code_tests.yaml b/promptfoo/tests/remote_tests/remote_code_tests.yaml index 61030c7..192832e 100644 --- a/promptfoo/tests/remote_tests/remote_code_tests.yaml +++ b/promptfoo/tests/remote_tests/remote_code_tests.yaml @@ -5,64 +5,63 @@ defaultTest: vars: model: remote scope: code - submission_type: python -scenarios: - - config: - - vars: { prompt: code_table } - - vars: { prompt: code_explanation } - - vars: { prompt: code_hint } - - vars: { prompt: code_lines } - - vars: { prompt: code_template } - tests: - - vars: - submission_file: test_submissions/csc263_opt_connected/correct_submission/correct_submission.py - solution_file: test_submissions/csc263_opt_connected/solution.py +prompts: + - file://../../../ai_feedback/data/prompts/user/code_table.md + - file://../../../ai_feedback/data/prompts/user/code_explanation.md + - file://../../../ai_feedback/data/prompts/user/code_hint.md + - file://../../../ai_feedback/data/prompts/user/code_lines.md + - file://../../../ai_feedback/data/prompts/user/code_template.md - - vars: - submission_file: test_submissions/csc263_opt_connected/fail_submission/fail_submission.py - solution_file: test_submissions/csc263_opt_connected/solution.py +tests: + - vars: + submission_file: test_submissions/csc263_opt_connected/correct_submission/correct_submission.py + solution_file: test_submissions/csc263_opt_connected/solution.py - - vars: - submission_file: test_submissions/csc263_opt_connected/incorrect_algo_submission/incorrect_algo_submission.py - solution_file: test_submissions/csc263_opt_connected/solution.py + - vars: + submission_file: test_submissions/csc263_opt_connected/fail_submission/fail_submission.py + solution_file: test_submissions/csc263_opt_connected/solution.py - - vars: - submission_file: test_submissions/csc263_opt_connected/style_issues_submission/style_issues_submission.py - solution_file: test_submissions/csc263_opt_connected/solution.py + - vars: + submission_file: test_submissions/csc263_opt_connected/incorrect_algo_submission/incorrect_algo_submission.py + solution_file: test_submissions/csc263_opt_connected/solution.py - - vars: - submission_file: test_submissions/csc108/correct_submission/correct_submission.py - solution_file: test_submissions/csc108/solution.py + - vars: + submission_file: test_submissions/csc263_opt_connected/style_issues_submission/style_issues_submission.py + solution_file: test_submissions/csc263_opt_connected/solution.py - - vars: - submission_file: test_submissions/csc108/correctness_1_submission/correctness_1_submission.py - solution_file: test_submissions/csc108/solution.py + - vars: + submission_file: test_submissions/csc108/correct_submission/correct_submission.py + solution_file: test_submissions/csc108/solution.py - - vars: - submission_file: test_submissions/csc108/correctness_2_submission/correctness_2_submission.py - solution_file: test_submissions/csc108/solution.py + - vars: + submission_file: test_submissions/csc108/correctness_1_submission/correctness_1_submission.py + solution_file: test_submissions/csc108/solution.py - - vars: - submission_file: test_submissions/csc108/efficiency_submission/efficiency_submission.py - solution_file: test_submissions/csc108/solution.py + - vars: + submission_file: test_submissions/csc108/correctness_2_submission/correctness_2_submission.py + solution_file: test_submissions/csc108/solution.py - - vars: - submission_file: test_submissions/csc108/style_submission/style_submission.py - solution_file: test_submissions/csc108/solution.py + - vars: + submission_file: test_submissions/csc108/efficiency_submission/efficiency_submission.py + solution_file: test_submissions/csc108/solution.py - - vars: - submission_file: test_submissions/gac_example/correct_submission/correct_submission.py - solution_file: test_submissions/gac_example/solution.py + - vars: + submission_file: test_submissions/csc108/style_submission/style_submission.py + solution_file: test_submissions/csc108/solution.py - - vars: - submission_file: test_submissions/gac_example/fail_submission/fail_submission.py - solution_file: test_submissions/gac_example/solution.py + - vars: + submission_file: test_submissions/gac_example/correct_submission/correct_submission.py + solution_file: test_submissions/gac_example/solution.py - - vars: - submission_file: test_submissions/gac_example/inefficient_submission/inefficient_submission.py - solution_file: test_submissions/gac_example/solution.py + - vars: + submission_file: test_submissions/gac_example/fail_submission/fail_submission.py + solution_file: test_submissions/gac_example/solution.py - - vars: - submission_file: test_submissions/gac_example/partial_correct_submission/partial_correct_submission.py - solution_file: test_submissions/gac_example/solution.py + - vars: + submission_file: test_submissions/gac_example/inefficient_submission/inefficient_submission.py + solution_file: test_submissions/gac_example/solution.py + + - vars: + submission_file: test_submissions/gac_example/partial_correct_submission/partial_correct_submission.py + solution_file: test_submissions/gac_example/solution.py diff --git a/promptfoo/tests/remote_tests/remote_text_tests.yaml b/promptfoo/tests/remote_tests/remote_text_tests.yaml index d7e8a0c..4660e7c 100644 --- a/promptfoo/tests/remote_tests/remote_text_tests.yaml +++ b/promptfoo/tests/remote_tests/remote_text_tests.yaml @@ -5,36 +5,35 @@ defaultTest: vars: model: remote scope: text - submission_type: pdf - -scenarios: - - config: - - vars: { prompt: text_pdf_analyze } - tests: - - vars: - submission_file: test_submissions/data_collection_ethics_module/average_submission/average_submission.txt - solution_file: test_submissions/data_collection_ethics_module/solution.txt - - - vars: - submission_file: test_submissions/data_collection_ethics_module/excellent_submission/excellent_submission.txt - solution_file: test_submissions/data_collection_ethics_module/solution.txt - - - vars: - submission_file: test_submissions/data_collection_ethics_module/off_topic_submission/off_topic_submission.txt - solution_file: test_submissions/data_collection_ethics_module/solution.txt - - - vars: - submission_file: test_submissions/data_collection_ethics_module/weak_submission/weak_submission.txt - solution_file: test_submissions/data_collection_ethics_module/solution.txt - - - vars: - submission_file: test_submissions/csc373_eft_optimality_proof/fail_submission/fail_submission.pdf - solution_file: test_submissions/csc373_eft_optimality_proof/solution.pdf - - - vars: - submission_file: test_submissions/csc373_eft_optimality_proof/incomplete_submission/incomplete_submission.pdf - solution_file: test_submissions/csc373_eft_optimality_proof/solution.pdf - - - vars: - submission_file: test_submissions/csc373_eft_optimality_proof/induction_submission/induction_submission.pdf - solution_file: test_submissions/csc373_eft_optimality_proof/solution.pdf + +prompts: + - file://../../../ai_feedback/data/prompts/user/text_pdf_analyze.md + +tests: + - vars: + submission_file: test_submissions/data_collection_ethics_module/average_submission/average_submission.txt + solution_file: test_submissions/data_collection_ethics_module/solution.txt + + - vars: + submission_file: test_submissions/data_collection_ethics_module/excellent_submission/excellent_submission.txt + solution_file: test_submissions/data_collection_ethics_module/solution.txt + + - vars: + submission_file: test_submissions/data_collection_ethics_module/off_topic_submission/off_topic_submission.txt + solution_file: test_submissions/data_collection_ethics_module/solution.txt + + - vars: + submission_file: test_submissions/data_collection_ethics_module/weak_submission/weak_submission.txt + solution_file: test_submissions/data_collection_ethics_module/solution.txt + + - vars: + submission_file: test_submissions/csc373_eft_optimality_proof/fail_submission/fail_submission.pdf + solution_file: test_submissions/csc373_eft_optimality_proof/solution.pdf + + - vars: + submission_file: test_submissions/csc373_eft_optimality_proof/incomplete_submission/incomplete_submission.pdf + solution_file: test_submissions/csc373_eft_optimality_proof/solution.pdf + + - vars: + submission_file: test_submissions/csc373_eft_optimality_proof/induction_submission/induction_submission.pdf + solution_file: test_submissions/csc373_eft_optimality_proof/solution.pdf