msv-lab · yihandai3826 · Feb 25, 2025 · Jun 26, 2025 · Jul 1, 2025
diff --git a/.gitignore b/.gitignore
@@ -2,4 +2,6 @@
 .idea/
 logs/
 hoareprompt-env/
-src/__pycache__
+src/__pycache__
+src/postprocessing_neww.py
+helper_runners/test_verify_yh.sh
diff --git a/configs/naive_test_qwen7.json b/configs/naive_test_qwen7.json
@@ -0,0 +1,13 @@
+{
+    "model": "qwen2.5-7b-instruct",
+    "temperature": 1.0,
+    "assessment-mode": "naive-test",
+    "postcondition-mode": "hoarecot",
+    "postcondition-cot-prompt": "node-based-style",
+    "annotated_type" : "simple",
+    "loop-unrolling-count": 3,
+    "entailment-mode": "verify-answer",
+    "cex-mode": "with-postcondition",
+    "annotated" : false,
+    "fsl": false
+}
diff --git a/configs/naive_test_qwen72.json b/configs/naive_test_qwen72.json
@@ -0,0 +1,13 @@
+{
+    "model": "qwen2.5-72b-instruct",
+    "temperature": 1.0,
+    "assessment-mode": "naive-test",
+    "postcondition-mode": "hoarecot",
+    "postcondition-cot-prompt": "node-based-style",
+    "annotated_type" : "simple",
+    "loop-unrolling-count": 3,
+    "entailment-mode": "verify-answer",
+    "cex-mode": "with-postcondition",
+    "annotated" : false,
+    "fsl": false
+}
diff --git a/helper runners/run_multiple.sh → helper_runners/run_multiple.sh b/helper runners/run_multiple.sh → helper_runners/run_multiple.sh
diff --git a/helper runners/run_multiple_help1.sh → helper_runners/run_multiple_help1.sh b/helper runners/run_multiple_help1.sh → helper_runners/run_multiple_help1.sh
diff --git a/helper runners/run_multiple_help2.sh → helper_runners/run_multiple_help2.sh b/helper runners/run_multiple_help2.sh → helper_runners/run_multiple_help2.sh
diff --git a/helper runners/run_multiple_help3.sh → helper_runners/run_multiple_help3.sh b/helper runners/run_multiple_help3.sh → helper_runners/run_multiple_help3.sh
diff --git a/helper runners/test.sh → helper_runners/test.sh b/helper runners/test.sh → helper_runners/test.sh
diff --git a/helper runners/test_confidence.sh → helper_runners/test_confidence.sh b/helper runners/test_confidence.sh → helper_runners/test_confidence.sh
diff --git a/helper runners/test_confidence_logprobs.sh → helper_runners/test_confidence_logprobs.sh b/helper runners/test_confidence_logprobs.sh → helper_runners/test_confidence_logprobs.sh
diff --git a/helper runners/test_fast.sh → helper_runners/test_fast.sh b/helper runners/test_fast.sh → helper_runners/test_fast.sh
diff --git a/helper runners/test_fast_75.sh → helper_runners/test_fast_75.sh b/helper runners/test_fast_75.sh → helper_runners/test_fast_75.sh
diff --git a/helper runners/test_verify.sh → helper_runners/test_verify.sh b/helper runners/test_verify.sh → helper_runners/test_verify.sh
diff --git a/helper runners/test_yihan.sh → helper_runners/test_yihan.sh b/helper runners/test_yihan.sh → helper_runners/test_yihan.sh
diff --git a/new_data/add_json.py b/new_data/add_json.py
@@ -0,0 +1,56 @@
+import json
+import os
+
+def add_to_jsonl(num: int, alp: str, input_data: str, output_data: str) -> None:
+    """
+    Append data to a specified JSONL file.
+
+    Args:
+    num (int): Numeric part, used to generate the filename.
+    alp (str): Alphabetic part, used to generate the filename.
+    input_data (str): Value for the 'input' key in the JSON object.
+    output_data (str): Value for the 'output' key in the JSON object.
+    """
+    filename = f"./test/{num}_{alp}.jsonl"
+
+    # Create the data object to add
+    data = {
+        "input": input_data,
+        "output": output_data
+    }
+
+    try:
+        # Write data to the file in append mode
+        with open(filename, 'a', encoding='utf-8') as f:
+            # Ensure non-ASCII characters (like Chinese) are written correctly
+            f.write(json.dumps(data, ensure_ascii=False) + '\n')
+        print(f"Successfully added data to {filename}")
+    except Exception as e:
+        print(f"Error while adding data: {e}")
+
+def get_multiline_input(prompt: str) -> str:
+    """Get multiline input from the user until an empty line is entered."""
+    print(prompt)
+    lines = []
+    while True:
+        line = input()
+        if line == '':  # Empty line indicates end of input
+            break
+        lines.append(line)
+    # Join all lines with newline characters
+    return '\n'.join(lines)
+
+if __name__ == "__main__":
+    # Get input from keyboard
+    try:
+        num = int(input("Enter numeric part (num): "))
+        alp = input("Enter alphabetic part (alp): ")
+
+        input_data = get_multiline_input("Enter value for the 'input' field (finish with an empty line):")
+        output_data = get_multiline_input("Enter value for the 'output' field (finish with an empty line):")
+
+        add_to_jsonl(num, alp, input_data, output_data)
+    except ValueError:
+        print("Error: Numeric part must be an integer.")
+    except Exception as e:
+        print(f"An unexpected error occurred: {e}")
diff --git a/new_data/filtered_merged_cor.json b/new_data/filtered_merged_cor.json
diff --git a/new_data/filtered_merged_incor.json b/new_data/filtered_merged_incor.json
diff --git a/new_data/ge_new_test.py b/new_data/ge_new_test.py
@@ -0,0 +1,255 @@
+import os
+import re
+import subprocess
+
+import requests
+import json
+
+import add_json
+
+other = 'You are a professional program tester. I will provide you with the problem description of a certain competition problem, which will explain the content of the problem. In addition, it will also explain the scope and requirements that valid inputs need to meet. You need to help me generate 10 different test inputs for edge cases (note: only provide the inputs, no need to provide the outputs). '
+
+txt2 = """
+You are a professional program tester. I will provide you with the problem description of a certain competition problem, which will explain the problem content. In addition, it will specify the range and constraints that valid inputs must meet. 
+
+Your task is to generate 10 diverse and well-considered test inputs focusing on the most important edge cases involved in the problem. You only need to provide the inputs, not the outputs. The inputs you provide don't necessarily have to be large in size, but please choose them carefully to ensure diversity and comprehensive coverage.
+
+You need to follow the following format.
+# **Input 1**
+```
+
+```
+# **Input 2**
+```
+
+```
+# **Input 3**
+```
+
+```
+# **Input 4**
+```
+
+```
+# **Input 5**
+```
+
+```
+# **Input 6**
+```
+
+```
+# **Input 7**
+```
+
+```
+# **Input 8**
+```
+
+```
+# **Input 9**
+```
+
+```
+# **Input 10**
+```
+
+```
+Now I will give you the problem description:
+{description}
+"""
+
+txt = """
+You are a professional program tester. I will provide you with the problem description of a certain competition problem, which will explain the problem content as well as the input constraints.
+
+Your task is to generate 20 diverse and well-considered test inputs. These test cases should focus on:
+
+Covering the most important edge cases based on the constraints and problem details.
+Ensuring that all key functionalities and aspects of the problem description are thoroughly tested.
+The inputs do not necessarily need to be large, but they should be carefully chosen to maximize coverage of edge behaviors, special cases, and typical scenarios that validate the correctness and robustness of the implementation.
+
+You only need to provide the inputs, not the expected outputs.
+
+Please follow the following format.
+# **Input 1**
+```
+
+```
+# **Input 2**
+```
+
+```
+# **Input 3**
+```
+
+```
+# **Input 4**
+```
+
+```
+# **Input 5**
+```
+
+```
+# **Input 6**
+```
+
+```
+# **Input 7**
+```
+
+```
+# **Input 8**
+```
+
+```
+# **Input 9**
+```
+
+```
+# **Input 10**
+```
+
+```
+Now I will give you the problem description:
+{description}
+"""
+
+url = "https://api.302.ai/v1/chat/completions"
+
+def run_python_code(code, input_data):
+    temp_file_path = "temp_program.py"
+    with open(temp_file_path, "w") as f:
+        f.write(code)
+
+    try:
+        result = subprocess.run(
+            ["python3", temp_file_path],
+            input=input_data.encode(),
+            capture_output=True,
+            timeout=10  # Prevent infinite loops
+        )
+        output = result.stdout.decode().strip()
+        error = result.stderr.decode().strip()
+        return output, error
+    except subprocess.TimeoutExpired:
+        return "", "Timeout"
+    finally:
+        os.remove(temp_file_path)
+
+def run_all_solutions_on_inputs(solutions, input_list, ignore_case):
+    all_results = []
+
+    for idx, input_data in enumerate(input_list, 1):
+        print(f"\n--- Running Test Input {idx} ---\nInput:\n{input_data}\n")
+        input_results = []
+
+        outputs_set = set()
+        has_error = False
+        outputs_list = []
+
+        for sol_idx, code in enumerate(solutions, 1):
+            print(f"Running Solution {sol_idx}...")
+            output, error = run_python_code(code, input_data)
+
+            outputs_list.append(output)
+            input_results.append({
+                "solution_index": sol_idx,
+                "output": output,
+                "error": error
+            })
+
+            if error:
+                has_error = True
+            else:
+                if ignore_case:
+                    output = output.lower()
+                print(output)
+                outputs_set.add(output)
+
+        # Determine if the current input is valid
+        is_valid = (not has_error) and (len(outputs_set) == 1)
+
+        all_results.append({
+            "input_index": idx,
+            "input": input_data,
+            "results": input_results,
+            "is_valid": is_valid
+        })
+
+    return all_results
+
+def api_call_for_test(name, prob_des, solutions, anycase):
+    name_num = name.split("_")[0]
+    name_alp = name.split("_")[1]
+    payload = json.dumps({
+        "model": "o1",  # gpt-4o or o1
+        "messages": [
+            {
+                "role": "user",
+                "content": txt.format(description=prob_des),
+            }
+        ]
+    })
+    headers = {
+        'Accept': 'application/json',
+        'Authorization': 'sk-g9aImgg6dDoeoHLvrxUA3kQWtvblpOOZiO6mhad7rfWSjmSB',
+        'Content-Type': 'application/json'
+    }
+
+    response = requests.post(url, headers=headers, data=payload)
+    content = json.loads(response.text)
+    print("Response received!")
+
+    # Extract test inputs
+    test_inputs_text = content['choices'][0]['message']['content']
+    pattern = r"# \*\*Input \d+\*\*\n```(.*?)```"
+    matches = re.findall(pattern, test_inputs_text, re.DOTALL)
+
+    input_list = [block.strip() for block in matches]
+
+    print(f"\nExtracted {len(input_list)} test inputs\n")
+
+    # Run all solutions
+    test_results = run_all_solutions_on_inputs(solutions, input_list, anycase)
+
+    # Filter valid inputs
+    valid_inputs = [res for res in test_results if res['is_valid']]
+
+    print(f"\n===== Number of Valid Inputs: {len(valid_inputs)} / {len(input_list)} =====\n")
+
+    # Print valid inputs info
+    for v in valid_inputs:
+        print(f"Input {v['input_index']} is considered valid")
+        print(f"Input Content:\n{v['input']}\n")
+        print(f"Outputs from all solutions:\n{v['results'][0]['output']}\n")
+        add_json.add_to_jsonl(name_num, name_alp, v['input'], v['results'][0]['output'])
+
+    # If you want to save to file
+    # with open("valid_inputs.json", "w", encoding="utf-8") as f:
+    #     json.dump(valid_inputs, f, ensure_ascii=False, indent=4)
+
+if __name__ == "__main__":
+    file_path = "./filtered_merged_cor.json"
+    if not os.path.exists(file_path):
+        print(f"Error: File {file_path} does not exist.")
+    try:
+        with open(file_path, 'r', encoding='utf-8') as f:
+            problems = json.load(f)
+            for idx, problem in enumerate(problems, 1):
+                name = problem.get('task_name')
+                # if int(name.split("_")[0]) < 1948:
+                #     continue
+                description = problem.get('description')
+                solutions = problem.get('generated_code')
+                print(f"\n================== Running Task: {name} ==================\n")
+                if 'any case' in description:
+                    anycase = True
+                else:
+                    anycase = False
+                print(anycase)
+                api_call_for_test(name, description, solutions, anycase)
+
+                # break  # Remove this if you want to process all tasks
+    except Exception as e:
+        print(e)