Merge pull request #46 from iansan5653/feature/mcta-support

iansan5653 · web-flow · commit c1587061f4b1 · 2021-11-02T23:53:37.000-04:00
Add support for outputting files for MCTA
diff --git a/code/data_exporting.py b/code/data_exporting.py
@@ -43,6 +43,10 @@ def validate_order_map(order_map: tp.Dict[str, tp.List[int]],
                 f"Arrangement file entry for '{form_code}' is invalid. All arrangement file entries must contain one of each index from 1 to the number of questions."
             )
 
+def save_csv(data: tp.List[tp.List[str]], path: pathlib.PurePath):
+    with open(path, "w", newline="") as f:
+        writer = csv.writer(f)
+        writer.writerows(data)
 
 class OutputSheet():
     """A lightweight matrix of data to be exported. Faster than a dataframe but
@@ -53,13 +57,15 @@ class OutputSheet():
     num_questions: int
     row_count: int
     first_question_column_index: int
+    form_code_column_index: int
 
     def __init__(self, columns: tp.List[RealOrVirtualField], num_questions: int):
         self.field_columns = columns
         self.num_questions = num_questions
         field_column_names = [COLUMN_NAMES[column] for column in columns]
         answer_columns = [f"Q{i + 1}" for i in range(self.num_questions)]
         self.first_question_column_index = len(field_column_names)
+        self.form_code_column_index = self.field_columns.index(Field.TEST_FORM_CODE)
         self.data = [field_column_names + answer_columns]
         self.row_count = 0
 
@@ -71,9 +77,7 @@ def save(self, path: pathlib.PurePath, filebasename: str, sort: bool,
         data = self.data
         if(transpose):
             data = list_utils.transpose(data)
-        with open(str(output_path), 'w+', newline='') as output_file:
-            writer = csv.writer(output_file)
-            writer.writerows(data)
+        save_csv(data, output_path)
         return output_path
 
     def delete_field_column(self, column: RealOrVirtualField):
diff --git a/code/main.py b/code/main.py
@@ -8,6 +8,7 @@
 import grid_info as grid_i
 import grid_reading as grid_r
 import image_utils
+from mcta_processing import transform_and_save_mcta_output
 import scoring
 import user_interface
 
@@ -21,6 +22,7 @@
 keys_file = user_input.keys_file
 arrangement_file = user_input.arrangement_map
 sort_results = user_input.sort_results
+output_mcta = user_input.output_mcta
 debug_mode_on = user_input.debug_mode
 form_variant = grid_i.form_150q if user_input.form_variant == user_interface.FormVariantSelection.VARIANT_150_Q else grid_i.form_75q
 
@@ -105,6 +107,7 @@
                 form_code_field, grid, threshold, form_variant,
                 field_fill_percents[form_code_field]) or ""
             keys_results.add(field_data, answers)
+
         else:
             for field in form_variant.fields.keys():
                 field_value = grid_r.read_field_as_string(
@@ -146,6 +149,7 @@
                           sort_results,
                           timestamp=files_timestamp,
                           transpose=True)
+
         success_string += "✔️ Key processed and saved.\n"
 
         scores = scoring.score_results(answers_results, keys_results,
@@ -171,6 +175,9 @@
                     timestamp=files_timestamp)
         success_string += "✔️ All scored results processed and saved."
 
+    if (output_mcta):
+        transform_and_save_mcta_output(answers_results, keys_results, files_timestamp, output_folder)
+
     progress.set_status(success_string, False)
 except (RuntimeError, ValueError) as e:
     wrapped_err = "\n".join(textwrap.wrap(str(e), 70))
diff --git a/code/mcta_processing.py b/code/mcta_processing.py
@@ -0,0 +1,96 @@
+import typing as tp
+import pathlib
+import datetime
+import itertools
+
+from data_exporting import format_timestamp_for_file, save_csv, OutputSheet
+
+"""Support for additional outputs used by the Multiple Choice Test Analysis software."""
+
+def transform_and_save_mcta_output(answers_results: OutputSheet,
+                                   keys_results: OutputSheet,
+                                   files_timestamp: datetime,
+                                   output_folder: pathlib.Path):
+    """Generate and save files that are specific to a downstream Multiple Choice Test Analysis
+    software. The format of these files is dependend on the downstream software, so they are not
+    consistent with the rest of the output."""
+    create_keys_files(keys_results, output_folder, files_timestamp)
+    create_answers_files(answers_results, output_folder, files_timestamp)
+
+
+def create_keys_files(keys_results: OutputSheet, output_folder: pathlib.Path, files_timestamp: datetime):
+    """Create the key files for the Multiple Choice Test Analysis software.
+
+    Params:
+        keys_results: The results of the keys file.
+        output_folder: The folder to save the files to.
+        files_timestamp: The timestamp to use for the files.
+    """
+    form_code_col = keys_results.form_code_column_index
+
+    for row in keys_results.data[1:]:
+        code = row[form_code_col]
+        csv_data = build_key_csv(row[keys_results.first_question_column_index:])
+        save_mcta_csv(csv_data, output_folder, f"{code}_key", files_timestamp)
+
+
+def create_answers_files(answers_results: OutputSheet,
+                         output_folder: pathlib.Path,
+                         files_timestamp: datetime):
+    """Create the answer files for the Multiple Choice Test Analysis software.
+
+    Params:
+        answers_results: The results of the answers file.
+        output_folder: The folder to save the files to.
+        files_timestamp: The timestamp to use for the files.
+    """
+    form_code_col = answers_results.form_code_column_index
+    first_question_col = answers_results.first_question_column_index
+    
+    # Preserve the original index for naming students anonymously
+    # List of tuples of (form code, original index, answers)
+    answers_with_form_code = [(row[form_code_col], i, row[first_question_col:]) for (i, row) in enumerate(answers_results.data[1:])]
+    
+    # groupby requires sorted input
+    sorted_by_code = sorted(answers_with_form_code, key=lambda x: x[0])
+    grouped_by_code = itertools.groupby(sorted_by_code, key=lambda x: x[0])
+
+    # Generate one output file for each form code in the answers data
+    for code, group in grouped_by_code:
+        group_data = [(original_index, answers) for (_, original_index, answers) in group]
+        csv_data = build_answers_csv(group_data)
+        # Test form code can be in [A|B] form if student selects A and B. The [|] are not safe for filename.
+        file_safe_code = code.replace("[", "").replace("]", "").replace("|", "")
+        save_mcta_csv(csv_data, output_folder, f"{file_safe_code}_results", files_timestamp)
+
+
+def build_key_csv(answers: tp.List[str]) -> tp.List[tp.List[str]]:
+    """Build the CSV data for a key file. Each key outputs a separate pair of key and answer files.
+
+    Params:
+        answers: All of the answers for this form code, in order.
+    """
+    header = ["", "Answer", "Title", "Concept"]
+    data = [[f"Q{i}", x, f"Q{i}", "unknown"] for i, x in enumerate(answers, 1)]
+    return [header] + data
+
+
+def build_answers_csv(data: tp.List[tp.Tuple[int, tp.List[str]]]) -> tp.List[tp.List[str]]:
+    """Build the CSV data for an answers file. Should be called once for each form code.
+
+    Params:
+        data: The data to save into the file. A list of rows, where each row represents a student.
+              Each row is a tuple of the student's original index (for naming) and the list of
+              answers.
+    """
+    header = [""] + [f"Q{i + 1}" for i in range(0, len(data[0][1]))]
+    rows = [[f"Student{i}"] + answers for (i, answers) in data]
+    return [header] + rows
+
+
+def save_mcta_csv(data: tp.List[tp.List[str]],
+                  path: pathlib.PurePath,
+                  basefilename: str,
+                  timestamp: datetime):
+    filename = path / f"{format_timestamp_for_file(timestamp)}__mcta_{basefilename}.csv"
+    save_csv(data, filename)
diff --git a/code/user_interface.py b/code/user_interface.py
@@ -283,6 +283,7 @@ def disable(self):
 class OutputFolderPickerWidget():
     folder: tp.Optional[Path]
     sort_results: bool
+    output_mcta: bool
     sort_toggle_count: int
 
     def __init__(self,
@@ -301,11 +302,15 @@ def __init__(self,
         self.__sort_results_checkbox = CheckboxWidget(
             container, "Sort results by students' names.",
             self.__on_sort_update)
+        self.__output_mcta_checkbox = CheckboxWidget(
+            container, "Output additional files for MCTA.",
+            self.__on_update, reduce_padding_above=True)
 
         pack(container, fill=tk.X)
 
         self.folder = None
         self.sort_results = False
+        self.output_mcta = False
         self.sort_toggle_count = 0
 
     def __on_sort_update(self):
@@ -315,13 +320,15 @@ def __on_sort_update(self):
     def __on_update(self):
         self.folder = self.__output_folder_picker.value
         self.sort_results = self.__sort_results_checkbox.value
+        self.output_mcta = self.__output_mcta_checkbox.value
 
         if self.__on_change is not None:
             self.__on_change()
 
     def disable(self):
         self.__output_folder_picker.disable()
         self.__sort_results_checkbox.disable()
+        self.__output_mcta_checkbox.disable()
 
 
 class AnswerKeyPickerWidget():
@@ -443,6 +450,7 @@ class MainWindow:
     keys_file: tp.Optional[Path]
     arrangement_map: tp.Optional[Path]
     sort_results: bool
+    output_mcta: bool
     debug_mode: bool = False
     form_variant: FormVariantSelection
 
@@ -566,6 +574,11 @@ def __on_update(self):
         else:
             new_status += f"Input sort order will be maintained.\n"
 
+
+        self.output_mcta = self.__output_folder_picker.output_mcta
+        if self.output_mcta:
+            new_status += "Additional files will be output for use with analysis software."
+
         if self.__output_folder_picker.sort_toggle_count > 15:
             new_status += "WARNING: Debug mode enabled. Restart to disable."
             self.debug_mode = True
diff --git a/examples/batch-B/11.jpg b/examples/batch-B/11.jpg