33import os
44
55import dacite
6+ import openpyxl
67import pandas as pd
78from openpyxl import load_workbook
89
910from fast_form .config .configuration_dataclasses import PathsForProcessingConfig
1011from fast_form .config .configuration_loading import get_processing_config
1112from fast_form .outputting .process_document import process_document_and_add_to_validation_excel
13+ from fast_form .structure_parser .form_structure_dataclasses import FieldType
1214
1315SHEET_WITH_RESULTS = "automatic_results"
1416VALIDATION_EXCEL_NAME = "validation_excel.xlsx"
@@ -25,7 +27,7 @@ def process_to_validation_excel(paths_for_processing_config: PathsForProcessingC
2527 document_names = [file for file in os .listdir (paths_for_processing_config .folder_with_documents_path ) if
2628 file .endswith ('.pdf' ) or file .endswith (".jpg" ) or file .endswith (".png" )]
2729 logger .info (f"Processing to validation excel from { paths_for_processing_config .folder_with_documents_path } . Number"
28- f" of documents is { len (document_names )} " )
30+ f" of documents is { len (document_names )} " )
2931 for document_name in document_names :
3032 logging .debug (f"Processing document { document_name } " )
3133 process_document_and_add_to_validation_excel (
@@ -48,6 +50,7 @@ def process_to_final_excel(paths_for_processing_config: PathsForProcessingConfig
4850 one_patient_per_row_df = (
4951 validation_df
5052 .set_index (['patient_id' , 'name' ])
53+ .loc [lambda df : df .field_type == FieldType .SINGLE_CHOICE ]
5154 .data
5255 .apply (lambda response_or_error : response_or_error if response_or_error >= 0 else "" )
5356 .unstack (level = 1 )
@@ -65,8 +68,10 @@ def process_to_final_excel(paths_for_processing_config: PathsForProcessingConfig
6568 sheet_name = SHEET_WITH_RESULTS )
6669
6770 combined_one_patient_per_row_df = one_patient_per_row_df .combine_first (old_one_patient_per_row_df )
71+ book = load_workbook (paths_for_processing_config .final_excel_path )
72+ else :
73+ book = openpyxl .Workbook ()
6874
69- book = load_workbook (paths_for_processing_config .final_excel_path )
7075 writer = pd .ExcelWriter (paths_for_processing_config .final_excel_path , engine = 'openpyxl' )
7176 if SHEET_WITH_RESULTS in book .sheetnames :
7277 book .remove (book [SHEET_WITH_RESULTS ])
0 commit comments