Skip to content

Commit a3a4b61

Browse files
committed
[IMP] account_statement_import_sheet_file_bg: Filter empty rows by date column
Stop processing Excel rows when timestamp column is empty Previously, the import would process all rows in the Excel file until the end, including thousands of empty rows. This caused performance issues and unnecessary data processing. Now the system: - Identifies the date/timestamp column from sheet_mapping_id configuration - Only processes rows where the timestamp column has a value - Stops processing at the first empty timestamp row This prevents importing empty rows and improves performance when handling Excel files with many empty rows at the end. closes #356 X-original-commit: ded1456 Signed-off-by: Filoquin adhoc <maq@adhoc.com.ar> Signed-off-by: rov-adhoc <rov@adhoc.com.ar>
1 parent 8003a58 commit a3a4b61

1 file changed

Lines changed: 29 additions & 1 deletion

File tree

account_statement_import_sheet_file_bg/models/account_statement_import.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,8 @@ def import_file_button(self, wizard_data=None):
126126

127127
def split_base64_excel(self, header_rows_count, rows_per_file_limit):
128128
"""Split Excel file into multiple parts to avoid overloading the system.
129-
Returns empty list if file is not a valid Excel or if split is not needed."""
129+
Returns empty list if file is not a valid Excel or if split is not needed.
130+
Only processes rows where the date column is not empty."""
130131
if not self.statement_file:
131132
return []
132133

@@ -145,6 +146,16 @@ def split_base64_excel(self, header_rows_count, rows_per_file_limit):
145146

146147
header_rows = all_rows[:header_rows_count]
147148
data_rows = all_rows[header_rows_count:]
149+
150+
# Get the date column index from the sheet mapping using the parser's method
151+
parser = self.env["account.statement.import.sheet.parser"]
152+
header = parser.parse_header((input_workbook, input_worksheet), self.sheet_mapping_id)
153+
date_column_indexes = parser._get_column_indexes(header, "timestamp_column", self.sheet_mapping_id)
154+
date_column_index = date_column_indexes[0] if date_column_indexes else None
155+
156+
# Filter out rows where the date column is empty
157+
data_rows = self._filter_rows_with_date(data_rows, date_column_index)
158+
148159
start_row_index = 0
149160
total_data_rows = len(data_rows)
150161

@@ -171,3 +182,20 @@ def split_base64_excel(self, header_rows_count, rows_per_file_limit):
171182

172183
start_row_index = end_row_index
173184
return output_base64_list
185+
186+
def _filter_rows_with_date(self, data_rows, date_column_index):
187+
"""Filter data rows to only include rows where the date column is not empty.
188+
If date_column_index is None, return all rows."""
189+
if date_column_index is None:
190+
return data_rows
191+
192+
filtered_rows = []
193+
for row in data_rows:
194+
# Check if the row has enough columns and the date column is not empty
195+
if len(row) > date_column_index and row[date_column_index].value:
196+
filtered_rows.append(row)
197+
elif len(row) > date_column_index and not row[date_column_index].value:
198+
# Stop processing when we find the first empty date
199+
break
200+
201+
return filtered_rows

0 commit comments

Comments
 (0)