Skip to content

Commit 8bd4979

Browse files
committed
[FIX] account_statement_import: Enhance file import handling for Excel and CSV formats
closes #383 X-original-commit: 1e6ea97 Signed-off-by: Filoquin adhoc <maq@adhoc.com.ar> Signed-off-by: rov-adhoc <rov@adhoc.com.ar>
1 parent 995a497 commit 8bd4979

1 file changed

Lines changed: 75 additions & 16 deletions

File tree

account_statement_import_sheet_file_bg/models/account_statement_import.py

Lines changed: 75 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,27 @@
33

44

55
import base64
6-
from io import BytesIO
6+
import logging
7+
from io import BytesIO, StringIO
78

89
from markupsafe import Markup
910
from odoo import _, models
1011
from odoo.exceptions import UserError
1112
from openpyxl import Workbook, load_workbook
1213

14+
_logger = logging.getLogger(__name__)
15+
try:
16+
from csv import reader
17+
18+
import xlrd
19+
except (OSError, ImportError) as err: # pragma: no cover
20+
_logger.error(err)
21+
22+
try:
23+
import chardet
24+
except ImportError:
25+
_logger.warning("chardet library not found, please install it from http://pypi.python.org/pypi/chardet")
26+
1327

1428
class AccountStatementImport(models.TransientModel):
1529
_name = "account.statement.import"
@@ -151,39 +165,84 @@ def import_file_button(self, wizard_data=None):
151165
return result
152166

153167
def split_base64_excel(self, header_rows_count, rows_per_file_limit):
154-
"""Split Excel file into multiple parts to avoid overloading the system.
155-
Returns empty list if file is not a valid Excel or if split is not needed.
156-
Only processes rows where the date column is not empty."""
168+
"""Split Excel/CSV file into multiple parts."""
157169
if not self.statement_file:
158170
return []
159171

160172
output_base64_list = []
173+
mapping = self.sheet_mapping_id
174+
journal = self.env["account.journal"].browse(self.env.context.get("journal_id"))
175+
currency_code = (journal.currency_id or journal.company_id.currency_id).name
176+
161177
try:
162178
file_bytes = base64.b64decode(self.statement_file)
163179
read_buffer = BytesIO(file_bytes)
180+
181+
# Try openpyxl (xlsx)
164182
input_workbook = load_workbook(read_buffer)
165183
input_worksheet = input_workbook.active
184+
all_rows = list(input_worksheet.rows)
185+
csv_or_xlsx = (input_workbook, input_worksheet)
186+
166187
except Exception:
167-
return [self.statement_file]
188+
try:
189+
# Try xlrd (xls)
190+
workbook = xlrd.open_workbook(
191+
file_contents=file_bytes,
192+
encoding_override=(mapping.file_encoding if mapping.file_encoding else None),
193+
)
194+
sheet = workbook.sheet_by_index(0)
195+
csv_or_xlsx = (workbook, sheet)
196+
197+
except Exception:
198+
# Try CSV
199+
csv_options = {}
200+
csv_delimiter = mapping._get_column_delimiter_character()
201+
if csv_delimiter:
202+
csv_options["delimiter"] = csv_delimiter
203+
if mapping.quotechar:
204+
csv_options["quotechar"] = mapping.quotechar
205+
206+
try:
207+
decoded = file_bytes.decode(mapping.file_encoding or "utf-8")
208+
except UnicodeDecodeError:
209+
detected_encoding = chardet.detect(file_bytes).get("encoding", False)
210+
if not detected_encoding:
211+
raise UserError(self.env._("No valid encoding was found for the attached file")) from None
212+
decoded = file_bytes.decode(detected_encoding)
213+
214+
csv_reader = reader(StringIO(decoded), **csv_options)
215+
csv_or_xlsx = csv_reader
216+
all_rows = [row for row in list(csv_or_xlsx) if any(cell for cell in row)]
217+
parser = self.env["account.statement.import.sheet.parser"]
218+
219+
# Only parse header and rows for Excel files (when all_rows is not yet populated)
220+
if not all_rows:
221+
header = parser.parse_header(csv_or_xlsx, mapping)
222+
columns = dict()
223+
for column_name in parser._get_column_names():
224+
columns[column_name] = parser._get_column_indexes(header, column_name, mapping)
225+
data = csv_or_xlsx, self.statement_file
226+
all_rows = parser._parse_rows(mapping, currency_code, data, columns)
227+
else:
228+
# For CSV files, we already have all_rows, convert list to iterator for parse_header
229+
header = parser.parse_header(iter(all_rows), mapping)
168230

169-
all_rows = list(input_worksheet.rows)
170231
if not all_rows:
171232
return []
172233

173234
header_rows = all_rows[:header_rows_count]
174235
data_rows = all_rows[header_rows_count:]
175236

176-
# Get the date column index from the sheet mapping using the parser's method
177-
parser = self.env["account.statement.import.sheet.parser"]
178-
header = parser.parse_header((input_workbook, input_worksheet), self.sheet_mapping_id)
179237
try:
180-
date_column_indexes = parser._get_column_indexes(header, "timestamp_column", self.sheet_mapping_id)
238+
date_column_indexes = parser._get_column_indexes(header, "timestamp_column", mapping)
181239
date_column_index = date_column_indexes[0] if date_column_indexes else None
182240
except Exception as e:
183241
raise UserError(_("Error importing bank statement: %s") % str(e))
184242

185-
# Filter out rows where the date column is empty
186-
data_rows = self._filter_rows_with_date(data_rows, date_column_index)
243+
# Filter rows with empty date
244+
if date_column_index is not None:
245+
data_rows = [r for r in data_rows if len(r) > date_column_index and r[date_column_index]]
187246

188247
start_row_index = 0
189248
total_data_rows = len(data_rows)
@@ -196,20 +255,20 @@ def split_base64_excel(self, header_rows_count, rows_per_file_limit):
196255
output_worksheet = output_workbook.active
197256

198257
for header_row in header_rows:
199-
row_values = [cell.value for cell in header_row]
200-
output_worksheet.append(row_values)
258+
output_worksheet.append(header_row)
201259

202260
for data_row in rows_for_current_part:
203-
row_values = [cell.value for cell in data_row]
204-
output_worksheet.append(row_values)
261+
output_worksheet.append(data_row)
205262

206263
write_buffer = BytesIO()
207264
output_workbook.save(write_buffer)
208265
output_bytes = write_buffer.getvalue()
266+
209267
base64_content = base64.b64encode(output_bytes).decode("utf-8")
210268
output_base64_list.append(base64_content)
211269

212270
start_row_index = end_row_index
271+
213272
return output_base64_list
214273

215274
def _filter_rows_with_date(self, data_rows, date_column_index):

0 commit comments

Comments
 (0)