33
44
55import base64
6- from io import BytesIO
6+ import logging
7+ from io import BytesIO , StringIO
78
89from markupsafe import Markup
910from odoo import _ , models
1011from odoo .exceptions import UserError
1112from openpyxl import Workbook , load_workbook
1213
14+ _logger = logging .getLogger (__name__ )
15+ try :
16+ from csv import reader
17+
18+ import xlrd
19+ except (OSError , ImportError ) as err : # pragma: no cover
20+ _logger .error (err )
21+
22+ try :
23+ import chardet
24+ except ImportError :
25+ _logger .warning ("chardet library not found, please install it from http://pypi.python.org/pypi/chardet" )
26+
1327
1428class AccountStatementImport (models .TransientModel ):
1529 _name = "account.statement.import"
@@ -151,39 +165,84 @@ def import_file_button(self, wizard_data=None):
151165 return result
152166
153167 def split_base64_excel (self , header_rows_count , rows_per_file_limit ):
154- """Split Excel file into multiple parts to avoid overloading the system.
155- Returns empty list if file is not a valid Excel or if split is not needed.
156- Only processes rows where the date column is not empty."""
168+ """Split Excel/CSV file into multiple parts."""
157169 if not self .statement_file :
158170 return []
159171
160172 output_base64_list = []
173+ mapping = self .sheet_mapping_id
174+ journal = self .env ["account.journal" ].browse (self .env .context .get ("journal_id" ))
175+ currency_code = (journal .currency_id or journal .company_id .currency_id ).name
176+
161177 try :
162178 file_bytes = base64 .b64decode (self .statement_file )
163179 read_buffer = BytesIO (file_bytes )
180+
181+ # Try openpyxl (xlsx)
164182 input_workbook = load_workbook (read_buffer )
165183 input_worksheet = input_workbook .active
184+ all_rows = list (input_worksheet .rows )
185+ csv_or_xlsx = (input_workbook , input_worksheet )
186+
166187 except Exception :
167- return [self .statement_file ]
188+ try :
189+ # Try xlrd (xls)
190+ workbook = xlrd .open_workbook (
191+ file_contents = file_bytes ,
192+ encoding_override = (mapping .file_encoding if mapping .file_encoding else None ),
193+ )
194+ sheet = workbook .sheet_by_index (0 )
195+ csv_or_xlsx = (workbook , sheet )
196+
197+ except Exception :
198+ # Try CSV
199+ csv_options = {}
200+ csv_delimiter = mapping ._get_column_delimiter_character ()
201+ if csv_delimiter :
202+ csv_options ["delimiter" ] = csv_delimiter
203+ if mapping .quotechar :
204+ csv_options ["quotechar" ] = mapping .quotechar
205+
206+ try :
207+ decoded = file_bytes .decode (mapping .file_encoding or "utf-8" )
208+ except UnicodeDecodeError :
209+ detected_encoding = chardet .detect (file_bytes ).get ("encoding" , False )
210+ if not detected_encoding :
211+ raise UserError (self .env ._ ("No valid encoding was found for the attached file" )) from None
212+ decoded = file_bytes .decode (detected_encoding )
213+
214+ csv_reader = reader (StringIO (decoded ), ** csv_options )
215+ csv_or_xlsx = csv_reader
216+ all_rows = [row for row in list (csv_or_xlsx ) if any (cell for cell in row )]
217+ parser = self .env ["account.statement.import.sheet.parser" ]
218+
219+ # Only parse header and rows for Excel files (when all_rows is not yet populated)
220+ if not all_rows :
221+ header = parser .parse_header (csv_or_xlsx , mapping )
222+ columns = dict ()
223+ for column_name in parser ._get_column_names ():
224+ columns [column_name ] = parser ._get_column_indexes (header , column_name , mapping )
225+ data = csv_or_xlsx , self .statement_file
226+ all_rows = parser ._parse_rows (mapping , currency_code , data , columns )
227+ else :
228+ # For CSV files, we already have all_rows, convert list to iterator for parse_header
229+ header = parser .parse_header (iter (all_rows ), mapping )
168230
169- all_rows = list (input_worksheet .rows )
170231 if not all_rows :
171232 return []
172233
173234 header_rows = all_rows [:header_rows_count ]
174235 data_rows = all_rows [header_rows_count :]
175236
176- # Get the date column index from the sheet mapping using the parser's method
177- parser = self .env ["account.statement.import.sheet.parser" ]
178- header = parser .parse_header ((input_workbook , input_worksheet ), self .sheet_mapping_id )
179237 try :
180- date_column_indexes = parser ._get_column_indexes (header , "timestamp_column" , self . sheet_mapping_id )
238+ date_column_indexes = parser ._get_column_indexes (header , "timestamp_column" , mapping )
181239 date_column_index = date_column_indexes [0 ] if date_column_indexes else None
182240 except Exception as e :
183241 raise UserError (_ ("Error importing bank statement: %s" ) % str (e ))
184242
185- # Filter out rows where the date column is empty
186- data_rows = self ._filter_rows_with_date (data_rows , date_column_index )
243+ # Filter rows with empty date
244+ if date_column_index is not None :
245+ data_rows = [r for r in data_rows if len (r ) > date_column_index and r [date_column_index ]]
187246
188247 start_row_index = 0
189248 total_data_rows = len (data_rows )
@@ -196,20 +255,20 @@ def split_base64_excel(self, header_rows_count, rows_per_file_limit):
196255 output_worksheet = output_workbook .active
197256
198257 for header_row in header_rows :
199- row_values = [cell .value for cell in header_row ]
200- output_worksheet .append (row_values )
258+ output_worksheet .append (header_row )
201259
202260 for data_row in rows_for_current_part :
203- row_values = [cell .value for cell in data_row ]
204- output_worksheet .append (row_values )
261+ output_worksheet .append (data_row )
205262
206263 write_buffer = BytesIO ()
207264 output_workbook .save (write_buffer )
208265 output_bytes = write_buffer .getvalue ()
266+
209267 base64_content = base64 .b64encode (output_bytes ).decode ("utf-8" )
210268 output_base64_list .append (base64_content )
211269
212270 start_row_index = end_row_index
271+
213272 return output_base64_list
214273
215274 def _filter_rows_with_date (self , data_rows , date_column_index ):
0 commit comments