Skip to content

Commit 6e3b26d

Browse files
authored
next version of scrapers
2 parents 0b0a4b6 + 74f9123 commit 6e3b26d

3 files changed

Lines changed: 44 additions & 11 deletions

File tree

il_supermarket_parsers/raw_parsing_pipeline.py

Lines changed: 31 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
import os
2+
from typing import List
23
from tqdm import tqdm
34
from .parser_factory import ParserFactory
4-
from .utils import DataLoader
5+
from .utils import DataLoader, DumpFile
56

67

78
class RawParsingPipeline:
@@ -23,7 +24,7 @@ def process(self, limit=None):
2324
self.file_type.lower() + "_" + self.store_name.lower() + ".csv",
2425
)
2526

26-
files_to_process = DataLoader(
27+
files_to_process: List[DumpFile] = DataLoader(
2728
self.folder,
2829
store_names=[self.store_name],
2930
files_types=[self.file_type],
@@ -34,18 +35,38 @@ def process(self, limit=None):
3435
total=len(files_to_process),
3536
desc=f"Processing {self.file_type}@{self.store_name}",
3637
):
37-
parser = parser_class()
38-
df = parser.read(file)
3938

40-
if not os.path.exists(create_csv):
41-
df.to_csv(create_csv, index=False, mode="w", header=True)
42-
else:
43-
df.to_csv(create_csv, index=False, mode="a", header=False)
44-
45-
del df
39+
execution_log = []
40+
try:
41+
parser = parser_class()
42+
df = parser.read(file)
43+
44+
if not os.path.exists(create_csv):
45+
df.to_csv(create_csv, index=False, mode="w", header=True)
46+
else:
47+
df.to_csv(create_csv, index=False, mode="a", header=False)
48+
49+
del df
50+
51+
execution_log.append(
52+
{
53+
"status": True,
54+
**file.to_log_dict(),
55+
}
56+
)
57+
58+
except Exception as error: # pylint: disable=broad-exception-caught
59+
execution_log.append(
60+
{
61+
"status": False,
62+
"error": error,
63+
**file.to_log_dict(),
64+
}
65+
)
4666

4767
return {
4868
"status": True,
69+
"execution_log": execution_log,
4970
"file_was_created": len(files_to_process) > 0,
5071
"file_created_path": create_csv,
5172
"files_to_process": [dumpfile.file_name for dumpfile in files_to_process],

il_supermarket_parsers/utils/data_loader.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,18 @@ def get_full_path(self):
2525
"""get full file path"""
2626
return os.path.join(self.store_folder, self.file_name)
2727

28+
def to_log_dict(self):
29+
"""return the object as dict"""
30+
return {
31+
"store_folder": self.store_folder,
32+
"file_name": self.file_name,
33+
"prefix_file_name": self.prefix_file_name,
34+
"extracted_store_number": self.extracted_store_number,
35+
"extracted_chain_id": self.extracted_chain_id,
36+
"extracted_date": self.extracted_date.strftime("%Y-%m-%d %H:%M:%S"),
37+
"detected_filetype": self.detected_filetype.name,
38+
}
39+
2840

2941
class DataLoader:
3042
"""class for loading dump files from the folder"""

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
pandas==2.2.2
22
lxml==5.2.1
33
pymongo==4.6.3
4-
il-supermarket-scraper>=0.5.0
4+
il-supermarket-scraper>=0.5.1
55
tqdm==4.66

0 commit comments

Comments
 (0)