Skip to content

Commit 27ef0ef

Browse files
committed
order
1 parent eaa945d commit 27ef0ef

11 files changed

Lines changed: 114 additions & 90 deletions

File tree

il_supermarket_parsers/documents/base.py

Lines changed: 24 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,24 @@
11
from abc import ABC, abstractmethod
22
from typing import List
33
import os
4-
from il_supermarket_parsers.utils import build_value, get_root
4+
from il_supermarket_parsers.utils import build_value, get_root_and_search
55

66

77
class XmlBaseConverter(ABC):
88
"""parser the xml docuement"""
99

10+
@abstractmethod
1011
def convert(self, found_store, file_name, **kwarg):
1112
"""parse file to data frame"""
12-
source_file = os.path.join(found_store, file_name)
13-
root, root_store = get_root(source_file, self.list_key, self.roots)
1413

15-
data = self._phrse(
16-
root,
17-
found_store,
18-
file_name,
19-
root_store,
20-
**kwarg,
21-
)
22-
return self.reduce_size(data)
14+
@abstractmethod
15+
def validate_succussful_extraction(
16+
self, data, source_file, ignore_missing_columns=None
17+
):
18+
"""validate column requested"""
2319

2420

25-
class BaseXMLParser(XmlBaseConverter):
21+
class BaseXMLParser(XmlBaseConverter, ABC):
2622
"""parser the xml docuement"""
2723

2824
def __init__(
@@ -43,10 +39,25 @@ def build_value(self, name, no_content):
4339
"""get the value"""
4440
return build_value(name, self.additional_constant, no_content=no_content)
4541

42+
def convert(self, found_store, file_name, **kwarg):
43+
"""parse file to data frame"""
44+
source_file = os.path.join(found_store, file_name)
45+
root, root_store = get_root_and_search(source_file, self.list_key, self.roots)
46+
47+
data = self._phrse(
48+
root,
49+
found_store,
50+
file_name,
51+
root_store,
52+
**kwarg,
53+
)
54+
return self.reduce_size(data)
55+
4656
def reduce_size(self, data):
4757
"""reduce the size"""
4858
return data
4959

60+
@abstractmethod
5061
def _phrse(
5162
self,
5263
root,
@@ -55,4 +66,4 @@ def _phrse(
5566
root_store,
5667
**kwarg,
5768
):
58-
pass
69+
"""parse file to response"""
Lines changed: 15 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,33 @@
1-
import pandas as pd
2-
from il_supermarket_parsers.utils import (
3-
count_tag_in_xml,
4-
collect_unique_keys_from_xml,
5-
collect_unique_columns_from_nested_json,
6-
)
1+
import os
72
from .base import XmlBaseConverter
3+
from ..utils import get_root
84

95

106
class ConditionalXmlDataFrameConverter(XmlBaseConverter):
117
"""parser the xml docuement"""
128

13-
def __init__(self, try_parser, catch_parser):
14-
self.try_parser = try_parser
15-
self.catch_parser = catch_parser
9+
def __init__(self, option_a, option_b, root_value):
10+
self.option_a = option_a
11+
self.option_b = option_b
12+
self.root_value = root_value
1613

1714
def convert(self, found_store, file_name, **kwarg):
1815
"""reduce the size"""
19-
try:
20-
return self.try_parser.convert(found_store, file_name, **kwarg)
21-
except:
22-
return self.catch_parser.convert(found_store, file_name, **kwarg)
16+
root = get_root(os.path.join(found_store, file_name))
17+
if root.tag == self.root_value:
18+
return self.option_a.convert(found_store, file_name, **kwarg)
19+
return self.option_b.convert(found_store, file_name, **kwarg)
2320

2421
def validate_succussful_extraction(
2522
self, data, source_file, ignore_missing_columns=None
2623
):
2724
"""validate column requested"""
28-
try:
29-
self.try_parser.validate_succussful_extraction(
25+
root = get_root(source_file)
26+
if root.tag == self.root_value:
27+
self.option_a.validate_succussful_extraction(
3028
data, source_file, ignore_missing_columns
3129
)
32-
except ValueError:
33-
self.catch_parser.validate_succussful_extraction(
30+
else:
31+
self.option_b.validate_succussful_extraction(
3432
data, source_file, ignore_missing_columns
3533
)

il_supermarket_parsers/documents/xml_dataframe_parser.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,15 @@ class XmlDataFrameConverter(BaseXMLParser):
1111
"""parser the xml docuement"""
1212

1313
def reduce_size(self, data):
14+
"""reduce the size"""
1415
for col in data.columns:
1516
data[col] = data[col].mask(data[col] == data[col].shift())
1617
return data
1718

1819
def validate_succussful_extraction(
1920
self, data, source_file, ignore_missing_columns=None
2021
):
22+
"""validate column requested"""
2123
# if there is an empty file
2224
# we expected it to return none
2325
tag_count = count_tag_in_xml(source_file, self.id_field)

il_supermarket_parsers/documents/xml_dataframe_subroot_praser.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ def __init__(
1313
sub_roots=None,
1414
list_sub_key="",
1515
ignore_column=None,
16-
last_mile=[],
16+
last_mile=None,
1717
**additional_constant,
1818
):
1919
super().__init__(
@@ -24,7 +24,7 @@ def __init__(
2424
additional_constant=additional_constant,
2525
)
2626
self.sub_roots = sub_roots if sub_roots else []
27-
self.last_mile = last_mile
27+
self.last_mile = last_mile if last_mile else []
2828
self.list_sub_key = list_sub_key
2929

3030
def validate_succussful_extraction(

il_supermarket_parsers/parsers/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from il_supermarket_parsers.engines.base import BaseFileConverter
22
from .bareket import BareketFileConverter
3+
from .city_market import CityMarketGivatayim, CityMarketKiryatGat, CityMarketShops
34
from .confix import CofixFileConverter
45
from .mahsani_a_shuk import MahsaniAShukPromoFileConverter
56
from .salach_dabach import SalachDabachFileConverter
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
from il_supermarket_parsers.engines.base import BaseFileConverter
2+
from il_supermarket_parsers.documents import (
3+
XmlDataFrameConverter,
4+
SubRootedXmlDataFrameConverter,
5+
ConditionalXmlDataFrameConverter,
6+
)
7+
8+
9+
class CityMarketGivatayim(BaseFileConverter):
10+
"""
11+
File converter for Dor Alon supermarket chain.
12+
Extends: CofixFileConverter
13+
"""
14+
15+
16+
class CityMarketKiryatGat(BaseFileConverter):
17+
"""
18+
File converter for Dor Alon supermarket chain.
19+
Extends: CofixFileConverter
20+
"""
21+
22+
23+
class CityMarketShops(BaseFileConverter):
24+
"""
25+
File converter for Dor Alon supermarket chain.
26+
Extends: CofixFileConverter
27+
"""
28+
29+
def __init__(self):
30+
super().__init__(
31+
promofull_parser=ConditionalXmlDataFrameConverter(
32+
option_a=XmlDataFrameConverter(
33+
list_key="Promotions",
34+
id_field="PromotionId",
35+
roots=["StoreId", "SubChainId", "ChainId"],
36+
date_columns=["PromotionUpdateDate"],
37+
ignore_column=["DllVerNo", "BikoretNo"],
38+
),
39+
option_b=XmlDataFrameConverter(
40+
list_key="Promotions",
41+
id_field="PromotionId",
42+
roots=[],
43+
date_columns=["PromotionUpdateDate"],
44+
ignore_column=["DllVerNo", "BikoretNo"],
45+
),
46+
root_value="Root",
47+
),
48+
stores_parser=SubRootedXmlDataFrameConverter(
49+
list_key="SubChainsXMLObject",
50+
sub_roots=["SubChainId", "SubChainName"],
51+
id_field="StoreId",
52+
list_sub_key="Store",
53+
roots=["ChainId", "ChainName", "LastUpdateDate", "LastUpdateTime"],
54+
ignore_column=["XmlDocVersion", "DllVerNo"],
55+
last_mile=["Stores", "SubChainStoresXMLObject"],
56+
),
57+
)

il_supermarket_parsers/parsers/other.py

Lines changed: 0 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,5 @@
11
from il_supermarket_parsers.engines.base import BaseFileConverter
22
from .confix import CofixFileConverter
3-
from il_supermarket_parsers.documents import (
4-
XmlDataFrameConverter,
5-
SubRootedXmlDataFrameConverter,
6-
ConditionalXmlDataFrameConverter,
7-
)
83

94

105
class YaynoBitanFileConverter(BaseFileConverter):
@@ -173,53 +168,3 @@ class ZolVebegadolFileConverter(BaseFileConverter):
173168
File converter for Zol Vebegadol supermarket chain.
174169
Extends: BaseFileConverter
175170
"""
176-
177-
178-
class CityMarketGivatayim(BaseFileConverter):
179-
"""
180-
File converter for Dor Alon supermarket chain.
181-
Extends: CofixFileConverter
182-
"""
183-
184-
185-
class CityMarketKiryatGat(BaseFileConverter):
186-
"""
187-
File converter for Dor Alon supermarket chain.
188-
Extends: CofixFileConverter
189-
"""
190-
191-
192-
class CityMarketShops(BaseFileConverter):
193-
"""
194-
File converter for Dor Alon supermarket chain.
195-
Extends: CofixFileConverter
196-
"""
197-
198-
def __init__(self):
199-
super().__init__(
200-
promofull_parser=ConditionalXmlDataFrameConverter(
201-
try_parser=XmlDataFrameConverter(
202-
list_key="Promotions",
203-
id_field="PromotionId",
204-
roots=["StoreId", "SubChainId", "ChainId"],
205-
date_columns=["PromotionUpdateDate"],
206-
ignore_column=["DllVerNo", "BikoretNo"],
207-
),
208-
catch_parser=XmlDataFrameConverter(
209-
list_key="Promotions",
210-
id_field="PromotionId",
211-
roots=[],
212-
date_columns=["PromotionUpdateDate"],
213-
ignore_column=["DllVerNo", "BikoretNo"],
214-
),
215-
),
216-
stores_parser=SubRootedXmlDataFrameConverter(
217-
list_key="SubChainsXMLObject",
218-
sub_roots=["SubChainId", "SubChainName"],
219-
id_field="StoreId",
220-
list_sub_key="Store",
221-
roots=["ChainId", "ChainName", "LastUpdateDate", "LastUpdateTime"],
222-
ignore_column=["XmlDocVersion", "DllVerNo"],
223-
last_mile=["Stores", "SubChainStoresXMLObject"],
224-
),
225-
)

il_supermarket_parsers/tests/test_parser_factory.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
1-
from il_supermarket_parsers.parser_factory import ParserFactory
21
from il_supermarket_scarper.scrappers_factory import ScraperFactory
2+
from il_supermarket_parsers.parser_factory import ParserFactory
33

44

55
def test_enum_are_aligned():
6+
"""make sure that the enum are aligned"""
67
assert len(ParserFactory) == len(ScraperFactory)
78
assert sorted(ParserFactory.__members__.keys()) == sorted(
89
ScraperFactory.__members__.keys()

il_supermarket_parsers/utils/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from .xml_utils import (
1414
get_root,
1515
build_value,
16+
get_root_and_search,
1617
count_tag_in_xml,
1718
collect_unique_keys_from_xml,
1819
)

il_supermarket_parsers/utils/data_loader.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,9 @@ def __init__(
5353

5454
def _format_datetime(self, date):
5555
"""format the datetime"""
56+
if len(date) == 8:
57+
# if doesn't include seconds
58+
return datetime.datetime.strptime(date, "%Y%m%d")
5659
if len(date) == 12:
5760
# if doesn't include seconds
5861
return datetime.datetime.strptime(date, "%Y%m%d%H%M")

0 commit comments

Comments
 (0)