Skip to content

Commit 7723260

Browse files
authored
fix: Separate 'changed' and 'due' ad selectors (#442)
This commit implements a new 'changed' selector for the --ads option that publishes only ads that have been modified since their last publication. The 'due' selector now only republishes ads based on the time interval, without considering content changes. The implementation allows combining selectors with commas (e.g., --ads=changed,due) to publish both changed and due ads. Documentation and translations have been updated accordingly. Fixes #411
1 parent 6b3da5b commit 7723260

File tree

4 files changed

+198
-43
lines changed

4 files changed

+198
-43
lines changed

README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,12 +192,14 @@ Commands:
192192
version - displays the application version
193193
194194
Options:
195-
--ads=all|due|new|<id(s)> (publish) - specifies which ads to (re-)publish (DEFAULT: due)
195+
--ads=all|due|new|changed|<id(s)> (publish) - specifies which ads to (re-)publish (DEFAULT: due)
196196
Possible values:
197197
* all: (re-)publish all ads ignoring republication_interval
198198
* due: publish all new ads and republish ads according the republication_interval
199199
* new: only publish new ads (i.e. ads that have no id in the config file)
200+
* changed: only publish ads that have been modified since last publication
200201
* <id(s)>: provide one or several ads by ID to (re-)publish, like e.g. "--ads=1,2,3" ignoring republication_interval
202+
* Combinations: You can combine multiple selectors with commas, e.g. "--ads=changed,due" to publish both changed and due ads
201203
--ads=all|new|<id(s)> (download) - specifies which ads to download (DEFAULT: new)
202204
Possible values:
203205
* all: downloads all ads from your profile

src/kleinanzeigen_bot/__init__.py

Lines changed: 74 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,9 @@ async def run(self, args:list[str]) -> None:
8686
self.configure_file_logging()
8787
self.load_config()
8888

89-
if not (self.ads_selector in {'all', 'new', 'due'} or re.compile(r'\d+[,\d+]*').search(self.ads_selector)):
89+
if not (self.ads_selector in {'all', 'new', 'due', 'changed'} or
90+
any(selector in self.ads_selector.split(',') for selector in ('all', 'new', 'due', 'changed')) or
91+
re.compile(r'\d+[,\d+]*').search(self.ads_selector)):
9092
LOG.warning('You provided no ads selector. Defaulting to "due".')
9193
self.ads_selector = 'due'
9294

@@ -148,12 +150,14 @@ def show_help(self) -> None:
148150
version - Zeigt die Version der Anwendung an
149151
150152
Optionen:
151-
--ads=all|due|new|<id(s)> (publish) - Gibt an, welche Anzeigen (erneut) veröffentlicht werden sollen (STANDARD: due)
153+
--ads=all|due|new|changed|<id(s)> (publish) - Gibt an, welche Anzeigen (erneut) veröffentlicht werden sollen (STANDARD: due)
152154
Mögliche Werte:
153155
* all: Veröffentlicht alle Anzeigen erneut, ignoriert republication_interval
154156
* due: Veröffentlicht alle neuen Anzeigen und erneut entsprechend dem republication_interval
155157
* new: Veröffentlicht nur neue Anzeigen (d.h. Anzeigen ohne ID in der Konfigurationsdatei)
158+
* changed: Veröffentlicht nur Anzeigen, die seit der letzten Veröffentlichung geändert wurden
156159
* <id(s)>: Gibt eine oder mehrere Anzeigen-IDs an, die veröffentlicht werden sollen, z. B. "--ads=1,2,3", ignoriert republication_interval
160+
* Kombinationen: Sie können mehrere Selektoren mit Kommas kombinieren, z. B. "--ads=changed,due" um sowohl geänderte als auch fällige Anzeigen zu veröffentlichen
157161
--ads=all|new|<id(s)> (download) - Gibt an, welche Anzeigen heruntergeladen werden sollen (STANDARD: new)
158162
Mögliche Werte:
159163
* all: Lädt alle Anzeigen aus Ihrem Profil herunter
@@ -180,12 +184,14 @@ def show_help(self) -> None:
180184
version - displays the application version
181185
182186
Options:
183-
--ads=all|due|new|<id(s)> (publish) - specifies which ads to (re-)publish (DEFAULT: due)
187+
--ads=all|due|new|changed|<id(s)> (publish) - specifies which ads to (re-)publish (DEFAULT: due)
184188
Possible values:
185189
* all: (re-)publish all ads ignoring republication_interval
186190
* due: publish all new ads and republish ads according the republication_interval
187191
* new: only publish new ads (i.e. ads that have no id in the config file)
192+
* changed: only publish ads that have been modified since last publication
188193
* <id(s)>: provide one or several ads by ID to (re-)publish, like e.g. "--ads=1,2,3" ignoring republication_interval
194+
* Combinations: You can combine multiple selectors with commas, e.g. "--ads=changed,due" to publish both changed and due ads
189195
--ads=all|new|<id(s)> (download) - specifies which ads to download (DEFAULT: new)
190196
Possible values:
191197
* all: downloads all ads from your profile
@@ -261,10 +267,12 @@ def configure_file_logging(self) -> None:
261267
LOG.info("App version: %s", self.get_version())
262268
LOG.info("Python version: %s", sys.version)
263269

264-
def __check_ad_republication(self, ad_cfg: dict[str, Any], ad_cfg_orig: dict[str, Any], ad_file_relative: str) -> bool:
270+
def __check_ad_republication(self, ad_cfg: dict[str, Any], ad_file_relative: str) -> bool:
265271
"""
266-
Check if an ad needs to be republished based on changes and republication interval.
267-
Returns True if the ad should be republished.
272+
Check if an ad needs to be republished based on republication interval.
273+
Returns True if the ad should be republished based on the interval.
274+
275+
Note: This method no longer checks for content changes. Use __check_ad_changed for that.
268276
"""
269277
if ad_cfg["updated_on"]:
270278
last_updated_on = parse_datetime(ad_cfg["updated_on"])
@@ -276,35 +284,44 @@ def __check_ad_republication(self, ad_cfg: dict[str, Any], ad_cfg_orig: dict[str
276284
if not last_updated_on:
277285
return True
278286

279-
# Check for changes first
280-
if ad_cfg["id"]:
281-
# Calculate hash on original config to match what was stored
282-
current_hash = calculate_content_hash(ad_cfg_orig)
283-
stored_hash = ad_cfg_orig.get("content_hash")
284-
285-
LOG.debug("Hash comparison for [%s]:", ad_file_relative)
286-
LOG.debug(" Stored hash: %s", stored_hash)
287-
LOG.debug(" Current hash: %s", current_hash)
288-
289-
if stored_hash and current_hash == stored_hash:
290-
# No changes - check republication interval
291-
ad_age = datetime.utcnow() - last_updated_on
292-
if ad_age.days <= ad_cfg["republication_interval"]:
293-
LOG.info(
294-
" -> SKIPPED: ad [%s] was last published %d days ago. republication is only required every %s days",
295-
ad_file_relative,
296-
ad_age.days,
297-
ad_cfg["republication_interval"]
298-
)
299-
return False
300-
else:
301-
LOG.info("Changes detected in ad [%s], will republish", ad_file_relative)
302-
# Update hash in original configuration
303-
ad_cfg_orig["content_hash"] = current_hash
304-
return True
287+
# Check republication interval
288+
ad_age = datetime.utcnow() - last_updated_on
289+
if ad_age.days <= ad_cfg["republication_interval"]:
290+
LOG.info(
291+
" -> SKIPPED: ad [%s] was last published %d days ago. republication is only required every %s days",
292+
ad_file_relative,
293+
ad_age.days,
294+
ad_cfg["republication_interval"]
295+
)
296+
return False
305297

306298
return True
307299

300+
def __check_ad_changed(self, ad_cfg: dict[str, Any], ad_cfg_orig: dict[str, Any], ad_file_relative: str) -> bool:
301+
"""
302+
Check if an ad has been changed since last publication.
303+
Returns True if the ad has been changed.
304+
"""
305+
if not ad_cfg["id"]:
306+
# New ads are not considered "changed"
307+
return False
308+
309+
# Calculate hash on original config to match what was stored
310+
current_hash = calculate_content_hash(ad_cfg_orig)
311+
stored_hash = ad_cfg_orig.get("content_hash")
312+
313+
LOG.debug("Hash comparison for [%s]:", ad_file_relative)
314+
LOG.debug(" Stored hash: %s", stored_hash)
315+
LOG.debug(" Current hash: %s", current_hash)
316+
317+
if stored_hash and current_hash != stored_hash:
318+
LOG.info("Changes detected in ad [%s], will republish", ad_file_relative)
319+
# Update hash in original configuration
320+
ad_cfg_orig["content_hash"] = current_hash
321+
return True
322+
323+
return False
324+
308325
def load_ads(self, *, ignore_inactive:bool = True, check_id:bool = True) -> list[tuple[str, dict[str, Any], dict[str, Any]]]:
309326
LOG.info("Searching for ad config files...")
310327

@@ -320,6 +337,8 @@ def load_ads(self, *, ignore_inactive:bool = True, check_id:bool = True) -> list
320337

321338
ids = []
322339
use_specific_ads = False
340+
selectors = self.ads_selector.split(',')
341+
323342
if re.compile(r'\d+[,\d+]*').search(self.ads_selector):
324343
ids = [int(n) for n in self.ads_selector.split(',')]
325344
use_specific_ads = True
@@ -343,13 +362,31 @@ def load_ads(self, *, ignore_inactive:bool = True, check_id:bool = True) -> list
343362
LOG.info(" -> SKIPPED: ad [%s] is not in list of given ids.", ad_file_relative)
344363
continue
345364
else:
346-
if self.ads_selector == "new" and ad_cfg["id"] and check_id:
365+
# Check if ad should be included based on selectors
366+
should_include = False
367+
368+
# Check for 'changed' selector
369+
if "changed" in selectors and self.__check_ad_changed(ad_cfg, ad_cfg_orig, ad_file_relative):
370+
should_include = True
371+
372+
# Check for 'new' selector
373+
if "new" in selectors and (not ad_cfg["id"] or not check_id):
374+
should_include = True
375+
elif "new" in selectors and ad_cfg["id"] and check_id:
347376
LOG.info(" -> SKIPPED: ad [%s] is not new. already has an id assigned.", ad_file_relative)
348-
continue
349377

350-
if self.ads_selector == "due":
351-
if not self.__check_ad_republication(ad_cfg, ad_cfg_orig, ad_file_relative):
352-
continue
378+
# Check for 'due' selector
379+
if "due" in selectors:
380+
# For 'due' selector, check if the ad is due for republication based on interval
381+
if self.__check_ad_republication(ad_cfg, ad_file_relative):
382+
should_include = True
383+
384+
# Check for 'all' selector (always include)
385+
if "all" in selectors:
386+
should_include = True
387+
388+
if not should_include:
389+
continue
353390

354391
# Get description with prefix/suffix from ad config if present, otherwise use defaults
355392
ad_cfg["description"] = self.__get_description_with_affixes(ad_cfg)

src/kleinanzeigen_bot/resources/translations.de.yaml

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,12 @@ kleinanzeigen_bot/__init__.py:
2424
"App version: %s": "App Version: %s"
2525
"Python version: %s": "Python Version: %s"
2626

27+
__check_ad_changed:
28+
"Hash comparison for [%s]:": "Hash-Vergleich für [%s]:"
29+
" Stored hash: %s": " Gespeicherter Hash: %s"
30+
" Current hash: %s": " Aktueller Hash: %s"
31+
"Changes detected in ad [%s], will republish": "Änderungen in Anzeige [%s] erkannt, wird erneut veröffentlicht"
32+
2733
load_ads:
2834
"Searching for ad config files...": "Suche nach Anzeigendateien..."
2935
" -> found %s": "-> %s gefunden"
@@ -90,11 +96,8 @@ kleinanzeigen_bot/__init__.py:
9096
" -> uploading image [%s]": " -> Lade Bild [%s] hoch"
9197

9298
__check_ad_republication:
93-
"Hash comparison for [%s]:": "Hash-Vergleich für [%s]:"
94-
" Stored hash: %s": " Gespeicherter Hash: %s"
95-
" Current hash: %s": " Aktueller Hash: %s"
96-
"Changes detected in ad [%s], will republish": "Änderungen in Anzeige [%s] erkannt, wird neu veröffentlicht"
97-
" -> SKIPPED: ad [%s] was last published %d days ago. republication is only required every %s days": " -> ÜBERSPRUNGEN: Anzeige [%s] wurde zuletzt vor %d Tagen veröffentlicht. Eine erneute Veröffentlichung ist nur alle %s Tage erforderlich"
99+
"Changes detected in ad [%s], will republish": "Änderungen in Anzeige [%s] erkannt, wird erneut veröffentlicht"
100+
" -> SKIPPED: ad [%s] was last published %d days ago. republication is only required every %s days": " -> ÜBERSPRUNGEN: Anzeige [%s] wurde zuletzt vor %d Tagen veröffentlicht. Erneute Veröffentlichung ist erst nach %s Tagen erforderlich"
98101

99102
__set_special_attributes:
100103
"Found %i special attributes": "%i spezielle Attribute gefunden"

tests/unit/test_init.py

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,9 @@ class TestKleinanzeigenBotCommandLine:
199199
(["publish", "--keep-old"], "publish", "due", True),
200200
(["publish", "--ads=all", "--keep-old"], "publish", "all", True),
201201
(["download", "--ads=new"], "download", "new", False),
202+
(["publish", "--ads=changed"], "publish", "changed", False),
203+
(["publish", "--ads=changed,due"], "publish", "changed,due", False),
204+
(["publish", "--ads=changed,new"], "publish", "changed,new", False),
202205
(["version"], "version", "due", False),
203206
])
204207
def test_parse_args_handles_valid_arguments(
@@ -1180,3 +1183,113 @@ def test_description_with_email_replacement(self, test_bot: KleinanzeigenBot) ->
11801183

11811184
description = getattr(test_bot, "_KleinanzeigenBot__get_description_with_affixes")(ad_cfg)
11821185
assert description == "Contact: test(at)example.com"
1186+
1187+
1188+
class TestKleinanzeigenBotChangedAds:
1189+
"""Tests for the 'changed' ads selector functionality."""
1190+
1191+
def test_load_ads_with_changed_selector(self, test_bot: KleinanzeigenBot, base_ad_config: dict[str, Any]) -> None:
1192+
"""Test that only changed ads are loaded when using the 'changed' selector."""
1193+
# Set up the bot with the 'changed' selector
1194+
test_bot.ads_selector = "changed"
1195+
test_bot.config["ad_defaults"] = {
1196+
"description": {
1197+
"prefix": "",
1198+
"suffix": ""
1199+
}
1200+
}
1201+
1202+
# Create a changed ad
1203+
changed_ad = create_ad_config(
1204+
base_ad_config,
1205+
id="12345",
1206+
title="Changed Ad",
1207+
updated_on="2024-01-01T00:00:00",
1208+
created_on="2024-01-01T00:00:00",
1209+
active=True
1210+
)
1211+
1212+
# Calculate hash for changed_ad and add it to the config
1213+
# Then modify the ad to simulate a change
1214+
changed_hash = calculate_content_hash(changed_ad)
1215+
changed_ad["content_hash"] = changed_hash
1216+
# Now modify the ad to make it "changed"
1217+
changed_ad["title"] = "Changed Ad - Modified"
1218+
1219+
# Create temporary directory and file
1220+
with tempfile.TemporaryDirectory() as temp_dir:
1221+
temp_path = Path(temp_dir)
1222+
ad_dir = temp_path / "ads"
1223+
ad_dir.mkdir()
1224+
1225+
# Write the ad file
1226+
yaml = YAML()
1227+
changed_file = ad_dir / "changed_ad.yaml"
1228+
with open(changed_file, "w", encoding="utf-8") as f:
1229+
yaml.dump(changed_ad, f)
1230+
1231+
# Set config file path and use relative path for ad_files
1232+
test_bot.config_file_path = str(temp_path / "config.yaml")
1233+
test_bot.config['ad_files'] = ["ads/*.yaml"]
1234+
1235+
# Mock the loading of the ad configuration
1236+
with patch('kleinanzeigen_bot.utils.dicts.load_dict', side_effect=[
1237+
changed_ad, # First call returns the changed ad
1238+
{} # Second call for ad_fields.yaml
1239+
]):
1240+
ads_to_publish = test_bot.load_ads()
1241+
1242+
# The changed ad should be loaded
1243+
assert len(ads_to_publish) == 1
1244+
assert ads_to_publish[0][1]["title"] == "Changed Ad - Modified"
1245+
1246+
def test_load_ads_with_due_selector_includes_all_due_ads(self, test_bot: KleinanzeigenBot, base_ad_config: dict[str, Any]) -> None:
1247+
"""Test that 'due' selector includes all ads that are due for republication, regardless of changes."""
1248+
# Set up the bot with the 'due' selector
1249+
test_bot.ads_selector = "due"
1250+
test_bot.config["ad_defaults"] = {
1251+
"description": {
1252+
"prefix": "",
1253+
"suffix": ""
1254+
}
1255+
}
1256+
1257+
# Create a changed ad that is also due for republication
1258+
current_time = datetime.utcnow()
1259+
old_date = (current_time - timedelta(days=10)).isoformat() # Past republication interval
1260+
1261+
changed_ad = create_ad_config(
1262+
base_ad_config,
1263+
id="12345",
1264+
title="Changed Ad",
1265+
updated_on=old_date,
1266+
created_on=old_date,
1267+
republication_interval=7, # Due for republication after 7 days
1268+
active=True
1269+
)
1270+
1271+
# Create temporary directory and file
1272+
with tempfile.TemporaryDirectory() as temp_dir:
1273+
temp_path = Path(temp_dir)
1274+
ad_dir = temp_path / "ads"
1275+
ad_dir.mkdir()
1276+
1277+
# Write the ad file
1278+
yaml = YAML()
1279+
ad_file = ad_dir / "changed_ad.yaml"
1280+
with open(ad_file, "w", encoding="utf-8") as f:
1281+
yaml.dump(changed_ad, f)
1282+
1283+
# Set config file path and use relative path for ad_files
1284+
test_bot.config_file_path = str(temp_path / "config.yaml")
1285+
test_bot.config['ad_files'] = ["ads/*.yaml"]
1286+
1287+
# Mock the loading of the ad configuration
1288+
with patch('kleinanzeigen_bot.utils.dicts.load_dict', side_effect=[
1289+
changed_ad, # First call returns the changed ad
1290+
{} # Second call for ad_fields.yaml
1291+
]):
1292+
ads_to_publish = test_bot.load_ads()
1293+
1294+
# The changed ad should be loaded with 'due' selector because it's due for republication
1295+
assert len(ads_to_publish) == 1

0 commit comments

Comments
 (0)