11# SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
22# SPDX-License-Identifier: AGPL-3.0-or-later
33# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
4- import atexit , copy , json , os , re , signal , sys , textwrap # isort: skip
4+ import atexit , json , os , re , signal , sys , textwrap # isort: skip
55import getopt # pylint: disable=deprecated-module
66import urllib .parse as urllib_parse
77from gettext import gettext as _
1313
1414from . import extract , resources
1515from ._version import __version__
16- from .ads import calculate_content_hash , get_description_affixes
17- from .model .ad_model import MAX_DESCRIPTION_LENGTH , Ad
16+ from .model .ad_model import MAX_DESCRIPTION_LENGTH , Ad , AdPartial
1817from .model .config_model import Config
1918from .utils import dicts , error_handlers , loggers , misc
2019from .utils .exceptions import CaptchaEncountered
@@ -80,6 +79,16 @@ async def run(self, args:list[str]) -> None:
8079 LOG .info ("############################################" )
8180 LOG .info ("DONE: No configuration errors found." )
8281 LOG .info ("############################################" )
82+ case "update-content-hash" :
83+ self .configure_file_logging ()
84+ self .load_config ()
85+ self .ads_selector = "all"
86+ if ads := self .load_ads (exclude_ads_with_id = False ):
87+ self .update_content_hashes (ads )
88+ else :
89+ LOG .info ("############################################" )
90+ LOG .info ("DONE: No active ads found." )
91+ LOG .info ("############################################" )
8392 case "publish" :
8493 self .configure_file_logging ()
8594 self .load_config ()
@@ -143,6 +152,9 @@ def show_help(self) -> None:
143152 verify - Überprüft die Konfigurationsdateien
144153 delete - Löscht Anzeigen
145154 download - Lädt eine oder mehrere Anzeigen herunter
155+ update-content-hash - Berechnet den content_hash aller Anzeigen anhand der aktuellen ad_defaults neu;
156+ nach Änderungen an den config.yaml/ad_defaults verhindert es, dass alle Anzeigen als
157+ "geändert" gelten und neu veröffentlicht werden.
146158 --
147159 help - Zeigt diese Hilfe an (Standardbefehl)
148160 version - Zeigt die Version der Anwendung an
@@ -178,6 +190,8 @@ def show_help(self) -> None:
178190 verify - verifies the configuration files
179191 delete - deletes ads
180192 download - downloads one or multiple ads
193+ update-content-hash – recalculates each ad’s content_hash based on the current ad_defaults;
194+ use this after changing config.yaml/ad_defaults to avoid every ad being marked "changed" and republished
181195 --
182196 help - displays this help (default command)
183197 version - displays the application version
@@ -269,9 +283,10 @@ def configure_file_logging(self) -> None:
269283 def __check_ad_republication (self , ad_cfg :Ad , ad_file_relative :str ) -> bool :
270284 """
271285 Check if an ad needs to be republished based on republication interval.
272- Returns True if the ad should be republished based on the interval .
286+ Note: This method does not check for content changes. Use __check_ad_changed for that .
273287
274- Note: This method no longer checks for content changes. Use __check_ad_changed for that.
288+ Returns:
289+ True if the ad should be republished based on the interval.
275290 """
276291 if ad_cfg .updated_on :
277292 last_updated_on = ad_cfg .updated_on
@@ -299,14 +314,16 @@ def __check_ad_republication(self, ad_cfg:Ad, ad_file_relative:str) -> bool:
299314 def __check_ad_changed (self , ad_cfg :Ad , ad_cfg_orig :dict [str , Any ], ad_file_relative :str ) -> bool :
300315 """
301316 Check if an ad has been changed since last publication.
302- Returns True if the ad has been changed.
317+
318+ Returns:
319+ True if the ad has been changed.
303320 """
304321 if not ad_cfg .id :
305322 # New ads are not considered "changed"
306323 return False
307324
308325 # Calculate hash on original config to match what was stored
309- current_hash = calculate_content_hash (ad_cfg_orig )
326+ current_hash = AdPartial . model_validate (ad_cfg_orig ). update_content_hash (). content_hash
310327 stored_hash = ad_cfg_orig .get ("content_hash" )
311328
312329 LOG .debug ("Hash comparison for [%s]:" , ad_file_relative )
@@ -321,7 +338,20 @@ def __check_ad_changed(self, ad_cfg:Ad, ad_cfg_orig:dict[str, Any], ad_file_rela
321338
322339 return False
323340
324- def load_ads (self , * , ignore_inactive :bool = True , check_id :bool = True ) -> list [tuple [str , Ad , dict [str , Any ]]]:
341+ def load_ads (self , * , ignore_inactive :bool = True , exclude_ads_with_id :bool = True ) -> list [tuple [str , Ad , dict [str , Any ]]]:
342+ """
343+ Load and validate all ad config files, optionally filtering out inactive or already‐published ads.
344+
345+ Args:
346+ ignore_inactive (bool):
347+ Skip ads with `active=False`.
348+ exclude_ads_with_id (bool):
349+ Skip ads whose raw data already contains an `id`, i.e. was published before.
350+
351+ Returns:
352+ list[tuple[str, Ad, dict[str, Any]]]:
353+ Tuples of (file_path, validated Ad model, original raw data).
354+ """
325355 LOG .info ("Searching for ad config files..." )
326356
327357 ad_files :dict [str , str ] = {}
@@ -366,9 +396,9 @@ def load_ads(self, *, ignore_inactive:bool = True, check_id:bool = True) -> list
366396 should_include = True
367397
368398 # Check for 'new' selector
369- if "new" in selectors and (not ad_cfg .id or not check_id ):
399+ if "new" in selectors and (not ad_cfg .id or not exclude_ads_with_id ):
370400 should_include = True
371- elif "new" in selectors and ad_cfg .id and check_id :
401+ elif "new" in selectors and ad_cfg .id and exclude_ads_with_id :
372402 LOG .info (" -> SKIPPED: ad [%s] is not new. already has an id assigned." , ad_file_relative )
373403
374404 # Check for 'due' selector
@@ -427,13 +457,7 @@ def load_ads(self, *, ignore_inactive:bool = True, check_id:bool = True) -> list
427457 return ads
428458
429459 def load_ad (self , ad_cfg_orig :dict [str , Any ]) -> Ad :
430- ad_cfg_merged = dicts .apply_defaults (
431- target = copy .deepcopy (ad_cfg_orig ),
432- defaults = self .config .ad_defaults .model_dump (),
433- ignore = lambda k , _ : k == "description" ,
434- override = lambda _ , v : v == "" # noqa: PLC1901 can be simplified to `not v` as an empty string is falsey
435- )
436- return Ad .model_validate (ad_cfg_merged )
460+ return AdPartial .model_validate (ad_cfg_orig ).to_ad (self .config .ad_defaults )
437461
438462 def load_config (self ) -> None :
439463 # write default config.yaml if config file does not exist
@@ -805,7 +829,7 @@ async def publish_ad(self, ad_file:str, ad_cfg:Ad, ad_cfg_orig:dict[str, Any], p
805829
806830 # Update content hash after successful publication
807831 # Calculate hash on original config to ensure consistent comparison on restart
808- ad_cfg_orig ["content_hash" ] = calculate_content_hash (ad_cfg_orig )
832+ ad_cfg_orig ["content_hash" ] = AdPartial . model_validate (ad_cfg_orig ). update_content_hash (). content_hash
809833 ad_cfg_orig ["updated_on" ] = misc .now ().isoformat (timespec = "seconds" )
810834 if not ad_cfg .created_on and not ad_cfg .id :
811835 ad_cfg_orig ["created_on" ] = ad_cfg_orig ["updated_on" ]
@@ -1052,7 +1076,7 @@ async def download_ads(self) -> None:
10521076 elif self .ads_selector == "new" : # download only unsaved ads
10531077 # check which ads already saved
10541078 saved_ad_ids = []
1055- ads = self .load_ads (ignore_inactive = False , check_id = False ) # do not skip because of existing IDs
1079+ ads = self .load_ads (ignore_inactive = False , exclude_ads_with_id = False ) # do not skip because of existing IDs
10561080 for ad in ads :
10571081 ad_id = int (ad [2 ]["id" ])
10581082 saved_ad_ids .append (ad_id )
@@ -1111,7 +1135,7 @@ def __get_description(self, ad_cfg:Ad, *, with_affixes:bool) -> str:
11111135 # 1. Direct ad-level prefix
11121136 ad_cfg .description_prefix if ad_cfg .description_prefix is not None
11131137 # 2. Global prefix from config
1114- else get_description_affixes ( self .config , prefix = True )
1138+ else self .config . ad_defaults . description_prefix
11151139 or "" # Default to empty string if all sources are None
11161140 )
11171141
@@ -1120,7 +1144,7 @@ def __get_description(self, ad_cfg:Ad, *, with_affixes:bool) -> str:
11201144 # 1. Direct ad-level suffix
11211145 ad_cfg .description_suffix if ad_cfg .description_suffix is not None
11221146 # 2. Global suffix from config
1123- else get_description_affixes ( self .config , prefix = False )
1147+ else self .config . ad_defaults . description_suffix
11241148 or "" # Default to empty string if all sources are None
11251149 )
11261150
@@ -1137,6 +1161,21 @@ def __get_description(self, ad_cfg:Ad, *, with_affixes:bool) -> str:
11371161
11381162 return final_description
11391163
1164+ def update_content_hashes (self , ads :list [tuple [str , Ad , dict [str , Any ]]]) -> None :
1165+ count = 0
1166+
1167+ for (ad_file , ad_cfg , ad_cfg_orig ) in ads :
1168+ LOG .info ("Processing %s/%s: '%s' from [%s]..." , count + 1 , len (ads ), ad_cfg .title , ad_file )
1169+ ad_cfg .update_content_hash ()
1170+ if ad_cfg .content_hash != ad_cfg_orig ["content_hash" ]:
1171+ count += 1
1172+ ad_cfg_orig ["content_hash" ] = ad_cfg .content_hash
1173+ dicts .save_dict (ad_file , ad_cfg_orig )
1174+
1175+ LOG .info ("############################################" )
1176+ LOG .info ("DONE: Updated [content_hash] in %s" , pluralize ("ad" , count ))
1177+ LOG .info ("############################################" )
1178+
11401179#############################
11411180# main entry point
11421181#############################
0 commit comments