@@ -28,9 +28,13 @@ def main():
2828 default = os .path .expanduser (DEFAULT_CONFIG_FILE ))
2929 parser .add_argument ("-d" , "--delay" , action = "store_true" ,
3030 help = "delay randomly from 10 to 30 seconds between each post" )
31+ parser .add_argument ("-p" , "--dedupe" ,
32+ help = "dedupe against the given tag" ,
33+ default = "" , metavar = "TAG" )
3134
3235 args = parser .parse_args ()
3336 config_file = args .config
37+ dedupe_field = args .dedupe
3438
3539 if args .verbose :
3640 print ("using config file" , config_file )
@@ -48,18 +52,28 @@ def main():
4852 )
4953
5054 newest_post = config ['updated' ]
55+ dupes = config ['dupecheck' ]
5156 for feed in config ['feeds' ]:
5257 if args .verbose :
5358 print (f"fetching { feed ['url' ]} entries since { config ['updated' ]} " )
5459 for entry in get_feed (feed ['url' ], config ['updated' ]):
5560 newest_post = max (newest_post , entry ['updated' ])
56- if args .verbose :
57- print (entry )
61+ entry_text = feed ['template' ].format (** entry )[:499 ]
5862
5963 if args .dry_run :
60- print ("trial run, not tooting " , entry [ "title" ][: 50 ] )
64+ print (entry_text )
6165 continue
62-
66+
67+ if args .verbose :
68+ print (entry_text )
69+
70+ if dedupe_field :
71+ if entry [dedupe_field ] in dupes :
72+ if args .verbose :
73+ print (f"Skipping dupe post: { entry_text } based on dedupe field { dedupe_field } " )
74+ continue
75+ update_dupes (dupes , entry [dedupe_field ])
76+
6377 image_medias = []
6478 if feed ['include_images' ] and entry ['images' ]:
6579 for image in entry ['images' ][:4 ]:
@@ -69,7 +83,7 @@ def main():
6983
7084 if not args .dry_run :
7185 masto .status_post (
72- feed [ 'template' ]. format ( ** entry )[: 499 ] ,
86+ entry_text ,
7387 media_ids = image_medias
7488 )
7589
@@ -80,6 +94,7 @@ def main():
8094
8195 if not args .dry_run :
8296 config ['updated' ] = newest_post .isoformat ()
97+ config ['dupecheck' ] = dupes
8398 save_config (config , config_file )
8499
85100def get_feed (feed_url , last_update ):
@@ -98,21 +113,27 @@ def get_feed(feed_url, last_update):
98113 for entry in entries :
99114 yield get_entry (entry )
100115
116+ def update_dupes (dupes , new ):
117+ if len (dupes ) > 10 :
118+ del dupes [0 ]
119+ dupes .append (new )
120+
101121def get_entry (entry ):
102122 hashtags = []
103123 for tag in entry .get ('tags' , []):
104124 t = tag ['term' ].replace (' ' , '_' ).replace ('.' , '' ).replace ('-' , '' )
105125 hashtags .append ('#{}' .format (t ))
106126 summary = entry .get ('summary' , '' )
107- content = entry .get ('content' , '' ) or ''
127+ content = entry .get ('content' , '' )
128+ comments = entry .get ('comments' , '' )
108129 if content :
109130 content = cleanup (content [0 ].get ('value' , '' ))
110131 url = entry .id
111132 return {
112133 'url' : url ,
113134 'link' : entry .link ,
114135 'links' : entry .links ,
115- 'comments' : entry . comments ,
136+ 'comments' : comments ,
116137 'title' : cleanup (entry .title ),
117138 'summary' : cleanup (summary ),
118139 'content' : content ,
@@ -167,7 +188,8 @@ def save_config(config, config_file):
167188
168189def read_config (config_file ):
169190 config = {
170- 'updated' : datetime (MINYEAR , 1 , 1 , 0 , 0 , 0 , 0 , timezone .utc )
191+ 'updated' : datetime (MINYEAR , 1 , 1 , 0 , 0 , 0 , 0 , timezone .utc ),
192+ 'dupecheck' : [],
171193 }
172194 with open (config_file ) as fh :
173195 cfg = yaml .load (fh , yaml .SafeLoader )
0 commit comments