Skip to content

Commit dccc90a

Browse files
committed
resolved conflicts with latest code
2 parents 84f1365 + 6ee7333 commit dccc90a

2 files changed

Lines changed: 40 additions & 8 deletions

File tree

README.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,16 @@ separated list of hashtags. For some feeds (e.g. youtube-rss) you should use `{l
4040
stripped). Please be aware that this might easily exceed Mastodon's
4141
limit of 512 characters.
4242

43+
44+
## De-duping
45+
46+
If you are attempting to use the RSS feed of a major news site, you may find
47+
that they change / update (or just re-post) the same items multiple times which
48+
will lead to duplicate toots. To enable de-duplication, use the `{--dedupe}`
49+
option to check for duplicates based on a tag before tooting, e.g.
50+
51+
feediverse --dedupe url
52+
4353
## Multiple Feeds
4454

4555
Since *feeds* is a list you can add additional feeds to watch if you want.

feediverse.py

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,13 @@ def main():
2828
default=os.path.expanduser(DEFAULT_CONFIG_FILE))
2929
parser.add_argument("-d", "--delay", action="store_true",
3030
help="delay randomly from 10 to 30 seconds between each post")
31+
parser.add_argument("-p", "--dedupe",
32+
help="dedupe against the given tag",
33+
default="", metavar="TAG")
3134

3235
args = parser.parse_args()
3336
config_file = args.config
37+
dedupe_field = args.dedupe
3438

3539
if args.verbose:
3640
print("using config file", config_file)
@@ -48,18 +52,28 @@ def main():
4852
)
4953

5054
newest_post = config['updated']
55+
dupes = config['dupecheck']
5156
for feed in config['feeds']:
5257
if args.verbose:
5358
print(f"fetching {feed['url']} entries since {config['updated']}")
5459
for entry in get_feed(feed['url'], config['updated']):
5560
newest_post = max(newest_post, entry['updated'])
56-
if args.verbose:
57-
print(entry)
61+
entry_text = feed['template'].format(**entry)[:499]
5862

5963
if args.dry_run:
60-
print("trial run, not tooting ", entry["title"][:50])
64+
print(entry_text)
6165
continue
62-
66+
67+
if args.verbose:
68+
print(entry_text)
69+
70+
if dedupe_field:
71+
if entry[dedupe_field] in dupes:
72+
if args.verbose:
73+
print(f"Skipping dupe post: {entry_text} based on dedupe field {dedupe_field}")
74+
continue
75+
update_dupes(dupes, entry[dedupe_field])
76+
6377
image_medias = []
6478
if feed['include_images'] and entry['images']:
6579
for image in entry['images'][:4]:
@@ -69,7 +83,7 @@ def main():
6983

7084
if not args.dry_run:
7185
masto.status_post(
72-
feed['template'].format(**entry)[:499],
86+
entry_text,
7387
media_ids=image_medias
7488
)
7589

@@ -80,6 +94,7 @@ def main():
8094

8195
if not args.dry_run:
8296
config['updated'] = newest_post.isoformat()
97+
config['dupecheck'] = dupes
8398
save_config(config, config_file)
8499

85100
def get_feed(feed_url, last_update):
@@ -98,21 +113,27 @@ def get_feed(feed_url, last_update):
98113
for entry in entries:
99114
yield get_entry(entry)
100115

116+
def update_dupes(dupes, new):
117+
if len(dupes) > 10:
118+
del dupes[0]
119+
dupes.append(new)
120+
101121
def get_entry(entry):
102122
hashtags = []
103123
for tag in entry.get('tags', []):
104124
t = tag['term'].replace(' ', '_').replace('.', '').replace('-', '')
105125
hashtags.append('#{}'.format(t))
106126
summary = entry.get('summary', '')
107-
content = entry.get('content', '') or ''
127+
content = entry.get('content', '')
128+
comments = entry.get('comments', '')
108129
if content:
109130
content = cleanup(content[0].get('value', ''))
110131
url = entry.id
111132
return {
112133
'url': url,
113134
'link': entry.link,
114135
'links': entry.links,
115-
'comments': entry.comments,
136+
'comments': comments,
116137
'title': cleanup(entry.title),
117138
'summary': cleanup(summary),
118139
'content': content,
@@ -167,7 +188,8 @@ def save_config(config, config_file):
167188

168189
def read_config(config_file):
169190
config = {
170-
'updated': datetime(MINYEAR, 1, 1, 0, 0, 0, 0, timezone.utc)
191+
'updated': datetime(MINYEAR, 1, 1, 0, 0, 0, 0, timezone.utc),
192+
'dupecheck': [],
171193
}
172194
with open(config_file) as fh:
173195
cfg = yaml.load(fh, yaml.SafeLoader)

0 commit comments

Comments
 (0)