Skip to content

Commit 79a403b

Browse files
committed
Move readability HTML cleanup from request-time to fetch-time
Changes: - Run readability on post content/original_content when fetching feeds, not on every post detail render — eliminates wasted CPU on cached page hits. - Remove the now-unused clean_html template filter and its tests. - Drop redundant .all() in the index view.
1 parent cdd5005 commit 79a403b

6 files changed

Lines changed: 8 additions & 39 deletions

File tree

planet/managers.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from django.apps import apps
55
from django.db import models, transaction
66
from django.utils import timezone
7+
from readability import Document
78

89
from planet.utils import md5_hash, normalize_language, to_datetime
910

@@ -179,9 +180,11 @@ def create_from(self, entry_data, feed):
179180
return None
180181

181182
try:
182-
post.content = entry_data.summary
183+
raw_content = entry_data.summary
183184
except AttributeError:
184-
pass
185+
raw_content = ""
186+
if raw_content:
187+
post.content = Document(raw_content).summary(html_partial=True)
185188

186189
try:
187190
language = entry_data.summary_detail.language

planet/templates/planet/posts/detail.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
{% include "planet/posts/full_details.html" %}
3131
</div>
3232
<div class="row">
33-
<p>{{ post.original_content|default:post.content|clean_html }}</p>
33+
<p>{{ post.original_content|default:post.content|safe }}</p>
3434
</div>
3535
</div>
3636
{% endblock %}

planet/templatetags/planet_tags.py

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,14 @@
11
import re
22

33
from django import template
4-
from django.template.defaultfilters import stringfilter
54
from django.utils.html import strip_tags
6-
from django.utils.safestring import mark_safe
7-
from readability import Document
85

96
from planet.models import Author, Blog, Feed, Post
107
from planet.settings import PLANET_CONFIG
118

129
register = template.Library()
1310

1411

15-
@register.filter
16-
@stringfilter
17-
def clean_html(html):
18-
if not html:
19-
return ""
20-
return mark_safe(Document(html).summary(html_partial=True))
21-
22-
2312
@register.simple_tag
2413
def get_first_paragraph(body):
2514
if body is None:

planet/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ def fetch_post_content(url):
5353
raw = resp.read()
5454
charset = resp.headers.get_content_charset() or "utf-8"
5555
html = raw.decode(charset, errors="replace")
56-
return Document(html).summary()
56+
return Document(html).summary(html_partial=True)
5757
except Exception as exc:
5858
logger.warning("Failed to fetch original content for %s: %s", url, exc)
5959
return None

planet/views.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88

99
def index(request):
10-
posts = Post.objects.all().with_relations()
10+
posts = Post.objects.with_relations()
1111
return render(request, "planet/posts/list.html", {"posts": posts})
1212

1313

tests/test_templatetags.py

Lines changed: 0 additions & 23 deletions
This file was deleted.

0 commit comments

Comments
 (0)