Skip to content

Commit 04d6957

Browse files
committed
fix(web): dedupe X profile threads across pages
1 parent 44714de commit 04d6957

3 files changed

Lines changed: 188 additions & 2 deletions

File tree

internal/db/feed.go

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -776,6 +776,86 @@ func (db *DB) GetFeedItemsByAuthorPage(handle string, limit int, offset int) ([]
776776
return scanFeedItems(rows)
777777
}
778778

779+
// GetFeedThreadItemsByAuthorPage returns one representative row per conversation
780+
// root for an X profile. Replies in the same stored thread collapse to the
781+
// newest matching row so profile infinite scroll cannot render the same thread
782+
// again on a later page.
783+
func (db *DB) GetFeedThreadItemsByAuthorPage(handle string, limit int, offset int) ([]model.FeedItem, error) {
784+
if limit <= 0 {
785+
limit = 40
786+
}
787+
if offset < 0 {
788+
offset = 0
789+
}
790+
rows, err := db.conn.Query(`
791+
WITH RECURSIVE
792+
matched(tweet_id) AS (
793+
SELECT tweet_id
794+
FROM feed_items
795+
WHERE (LOWER(author_handle) = LOWER(?) OR LOWER(source_handle) = LOWER(?) OR LOWER(quote_author_handle) = LOWER(?))
796+
AND `+feedPrimaryItemPredicate("feed_items")+`
797+
),
798+
chain(seed_id, tweet_id, reply_to_status, depth) AS (
799+
SELECT m.tweet_id, f.tweet_id, COALESCE(f.reply_to_status, ''), 0
800+
FROM matched m
801+
JOIN feed_items f ON f.tweet_id = m.tweet_id
802+
UNION ALL
803+
SELECT chain.seed_id, parent.tweet_id, COALESCE(parent.reply_to_status, ''), chain.depth + 1
804+
FROM chain
805+
JOIN feed_items parent ON parent.tweet_id = chain.reply_to_status
806+
WHERE chain.reply_to_status != ''
807+
AND chain.depth < 50
808+
),
809+
roots AS (
810+
SELECT c.seed_id, c.tweet_id AS root_id
811+
FROM chain c
812+
JOIN (
813+
SELECT seed_id, MAX(depth) AS max_depth
814+
FROM chain
815+
GROUP BY seed_id
816+
) deepest ON deepest.seed_id = c.seed_id AND deepest.max_depth = c.depth
817+
),
818+
ranked AS (
819+
SELECT f.tweet_id,
820+
ROW_NUMBER() OVER (
821+
PARTITION BY COALESCE(r.root_id, f.tweet_id)
822+
ORDER BY f.published_at DESC, f.tweet_id DESC
823+
) AS rn
824+
FROM feed_items f
825+
JOIN matched m ON m.tweet_id = f.tweet_id
826+
LEFT JOIN roots r ON r.seed_id = f.tweet_id
827+
)
828+
SELECT f.tweet_id, COALESCE(f.source_handle,''), f.author_handle,
829+
COALESCE(f.author_display_name,''), COALESCE(f.author_avatar_url,''),
830+
COALESCE(f.body_text,''), COALESCE(f.lang,''),
831+
COALESCE(f.is_retweet,0), COALESCE(f.retweeted_by_handle,''),
832+
COALESCE(f.retweeted_by_display_name,''),
833+
COALESCE(f.quote_tweet_id,''), COALESCE(f.quote_author_handle,''),
834+
COALESCE(f.quote_author_display_name,''), COALESCE(f.quote_author_avatar_url,''),
835+
COALESCE(f.quote_body_text,''), COALESCE(f.quote_lang,''),
836+
COALESCE(f.quote_media_json,''), COALESCE(f.media_json,''),
837+
COALESCE(f.canonical_url,''), COALESCE(f.reply_to_handle,''),
838+
COALESCE(f.reply_to_status,''),
839+
COALESCE(f.is_reply,0), COALESCE(f.is_ghost,0),
840+
f.quote_published_at,
841+
COALESCE(f.views,0), COALESCE(f.likes,0), COALESCE(f.retweets,0),
842+
f.published_at, f.fetched_at,
843+
COALESCE(f.content_hash,''), COALESCE(f.canonical_tweet_id,'')
844+
FROM ranked
845+
JOIN feed_items f ON f.tweet_id = ranked.tweet_id
846+
WHERE ranked.rn = 1
847+
ORDER BY f.published_at DESC, f.tweet_id DESC
848+
LIMIT ? OFFSET ?
849+
`, handle, handle, handle, limit, offset)
850+
if err != nil {
851+
return nil, err
852+
}
853+
defer func() {
854+
_ = rows.Close()
855+
}()
856+
return scanFeedItems(rows)
857+
}
858+
779859
// scanFeedItems scans rows into FeedItem structs.
780860
func scanFeedItems(rows *sql.Rows) ([]model.FeedItem, error) {
781861
var items []model.FeedItem

internal/web/pages.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -890,15 +890,15 @@ func (s *Server) renderTwitterChannelFeed(w http.ResponseWriter, r *http.Request
890890
username = user.Username
891891
}
892892

893-
items, _ := s.db.GetFeedItemsByAuthorPage(handle, pageSize+1, offset)
893+
items, _ := s.db.GetFeedThreadItemsByAuthorPage(handle, pageSize+1, offset)
894894
hasMore := len(items) > pageSize
895895
if hasMore {
896896
items = items[:pageSize]
897897
}
898+
nextOffset := offset + len(items)
898899
items = feed.EnrichFeedItems(s.db, items, username)
899900
nextPageURL := ""
900901
if hasMore {
901-
nextOffset := offset + len(items)
902902
nextPageURL = fmt.Sprintf("/channels/%s?offset=%d", url.PathEscape(channelID), nextOffset)
903903
}
904904

internal/web/pages_channel_test.go

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,112 @@ func TestHandlePageTwitterChannelFeedPaginatesPastFirstChunk(t *testing.T) {
147147
}
148148
}
149149

150+
func TestHandlePageTwitterChannelFeedDoesNotRepeatThreadAcrossPages(t *testing.T) {
151+
srv := newTestServer(t)
152+
srv.staticV = func(path string) string { return "/static/" + path }
153+
if err := srv.db.UpsertChannelProfile(model.ChannelProfile{
154+
ChannelID: "twitter_sample_author",
155+
Platform: "twitter",
156+
Handle: "sample_author",
157+
DisplayName: "Sample Author",
158+
}); err != nil {
159+
t.Fatalf("UpsertChannelProfile: %v", err)
160+
}
161+
162+
base := time.UnixMilli(1700000000000)
163+
items := make([]model.FeedItem, 0, 42)
164+
for i := 0; i < 39; i++ {
165+
publishedAt := base.Add(time.Duration(300-i) * time.Minute)
166+
items = append(items, model.FeedItem{
167+
TweetID: fmt.Sprintf("sample_filler_%02d", i+1),
168+
AuthorHandle: "sample_author",
169+
AuthorDisplayName: "Sample Author",
170+
BodyText: "filler post",
171+
PublishedAt: &publishedAt,
172+
FetchedAt: publishedAt,
173+
ContentHash: fmt.Sprintf("sample_filler_hash_%02d", i+1),
174+
CanonicalTweetID: fmt.Sprintf("sample_filler_%02d", i+1),
175+
})
176+
}
177+
rootAt := base.Add(100 * time.Minute)
178+
leafAt := base.Add(101 * time.Minute)
179+
oldAt := base.Add(90 * time.Minute)
180+
items = append(items,
181+
model.FeedItem{
182+
TweetID: "sample_thread_leaf",
183+
AuthorHandle: "sample_author",
184+
AuthorDisplayName: "Sample Author",
185+
BodyText: "thread leaf body",
186+
IsReply: true,
187+
ReplyToHandle: "sample_author",
188+
ReplyToStatus: "sample_thread_root",
189+
PublishedAt: &leafAt,
190+
FetchedAt: leafAt,
191+
ContentHash: "sample_thread_leaf_hash",
192+
CanonicalTweetID: "sample_thread_leaf",
193+
},
194+
model.FeedItem{
195+
TweetID: "sample_thread_root",
196+
AuthorHandle: "sample_author",
197+
AuthorDisplayName: "Sample Author",
198+
BodyText: "thread root body",
199+
PublishedAt: &rootAt,
200+
FetchedAt: rootAt,
201+
ContentHash: "sample_thread_root_hash",
202+
CanonicalTweetID: "sample_thread_root",
203+
},
204+
model.FeedItem{
205+
TweetID: "sample_old_post",
206+
AuthorHandle: "sample_author",
207+
AuthorDisplayName: "Sample Author",
208+
BodyText: "old post body",
209+
PublishedAt: &oldAt,
210+
FetchedAt: oldAt,
211+
ContentHash: "sample_old_post_hash",
212+
CanonicalTweetID: "sample_old_post",
213+
},
214+
)
215+
if _, err := srv.db.UpsertFeedItems(items); err != nil {
216+
t.Fatalf("UpsertFeedItems: %v", err)
217+
}
218+
219+
req := httptest.NewRequest(http.MethodGet, "/channels/twitter_sample_author", nil)
220+
req.SetPathValue("channelID", "twitter_sample_author")
221+
rec := httptest.NewRecorder()
222+
srv.handlePageChannel(rec, req)
223+
224+
if rec.Code != http.StatusOK {
225+
t.Fatalf("status = %d, want %d", rec.Code, http.StatusOK)
226+
}
227+
html := rec.Body.String()
228+
if !strings.Contains(html, `thread root body`) || !strings.Contains(html, `thread leaf body`) {
229+
t.Fatalf("initial page should render the thread preview\n%s", html)
230+
}
231+
if strings.Contains(html, `sample_old_post`) {
232+
t.Fatalf("initial page should not include the next representative\n%s", html)
233+
}
234+
if !strings.Contains(html, `hx-get="/channels/twitter_sample_author?offset=40"`) {
235+
t.Fatalf("initial page missing grouped next-page sentinel\n%s", html)
236+
}
237+
238+
req = httptest.NewRequest(http.MethodGet, "/channels/twitter_sample_author?offset=40", nil)
239+
req.Header.Set("HX-Request", "true")
240+
req.SetPathValue("channelID", "twitter_sample_author")
241+
rec = httptest.NewRecorder()
242+
srv.handlePageChannel(rec, req)
243+
244+
if rec.Code != http.StatusOK {
245+
t.Fatalf("partial status = %d, want %d", rec.Code, http.StatusOK)
246+
}
247+
html = rec.Body.String()
248+
if !strings.Contains(html, `sample_old_post`) || !strings.Contains(html, `old post body`) {
249+
t.Fatalf("partial page should continue after the grouped thread\n%s", html)
250+
}
251+
if strings.Contains(html, `sample_thread_root`) || strings.Contains(html, `sample_thread_leaf`) || strings.Contains(html, `thread root body`) {
252+
t.Fatalf("partial page repeated a thread already rendered on the first page\n%s", html)
253+
}
254+
}
255+
150256
func TestHandlePageShortsStartsAtOldestMoment(t *testing.T) {
151257
srv := newTestServer(t)
152258
srv.staticV = func(path string) string { return "/static/" + path }

0 commit comments

Comments
 (0)