Skip to content

Commit 1f0d58f

Browse files
authored
Order local queue URLs by number of hops and timestamp (#490)
* feat: order fresh URLs by number of hops and timestamp. This should allow us to more effectively crawl "in order" when using the local queue. * add index for hops and timestamp. Should improve performance for new databases.
1 parent 88714c6 commit 1f0d58f

File tree

2 files changed

+2
-0
lines changed

2 files changed

+2
-0
lines changed

internal/pkg/source/lq/query.sql

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
-- name: GetFreshURLs :many
22
SELECT * FROM urls
33
WHERE status = 'FRESH'
4+
ORDER BY hops ASC, timestamp ASC
45
LIMIT ?;
56

67
-- name: ClaimThisURL :exec

internal/pkg/source/lq/schema.sql

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,4 @@ CREATE TABLE IF NOT EXISTS urls (
88
);
99
CREATE UNIQUE INDEX IF NOT EXISTS urls_value ON urls (value); -- for deduplication
1010
CREATE INDEX IF NOT EXISTS urls_status ON urls (status); -- for queueing
11+
CREATE INDEX IF NOT EXISTS urls_hops_timestamp ON urls (hops ASC, timestamp ASC); -- for sorting by crawl depth and time

0 commit comments

Comments
 (0)