Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions internal/pkg/archiver/general/body.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ func processBody(u *models.URL, disableAssetsCapture, domainsCrawl bool, maxHops
if err := connutil.CopyWithTimeout(io.Discard, u.GetResponse().Body); err != nil {
return err
}
return nil
}

// Get a buffer from the pool for MIME type detection
Expand Down
9 changes: 7 additions & 2 deletions internal/pkg/archiver/headless/archiver.go
Original file line number Diff line number Diff line change
Expand Up @@ -327,11 +327,16 @@ func archivePage(warcClient *warc.CustomHTTPClient, item *models.Item, seed *mod
// Navigate to the URL
logger.Debug("navigating to URL")

go router.Run()
ready := make(chan struct{})

go func() {
defer close(ready) // router is now running and handlers are active
router.Run()
}()

// Wait for the router to start to avoid race condition in rod
// The race happens between router.Run() initializing events and page.Navigate() triggering events.
time.Sleep(100 * time.Millisecond)
<-ready

err = page.Navigate(item.GetURL().String())
if err != nil {
Expand Down
8 changes: 7 additions & 1 deletion internal/pkg/archiver/ratelimiter/ratelimiter.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,14 @@ func (tb *tokenBucket) Wait() {
tb.mu.Unlock()
return
}

// Calculate exact time until next token is available
// instead of busy-waiting with an arbitrary sleep duration.
tokensNeeded := 1.0 - tb.tokens
waitDuration := time.Duration(tokensNeeded / tb.refillRate * float64(time.Second))
tb.mu.Unlock()
time.Sleep(50 * time.Millisecond) // adjust as needed

time.Sleep(waitDuration)
}
}

Expand Down
2 changes: 1 addition & 1 deletion pkg/models/item_dedupe.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,10 @@ func flattenTree(root *Item) []*Item {
var nodes []*Item
var traverse func(node *Item)
traverse = func(node *Item) {
nodes = append(nodes, node)
if node == nil {
return
}
nodes = append(nodes, node)
for _, child := range node.GetChildren() {
traverse(child)
}
Expand Down