From da731afd7f961055a06b104726672fae3b621d0c Mon Sep 17 00:00:00 2001 From: Mariell Hoversholm Date: Mon, 13 Oct 2025 12:32:14 +0200 Subject: [PATCH] feat(go): add tonnes of knobs I've been trying to debug what's the best setup for Cloud. Having to build new images all the time is a big pain, so this should help us out a lot, as well as our customers when they are debugging. --- pkg/config/config.go | 116 ++++++++++++++++++++++++++++-------- pkg/service/browser.go | 131 ++++++++++++++++++++++++----------------- 2 files changed, 168 insertions(+), 79 deletions(-) diff --git a/pkg/config/config.go b/pkg/config/config.go index b286d672..f49fd72e 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -314,10 +314,18 @@ type BrowserConfig struct { // That means for a viewport that is 500px high, and a webpage that is 2500px high, we will scroll 5 times, meaning a total wait duration of 6 * duration (as we have to wait on the first & last scrolls as well). TimeBetweenScrolls time.Duration // ReadinessTimeout is the maximum time to wait for the web-page to become ready (i.e. no longer loading anything). - ReadinessTimeout time.Duration - // LoadWait is the time to wait before checking for how ready the page is. + ReadinessTimeout time.Duration + ReadinessIterationInterval time.Duration + // ReadinessPriorWait is the time to wait before checking for how ready the page is. // This lets you force the webpage to take a beat and just do its thing before the service starts looking for whether it's time to render anything. - LoadWait time.Duration + ReadinessPriorWait time.Duration + ReadinessDisableQueryWait bool + ReadinessFirstQueryTimeout time.Duration + ReadinessQueriesTimeout time.Duration + ReadinessDisableNetworkWait bool + ReadinessNetworkIdleTimeout time.Duration + ReadinessDisableDOMHashCodeWait bool + ReadinessDOMHashCodeTimeout time.Duration // MinWidth is the minimum width of the browser viewport. // If larger than MaxWidth, MaxWidth is used instead. @@ -400,16 +408,66 @@ func BrowserFlags() []cli.Flag { Sources: FromConfig("browser.time-between-scrolls", "BROWSER_TIME_BETWEEN_SCROLLS"), }, &cli.DurationFlag{ - Name: "browser.readiness-timeout", - Usage: "The maximum time to wait for a web-page to become ready (i.e. no longer loading anything).", + Name: "browser.readiness.timeout", + Usage: "The maximum time to wait for a web-page to become ready (i.e. no longer loading anything). If <= 0, the timeout is disabled.", Value: time.Second * 30, - Sources: FromConfig("browser.readiness-timeout", "BROWSER_READINESS_TIMEOUT"), + Sources: FromConfig("browser.readiness.timeout", "BROWSER_READINESS_TIMEOUT"), }, &cli.DurationFlag{ - Name: "browser.load-wait", - Usage: "The time to wait before checking for how ready the page is. This lets you force the webpage to take a beat and just do its thing before the service starts looking for whether it's time to render anything.", + Name: "browser.readiness.iteration-interval", + Usage: "How long to wait between each iteration of checking whether the page is ready. Must be positive.", + Value: time.Millisecond * 100, + Validator: func(d time.Duration) error { + if d <= 0 { + return fmt.Errorf("browser readiness iteration-interval must be positive (got %v)", d) + } + return nil + }, + }, + &cli.DurationFlag{ + Name: "browser.readiness.prior-wait", + Usage: "The time to wait before checking for how ready the page is. This lets you force the webpage to take a beat and just do its thing before the service starts looking for whether it's time to render anything. If <= 0, this is disabled.", Value: time.Second, - Sources: FromConfig("browser.load-wait", "BROWSER_LOAD_WAIT"), + Sources: FromConfig("browser.readiness.prior-wait", "BROWSER_READINESS_PRIOR_WAIT"), + }, + &cli.BoolFlag{ + Name: "browser.readiness.disable-query-wait", + Usage: "Disable waiting for queries to finish before capturing.", + Sources: FromConfig("browser.readiness.disable-query-wait", "BROWSER_READINESS_DISABLE_QUERY_WAIT"), + }, + &cli.DurationFlag{ + Name: "browser.readiness.give-up-on-first-query", + Usage: "How long to wait before giving up on a first query being registered. If <= 0, the give-up is disabled.", + Value: time.Second * 3, + Sources: FromConfig("browser.readiness.give-up-on-first-query", "BROWSER_READINESS_GIVE_UP_ON_FIRST_QUERY"), + }, + &cli.DurationFlag{ + Name: "browser.readiness.give-up-on-all-queries", + Usage: "How long to wait before giving up on all running queries. If <= 0, the give-up is disabled.", + Value: 0, + Sources: FromConfig("browser.readiness.give-up-on-all-queries", "BROWSER_READINESS_GIVE_UP_ON_ALL_QUERIES"), + }, + &cli.BoolFlag{ + Name: "browser.readiness.disable-network-wait", + Usage: "Disable waiting for network requests to finish before capturing.", + Sources: FromConfig("browser.readiness.disable-network-wait", "BROWSER_READINESS_DISABLE_NETWORK_WAIT"), + }, + &cli.DurationFlag{ + Name: "browser.readiness.network-idle-timeout", + Usage: "How long to wait before giving up on the network being idle. If <= 0, the timeout is disabled.", + Value: 0, + Sources: FromConfig("browser.readiness.network-idle-timeout", "BROWSER_READINESS_NETWORK_IDLE_TIMEOUT"), + }, + &cli.BoolFlag{ + Name: "browser.readiness.disable-dom-hashcode-wait", + Usage: "Disable waiting for the DOM to stabilize (i.e. not change) before capturing.", + Sources: FromConfig("browser.readiness.disable-dom-hashcode-wait", "BROWSER_READINESS_DISABLE_DOM_HASHCODE_WAIT"), + }, + &cli.DurationFlag{ + Name: "browser.readiness.dom-hashcode-timeout", + Usage: "How long to wait before giving up on the DOM stabilizing (i.e. not changing). If <= 0, the timeout is disabled.", + Value: 0, + Sources: FromConfig("browser.readiness.dom-hashcode-timeout", "BROWSER_READINESS_DOM_HASHCODE_TIMEOUT"), }, &cli.IntFlag{ Name: "browser.min-width", @@ -504,21 +562,29 @@ func BrowserConfigFromCommand(c *cli.Command) (BrowserConfig, error) { } return BrowserConfig{ - Path: c.String("browser.path"), - Flags: c.StringSlice("browser.flag"), - GPU: c.Bool("browser.gpu"), - Sandbox: c.Bool("browser.sandbox"), - TimeZone: timeZone, - Cookies: nil, - Headers: headers, - TimeBetweenScrolls: c.Duration("browser.time-between-scrolls"), - ReadinessTimeout: c.Duration("browser.readiness-timeout"), - LoadWait: c.Duration("browser.load-wait"), - MinWidth: minWidth, - MinHeight: minHeight, - MaxWidth: maxWidth, - MaxHeight: maxHeight, - PageScaleFactor: c.Float64("browser.page-scale-factor"), - Landscape: !c.Bool("browser.portrait"), + Path: c.String("browser.path"), + Flags: c.StringSlice("browser.flag"), + GPU: c.Bool("browser.gpu"), + Sandbox: c.Bool("browser.sandbox"), + TimeZone: timeZone, + Cookies: nil, + Headers: headers, + TimeBetweenScrolls: c.Duration("browser.time-between-scrolls"), + ReadinessTimeout: c.Duration("browser.readiness.timeout"), + ReadinessIterationInterval: c.Duration("browser.readiness.iteration-interval"), + ReadinessPriorWait: c.Duration("browser.readiness.prior-wait"), + ReadinessDisableQueryWait: c.Bool("browser.readiness.disable-query-wait"), + ReadinessFirstQueryTimeout: c.Duration("browser.readiness.give-up-on-first-query"), + ReadinessQueriesTimeout: c.Duration("browser.readiness.give-up-on-all-queries"), + ReadinessDisableNetworkWait: c.Bool("browser.readiness.disable-network-wait"), + ReadinessNetworkIdleTimeout: c.Duration("browser.readiness.network-idle-timeout"), + ReadinessDisableDOMHashCodeWait: c.Bool("browser.readiness.disable-dom-hashcode-wait"), + ReadinessDOMHashCodeTimeout: c.Duration("browser.readiness.dom-hashcode-timeout"), + MinWidth: minWidth, + MinHeight: minHeight, + MaxWidth: maxWidth, + MaxHeight: maxHeight, + PageScaleFactor: c.Float64("browser.page-scale-factor"), + Landscape: !c.Bool("browser.portrait"), }, nil } diff --git a/pkg/service/browser.go b/pkg/service/browser.go index 82c38e34..d53d151c 100644 --- a/pkg/service/browser.go +++ b/pkg/service/browser.go @@ -235,8 +235,8 @@ func (s *BrowserService) Render(ctx context.Context, url string, printer Printer fileChan := make(chan []byte, 1) // buffered: we don't want the browser to stick around while we try to export this value. actions := []chromedp.Action{ - tracingAction("network.Enable", network.Enable()), - tracingAction("fetch.Enable", fetch.Enable()), // required by handleNetworkEvents + tracingAction("network.Enable", network.Enable()), // required by waitForReady + tracingAction("fetch.Enable", fetch.Enable()), // required by handleNetworkEvents tracingAction("SetPageScaleFactor", emulation.SetPageScaleFactor(cfg.PageScaleFactor)), tracingAction("EmulateViewport", chromedp.EmulateViewport(int64(cfg.MinWidth), int64(cfg.MinHeight), orientation)), setHeaders(browserCtx, cfg.Headers), @@ -244,8 +244,8 @@ func (s *BrowserService) Render(ctx context.Context, url string, printer Printer tracingAction("Navigate", chromedp.Navigate(url)), tracingAction("WaitReady(body)", chromedp.WaitReady("body", chromedp.ByQuery)), // wait for a body to exist; this is when the page has started to actually render scrollForElements(cfg.TimeBetweenScrolls), - waitForDuration(cfg.LoadWait), - waitForReady(browserCtx, cfg.ReadinessTimeout), + waitForDuration(cfg.ReadinessPriorWait), + waitForReady(browserCtx, cfg), printer.prepare(cfg), printer.action(fileChan, cfg), } @@ -725,7 +725,7 @@ func (p *pngPrinter) prepare(cfg config.BrowserConfig) chromedp.Action { } span.SetStatus(codes.Ok, "viewport resized successfully") - if err := waitForReady(ctx, cfg.ReadinessTimeout).Do(ctx); err != nil { + if err := waitForReady(ctx, cfg).Do(ctx); err != nil { return fmt.Errorf("failed to wait for readiness after resizing viewport: %w", err) } } else { @@ -870,7 +870,7 @@ func scrollForElements(timeBetweenScrolls time.Duration) chromedp.Action { }) } -func waitForReady(browserCtx context.Context, timeout time.Duration) chromedp.Action { +func waitForReady(browserCtx context.Context, cfg config.BrowserConfig) chromedp.Action { getRunningQueries := func(ctx context.Context) (bool, error) { var running bool err := chromedp.Evaluate(`!!(window.__grafanaSceneContext && window.__grafanaRunningQueryCount > 0)`, &running).Do(ctx) @@ -891,26 +891,35 @@ func waitForReady(browserCtx context.Context, timeout time.Duration) chromedp.Ac requests := &atomic.Int64{} lastRequest := &atomicTime{} // TODO: use this to wait for network stabilisation. lastRequest.Store(time.Now()) - chromedp.ListenTarget(browserCtx, func(ev any) { - switch ev.(type) { - case *network.EventRequestWillBeSent: - requests.Add(1) - lastRequest.Store(time.Now()) - case *network.EventLoadingFinished, *network.EventLoadingFailed: - requests.Add(-1) - } - }) + networkListenerCtx, cancelNetworkListener := context.WithCancel(browserCtx) + if !cfg.ReadinessDisableNetworkWait { + chromedp.ListenTarget(networkListenerCtx, func(ev any) { + switch ev.(type) { + case *network.EventRequestWillBeSent: + requests.Add(1) + lastRequest.Store(time.Now()) + case *network.EventLoadingFinished, *network.EventLoadingFailed: + requests.Add(-1) + } + }) + } return chromedp.ActionFunc(func(ctx context.Context) error { + defer cancelNetworkListener() + tracer := tracer(ctx) ctx, span := tracer.Start(ctx, "waitForReady", - trace.WithAttributes(attribute.Float64("timeout_seconds", timeout.Seconds()))) + trace.WithAttributes(attribute.String("timeout", cfg.ReadinessTimeout.String()))) defer span.End() - timeout := time.After(timeout) + start := time.Now() + + var readinessTimeout <-chan time.Time + if cfg.ReadinessTimeout > 0 { + readinessTimeout = time.After(cfg.ReadinessTimeout) + } - hasHadQueries := false - giveUpFirstQuery := time.Now().Add(time.Second * 3) + hasSeenAnyQuery := false var domHashCode int initialDOMPass := true @@ -920,57 +929,70 @@ func waitForReady(browserCtx context.Context, timeout time.Duration) chromedp.Ac case <-ctx.Done(): span.SetStatus(codes.Error, "context completed before readiness detected") return ctx.Err() - case <-timeout: + case <-readinessTimeout: span.SetStatus(codes.Error, "timed out waiting for readiness") return fmt.Errorf("timed out waiting for readiness") - case <-time.After(100 * time.Millisecond): + + case <-time.After(cfg.ReadinessIterationInterval): + // Continue with the rest of the code; this is waiting for the next time we can do work. } - if requests.Load() > 0 { + if !cfg.ReadinessDisableNetworkWait && + (cfg.ReadinessNetworkIdleTimeout <= 0 || time.Since(start) < cfg.ReadinessNetworkIdleTimeout) && + requests.Load() > 0 { initialDOMPass = true - span.AddEvent("network requests still ongoing", trace.WithAttributes(attribute.Int64("inflightRequests", requests.Load()))) + span.AddEvent("network requests still ongoing", trace.WithAttributes(attribute.Int64("inflight_requests", requests.Load()))) continue // still waiting on network requests to complete } - running, err := getRunningQueries(ctx) - if err != nil { - span.SetStatus(codes.Error, err.Error()) - return fmt.Errorf("failed to get running queries: %w", err) - } - span.AddEvent("queried running queries", trace.WithAttributes(attribute.Bool("running", running))) - if running { - initialDOMPass = true - hasHadQueries = true - continue // still waiting on queries to complete - } else if !hasHadQueries && time.Now().Before(giveUpFirstQuery) { - span.AddEvent("no first query detected yet; giving it more time") - continue + if !cfg.ReadinessDisableQueryWait && (cfg.ReadinessQueriesTimeout <= 0 || time.Since(start) < cfg.ReadinessQueriesTimeout) { + running, err := getRunningQueries(ctx) + if err != nil { + span.SetStatus(codes.Error, err.Error()) + span.RecordError(err) + return fmt.Errorf("failed to get running queries: %w", err) + } + span.AddEvent("queried running queries", trace.WithAttributes(attribute.Bool("running", running))) + if running { + initialDOMPass = true + hasSeenAnyQuery = true + continue // still waiting on queries to complete + } else if !hasSeenAnyQuery && (cfg.ReadinessFirstQueryTimeout <= 0 || time.Since(start) < cfg.ReadinessFirstQueryTimeout) { + span.AddEvent("no first query detected yet; giving it more time") + continue + } } - if initialDOMPass { - domHashCode, err = getDOMHashCode(ctx) + if !cfg.ReadinessDisableDOMHashCodeWait && (cfg.ReadinessDOMHashCodeTimeout <= 0 || time.Since(start) < cfg.ReadinessDOMHashCodeTimeout) { + if initialDOMPass { + var err error + domHashCode, err = getDOMHashCode(ctx) + if err != nil { + span.SetStatus(codes.Error, err.Error()) + span.RecordError(err) + return fmt.Errorf("failed to get DOM hash code: %w", err) + } + span.AddEvent("initial DOM hash code recorded", trace.WithAttributes(attribute.Int("hashCode", domHashCode))) + initialDOMPass = false + continue // not stable yet + } + + newHashCode, err := getDOMHashCode(ctx) if err != nil { span.SetStatus(codes.Error, err.Error()) + span.RecordError(err) return fmt.Errorf("failed to get DOM hash code: %w", err) } - span.AddEvent("initial DOM hash code recorded", trace.WithAttributes(attribute.Int("hashCode", domHashCode))) - initialDOMPass = false - continue // not stable yet + span.AddEvent("subsequent DOM hash code recorded", trace.WithAttributes(attribute.Int("hashCode", newHashCode))) + if newHashCode != domHashCode { + span.AddEvent("DOM hash code changed", trace.WithAttributes(attribute.Int("oldHashCode", domHashCode), attribute.Int("newHashCode", newHashCode))) + domHashCode = newHashCode + initialDOMPass = true + continue // not stable yet + } + span.AddEvent("DOM hash code stable", trace.WithAttributes(attribute.Int("hashCode", domHashCode))) } - newHashCode, err := getDOMHashCode(ctx) - if err != nil { - span.SetStatus(codes.Error, err.Error()) - return fmt.Errorf("failed to get DOM hash code: %w", err) - } - span.AddEvent("subsequent DOM hash code recorded", trace.WithAttributes(attribute.Int("hashCode", newHashCode))) - if newHashCode != domHashCode { - span.AddEvent("DOM hash code changed", trace.WithAttributes(attribute.Int("oldHashCode", domHashCode), attribute.Int("newHashCode", newHashCode))) - domHashCode = newHashCode - initialDOMPass = true - continue // not stable yet - } - span.AddEvent("DOM hash code stable", trace.WithAttributes(attribute.Int("hashCode", domHashCode))) break // we're done!! } @@ -1006,6 +1028,7 @@ func tracingAction(name string, action chromedp.Action) chromedp.Action { err := action.Do(ctx) if err != nil { span.SetStatus(codes.Error, err.Error()) + span.RecordError(err) return err } span.SetStatus(codes.Ok, "action completed successfully")