@@ -27,9 +27,11 @@ type PromqlSeriesSettings struct {
2727 LookbackRange string `hcl:"lookbackRange,optional" json:"lookbackRange,omitempty"`
2828 LookbackStep string `hcl:"lookbackStep,optional" json:"lookbackStep,omitempty"`
2929 IgnoreMetrics []string `hcl:"ignoreMetrics,optional" json:"ignoreMetrics,omitempty"`
30+ FallbackTimeout string `hcl:"fallbackTimeout,optional" json:"fallbackTimeout,omitempty"`
3031 ignoreMetricsRe []* regexp.Regexp
3132 lookbackRangeDuration time.Duration
3233 lookbackStepDuration time.Duration
34+ fallbackTimeout time.Duration
3335}
3436
3537func (c * PromqlSeriesSettings ) Validate () error {
@@ -59,6 +61,15 @@ func (c *PromqlSeriesSettings) Validate() error {
5961 c .lookbackStepDuration = time .Duration (dur )
6062 }
6163
64+ c .fallbackTimeout = time .Minute * 5
65+ if c .FallbackTimeout != "" {
66+ dur , err := model .ParseDuration (c .FallbackTimeout )
67+ if err != nil {
68+ return err
69+ }
70+ c .fallbackTimeout = time .Duration (dur )
71+ }
72+
6273 for selector := range c .IgnoreLabelsValue {
6374 if _ , err := promParser .ParseMetricSelector (selector ); err != nil {
6475 return fmt .Errorf ("%q is not a valid PromQL metric selector: %w" , selector , err )
@@ -300,7 +311,7 @@ func (c SeriesCheck) Check(ctx context.Context, _ discovery.Path, rule parser.Ru
300311 Lines : expr .Value .Lines ,
301312 Reporter : c .Reporter (),
302313 Text : text ,
303- Details : c .checkOtherServer (ctx , selector .String ()),
314+ Details : c .checkOtherServer (ctx , selector .String (), settings . fallbackTimeout ),
304315 Severity : severity ,
305316 })
306317 slog .Debug ("No historical series for base metric" , slog .String ("check" , c .Reporter ()), slog .String ("selector" , (& bareSelector ).String ()))
@@ -564,10 +575,15 @@ func (c SeriesCheck) Check(ctx context.Context, _ discovery.Path, rule parser.Ru
564575 return problems
565576}
566577
567- func (c SeriesCheck ) checkOtherServer (ctx context.Context , query string ) string {
578+ func (c SeriesCheck ) checkOtherServer (ctx context.Context , query string , timeout time. Duration ) string {
568579 var servers []* promapi.FailoverGroup
569580 if val := ctx .Value (promapi .AllPrometheusServers ); val != nil {
570- servers = val .([]* promapi.FailoverGroup )
581+ for _ , s := range val .([]* promapi.FailoverGroup ) {
582+ if s .Name () == c .prom .Name () {
583+ continue
584+ }
585+ servers = append (servers , s )
586+ }
571587 }
572588
573589 if len (servers ) == 0 {
@@ -579,10 +595,31 @@ func (c SeriesCheck) checkOtherServer(ctx context.Context, query string) string
579595 buf .WriteString (query )
580596 buf .WriteString ("` was found on other prometheus servers:\n \n " )
581597
582- var matches , skipped int
598+ start := time .Now ()
599+ var tested , matches , skipped int
583600 for _ , prom := range servers {
584- slog .Debug ("Checking if metric exists on any other Prometheus server" , slog .String ("check" , c .Reporter ()), slog .String ("selector" , query ))
585-
601+ if time .Since (start ) >= timeout {
602+ slog .Debug ("Time limit reached for checking if metric exists on any other Prometheus server" ,
603+ slog .String ("check" , c .Reporter ()),
604+ slog .String ("selector" , query ),
605+ )
606+ buf .WriteString ("\n pint tried to check " )
607+ buf .WriteString (strconv .Itoa (len (servers )))
608+ buf .WriteString (" server(s) but stopped after checking " )
609+ buf .WriteString (strconv .Itoa (tested ))
610+ buf .WriteString (" server(s) due to reaching time limit (" )
611+ buf .WriteString (output .HumanizeDuration (timeout ))
612+ buf .WriteString (").\n " )
613+ break
614+ }
615+
616+ slog .Debug ("Checking if metric exists on any other Prometheus server" ,
617+ slog .String ("check" , c .Reporter ()),
618+ slog .String ("name" , prom .Name ()),
619+ slog .String ("selector" , query ),
620+ )
621+
622+ tested ++
586623 qr , err := prom .Query (ctx , fmt .Sprintf ("count(%s)" , query ))
587624 if err != nil {
588625 continue
0 commit comments