@@ -2,21 +2,21 @@ package minion
22
33import (
44 "context"
5+ "errors"
56 "fmt"
6- "github.com/twmb/franz-go/pkg/kerr"
7- "go.uber.org/zap"
87 "strconv"
98 "time"
109
11- "github.com/twmb/franz-go/pkg/kmsg"
10+ "github.com/twmb/franz-go/pkg/kadm"
11+ "go.uber.org/zap"
1212)
1313
14- func (s * Service ) ListOffsetsCached (ctx context.Context , timestamp int64 ) (* kmsg. ListOffsetsResponse , error ) {
14+ func (s * Service ) ListOffsetsCached (ctx context.Context , timestamp int64 ) (kadm. ListedOffsets , error ) {
1515 reqId := ctx .Value ("requestId" ).(string )
1616 key := "partition-offsets-" + strconv .Itoa (int (timestamp )) + "-" + reqId
1717
1818 if cachedRes , exists := s .getCachedItem (key ); exists {
19- return cachedRes .(* kmsg. ListOffsetsResponse ), nil
19+ return cachedRes .(kadm. ListedOffsets ), nil
2020 }
2121
2222 res , err , _ := s .requestGroup .Do (key , func () (interface {}, error ) {
@@ -33,70 +33,55 @@ func (s *Service) ListOffsetsCached(ctx context.Context, timestamp int64) (*kmsg
3333 return nil , err
3434 }
3535
36- return res .(* kmsg. ListOffsetsResponse ), nil
36+ return res .(kadm. ListedOffsets ), nil
3737}
3838
3939// ListOffsets fetches the low (timestamp: -2) or high water mark (timestamp: -1) for all topic partitions
40- func (s * Service ) ListOffsets (ctx context.Context , timestamp int64 ) (* kmsg. ListOffsetsResponse , error ) {
41- metadata , err := s .GetMetadataCached (ctx )
40+ func (s * Service ) ListOffsets (ctx context.Context , timestamp int64 ) (kadm. ListedOffsets , error ) {
41+ listedOffsets , err := s .admClient . ListEndOffsets (ctx )
4242 if err != nil {
43- return nil , fmt .Errorf ("failed to list consumer groups: %w" , err )
44- }
45-
46- topicReqs := make ([]kmsg.ListOffsetsRequestTopic , len (metadata .Topics ))
47- for i , topic := range metadata .Topics {
48- req := kmsg .NewListOffsetsRequestTopic ()
49- req .Topic = * topic .Topic
50-
51- partitionReqs := make ([]kmsg.ListOffsetsRequestTopicPartition , len (topic .Partitions ))
52- for j , partition := range topic .Partitions {
53- partitionReqs [j ] = kmsg .NewListOffsetsRequestTopicPartition ()
54- partitionReqs [j ].Partition = partition .Partition
55- partitionReqs [j ].Timestamp = timestamp
43+ var se * kadm.ShardErrors
44+ if ! errors .As (err , & se ) {
45+ return nil , fmt .Errorf ("failed to list offsets: %w" , err )
5646 }
57- req .Partitions = partitionReqs
58-
59- topicReqs [i ] = req
60- }
6147
62- req := kmsg .NewListOffsetsRequest ()
63- req .Topics = topicReqs
64-
65- res , err := req .RequestWith (ctx , s .client )
66- if err != nil {
67- return res , err
48+ if se .AllFailed {
49+ return nil , fmt .Errorf ("failed to list offsets, all shard responses failed: %w" , err )
50+ }
51+ s .logger .Info ("failed to list offset from some shards" , zap .Int ("failed_shards" , len (se .Errs )))
52+ for _ , shardErr := range se .Errs {
53+ s .logger .Warn ("shard error for listing end offsets" ,
54+ zap .Int32 ("broker_id" , shardErr .Broker .NodeID ),
55+ zap .Error (shardErr .Err ))
56+ }
6857 }
6958
7059 // Log inner errors before returning them. We do that inside of this function to avoid duplicate logging as the response
7160 // are cached for each scrape anyways.
7261 //
7362 // Create two metrics to aggregate error logs in few messages. Logging one message per occured partition error
7463 // is too much. Typical errors are LEADER_NOT_AVAILABLE etc.
75- errorCountByErrCode := make (map [int16 ]int )
64+ errorCountByErrCode := make (map [error ]int )
7665 errorCountByTopic := make (map [string ]int )
7766
7867 // Iterate on all partitions
79- for _ , topic := range res .Topics {
80- for _ , partition := range topic .Partitions {
81- err := kerr .TypedErrorForCode (partition .ErrorCode )
82- if err != nil {
83- errorCountByErrCode [partition .ErrorCode ]++
84- errorCountByTopic [topic .Topic ]++
85- }
68+ listedOffsets .Each (func (offset kadm.ListedOffset ) {
69+ if offset .Err != nil {
70+ errorCountByTopic [offset .Topic ]++
71+ errorCountByErrCode [offset .Err ]++
8672 }
87- }
73+ })
8874
8975 // Print log line for each error type
90- for errCode , count := range errorCountByErrCode {
91- typedErr := kerr .TypedErrorForCode (errCode )
76+ for err , count := range errorCountByErrCode {
9277 s .logger .Warn ("failed to list some partitions watermarks" ,
93- zap .Error (typedErr ),
78+ zap .Error (err ),
9479 zap .Int ("error_count" , count ))
9580 }
9681 if len (errorCountByTopic ) > 0 {
9782 s .logger .Warn ("some topics had one or more partitions whose watermarks could not be fetched from Kafka" ,
9883 zap .Int ("topics_with_errors" , len (errorCountByTopic )))
9984 }
10085
101- return res , nil
86+ return listedOffsets , nil
10287}
0 commit comments