Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions agent/outbound_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ func Test_addAppDeletionToQueue(t *testing.T) {
_ = a.appManager.Manage("agent/guestbook")
a.addAppDeletionToQueue(app)
ev, _ := a.queues.SendQ(defaultQueueName).Get()
a.queues.SendQ(defaultQueueName).Done(ev)
assert.Equal(t, event.Delete.String(), ev.Type())
require.False(t, a.appManager.IsManaged("agent/guestbook"))
})
Expand Down Expand Up @@ -365,6 +366,7 @@ func Test_addAppProjectCreationToQueue(t *testing.T) {
require.Equal(t, 1, a.queues.SendQ(defaultQueueName).Len())
ev, _ := a.queues.SendQ(defaultQueueName).Get()
assert.NotNil(t, ev)
a.queues.SendQ(defaultQueueName).Done(ev)
assert.Equal(t, event.Create.String(), ev.Type())
// Queue should be empty after get
assert.Equal(t, 0, a.queues.SendQ(defaultQueueName).Len())
Expand Down Expand Up @@ -512,6 +514,7 @@ func Test_addAppProjectDeletionToQueue(t *testing.T) {
a.addAppProjectDeletionToQueue(appProject)

ev, _ := a.queues.SendQ(defaultQueueName).Get()
a.queues.SendQ(defaultQueueName).Done(ev)
assert.Equal(t, event.Delete.String(), ev.Type())
require.False(t, a.projectManager.IsManaged("test-project"))
})
Expand Down
222 changes: 222 additions & 0 deletions internal/queue/dedupe_queue.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,222 @@
// Copyright 2026 The argocd-agent Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package queue

import (
"sync"

internalevent "github.com/argoproj-labs/argocd-agent/internal/event"
"github.com/cloudevents/sdk-go/v2/event"
"k8s.io/client-go/util/workqueue"
)

// EventKey is the key that goes into the workqueue
type EventKey struct {
ResourceID string
EventType string
EventID string
}

// reorderQueue is used to customize the functionality of the default workqueue.
// When a duplicate item is added (via Touch), it removes the existing entry and
// appends the item to the tail of the queue.
type reorderQueue[T comparable] struct {
items []T
// index is a map of item to its index in the queue
index map[T]int
}

func newReorderQueue[T comparable]() workqueue.Queue[T] {
return &reorderQueue[T]{
index: make(map[T]int),
}
}

func (q *reorderQueue[T]) Push(item T) {
q.index[item] = len(q.items)
q.items = append(q.items, item)
}

func (q *reorderQueue[T]) Pop() (item T) {
item = q.items[0]
delete(q.index, item)

q.items[0] = *new(T)
q.items = q.items[1:]

// Rebuild indices after shift
for i, it := range q.items {
q.index[it] = i
}
return item
}

// Touch is a hook that is invoked when queue.Add is called with an item that already exists in the queue.
// It is only called when the item is not being processed i.e in dirty set and not in processing set.
func (q *reorderQueue[T]) Touch(item T) {
idx, ok := q.index[item]
if !ok {
return
}
// Remove from current position
q.items = append(q.items[:idx], q.items[idx+1:]...)
// Re-add at the end
q.items = append(q.items, item)
// Rebuild indices from the moved position onward
for i := idx; i < len(q.items); i++ {
q.index[q.items[i]] = i
}
}

func (q *reorderQueue[T]) Len() int {
return len(q.items)
}

type dedupeQueue struct {
queue workqueue.TypedRateLimitingInterface[EventKey]

maxSize int

mu sync.Mutex
latestEvents map[EventKey]*event.Event
eventKeys map[*event.Event]EventKey

notify chan struct{}
}

func NewDedupeQueue(name string, maxSize int) WorkQueue {
baseQueue := workqueue.NewTypedWithConfig(workqueue.TypedQueueConfig[EventKey]{
Name: name,
Queue: newReorderQueue[EventKey](),
})

delayingQueue := workqueue.NewTypedDelayingQueueWithConfig(workqueue.TypedDelayingQueueConfig[EventKey]{
Queue: baseQueue,
})

queue := workqueue.NewTypedRateLimitingQueueWithConfig(
workqueue.DefaultTypedControllerRateLimiter[EventKey](),
workqueue.TypedRateLimitingQueueConfig[EventKey]{
DelayingQueue: delayingQueue,
},
)

return &dedupeQueue{
queue: queue,
maxSize: maxSize,
latestEvents: make(map[EventKey]*event.Event),
eventKeys: make(map[*event.Event]EventKey),
notify: make(chan struct{}, 10),
}
}
Comment on lines +99 to +123

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

Validate maxSize at construction time.

maxSize <= 0 causes the first Add() to enter eviction immediately and can block on Get(). Add an explicit constructor guard (panic or clamp with logged warning) to prevent invalid queue instances.

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@internal/queue/dedupe_queue.go` around lines 99 - 123, Add validation in the
NewDedupeQueue function to check if the maxSize parameter is less than or equal
to zero before proceeding with queue initialization. If maxSize is invalid,
either panic with a descriptive error message or log a warning and clamp it to a
sensible minimum value (such as 1) to prevent the queue from entering eviction
immediately on the first Add() call and causing blocking issues on Get().


func getKey(ev *event.Event) EventKey {
resID := internalevent.ResourceID(ev)
evType := ev.Type()

if canDedupe(ev) {
return EventKey{
ResourceID: resID,
EventType: evType,
}
}

// Non-dedupable events get a unique key
return EventKey{
ResourceID: resID,
EventType: evType,
EventID: internalevent.EventID(ev),
}
}

func (q *dedupeQueue) Add(item *event.Event) {
key := getKey(item)

q.mu.Lock()
oldEvent := q.latestEvents[key]
_, exists := q.latestEvents[key]
q.latestEvents[key] = item
q.eventKeys[item] = key
if exists && oldEvent != nil {
delete(q.eventKeys, oldEvent)
Comment on lines +147 to +153

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical | 🏗️ Heavy lift

In-flight duplicate handling drops the newest event for the same key.

When a key is re-added after Get() but before Done(), the new payload is put back into latestEvents, then Done() for the older payload deletes latestEvents[key] unconditionally. That can lose the pending update and produce nil from a subsequent Get().

Proposed fix
 type dedupeQueue struct {
 	queue workqueue.TypedRateLimitingInterface[EventKey]

 	maxSize int

 	mu           sync.Mutex
 	latestEvents map[EventKey]*event.Event
 	eventKeys    map[*event.Event]EventKey
+	processing   map[EventKey]*event.Event

 	notify chan struct{}
 }
@@
 	return &dedupeQueue{
 		queue:        queue,
 		maxSize:      maxSize,
 		latestEvents: make(map[EventKey]*event.Event),
 		eventKeys:    make(map[*event.Event]EventKey),
+		processing:   make(map[EventKey]*event.Event),
 		notify:       make(chan struct{}, 10),
 	}
 }
@@
 func (q *dedupeQueue) Add(item *event.Event) {
 	key := getKey(item)

 	q.mu.Lock()
 	oldEvent := q.latestEvents[key]
-	_, exists := q.latestEvents[key]
+	_, exists := q.latestEvents[key]
+	_, inFlight := q.processing[key]
 	q.latestEvents[key] = item
 	q.eventKeys[item] = key
 	if exists && oldEvent != nil {
 		delete(q.eventKeys, oldEvent)
 	}
+	isNewKey := !exists && !inFlight
 	q.mu.Unlock()
@@
-	if !exists && q.queue.Len() == q.maxSize {
+	if isNewKey && q.queue.Len() == q.maxSize {
@@
 func (q *dedupeQueue) Get() (*event.Event, bool) {
@@
 	q.mu.Lock()
 	ev := q.latestEvents[key]
+	if ev != nil {
+		q.processing[key] = ev
+	}
 	delete(q.latestEvents, key)
 	q.mu.Unlock()

 	return ev, shutdown
 }

 func (q *dedupeQueue) Done(item *event.Event) {
 	q.mu.Lock()
 	key, ok := q.eventKeys[item]
 	if ok {
 		delete(q.eventKeys, item)
-		delete(q.latestEvents, key)
+		delete(q.processing, key)
+		if cur, exists := q.latestEvents[key]; exists && cur == item {
+			delete(q.latestEvents, key)
+		}
 	}
 	q.mu.Unlock()

Also applies to: 188-201

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@internal/queue/dedupe_queue.go` around lines 147 - 153, The issue is that
when Done() is called for an older event that has been superseded by a newer
event with the same key, the code unconditionally deletes latestEvents[key],
losing the newer pending event. In the Done() method (around lines 188-201),
before deleting latestEvents[key], you need to verify that the event being
completed is actually the current event stored in latestEvents for that key by
checking if q.eventKeys[item] still points to that key and if latestEvents[key]
still equals item. Only delete latestEvents[key] if the item matches the current
event; otherwise, preserve the newer event that has arrived.

}
q.mu.Unlock()

// Only evict when this is a genuinely new key. If the key already
// exists the workqueue will update it in-place (Touch) without
// growing, so evicting would incorrectly shrink the queue.
if !exists && q.queue.Len() == q.maxSize {
oldest, shutdown := q.queue.Get()
if !shutdown {
Comment on lines +160 to +162

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical | 🏗️ Heavy lift

Eviction path can block producer threads indefinitely.

Using blocking q.queue.Get() inside Add() is unsafe. The queue can be drained by another goroutine after the Len()==maxSize check, causing this producer to block before enqueueing the new item.

Please switch eviction to a non-blocking internal structure (or lock-protected key-order structure) rather than calling blocking Get() from the producer path.

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@internal/queue/dedupe_queue.go` around lines 160 - 162, The issue is that the
Add() method uses a blocking q.queue.Get() call inside the eviction path after
checking q.queue.Len() == q.maxSize, which creates a race condition where
another goroutine could drain the queue between the length check and the Get()
call, causing the producer thread to block indefinitely. Replace the blocking
Get() approach with a non-blocking alternative for item eviction, such as
implementing a separate lock-protected data structure (like a map or ordered
list) that tracks key insertion order for determining which item to evict, or
switch to a non-blocking peek/remove operation that won't wait if the queue
becomes empty.

q.mu.Lock()
evicted := q.latestEvents[oldest]
delete(q.latestEvents, oldest)
if evicted != nil {
delete(q.eventKeys, evicted)
}
q.mu.Unlock()
q.queue.Done(oldest)
}
}

q.queue.Add(key)
select {
case q.notify <- struct{}{}:
default:
return
}
}

func (q *dedupeQueue) Get() (*event.Event, bool) {
key, shutdown := q.queue.Get()
if shutdown {
return nil, shutdown
}

q.mu.Lock()
ev := q.latestEvents[key]
delete(q.latestEvents, key)
q.mu.Unlock()

return ev, shutdown
}

func (q *dedupeQueue) Done(item *event.Event) {
q.mu.Lock()
key, ok := q.eventKeys[item]
if ok {
delete(q.eventKeys, item)
delete(q.latestEvents, key)
}
q.mu.Unlock()

if ok {
q.queue.Done(key)
}
}

func (q *dedupeQueue) ShutDown() {
q.queue.ShutDown()
}

func (q *dedupeQueue) Len() int {
return q.queue.Len()
}

// canDedupe returns true if the event type supports de-duplication.
func canDedupe(ev *event.Event) bool {
evType := ev.Type()
return evType == internalevent.SpecUpdate.String() || evType == internalevent.StatusUpdate.String()
}
Loading
Loading