Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,7 @@ import (
type Manager interface {
SaveAccessLog(ctx context.Context, proxyLog *AccessLogEntry) error
GetAllAccessLogs(ctx context.Context, accountID, userID string, filter *AccessLogFilter) ([]*AccessLogEntry, int64, error)
CleanupOldAccessLogs(ctx context.Context, retentionDays int) (int64, error)
StartPeriodicCleanup(ctx context.Context, retentionDays, cleanupIntervalHours int)
StopPeriodicCleanup()
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package manager
import (
"context"
"strings"
"time"

log "github.com/sirupsen/logrus"

Expand All @@ -19,6 +20,7 @@ type managerImpl struct {
store store.Store
permissionsManager permissions.Manager
geo geolocation.Geolocation
cleanupCancel context.CancelFunc
}

func NewManager(store store.Store, permissionsManager permissions.Manager, geo geolocation.Geolocation) accesslogs.Manager {
Expand Down Expand Up @@ -78,6 +80,74 @@ func (m *managerImpl) GetAllAccessLogs(ctx context.Context, accountID, userID st
return logs, totalCount, nil
}

// CleanupOldAccessLogs deletes access logs older than the specified retention period
func (m *managerImpl) CleanupOldAccessLogs(ctx context.Context, retentionDays int) (int64, error) {
if retentionDays <= 0 {
log.WithContext(ctx).Debug("access log cleanup skipped: retention days is 0 or negative")
return 0, nil
}

cutoffTime := time.Now().AddDate(0, 0, -retentionDays)
deletedCount, err := m.store.DeleteOldAccessLogs(ctx, cutoffTime)
if err != nil {
log.WithContext(ctx).Errorf("failed to cleanup old access logs: %v", err)
return 0, err
}

if deletedCount > 0 {
log.WithContext(ctx).Infof("cleaned up %d access logs older than %d days", deletedCount, retentionDays)
}

return deletedCount, nil
}

// StartPeriodicCleanup starts a background goroutine that periodically cleans up old access logs
func (m *managerImpl) StartPeriodicCleanup(ctx context.Context, retentionDays, cleanupIntervalHours int) {
if retentionDays <= 0 {
log.WithContext(ctx).Debug("periodic access log cleanup disabled: retention days is 0 or negative")
return
}

if cleanupIntervalHours <= 0 {
cleanupIntervalHours = 24
}

cleanupCtx, cancel := context.WithCancel(ctx)
m.cleanupCancel = cancel

cleanupInterval := time.Duration(cleanupIntervalHours) * time.Hour
ticker := time.NewTicker(cleanupInterval)

go func() {
defer ticker.Stop()

// Run cleanup immediately on startup
log.WithContext(cleanupCtx).Infof("starting access log cleanup routine (retention: %d days, interval: %d hours)", retentionDays, cleanupIntervalHours)
if _, err := m.CleanupOldAccessLogs(cleanupCtx, retentionDays); err != nil {
log.WithContext(cleanupCtx).Errorf("initial access log cleanup failed: %v", err)
}

for {
select {
case <-cleanupCtx.Done():
log.WithContext(cleanupCtx).Info("stopping access log cleanup routine")
return
case <-ticker.C:
if _, err := m.CleanupOldAccessLogs(cleanupCtx, retentionDays); err != nil {
log.WithContext(cleanupCtx).Errorf("periodic access log cleanup failed: %v", err)
}
}
}
}()
}

// StopPeriodicCleanup stops the periodic cleanup routine
func (m *managerImpl) StopPeriodicCleanup() {
if m.cleanupCancel != nil {
m.cleanupCancel()
}
}

// resolveUserFilters converts user email/name filters to user ID filter
func (m *managerImpl) resolveUserFilters(ctx context.Context, accountID string, filter *accesslogs.AccessLogFilter) error {
if filter.UserEmail == nil && filter.UserName == nil {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,281 @@
package manager

import (
"context"
"testing"
"time"

"github.com/golang/mock/gomock"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

"github.com/netbirdio/netbird/management/server/store"
)

func TestCleanupOldAccessLogs(t *testing.T) {
tests := []struct {
name string
retentionDays int
setupMock func(*store.MockStore)
expectedCount int64
expectedError bool
}{
{
name: "cleanup logs older than retention period",
retentionDays: 30,
setupMock: func(mockStore *store.MockStore) {
mockStore.EXPECT().
DeleteOldAccessLogs(gomock.Any(), gomock.Any()).
DoAndReturn(func(ctx context.Context, olderThan time.Time) (int64, error) {
expectedCutoff := time.Now().AddDate(0, 0, -30)
timeDiff := olderThan.Sub(expectedCutoff)
if timeDiff.Abs() > time.Second {
t.Errorf("cutoff time not as expected: got %v, want ~%v", olderThan, expectedCutoff)
}
return 5, nil
})
Comment on lines +29 to +36
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Use t.Fatalf instead of t.Errorf inside DoAndReturn to stop execution on assertion failure.

t.Errorf marks the test failed but allows the callback to return 5, nil, which makes subsequent assertions pass even when the cutoff check failed. Use t.Fatalf to abort immediately.

🐛 Proposed fix
-					if timeDiff.Abs() > time.Second {
-						t.Errorf("cutoff time not as expected: got %v, want ~%v", olderThan, expectedCutoff)
-					}
+					if timeDiff.Abs() > time.Second {
+						t.Fatalf("cutoff time not as expected: got %v, want ~%v", olderThan, expectedCutoff)
+					}

Apply the same fix to the identical pattern at Line 108 in TestCleanupWithExactBoundary.

📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
DoAndReturn(func(ctx context.Context, olderThan time.Time) (int64, error) {
expectedCutoff := time.Now().AddDate(0, 0, -30)
timeDiff := olderThan.Sub(expectedCutoff)
if timeDiff.Abs() > time.Second {
t.Errorf("cutoff time not as expected: got %v, want ~%v", olderThan, expectedCutoff)
}
return 5, nil
})
DoAndReturn(func(ctx context.Context, olderThan time.Time) (int64, error) {
expectedCutoff := time.Now().AddDate(0, 0, -30)
timeDiff := olderThan.Sub(expectedCutoff)
if timeDiff.Abs() > time.Second {
t.Fatalf("cutoff time not as expected: got %v, want ~%v", olderThan, expectedCutoff)
}
return 5, nil
})
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@management/internals/modules/reverseproxy/accesslogs/manager/manager_test.go`
around lines 29 - 36, In the DoAndReturn callback used in the tests (e.g.,
inside the TestCleanup test and the identical callback in
TestCleanupWithExactBoundary) replace t.Errorf(...) with t.Fatalf(...) so the
test aborts immediately on the cutoff-time assertion failure; update the
anonymous DoAndReturn funcs that currently compute expectedCutoff/timeDiff and
call t.Errorf to call t.Fatalf with the same message, ensuring the callback does
not return a success value after a failed assertion.

},
expectedCount: 5,
expectedError: false,
},
{
name: "no logs to cleanup",
retentionDays: 30,
setupMock: func(mockStore *store.MockStore) {
mockStore.EXPECT().
DeleteOldAccessLogs(gomock.Any(), gomock.Any()).
Return(int64(0), nil)
},
expectedCount: 0,
expectedError: false,
},
{
name: "zero retention days skips cleanup",
retentionDays: 0,
setupMock: func(mockStore *store.MockStore) {
// No expectations - DeleteOldAccessLogs should not be called
},
expectedCount: 0,
expectedError: false,
},
{
name: "negative retention days skips cleanup",
retentionDays: -10,
setupMock: func(mockStore *store.MockStore) {
// No expectations - DeleteOldAccessLogs should not be called
},
expectedCount: 0,
expectedError: false,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()

mockStore := store.NewMockStore(ctrl)
tt.setupMock(mockStore)

manager := &managerImpl{
store: mockStore,
}

ctx := context.Background()
deletedCount, err := manager.CleanupOldAccessLogs(ctx, tt.retentionDays)

if tt.expectedError {
require.Error(t, err)
} else {
require.NoError(t, err)
}
assert.Equal(t, tt.expectedCount, deletedCount, "unexpected number of deleted logs")
})
}
}

func TestCleanupWithExactBoundary(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()

mockStore := store.NewMockStore(ctrl)

mockStore.EXPECT().
DeleteOldAccessLogs(gomock.Any(), gomock.Any()).
DoAndReturn(func(ctx context.Context, olderThan time.Time) (int64, error) {
expectedCutoff := time.Now().AddDate(0, 0, -30)
timeDiff := olderThan.Sub(expectedCutoff)
assert.Less(t, timeDiff.Abs(), time.Second, "cutoff time should be close to expected value")
return 1, nil
})

manager := &managerImpl{
store: mockStore,
}

ctx := context.Background()
deletedCount, err := manager.CleanupOldAccessLogs(ctx, 30)

require.NoError(t, err)
assert.Equal(t, int64(1), deletedCount)
}

func TestStartPeriodicCleanup(t *testing.T) {
t.Run("periodic cleanup disabled with zero retention", func(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()

mockStore := store.NewMockStore(ctrl)
// No expectations - cleanup should not run

manager := &managerImpl{
store: mockStore,
}

ctx, cancel := context.WithCancel(context.Background())
defer cancel()

manager.StartPeriodicCleanup(ctx, 0, 1)

time.Sleep(100 * time.Millisecond)

// If DeleteOldAccessLogs was called, the test will fail due to unexpected call
})

t.Run("periodic cleanup runs immediately on start", func(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()

mockStore := store.NewMockStore(ctrl)

mockStore.EXPECT().
DeleteOldAccessLogs(gomock.Any(), gomock.Any()).
Return(int64(2), nil).
Times(1)

manager := &managerImpl{
store: mockStore,
}

ctx, cancel := context.WithCancel(context.Background())
defer cancel()

manager.StartPeriodicCleanup(ctx, 30, 24)

time.Sleep(200 * time.Millisecond)

// Expectations verified by gomock on defer ctrl.Finish()
})

t.Run("periodic cleanup stops on context cancel", func(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()

mockStore := store.NewMockStore(ctrl)

mockStore.EXPECT().
DeleteOldAccessLogs(gomock.Any(), gomock.Any()).
Return(int64(1), nil).
Times(1)

manager := &managerImpl{
store: mockStore,
}

ctx, cancel := context.WithCancel(context.Background())

manager.StartPeriodicCleanup(ctx, 30, 24)

time.Sleep(100 * time.Millisecond)

cancel()

time.Sleep(200 * time.Millisecond)

})

t.Run("cleanup interval defaults to 24 hours when invalid", func(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()

mockStore := store.NewMockStore(ctrl)

mockStore.EXPECT().
DeleteOldAccessLogs(gomock.Any(), gomock.Any()).
Return(int64(0), nil).
Times(1)

manager := &managerImpl{
store: mockStore,
}

ctx, cancel := context.WithCancel(context.Background())
defer cancel()

manager.StartPeriodicCleanup(ctx, 30, 0)

time.Sleep(100 * time.Millisecond)

manager.StopPeriodicCleanup()
})

t.Run("cleanup interval uses configured hours", func(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()

mockStore := store.NewMockStore(ctrl)

mockStore.EXPECT().
DeleteOldAccessLogs(gomock.Any(), gomock.Any()).
Return(int64(3), nil).
Times(1)

manager := &managerImpl{
store: mockStore,
}

ctx, cancel := context.WithCancel(context.Background())
defer cancel()

manager.StartPeriodicCleanup(ctx, 30, 12)

time.Sleep(100 * time.Millisecond)

manager.StopPeriodicCleanup()
})
}

func TestStopPeriodicCleanup(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()

mockStore := store.NewMockStore(ctrl)

mockStore.EXPECT().
DeleteOldAccessLogs(gomock.Any(), gomock.Any()).
Return(int64(1), nil).
Times(1)

manager := &managerImpl{
store: mockStore,
}

ctx := context.Background()

manager.StartPeriodicCleanup(ctx, 30, 24)

time.Sleep(100 * time.Millisecond)

manager.StopPeriodicCleanup()

time.Sleep(200 * time.Millisecond)

// Expectations verified by gomock - would fail if more than 1 call happened
}

func TestStopPeriodicCleanup_NotStarted(t *testing.T) {
manager := &managerImpl{}

// Should not panic if cleanup was never started
manager.StopPeriodicCleanup()
}
5 changes: 5 additions & 0 deletions management/internals/server/boot.go
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,11 @@ func (s *BaseServer) ProxyTokenStore() *nbgrpc.OneTimeTokenStore {
func (s *BaseServer) AccessLogsManager() accesslogs.Manager {
return Create(s, func() accesslogs.Manager {
accessLogManager := accesslogsmanager.NewManager(s.Store(), s.PermissionsManager(), s.GeoLocationManager())
accessLogManager.StartPeriodicCleanup(
context.Background(),
s.Config.ReverseProxy.AccessLogRetentionDays,
s.Config.ReverseProxy.AccessLogCleanupIntervalHours,
)
return accessLogManager
})
}
Comment on lines 197 to 207
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

# First, let's check the directory structure and locate the file
find . -name "boot.go" -type f | head -20

Repository: netbirdio/netbird

Length of output: 98


🏁 Script executed:

# Search for StopPeriodicCleanup usage in the codebase
rg -n "StopPeriodicCleanup" --type go -A 2 -B 2

Repository: netbirdio/netbird

Length of output: 3862


🏁 Script executed:

# Check the boot.go file around the AccessLogsManager function and line 126 for proxyAuthClose
rg -n "AccessLogsManager|proxyAuthClose" management/internals/server/boot.go -A 5 -B 2

Repository: netbirdio/netbird

Length of output: 2221


🏁 Script executed:

# Look at the BaseServer struct definition
rg -n "type BaseServer struct" --type go -A 20

Repository: netbirdio/netbird

Length of output: 1564


🏁 Script executed:

# Search for BaseServer Close or Shutdown methods
rg -n "func.*BaseServer.*Close|func.*BaseServer.*Shutdown" --type go -A 10

Repository: netbirdio/netbird

Length of output: 43


🏁 Script executed:

# Check the full boot.go file to understand the context better
wc -l management/internals/server/boot.go

Repository: netbirdio/netbird

Length of output: 100


🏁 Script executed:

# Check the full BaseServer struct definition
cat -n management/internals/server/server.go | head -100

Repository: netbirdio/netbird

Length of output: 3834


🏁 Script executed:

# Search for BaseServer Close method
rg -n "func.*Close|func.*Shutdown" management/internals/server/server.go -A 15

Repository: netbirdio/netbird

Length of output: 43


🏁 Script executed:

# Search for where proxyAuthClose is actually used/called
rg -n "proxyAuthClose" management/internals/server/ -A 3 -B 3

Repository: netbirdio/netbird

Length of output: 1754


🏁 Script executed:

# Look at the full AccessLogsManager function in context
sed -n '195,210p' management/internals/server/boot.go

Repository: netbirdio/netbird

Length of output: 610


🏁 Script executed:

# Get full Stop method
sed -n '218,235p' management/internals/server/server.go

Repository: netbirdio/netbird

Length of output: 460


🏁 Script executed:

# Check the full boot.go to see how Create pattern works
rg -n "func Create" management/internals/server/boot.go -A 10

Repository: netbirdio/netbird

Length of output: 43


🏁 Script executed:

# Check if AccessLogsManager is cached/stored
rg -n "accessLogsManager|AccessLogsManager" management/internals/server/ --type go

Repository: netbirdio/netbird

Length of output: 950


context.Background() + missing StopPeriodicCleanup() wiring causes a goroutine leak

StartPeriodicCleanup is passed context.Background(), so its internal goroutine can only be stopped via StopPeriodicCleanup(). However, StopPeriodicCleanup is never called in any production shutdown path. This results in:

  1. Goroutine leak during graceful server shutdown — the cleanup ticker goroutine runs until the OS kills the process.
  2. Test interference — each test that initializes AccessLogsManager spawns a goroutine that outlives the test.

The proxyAuthClose pattern at line 126 demonstrates the existing precedent for wiring cleanup into server shutdown. StopPeriodicCleanup must follow the same pattern: store the manager instance and call it in the Stop() method.

🔧 Suggested fix

Add an accessLogManager field to BaseServer and wire StopPeriodicCleanup into the shutdown sequence:

type BaseServer struct {
	// ... existing fields
	proxyAuthClose func()
+	accessLogManager accesslogs.Manager
}

Then in AccessLogsManager():

 func (s *BaseServer) AccessLogsManager() accesslogs.Manager {
 	return Create(s, func() accesslogs.Manager {
+		if s.accessLogManager == nil {
 			accessLogManager := accesslogsmanager.NewManager(s.Store(), s.PermissionsManager(), s.GeoLocationManager())
 			accessLogManager.StartPeriodicCleanup(
 				context.Background(),
 				s.Config.ReverseProxy.AccessLogRetentionDays,
 				s.Config.ReverseProxy.AccessLogCleanupIntervalHours,
 			)
+			s.accessLogManager = accessLogManager
+		}
-		return accessLogManager
+		return s.accessLogManager
 	})
 }

And in the Stop() method (after ReverseProxyGRPCServer().Close()):

 if s.proxyAuthClose != nil {
 	s.proxyAuthClose()
 	s.proxyAuthClose = nil
 }
+if s.accessLogManager != nil {
+	s.accessLogManager.StopPeriodicCleanup()
+}
 _ = s.Store().Close(ctx)
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@management/internals/server/boot.go` around lines 197 - 207,
AccessLogsManager currently starts a long‑running cleanup goroutine with
StartPeriodicCleanup(context.Background(), ...) and never wires
StopPeriodicCleanup into shutdown, causing goroutine leaks; fix it by adding an
accessLogManager field to BaseServer, have AccessLogsManager() store the created
accesslogsmanager.Manager instance into that field (instead of only returning
it), and call its StopPeriodicCleanup(ctx) from BaseServer.Stop() (following the
proxyAuthClose pattern) so the cleanup goroutine is cancelled during graceful
shutdown and in tests; reference BaseServer, AccessLogsManager(),
StartPeriodicCleanup, StopPeriodicCleanup, proxyAuthClose, and Stop().

Expand Down
Loading
Loading