Skip to content

Commit c900862

Browse files
committed
fix: consolidate garbage collection for namespaced and cluster-scoped workspaces
Fixes the bug in the garbage collector that caused unintended workspace directory deletions. Previously, both cluster-scoped and namespaced workspace controllers started their own GC instances on the same root directory. Each GC only checked their respective Workspace MR instances, causing race and deletions of workspace dirs of each other, e.g. cluster-scoped GC deleting workspace directories and vice versa. The fix consolidates the GC logic to consider both Workspace MR types and runs a centralized GC controller, per root directory ( e.g. `/tofu` and `/tmp/tofu`) It also considers edge cases due to potential usage of SafeStart, where namespaced or cluster-scoped Workspace CRDs might not be available immediately or not used at all. - GC logic is now controller-runtime `manager.Runnable` with proper shutdown context - centralized gc.Setup() with ensuring "run once", whichever controller - Added tests for CRD gating scenarios Signed-off-by: Erhan Cagirici <erhan@upbound.io>
1 parent 24d6b81 commit c900862

6 files changed

Lines changed: 318 additions & 42 deletions

File tree

cmd/provider/main.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ import (
4141
namespacedapis "github.com/upbound/provider-opentofu/apis/namespaced"
4242
"github.com/upbound/provider-opentofu/internal/bootcheck"
4343
clustercontroller "github.com/upbound/provider-opentofu/internal/controller/cluster"
44+
"github.com/upbound/provider-opentofu/internal/controller/cluster/workspace"
45+
"github.com/upbound/provider-opentofu/internal/controller/gc"
4446
namespacedcontroller "github.com/upbound/provider-opentofu/internal/controller/namespaced"
4547
"github.com/upbound/provider-opentofu/internal/features"
4648
)
@@ -144,6 +146,9 @@ func main() {
144146
log.Info("Beta feature enabled", "flag", features.EnableBetaManagementPolicies)
145147
}
146148

149+
// NOTE: cluster-scoped and namespaced Workspaces share a common
150+
// workspace root directory. Update GC setup if they diverge
151+
kingpin.FatalIfError(gc.Setup(mgr, workspace.GetTfDir(), log), "cannot setup Workspace garbage collector controller")
147152
canSafeStart, err := canWatchCRD(ctx, mgr)
148153
kingpin.FatalIfError(err, "SafeStart precheck failed")
149154
if canSafeStart {

internal/controller/cluster/workspace/workspace.go

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@ import (
3939
"github.com/upbound/provider-opentofu/internal/clients"
4040
"github.com/upbound/provider-opentofu/internal/features"
4141
"github.com/upbound/provider-opentofu/internal/opentofu"
42-
"github.com/upbound/provider-opentofu/internal/workdir"
4342
)
4443

4544
const (
@@ -90,6 +89,11 @@ func envVarFallback(envvar string, fallback string) string {
9089

9190
var tfDir = envVarFallback("XP_TF_DIR", "/tofu")
9291

92+
// GetTfDir returns the OpenTofu working directory root.
93+
func GetTfDir() string {
94+
return tfDir
95+
}
96+
9397
type tofuclient interface {
9498
Init(ctx context.Context, o ...opentofu.InitOption) error
9599
Workspace(ctx context.Context, name string) error
@@ -107,12 +111,6 @@ func Setup(mgr ctrl.Manager, o controller.Options, timeout, pollJitter time.Dura
107111
name := managed.ControllerName(v1beta1.WorkspaceGroupKind)
108112

109113
fs := afero.Afero{Fs: afero.NewOsFs()}
110-
gcWorkspace := workdir.NewGarbageCollector(mgr.GetClient(), tfDir, workdir.WithFs(fs), workdir.WithLogger(o.Logger))
111-
go gcWorkspace.Run(context.TODO(), false)
112-
113-
gcTmp := workdir.NewGarbageCollector(mgr.GetClient(), filepath.Join("/tmp", tfDir), workdir.WithFs(fs), workdir.WithLogger(o.Logger))
114-
go gcTmp.Run(context.TODO(), false)
115-
116114
c := &connector{
117115
kube: mgr.GetClient(),
118116
usage: resource.NewLegacyProviderConfigUsageTracker(mgr.GetClient(), &v1beta1.ProviderConfigUsage{}),

internal/controller/gc/gc.go

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
/*
2+
SPDX-FileCopyrightText: 2026 Upbound Inc. <https://upbound.io>
3+
4+
SPDX-License-Identifier: Apache-2.0
5+
*/
6+
7+
package gc
8+
9+
import (
10+
"path/filepath"
11+
12+
"github.com/crossplane/crossplane-runtime/v2/pkg/logging"
13+
"github.com/spf13/afero"
14+
ctrl "sigs.k8s.io/controller-runtime"
15+
16+
"github.com/upbound/provider-opentofu/internal/workdir"
17+
)
18+
19+
// Setup initializes and registers the garbage collectors with the manager.
20+
//
21+
// Two GC instances are created:
22+
// - One for the main workspace directory containing workspace roots
23+
// - One for /tmp directory containing temporary workspace files
24+
//
25+
// Each GC queries both cluster-scoped and namespaced workspaces to determine
26+
// which directories can be safely deleted.
27+
func Setup(mgr ctrl.Manager, tfDir string, logger logging.Logger) error {
28+
fs := afero.Afero{Fs: afero.NewOsFs()}
29+
30+
// GC for main workspace directory
31+
gcWorkspace := workdir.NewGarbageCollector(
32+
mgr.GetClient(),
33+
tfDir,
34+
workdir.WithFs(fs),
35+
workdir.WithLogger(logger),
36+
)
37+
if err := mgr.Add(gcWorkspace); err != nil {
38+
return err
39+
}
40+
41+
// GC for temporary workspace directory
42+
gcTmp := workdir.NewGarbageCollector(
43+
mgr.GetClient(),
44+
filepath.Join("/tmp", tfDir),
45+
workdir.WithFs(fs),
46+
workdir.WithLogger(logger),
47+
)
48+
if err := mgr.Add(gcTmp); err != nil {
49+
return err
50+
}
51+
52+
logger.Debug("Workspace garbage collectors initialized successfully")
53+
54+
return nil
55+
}

internal/controller/namespaced/workspace/workspace.go

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@ import (
3939
"github.com/upbound/provider-opentofu/internal/clients"
4040
"github.com/upbound/provider-opentofu/internal/features"
4141
"github.com/upbound/provider-opentofu/internal/opentofu"
42-
"github.com/upbound/provider-opentofu/internal/workdir"
4342
)
4443

4544
const (
@@ -90,6 +89,11 @@ func envVarFallback(envvar string, fallback string) string {
9089

9190
var tfDir = envVarFallback("XP_TF_DIR", "/tofu")
9291

92+
// GetTfDir returns the OpenTofu working directory root.
93+
func GetTfDir() string {
94+
return tfDir
95+
}
96+
9397
type tofuclient interface {
9498
Init(ctx context.Context, o ...opentofu.InitOption) error
9599
Workspace(ctx context.Context, name string) error
@@ -107,12 +111,6 @@ func Setup(mgr ctrl.Manager, o controller.Options, timeout, pollJitter time.Dura
107111
name := managed.ControllerName(v1beta1.WorkspaceGroupKind)
108112

109113
fs := afero.Afero{Fs: afero.NewOsFs()}
110-
gcWorkspace := workdir.NewGarbageCollector(mgr.GetClient(), tfDir, workdir.WithFs(fs), workdir.WithLogger(o.Logger))
111-
go gcWorkspace.Run(context.TODO(), true)
112-
113-
gcTmp := workdir.NewGarbageCollector(mgr.GetClient(), filepath.Join("/tmp", tfDir), workdir.WithFs(fs), workdir.WithLogger(o.Logger))
114-
go gcTmp.Run(context.TODO(), true)
115-
116114
c := &connector{
117115
kube: mgr.GetClient(),
118116
usage: resource.NewProviderConfigUsageTracker(mgr.GetClient(), &v1beta1.ProviderConfigUsage{}),

internal/workdir/workdir.go

Lines changed: 69 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ import (
1616
"github.com/google/uuid"
1717
"github.com/pkg/errors"
1818
"github.com/spf13/afero"
19+
apierrors "k8s.io/apimachinery/pkg/api/errors"
20+
"k8s.io/apimachinery/pkg/api/meta"
1921
"sigs.k8s.io/controller-runtime/pkg/client"
2022

2123
clusterv1beta1 "github.com/upbound/provider-opentofu/apis/cluster/v1beta1"
@@ -77,15 +79,19 @@ func NewGarbageCollector(c client.Client, parentDir string, o ...GarbageCollecto
7779
return gc
7880
}
7981

80-
// Run the garbage collector. Blocks until the supplied context is done.
81-
func (gc *GarbageCollector) Run(ctx context.Context, namespaced bool) {
82+
// Start runs the garbage collector. Blocks until the supplied context
83+
// is done.
84+
//
85+
// Implements manager.Runnable to allow controller-runtime
86+
// managers can manage the garbage collector.
87+
func (gc *GarbageCollector) Start(ctx context.Context) error {
8288
t := time.NewTicker(gc.interval)
8389
for {
8490
select {
8591
case <-ctx.Done():
86-
return
92+
return nil
8793
case <-t.C:
88-
if err := gc.collect(ctx, namespaced); err != nil {
94+
if err := gc.collect(ctx); err != nil {
8995
gc.log.Info("Garbage collection failed", "error", err)
9096
}
9197
}
@@ -97,26 +103,73 @@ func isUUID(u string) bool {
97103
return err == nil
98104
}
99105

100-
func (gc *GarbageCollector) collect(ctx context.Context, namespaced bool) error {
106+
func (gc *GarbageCollector) collect(ctx context.Context) error { //nolint:gocyclo // easier to follow as a unit
107+
gc.log.Debug("Running workspace garbage collection", "dir", gc.parentDir)
101108
exists := map[string]bool{}
102-
103-
if namespaced {
104-
l := &namespacedv1beta1.WorkspaceList{}
105-
if err := gc.kube.List(ctx, l); err != nil {
106-
return errors.Wrap(err, errListWorkspaces)
109+
listedAny := false
110+
111+
// List cluster-scoped workspaces
112+
// Note: cluster-scoped Workspace CRD may not be available
113+
// (e.g. disabled via ManagedResourceActivationPolicies)
114+
clusterList := &clusterv1beta1.WorkspaceList{}
115+
if err := gc.kube.List(ctx, clusterList); err != nil {
116+
switch {
117+
case apierrors.IsNotFound(err), meta.IsNoMatchError(err):
118+
// cluster-scoped Workspace CRD not installed, so no instances (safe to continue)
119+
gc.log.Debug("Cluster-scoped Workspace CRD not installed, skipping")
120+
case apierrors.IsForbidden(err):
121+
// cluster-scoped Workspaces might still exist, cannot safely determine
122+
gc.log.Debug("No RBAC permissions to list cluster-scoped workspaces, aborting garbage collection")
123+
return err
124+
default:
125+
// cluster-scoped Workspaces might still exist, cannot safely determine
126+
gc.log.Debug("Failed to list cluster-scoped workspaces, aborting garbage collection")
127+
return err
107128
}
108-
for _, ws := range l.Items {
129+
} else {
130+
listedAny = true
131+
for _, ws := range clusterList.Items {
109132
exists[string(ws.GetUID())] = true
110133
}
111-
} else {
112-
l := &clusterv1beta1.WorkspaceList{}
113-
if err := gc.kube.List(ctx, l); err != nil {
114-
return errors.Wrap(err, errListWorkspaces)
134+
}
135+
136+
// List namespaced workspaces
137+
// Note: namespaced `Workspace` CRD may not be installed
138+
// (e.g. disabled via ManagedResourceActivationPolicies)
139+
namespacedList := &namespacedv1beta1.WorkspaceList{}
140+
if err := gc.kube.List(ctx, namespacedList); err != nil {
141+
switch {
142+
case apierrors.IsNotFound(err), meta.IsNoMatchError(err):
143+
// no workspaces of namespaced type can exist (safe to continue)
144+
gc.log.Debug("Namespaced Workspace CRD not installed, skipping")
145+
case apierrors.IsForbidden(err):
146+
// Namespaced workspaces might exist, log and abort GC
147+
gc.log.Debug("No RBAC permissions to list namespaced workspaces, aborting garbage collection")
148+
return err
149+
default:
150+
// cannot safely determine whether the workspace, abort GC
151+
gc.log.Debug("Failed to list namespaced workspaces, aborting garbage collection")
152+
return err
115153
}
116-
for _, ws := range l.Items {
154+
} else {
155+
listedAny = true
156+
for _, ws := range namespacedList.Items {
117157
exists[string(ws.GetUID())] = true
118158
}
119159
}
160+
161+
// we reach this path IFF apiserver returned `NotFound` or `NoKindMatchError`
162+
// for both List calls of cluster-scoped and namespaced Workspace MRs,
163+
// i.e. both APIs does not exist.
164+
//
165+
// This could potentially happen with a misconfigured
166+
// ManagedResourceActivationPolicy that disabled both MR APIs.
167+
// We avoid any GC here just to be safe.
168+
if !listedAny {
169+
gc.log.Debug("No Workspace MR APIs available, skipping garbage collection")
170+
return nil
171+
}
172+
120173
fis, err := gc.fs.ReadDir(gc.parentDir)
121174
if err != nil {
122175
return errors.Wrapf(err, errFmtReadDir, gc.parentDir)

0 commit comments

Comments
 (0)