Skip to content

Commit 3caa357

Browse files
committed
fix: issue with error not propagating properly
fix: issue with error not propagating properly fix: issue with error not propagating properly fix: issue with error not propagating properly chore: goimport chore: goimport
1 parent c0fd506 commit 3caa357

File tree

12 files changed

+109
-54
lines changed

12 files changed

+109
-54
lines changed

cmd/katalyst-agent/app/options/qrm/statedirectory/statedirectory_base.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,13 @@ type StateDirectoryOptions struct {
2626
StateFileDirectory string
2727
InMemoryStateFileDirectory string
2828
EnableInMemoryState bool
29+
HasPreStop bool
2930
}
3031

3132
func NewStateDirectoryOptions() *StateDirectoryOptions {
3233
return &StateDirectoryOptions{
33-
StateFileDirectory: "/var/lib/katalyst/qrm_advisor",
34+
StateFileDirectory: "/var/lib/katalyst/qrm_advisor",
35+
InMemoryStateFileDirectory: "/dev/shm/qrm/state",
3436
}
3537
}
3638

@@ -42,11 +44,14 @@ func (o *StateDirectoryOptions) AddFlags(fss *cliflag.NamedFlagSets) {
4244
o.InMemoryStateFileDirectory, "The in memory directory to store the state file.")
4345
fs.BoolVar(&o.EnableInMemoryState, "qrm-enable-in-memory-state",
4446
o.EnableInMemoryState, "if set true, the state will be stored in the in-memory directory.")
47+
fs.BoolVar(&o.HasPreStop, "qrm-has-pre-stop",
48+
o.HasPreStop, "if set true, there is a pre-stop script in place.")
4549
}
4650

4751
func (o *StateDirectoryOptions) ApplyTo(conf *statedirectory.StateDirectoryConfiguration) error {
4852
conf.StateFileDirectory = o.StateFileDirectory
4953
conf.InMemoryStateFileDirectory = o.InMemoryStateFileDirectory
5054
conf.EnableInMemoryState = o.EnableInMemoryState
55+
conf.HasPreStop = o.HasPreStop
5156
return nil
5257
}

pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpueviction/cpu_eviciton_test.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,11 @@ import (
2323
"testing"
2424
"time"
2525

26-
"github.com/kubewharf/katalyst-core/pkg/config/agent/qrm/statedirectory"
27-
2826
"github.com/stretchr/testify/require"
2927

3028
qrmstate "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state"
3129
"github.com/kubewharf/katalyst-core/pkg/config"
30+
"github.com/kubewharf/katalyst-core/pkg/config/agent/qrm/statedirectory"
3231
"github.com/kubewharf/katalyst-core/pkg/metaserver"
3332
"github.com/kubewharf/katalyst-core/pkg/metaserver/agent"
3433
"github.com/kubewharf/katalyst-core/pkg/metaserver/agent/metric"

pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpueviction/strategy/pressure_load_test.go

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,6 @@ import (
2424
"testing"
2525
"time"
2626

27-
"github.com/kubewharf/katalyst-core/pkg/config/agent/qrm/statedirectory"
28-
2927
"github.com/stretchr/testify/assert"
3028
"github.com/stretchr/testify/require"
3129
v1 "k8s.io/api/core/v1"
@@ -43,6 +41,7 @@ import (
4341
qrmstate "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state"
4442
"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/util"
4543
"github.com/kubewharf/katalyst-core/pkg/config"
44+
"github.com/kubewharf/katalyst-core/pkg/config/agent/qrm/statedirectory"
4645
"github.com/kubewharf/katalyst-core/pkg/consts"
4746
"github.com/kubewharf/katalyst-core/pkg/metaserver"
4847
"github.com/kubewharf/katalyst-core/pkg/metaserver/agent"
@@ -64,7 +63,9 @@ const (
6463
defaultReservedForSystem = 0
6564
)
6665

67-
func makeMetaServer(metricsFetcher metrictypes.MetricsFetcher, cpuTopology *machine.CPUTopology) *metaserver.MetaServer {
66+
func makeMetaServer(
67+
metricsFetcher metrictypes.MetricsFetcher, cpuTopology *machine.CPUTopology,
68+
) *metaserver.MetaServer {
6869
metaServer := &metaserver.MetaServer{
6970
MetaAgent: &agent.MetaAgent{},
7071
}
@@ -77,7 +78,8 @@ func makeMetaServer(metricsFetcher metrictypes.MetricsFetcher, cpuTopology *mach
7778
return metaServer
7879
}
7980

80-
func makeConf(metricRingSize int, gracePeriod int64, loadUpperBoundRatio, loadLowerBoundRatio,
81+
func makeConf(
82+
metricRingSize int, gracePeriod int64, loadUpperBoundRatio, loadLowerBoundRatio,
8183
loadThresholdMetPercentage float64, reservedForReclaim, reservedForAllocate string, reservedForSystem int,
8284
) *config.Configuration {
8385
conf := config.NewConfiguration()

pkg/agent/qrm-plugins/cpu/dynamicpolicy/hintoptimizer/policy/canonical/optimizer_test.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,6 @@ import (
2222
"os"
2323
"testing"
2424

25-
"github.com/kubewharf/katalyst-core/pkg/config/agent/qrm/statedirectory"
26-
2725
"github.com/stretchr/testify/assert"
2826
"github.com/stretchr/testify/require"
2927
pluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1"
@@ -36,6 +34,7 @@ import (
3634
hintoptimizerutil "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/hintoptimizer/util"
3735
"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state"
3836
"github.com/kubewharf/katalyst-core/pkg/config"
37+
"github.com/kubewharf/katalyst-core/pkg/config/agent/qrm/statedirectory"
3938
"github.com/kubewharf/katalyst-core/pkg/metrics"
4039
"github.com/kubewharf/katalyst-core/pkg/util/machine"
4140
)

pkg/agent/qrm-plugins/cpu/dynamicpolicy/state/state_checkpoint.go

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ limitations under the License.
1717
package state
1818

1919
import (
20+
stdErrors "errors"
2021
"fmt"
2122
"path"
2223
"reflect"
@@ -62,9 +63,9 @@ func NewCheckpointState(
6263
generateMachineStateFunc GenerateMachineStateFromPodEntriesFunc,
6364
emitter metrics.MetricEmitter,
6465
) (State, error) {
65-
currentStateDir, otherStateDir := stateDirectoryConfig.GetCurrentAndOtherStateFileDirectory()
66-
// If there is an empty otherStateDir, this means that there is no pre-stop script in place
67-
hasPreStop := otherStateDir != ""
66+
currentStateDir, otherStateDir := stateDirectoryConfig.GetCurrentAndPreviousStateFileDirectory()
67+
hasPreStop := stateDirectoryConfig.HasPreStop
68+
6869
qrmCheckpointManager, err := qrmcheckpointmanager.NewQRMCheckpointManager(currentStateDir, otherStateDir, checkpointName, "cpu_plugin")
6970
if err != nil {
7071
return nil, fmt.Errorf("failed to initialize checkpoint manager: %v", err)
@@ -98,10 +99,10 @@ func (sc *stateCheckpoint) restoreState(topology *machine.CPUTopology) error {
9899

99100
checkpoint := NewCPUPluginCheckpoint()
100101
if err = sc.qrmCheckpointManager.GetCurrentCheckpoint(sc.checkpointName, checkpoint, true); err != nil {
101-
if err == errors.ErrCheckpointNotFound {
102+
if stdErrors.Is(err, errors.ErrCheckpointNotFound) {
102103
// We cannot find checkpoint, so it is possible that previous checkpoint was stored in either disk or memory
103104
return sc.tryMigrateState(topology, checkpoint)
104-
} else if err == errors.ErrCorruptCheckpoint {
105+
} else if stdErrors.Is(err, errors.ErrCorruptCheckpoint) {
105106
if !sc.skipStateCorruption {
106107
return err
107108
}
@@ -164,11 +165,11 @@ func (sc *stateCheckpoint) tryMigrateState(
164165
}
165166

166167
if err := sc.qrmCheckpointManager.GetPreviousCheckpoint(sc.checkpointName, checkpoint); err != nil {
167-
if err == errors.ErrCheckpointNotFound {
168+
if stdErrors.Is(err, errors.ErrCheckpointNotFound) {
168169
// Old checkpoint file is not found, so we just store state in new checkpoint
169170
general.Infof("[cpu_plugin] checkpoint %v doesn't exist, create it", sc.checkpointName)
170171
return sc.storeState()
171-
} else if err == errors.ErrCorruptCheckpoint {
172+
} else if stdErrors.Is(err, errors.ErrCorruptCheckpoint) {
172173
if !sc.skipStateCorruption {
173174
return err
174175
}

pkg/agent/qrm-plugins/memory/dynamicpolicy/state/state_checkpoint.go

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,24 +17,23 @@ limitations under the License.
1717
package state
1818

1919
import (
20+
stdErrors "errors"
2021
"fmt"
2122
"path"
2223
"reflect"
2324
"sync"
2425
"time"
2526

26-
"github.com/kubewharf/katalyst-core/pkg/util/qrmcheckpointmanager"
27-
28-
"github.com/kubewharf/katalyst-core/pkg/config/agent/qrm/statedirectory"
29-
3027
info "github.com/google/cadvisor/info/v1"
3128
v1 "k8s.io/api/core/v1"
3229
"k8s.io/klog/v2"
3330
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors"
3431

32+
"github.com/kubewharf/katalyst-core/pkg/config/agent/qrm/statedirectory"
3533
"github.com/kubewharf/katalyst-core/pkg/metrics"
3634
"github.com/kubewharf/katalyst-core/pkg/util/general"
3735
"github.com/kubewharf/katalyst-core/pkg/util/machine"
36+
"github.com/kubewharf/katalyst-core/pkg/util/qrmcheckpointmanager"
3837
)
3938

4039
const (
@@ -65,9 +64,9 @@ func NewCheckpointState(
6564
reservedMemory map[v1.ResourceName]map[int]uint64, skipStateCorruption bool,
6665
emitter metrics.MetricEmitter,
6766
) (State, error) {
68-
currentStateDir, otherStateDir := stateDirectoryConfig.GetCurrentAndOtherStateFileDirectory()
69-
// If there is an empty otherStateDir, this means that there is no pre-stop script in place
70-
hasPreStop := otherStateDir != ""
67+
currentStateDir, otherStateDir := stateDirectoryConfig.GetCurrentAndPreviousStateFileDirectory()
68+
hasPreStop := stateDirectoryConfig.HasPreStop
69+
7170
qrmCheckpointManager, err := qrmcheckpointmanager.NewQRMCheckpointManager(currentStateDir, otherStateDir, checkpointName, "memory_plugin")
7271
if err != nil {
7372
return nil, fmt.Errorf("failed to initialize checkpoint manager: %v", err)
@@ -107,10 +106,10 @@ func (sc *stateCheckpoint) restoreState(
107106

108107
checkpoint := NewMemoryPluginCheckpoint()
109108
if err = sc.qrmCheckpointManager.GetCurrentCheckpoint(sc.checkpointName, checkpoint, true); err != nil {
110-
if err == errors.ErrCheckpointNotFound {
109+
if stdErrors.Is(err, errors.ErrCheckpointNotFound) {
111110
// We cannot find checkpoint, so it is possible that previous checkpoint was stored in either disk or memory
112111
return sc.tryMigrateState(machineInfo, reservedMemory, checkpoint)
113-
} else if err == errors.ErrCorruptCheckpoint {
112+
} else if stdErrors.Is(err, errors.ErrCorruptCheckpoint) {
114113
if !sc.skipStateCorruption {
115114
return err
116115
}
@@ -180,11 +179,11 @@ func (sc *stateCheckpoint) tryMigrateState(
180179
}
181180

182181
if err := sc.qrmCheckpointManager.GetPreviousCheckpoint(sc.checkpointName, checkpoint); err != nil {
183-
if err == errors.ErrCheckpointNotFound {
182+
if stdErrors.Is(err, errors.ErrCheckpointNotFound) {
184183
// Old checkpoint file is not found, so we just store state in new checkpoint
185184
general.Infof("[memory_plugin] checkpoint %v doesn't exist, create it", sc.checkpointName)
186185
return sc.storeState()
187-
} else if err == errors.ErrCorruptCheckpoint {
186+
} else if stdErrors.Is(err, errors.ErrCorruptCheckpoint) {
188187
if !sc.skipStateCorruption {
189188
return err
190189
}

pkg/agent/qrm-plugins/memory/dynamicpolicy/state/state_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,19 +23,19 @@ import (
2323
"path/filepath"
2424
"testing"
2525

26+
info "github.com/google/cadvisor/info/v1"
27+
"github.com/stretchr/testify/assert"
28+
"github.com/stretchr/testify/require"
2629
v1 "k8s.io/api/core/v1"
2730
pluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1"
2831
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
2932
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors"
3033

31-
info "github.com/google/cadvisor/info/v1"
3234
"github.com/kubewharf/katalyst-api/pkg/consts"
3335
"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/commonstate"
3436
"github.com/kubewharf/katalyst-core/pkg/metrics"
3537
"github.com/kubewharf/katalyst-core/pkg/util/machine"
3638
"github.com/kubewharf/katalyst-core/pkg/util/qrmcheckpointmanager"
37-
"github.com/stretchr/testify/assert"
38-
"github.com/stretchr/testify/require"
3939
)
4040

4141
func TestGetReadonlyState(t *testing.T) {

pkg/agent/qrm-plugins/network/state/state_checkpoint.go

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -17,26 +17,23 @@ limitations under the License.
1717
package state
1818

1919
import (
20+
stdErrors "errors"
2021
"fmt"
2122
"path"
2223
"reflect"
2324
"sync"
2425
"time"
2526

26-
"github.com/kubewharf/katalyst-core/pkg/util/qrmcheckpointmanager"
27-
28-
"github.com/kubewharf/katalyst-core/pkg/config/agent/qrm/statedirectory"
29-
30-
"k8s.io/klog/v2"
31-
3227
info "github.com/google/cadvisor/info/v1"
28+
"k8s.io/klog/v2"
29+
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors"
3330

3431
"github.com/kubewharf/katalyst-core/pkg/config/agent/qrm"
32+
"github.com/kubewharf/katalyst-core/pkg/config/agent/qrm/statedirectory"
3533
"github.com/kubewharf/katalyst-core/pkg/metrics"
3634
"github.com/kubewharf/katalyst-core/pkg/util/general"
3735
"github.com/kubewharf/katalyst-core/pkg/util/machine"
38-
39-
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors"
36+
"github.com/kubewharf/katalyst-core/pkg/util/qrmcheckpointmanager"
4037
)
4138

4239
const (
@@ -70,9 +67,9 @@ func NewCheckpointState(
7067
machineInfo *info.MachineInfo, nics []machine.InterfaceInfo, reservedBandwidth map[string]uint32,
7168
skipStateCorruption bool, emitter metrics.MetricEmitter,
7269
) (State, error) {
73-
currentStateDir, otherStateDir := stateDirectoryConfig.GetCurrentAndOtherStateFileDirectory()
74-
// If there is an empty otherStateDir, this means that there is no pre-stop script in place
75-
hasPreStop := otherStateDir != ""
70+
currentStateDir, otherStateDir := stateDirectoryConfig.GetCurrentAndPreviousStateFileDirectory()
71+
hasPreStop := stateDirectoryConfig.HasPreStop
72+
7673
qrmCheckpointManager, err := qrmcheckpointmanager.NewQRMCheckpointManager(currentStateDir, otherStateDir, checkpointName, "network_plugin")
7774
if err != nil {
7875
return nil, fmt.Errorf("failed to initialize checkpoint manager: %v", err)
@@ -113,10 +110,10 @@ func (sc *stateCheckpoint) restoreState(
113110

114111
checkpoint := NewNetworkPluginCheckpoint()
115112
if err = sc.qrmCheckpointManager.GetCurrentCheckpoint(sc.checkpointName, checkpoint, true); err != nil {
116-
if err == errors.ErrCheckpointNotFound {
113+
if stdErrors.Is(err, errors.ErrCheckpointNotFound) {
117114
// We cannot find checkpoint, so it is possible that previous checkpoint was stored in either disk or memory
118115
return sc.tryMigrateState(conf, nics, reservedBandwidth, checkpoint)
119-
} else if err == errors.ErrCorruptCheckpoint {
116+
} else if stdErrors.Is(err, errors.ErrCorruptCheckpoint) {
120117
if !sc.skipStateCorruption {
121118
return err
122119
}
@@ -187,11 +184,11 @@ func (sc *stateCheckpoint) tryMigrateState(
187184
}
188185

189186
if err := sc.qrmCheckpointManager.GetPreviousCheckpoint(sc.checkpointName, checkpoint); err != nil {
190-
if err == errors.ErrCheckpointNotFound {
187+
if stdErrors.Is(err, errors.ErrCheckpointNotFound) {
191188
// Old checkpoint file is not found, so we just store state in new checkpoint
192189
general.InfoS("[network_plugin] checkpoint %v doesn't exist, create it", sc.checkpointName)
193190
return sc.storeState()
194-
} else if err == errors.ErrCorruptCheckpoint {
191+
} else if stdErrors.Is(err, errors.ErrCorruptCheckpoint) {
195192
if !sc.skipStateCorruption {
196193
return err
197194
}

pkg/agent/qrm-plugins/network/state/state_test.go

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,16 +24,17 @@ import (
2424
"testing"
2525

2626
info "github.com/google/cadvisor/info/v1"
27+
"github.com/stretchr/testify/assert"
28+
pluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1"
29+
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
30+
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors"
31+
2732
"github.com/kubewharf/katalyst-api/pkg/consts"
2833
"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/commonstate"
2934
"github.com/kubewharf/katalyst-core/pkg/config/agent/qrm"
3035
"github.com/kubewharf/katalyst-core/pkg/metrics"
3136
"github.com/kubewharf/katalyst-core/pkg/util/machine"
3237
"github.com/kubewharf/katalyst-core/pkg/util/qrmcheckpointmanager"
33-
"github.com/stretchr/testify/assert"
34-
pluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1"
35-
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
36-
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors"
3738
)
3839

3940
func TestTryMigrateState(t *testing.T) {

pkg/config/agent/qrm/statedirectory/statedirectory_base.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,18 @@ type StateDirectoryConfiguration struct {
2222
// EnableInMemoryState indicates whether we want to store the state in memory or on disk
2323
// if set true, the state will be stored in tmpfs
2424
EnableInMemoryState bool
25+
// HasPreStop indicates whether we have pre-stop script in place
26+
HasPreStop bool
2527
}
2628

2729
func NewStateDirectoryConfiguration() *StateDirectoryConfiguration {
2830
return &StateDirectoryConfiguration{}
2931
}
3032

31-
func (c *StateDirectoryConfiguration) GetCurrentAndOtherStateFileDirectory() (string, string) {
33+
func (c *StateDirectoryConfiguration) GetCurrentAndPreviousStateFileDirectory() (string, string) {
34+
if !c.HasPreStop {
35+
return c.StateFileDirectory, c.InMemoryStateFileDirectory
36+
}
3237
if c.EnableInMemoryState {
3338
return c.InMemoryStateFileDirectory, c.StateFileDirectory
3439
}

0 commit comments

Comments
 (0)