Skip to content

Commit 156b470

Browse files
authored
Merge pull request #1017 from luomingmeng/dev/fix-network-plugin-clear-state
fix(network): extract net class cleanup logic to a separate method
2 parents 2590c1d + a1d945b commit 156b470

File tree

2 files changed

+154
-18
lines changed

2 files changed

+154
-18
lines changed

pkg/agent/qrm-plugins/network/staticpolicy/policy.go

Lines changed: 26 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1170,24 +1170,9 @@ func (p *StaticPolicy) getResourceAllocationAnnotations(
11701170
}
11711171

11721172
func (p *StaticPolicy) removePod(podUID string) error {
1173-
if p.CgroupV2Env {
1174-
cgIDList, err := p.metaServer.ExternalManager.ListCgroupIDsForPod(podUID)
1175-
if err != nil {
1176-
if general.IsErrNotFound(err) {
1177-
general.Warningf("cgroup ids for pod not found")
1178-
return nil
1179-
}
1180-
return fmt.Errorf("[NetworkStaticPolicy.removePod] list cgroup ids of pod: %s failed with error: %v", podUID, err)
1181-
}
1182-
1183-
for _, cgID := range cgIDList {
1184-
go func(cgID uint64) {
1185-
if err := p.metaServer.ExternalManager.ClearNetClass(cgID); err != nil {
1186-
general.Errorf("delete net class failed, cgID: %v, err: %v", cgID, err)
1187-
return
1188-
}
1189-
}(cgID)
1190-
}
1173+
err := p.clearNetClassIfNeed(podUID)
1174+
if err != nil {
1175+
return fmt.Errorf("clearNetClassIfNeed failed with error: %v", err)
11911176
}
11921177

11931178
// update state cache
@@ -1370,6 +1355,29 @@ func (p *StaticPolicy) generateAndApplyGroups() error {
13701355
return nil
13711356
}
13721357

1358+
func (p *StaticPolicy) clearNetClassIfNeed(podUID string) error {
1359+
if p.CgroupV2Env {
1360+
cgIDList, err := p.metaServer.ExternalManager.ListCgroupIDsForPod(podUID)
1361+
if err != nil {
1362+
if general.IsErrNotFound(err) {
1363+
general.Warningf("cgroup ids for pod %s not found", podUID)
1364+
return nil
1365+
}
1366+
return fmt.Errorf("list cgroup ids of pod: %s failed with error: %v", podUID, err)
1367+
}
1368+
1369+
for _, cgID := range cgIDList {
1370+
go func(cgID uint64) {
1371+
if err := p.metaServer.ExternalManager.ClearNetClass(cgID); err != nil {
1372+
general.Errorf("delete net class failed, cgID: %v, err: %v", cgID, err)
1373+
return
1374+
}
1375+
}(cgID)
1376+
}
1377+
}
1378+
return nil
1379+
}
1380+
13731381
func getAllNICs(nicManager nic.NICManager) []machine.InterfaceInfo {
13741382
nics := nicManager.GetNICs()
13751383
return append(nics.HealthyNICs, nics.UnhealthyNICs...)

pkg/agent/qrm-plugins/network/staticpolicy/policy_test.go

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ import (
5656
"github.com/kubewharf/katalyst-core/pkg/metrics"
5757
"github.com/kubewharf/katalyst-core/pkg/util/cgroup/common"
5858
"github.com/kubewharf/katalyst-core/pkg/util/external/network"
59+
"github.com/kubewharf/katalyst-core/pkg/util/general"
5960
"github.com/kubewharf/katalyst-core/pkg/util/machine"
6061
)
6162

@@ -390,6 +391,8 @@ func TestRemovePod(t *testing.T) {
390391
PodUid: podID,
391392
}
392393

394+
//---verify cgroup id not found
395+
policy.CgroupV2Env = true
393396
_, err = policy.RemovePod(context.TODO(), delReq)
394397
assert.NoError(t, err)
395398

@@ -1805,3 +1808,128 @@ func TestStaticPolicy_applyNetClass(t *testing.T) {
18051808

18061809
policy.applyNetClass()
18071810
}
1811+
1812+
type errCgroupIDManager struct {
1813+
cgroupid.CgroupIDManagerStub
1814+
errListCgroupIDsForPod error
1815+
}
1816+
1817+
func (f *errCgroupIDManager) ListCgroupIDsForPod(_ string) ([]uint64, error) {
1818+
return nil, f.errListCgroupIDsForPod
1819+
}
1820+
1821+
type errNetworkManager struct {
1822+
network.NetworkManagerStub
1823+
errClearNetClass error
1824+
}
1825+
1826+
func (n *errNetworkManager) ClearNetClass(_ uint64) error {
1827+
return n.errClearNetClass
1828+
}
1829+
1830+
func TestStaticPolicy_clearNetClassIfNeed(t *testing.T) {
1831+
t.Parallel()
1832+
type fields struct {
1833+
cgroupIDManager cgroupid.CgroupIDManager
1834+
networkManager network.NetworkManager
1835+
}
1836+
type args struct {
1837+
podUID string
1838+
}
1839+
tests := []struct {
1840+
name string
1841+
fields fields
1842+
args args
1843+
wantErr assert.ErrorAssertionFunc
1844+
}{
1845+
{
1846+
name: "clear net class success",
1847+
fields: fields{
1848+
cgroupIDManager: &cgroupid.CgroupIDManagerStub{
1849+
ContainerCGroupIDMap: map[string]map[string]uint64{
1850+
"test-pod-uid": {
1851+
"test-container-id": 314125,
1852+
},
1853+
},
1854+
},
1855+
networkManager: &network.NetworkManagerStub{
1856+
NetClassMap: map[string]map[string]*common.NetClsData{
1857+
"test-pod-uid": {
1858+
"test-container-id": {
1859+
ClassID: 0,
1860+
CgroupID: 314125,
1861+
Attributes: map[string]string{
1862+
testNetClassIDResourceAllocationAnnotationKey: testSharedNetClsId,
1863+
},
1864+
},
1865+
},
1866+
},
1867+
},
1868+
},
1869+
args: args{
1870+
podUID: "test-pod-uid",
1871+
},
1872+
wantErr: assert.NoError,
1873+
},
1874+
{
1875+
name: "list cgroup id for pod not found",
1876+
fields: fields{
1877+
cgroupIDManager: &errCgroupIDManager{
1878+
errListCgroupIDsForPod: general.ErrNotFound,
1879+
},
1880+
},
1881+
args: args{podUID: "test-pod-uid"},
1882+
wantErr: assert.NoError,
1883+
},
1884+
{
1885+
name: "list cgroup id for pod failed",
1886+
fields: fields{
1887+
cgroupIDManager: &errCgroupIDManager{
1888+
errListCgroupIDsForPod: fmt.Errorf("ListCgroupIDsForPod"),
1889+
},
1890+
},
1891+
args: args{
1892+
podUID: "test-pod-uid",
1893+
},
1894+
wantErr: assert.Error,
1895+
},
1896+
{
1897+
name: "clear net class fail",
1898+
fields: fields{
1899+
cgroupIDManager: &cgroupid.CgroupIDManagerStub{
1900+
ContainerCGroupIDMap: map[string]map[string]uint64{
1901+
"test-pod-uid": {
1902+
"test-container-id": 314125,
1903+
},
1904+
},
1905+
},
1906+
networkManager: &errNetworkManager{
1907+
errClearNetClass: fmt.Errorf("ClearNetClass"),
1908+
},
1909+
},
1910+
args: args{
1911+
podUID: "test-pod-uid",
1912+
},
1913+
wantErr: assert.NoError,
1914+
},
1915+
}
1916+
for _, tt := range tests {
1917+
tt := tt
1918+
t.Run(tt.name, func(t *testing.T) {
1919+
t.Parallel()
1920+
1921+
metaServer := makeMetaServer()
1922+
p := &StaticPolicy{
1923+
CgroupV2Env: true,
1924+
metaServer: metaServer,
1925+
}
1926+
1927+
p.metaServer.ExternalManager = &external.DummyExternalManager{
1928+
CgroupIDManager: tt.fields.cgroupIDManager,
1929+
NetworkManager: tt.fields.networkManager,
1930+
}
1931+
time.Sleep(1 * time.Second)
1932+
tt.wantErr(t, p.clearNetClassIfNeed(tt.args.podUID), fmt.Sprintf("clearNetClassIfNeed(%v)", tt.args.podUID))
1933+
})
1934+
}
1935+
}

0 commit comments

Comments
 (0)