@@ -105,7 +105,7 @@ type cpuResourceAdvisor struct {
105105 provisionAssembler provisionassembler.ProvisionAssembler
106106 headroomAssembler headroomassembler.HeadroomAssembler
107107
108- isolator isolation.Isolator
108+ isolators [] isolation.Isolator
109109 isolationSafety bool
110110
111111 mutex sync.RWMutex
@@ -131,7 +131,10 @@ func NewCPUResourceAdvisor(conf *config.Configuration, extraConf interface{}, me
131131 numRegionsPerNuma : make (map [int ]int ),
132132 nonBindingNumas : machine .NewCPUSet (),
133133
134- isolator : isolation .NewLoadIsolator (conf , extraConf , emitter , metaCache , metaServer ),
134+ isolators : []isolation.Isolator {
135+ isolation .NewLoadIsolator (conf , extraConf , emitter , metaCache , metaServer ),
136+ isolation .NewPerfIsolator (conf , extraConf , emitter , metaCache , metaServer ),
137+ },
135138
136139 metaCache : metaCache ,
137140 metaServer : metaServer ,
@@ -151,6 +154,11 @@ func NewCPUResourceAdvisor(conf *config.Configuration, extraConf interface{}, me
151154}
152155
153156func (cra * cpuResourceAdvisor ) Run (ctx context.Context ) {
157+ for _ , isolator := range cra .isolators {
158+ if err := isolator .Start (ctx ); err != nil {
159+ klog .Fatalf ("[qosaware-cpu] start isolator failed: %v" , err )
160+ }
161+ }
154162 <- ctx .Done ()
155163}
156164
@@ -234,7 +242,7 @@ func (cra *cpuResourceAdvisor) updateWithIsolationGuardian(tryIsolation bool) (
234242 }
235243
236244 cra .updateNumasAvailableResource ()
237- isolationExists := cra .setIsolatedContainers (tryIsolation )
245+ cra .setIsolatedContainers (tryIsolation )
238246
239247 // assign containers to regions
240248 if err := cra .assignContainersToRegions (); err != nil {
@@ -244,7 +252,7 @@ func (cra *cpuResourceAdvisor) updateWithIsolationGuardian(tryIsolation bool) (
244252
245253 cra .gcRegionMap ()
246254 cra .updateAdvisorEssentials ()
247- if tryIsolation && isolationExists && ! cra .checkIsolationSafety () {
255+ if tryIsolation && cra . hasIsolatedPods () && ! cra .checkIsolationSafety () {
248256 klog .Errorf ("[qosaware-cpu] failed to check isolation" )
249257 return nil , errIsolationSafetyCheckFailed
250258 }
@@ -284,57 +292,81 @@ func (cra *cpuResourceAdvisor) updateWithIsolationGuardian(tryIsolation bool) (
284292 return & calculationResult , nil
285293}
286294
287- // setIsolatedContainers get isolation status from isolator and update into containers
288- func (cra * cpuResourceAdvisor ) setIsolatedContainers (enableIsolated bool ) bool {
295+ func (cra * cpuResourceAdvisor ) getTargetIsolatedPods () (sets.String , error ) {
289296 isolatedPods := sets .NewString ()
290- if enableIsolated {
291- isolatedPods = sets .NewString (cra .isolator .GetIsolatedPods ()... )
292- }
293- if len (isolatedPods ) > 0 {
294- klog .Infof ("[qosaware-cpu] current isolated pod: %v" , isolatedPods .List ())
297+ general .InfoS ("try to getTargetIsolatedPods" , "isolators" , general .ToString (cra .isolators ))
298+ for _ , isolator := range cra .isolators {
299+ pods , err := isolator .GetIsolatedPods ()
300+ if err != nil {
301+ return nil , err
302+ }
303+ isolatedPods .Insert (pods ... )
295304 }
305+ return isolatedPods , nil
306+ }
296307
297- _ = cra .metaCache .RangeAndUpdateContainer (func (podUID string , _ string , ci * types.ContainerInfo ) bool {
298- ci .Isolated = false
299- if isolatedPods .Has (podUID ) {
300- ci .Isolated = true
308+ func (cra * cpuResourceAdvisor ) hasIsolatedPods () bool {
309+ ret := false
310+ cra .metaCache .RangeContainer (func (podUID string , containerName string , ci * types.ContainerInfo ) bool {
311+ if ci .Isolated {
312+ ret = true
313+ return false
301314 }
302315 return true
303316 })
304- return len ( isolatedPods ) > 0
317+ return ret
305318}
306319
307- // checkIsolationSafety returns true iff the isolated-limit-sum and share-pool-size exceed total capacity
308- // todo: this logic contains a lot of assumptions and should be refined in the future
309- func (cra * cpuResourceAdvisor ) checkIsolationSafety () bool {
310- shareAndIsolationPoolSize := 0
311- nonBindingNumas := cra .metaServer .CPUDetails .NUMANodes ()
312- for _ , r := range cra .regionMap {
313- if r .Type () == configapi .QoSRegionTypeShare {
314- controlKnob , err := r .GetProvision ()
315- if err != nil {
316- klog .Errorf ("[qosaware-cpu] get controlKnob for %v err: %v" , r .Name (), err )
317- return false
318- }
319- shareAndIsolationPoolSize += int (controlKnob [configapi .ControlKnobNonReclaimedCPURequirement ].Value )
320- } else if r .Type () == configapi .QoSRegionTypeIsolation {
321- pods := r .GetPods ()
322- cra .metaCache .RangeContainer (func (podUID string , _ string , containerInfo * types.ContainerInfo ) bool {
323- if _ , ok := pods [podUID ]; ok {
324- shareAndIsolationPoolSize += int (containerInfo .CPULimit )
325- }
326- return true
327- })
328- } else if r .Type () == configapi .QoSRegionTypeDedicatedNumaExclusive {
329- nonBindingNumas = nonBindingNumas .Difference (r .GetBindingNumas ())
320+ // setIsolatedContainers get isolation status from isolator and update into containers
321+ func (cra * cpuResourceAdvisor ) setIsolatedContainers (enableIsolation bool ) {
322+ isolatedPods := sets .NewString ()
323+ if enableIsolation {
324+ pods , err := cra .getTargetIsolatedPods ()
325+ if err != nil {
326+ klog .Errorf ("[qosaware-cpu] get isolated pods failed: %v" , err )
327+ return
330328 }
329+ isolatedPods = pods
331330 }
332331
333- nonBindingSize := cra .metaServer .NUMAToCPUs .CPUSizeInNUMAs (cra .nonBindingNumas .ToSliceNoSortInt ()... )
334- klog .Infof ("[qosaware-cpu] shareAndIsolationPoolSize %v, nonBindingSize %v" , shareAndIsolationPoolSize , nonBindingSize )
335- if shareAndIsolationPoolSize > nonBindingSize {
336- return false
337- }
332+ klog .InfoS ("[qosaware-cpu]" , "current isolated pod" , isolatedPods .List (), "enableIsolation" , enableIsolation )
333+ _ = cra .metaCache .RangeAndUpdateContainer (func (podUID string , containerName string , ci * types.ContainerInfo ) bool {
334+ ci .Isolated = isolatedPods .Has (podUID )
335+ return true
336+ })
337+ }
338+
339+ // checkIsolationSafety returns true iff the isolated-limit-sum and share-pool-size exceed total capacity
340+ // todo: this logic contains a lot of assumptions and should be refined in the future
341+ func (cra * cpuResourceAdvisor ) checkIsolationSafety () bool {
342+ //shareAndIsolationPoolSize := 0
343+ //nonBindingNumas := cra.metaServer.CPUDetails.NUMANodes()
344+ //for _, r := range cra.regionMap {
345+ // if r.Type() == configapi.QoSRegionTypeShare {
346+ // controlKnob, err := r.GetProvision()
347+ // if err != nil {
348+ // klog.Errorf("[qosaware-cpu] get controlKnob for %v err: %v", r.Name(), err)
349+ // return false
350+ // }
351+ // shareAndIsolationPoolSize += int(controlKnob[configapi.ControlKnobNonReclaimedCPURequirement].Value)
352+ // } else if r.Type() == configapi.QoSRegionTypeIsolation {
353+ // pods := r.GetPods()
354+ // cra.metaCache.RangeContainer(func(podUID string, _ string, containerInfo *types.ContainerInfo) bool {
355+ // if _, ok := pods[podUID]; ok {
356+ // shareAndIsolationPoolSize += int(containerInfo.CPULimit)
357+ // }
358+ // return true
359+ // })
360+ // } else if r.Type() == configapi.QoSRegionTypeDedicatedNumaExclusive {
361+ // nonBindingNumas = nonBindingNumas.Difference(r.GetBindingNumas())
362+ // }
363+ //}
364+ //
365+ //nonBindingSize := cra.metaServer.NUMAToCPUs.CPUSizeInNUMAs(cra.nonBindingNumas.ToSliceNoSortInt()...)
366+ //klog.Infof("[qosaware-cpu] shareAndIsolationPoolSize %v, nonBindingSize %v", shareAndIsolationPoolSize, nonBindingSize)
367+ //if shareAndIsolationPoolSize > nonBindingSize {
368+ // return false
369+ //}
338370 return true
339371}
340372
0 commit comments