Skip to content

Commit 9a8362f

Browse files
committed
fix: depends on not waiting for completed tasks to continue
1 parent d2ec565 commit 9a8362f

8 files changed

Lines changed: 448 additions & 37 deletions

File tree

Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
#
2+
# LICENSE START
3+
#
4+
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
5+
#
6+
# Licensed under the Apache License, Version 2.0 (the "License");
7+
# you may not use this file except in compliance with the License.
8+
# You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing, software
13+
# distributed under the License is distributed on an "AS IS" BASIS,
14+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
# See the License for the specific language governing permissions and
16+
# limitations under the License.
17+
#
18+
# LICENSE END
19+
#
20+
21+
---
22+
kind: Pod
23+
apiVersion: v1
24+
metadata:
25+
namespace: skyhook
26+
labels:
27+
skyhook.nvidia.com/name: depends-on
28+
skyhook.nvidia.com/package: aa-fast-1.2.3
29+
annotations:
30+
("skyhook.nvidia.com/package" && parse_json("skyhook.nvidia.com/package")):
31+
{
32+
"name": "aa-fast",
33+
"version": "1.2.3",
34+
"skyhook": "depends-on",
35+
"stage": "apply",
36+
"image": "ghcr.io/nvidia/skyhook/agentless"
37+
}
38+
---
39+
kind: Pod
40+
apiVersion: v1
41+
metadata:
42+
namespace: skyhook
43+
labels:
44+
skyhook.nvidia.com/name: depends-on
45+
skyhook.nvidia.com/package: bb-slow-1.2
46+
annotations:
47+
("skyhook.nvidia.com/package" && parse_json("skyhook.nvidia.com/package")):
48+
{
49+
"name": "bb-slow",
50+
"version": "1.2",
51+
"skyhook": "depends-on",
52+
"stage": "apply",
53+
"image": "ghcr.io/nvidia/skyhook/agentless"
54+
}
55+
---
56+
kind: Pod
57+
apiVersion: v1
58+
metadata:
59+
namespace: skyhook
60+
labels:
61+
skyhook.nvidia.com/name: depends-on
62+
skyhook.nvidia.com/package: aa-fast-1.2.3
63+
annotations:
64+
("skyhook.nvidia.com/package" && parse_json("skyhook.nvidia.com/package")):
65+
{
66+
"name": "aa-fast",
67+
"version": "1.2.3",
68+
"skyhook": "depends-on",
69+
"stage": "config",
70+
"image": "ghcr.io/nvidia/skyhook/agentless"
71+
}
72+
---
73+
kind: Pod
74+
apiVersion: v1
75+
metadata:
76+
namespace: skyhook
77+
labels:
78+
skyhook.nvidia.com/name: depends-on
79+
skyhook.nvidia.com/package: bb-slow-1.2
80+
annotations:
81+
("skyhook.nvidia.com/package" && parse_json("skyhook.nvidia.com/package")):
82+
{
83+
"name": "bb-slow",
84+
"version": "1.2",
85+
"skyhook": "depends-on",
86+
"stage": "config",
87+
"image": "ghcr.io/nvidia/skyhook/agentless"
88+
}
89+
---
90+
kind: Pod
91+
apiVersion: v1
92+
metadata:
93+
namespace: skyhook
94+
labels:
95+
skyhook.nvidia.com/name: depends-on
96+
skyhook.nvidia.com/package: cc-last-5.4.3
97+
annotations:
98+
("skyhook.nvidia.com/package" && parse_json("skyhook.nvidia.com/package")):
99+
{
100+
"name": "cc-last",
101+
"version": "5.4.3",
102+
"skyhook": "depends-on",
103+
"stage": "apply",
104+
"image": "ghcr.io/nvidia/skyhook/agentless"
105+
}
106+
---
107+
kind: Pod
108+
apiVersion: v1
109+
metadata:
110+
namespace: skyhook
111+
labels:
112+
skyhook.nvidia.com/name: depends-on
113+
skyhook.nvidia.com/package: cc-last-5.4.3
114+
annotations:
115+
("skyhook.nvidia.com/package" && parse_json("skyhook.nvidia.com/package")):
116+
{
117+
"name": "cc-last",
118+
"version": "5.4.3",
119+
"skyhook": "depends-on",
120+
"stage": "config",
121+
"image": "ghcr.io/nvidia/skyhook/agentless"
122+
}
123+
---
124+
apiVersion: skyhook.nvidia.com/v1alpha1
125+
kind: Skyhook
126+
metadata:
127+
name: depends-on
128+
status:
129+
status: complete
130+
nodeState:
131+
(values(@)):
132+
- aa-fast|1.2.3:
133+
image: ghcr.io/nvidia/skyhook/agentless
134+
name: aa-fast
135+
stage: config
136+
state: complete
137+
version: 1.2.3
138+
bb-slow|1.2:
139+
image: ghcr.io/nvidia/skyhook/agentless
140+
name: bb-slow
141+
stage: config
142+
state: complete
143+
version: "1.2"
144+
cc-last|5.4.3:
145+
image: ghcr.io/nvidia/skyhook/agentless
146+
name: cc-last
147+
stage: config
148+
state: complete
149+
version: 5.4.3
150+
nodeStatus:
151+
# grab values should be one and is complete
152+
(values(@)):
153+
- complete
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
#
2+
# LICENSE START
3+
#
4+
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
5+
#
6+
# Licensed under the Apache License, Version 2.0 (the "License");
7+
# you may not use this file except in compliance with the License.
8+
# You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing, software
13+
# distributed under the License is distributed on an "AS IS" BASIS,
14+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
# See the License for the specific language governing permissions and
16+
# limitations under the License.
17+
#
18+
# LICENSE END
19+
#
20+
21+
# yaml-language-server: $schema=https://raw.githubusercontent.com/kyverno/chainsaw/main/.schemas/json/test-chainsaw-v1alpha1.json
22+
apiVersion: chainsaw.kyverno.io/v1alpha1
23+
kind: Test
24+
metadata:
25+
name: depends-on
26+
spec:
27+
concurrent: true
28+
description: |
29+
Test makes sure depends-on works as expected. c depends on a, and b. Make sure a and b complete before c starts.
30+
timeouts:
31+
assert: 180s
32+
steps:
33+
- try:
34+
## setup step, skyhook to complete
35+
- script:
36+
content: |
37+
## remove annotation from last run
38+
../rest_test.sh depends-on
39+
- create:
40+
file: skyhook.yaml
41+
- assert:
42+
file: assert-skyhook.yaml
43+
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
#
2+
# LICENSE START
3+
#
4+
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
5+
#
6+
# Licensed under the Apache License, Version 2.0 (the "License");
7+
# you may not use this file except in compliance with the License.
8+
# You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing, software
13+
# distributed under the License is distributed on an "AS IS" BASIS,
14+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
# See the License for the specific language governing permissions and
16+
# limitations under the License.
17+
#
18+
# LICENSE END
19+
#
20+
21+
apiVersion: skyhook.nvidia.com/v1alpha1
22+
kind: Skyhook
23+
metadata:
24+
labels:
25+
app.kubernetes.io/part-of: skyhook-operator
26+
app.kubernetes.io/created-by: skyhook-operator
27+
name: depends-on
28+
spec:
29+
nodeSelectors:
30+
matchLabels:
31+
skyhook.nvidia.com/test-node: skyhooke2e
32+
# matchExpressions:
33+
# - key: node-role.kubernetes.io/control-plane
34+
# operator: DoesNotExist ## all worker nodes
35+
packages:
36+
aa-fast:
37+
version: "1.2.3"
38+
image: ghcr.io/nvidia/skyhook/agentless
39+
env:
40+
- name: SLEEP_LEN
41+
value: "1"
42+
resources:
43+
cpuLimit: 50m
44+
cpuRequest: 50m
45+
memoryLimit: 32Mi
46+
memoryRequest: 32Mi
47+
bb-slow:
48+
version: "1.2"
49+
image: ghcr.io/nvidia/skyhook/agentless
50+
env:
51+
- name: SLEEP_LEN
52+
value: "10"
53+
resources:
54+
cpuLimit: 50m
55+
cpuRequest: 50m
56+
memoryLimit: 32Mi
57+
memoryRequest: 32Mi
58+
cc-last:
59+
version: "5.4.3"
60+
image: ghcr.io/nvidia/skyhook/agentless
61+
dependsOn:
62+
aa-fast: "1.2.3"
63+
bb-slow: "1.2"
64+
env:
65+
- name: SLEEP_LEN
66+
value: "1"
67+
resources:
68+
cpuLimit: 50m
69+
cpuRequest: 50m
70+
memoryLimit: 32Mi
71+
memoryRequest: 32Mi

operator/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ unit-tests: reporting manifests generate envtest ginkgo kill ## Run unit tests.
165165
## exec time is for things like scripts and commands, if we new more of these, might want to switch to a config file
166166
## https://kyverno.github.io/chainsaw/latest/reference/commands/chainsaw_test/
167167
## https://kyverno.github.io/chainsaw/latest/configuration/file/
168-
CHAINSAW_ARGS:=--exec-timeout 30s --parallel 2
168+
CHAINSAW_ARGS:=--exec-timeout 30s --parallel 3
169169

170170
e2e-tests: chainsaw install run ## Run end to end tests.
171171
## requires a cluster to be running with access

operator/internal/graph/dependency_graph.go

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -108,35 +108,33 @@ func (d *dag[T]) Add(name string, object T, dependencies ...string) error {
108108
}
109109

110110
func (d *dag[T]) Next(from ...string) ([]string, error) {
111-
112111
if err := d.Valid(); err != nil {
113112
return nil, err
114113
}
115114

116115
if len(from) == 0 { // base staring case
117116
return getNames(flat(d.leafs)), nil
118117
}
119-
root := make([]*vertex[T], 0)
120118

119+
// Use a map to deduplicate edges
120+
seen := make(map[string]*vertex[T])
121121
for _, f := range from {
122122
vert := d.vertices[f]
123-
// if _, ok := d.vertices[f]; !ok {
124-
// // verts := make([]string, 0)
125-
// // for k := range d.vertices {
126-
// // verts = append(verts, k)
127-
// // }
128-
// // return nil, fmt.Errorf("error [%s] does not exist in the graph. Possible vertices %v", f, verts)
129-
// continue // not a thing, well ignore it
130-
// }
131123
for _, edge := range d.vertices[f].edges {
132124
// search the path before adding it, if the path is longer than 1 then we don't want to add it
133125
// this aids in walking the graph later because erroneous paths have been pre pruned
134126
if len(d.find_longest_path(vert.name, edge.name)) <= 1 {
135-
root = append(root, edge)
127+
seen[edge.name] = edge
136128
}
137129
}
138130
}
139131

132+
// Convert map to slice
133+
root := make([]*vertex[T], 0, len(seen))
134+
for _, v := range seen {
135+
root = append(root, v)
136+
}
137+
140138
// remove matching from input
141139
in := make(map[string]struct{})
142140
for _, f := range from {

operator/internal/graph/dependency_graph_test.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,4 +200,19 @@ var _ = Describe("DAG tests", func() {
200200
Expect(err).ToNot(BeNil())
201201

202202
})
203+
204+
It("should not return duplicates of the same from next", func() {
205+
d := New[*string]()
206+
Expect(d.Add("A", nil)).Should(Succeed())
207+
Expect(d.Add("B", nil)).Should(Succeed())
208+
Expect(d.Add("C", nil, "A", "B")).Should(Succeed())
209+
210+
step, err := d.Next()
211+
Expect(err).To(BeNil())
212+
Expect(step).To(BeEquivalentTo([]string{"A", "B"}))
213+
214+
step, err = d.Next(step...)
215+
Expect(err).To(BeNil())
216+
Expect(step).To(BeEquivalentTo([]string{"C"}))
217+
})
203218
})

operator/internal/wrapper/node.go

Lines changed: 12 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ package wrapper
2323
import (
2424
"encoding/json"
2525
"fmt"
26+
"slices"
2627
"sort"
2728
"strings"
2829

@@ -318,33 +319,19 @@ func (node *skyhookNode) ProgressSkipped() {
318319

319320
func (node *skyhookNode) RunNext() ([]*v1alpha1.Package, error) {
320321
complete := node.GetComplete()
321-
322-
var next []string
323-
var err error
324-
if len(complete) == 0 { // base case, start from leaves
325-
next, err = node.graph.Next()
326-
} else {
327-
next, err = node.graph.Next(complete...)
322+
fnext, err := node.graph.Next()
323+
next := make([]string, 0)
324+
325+
// check that all of the current batch are complete
326+
for _, item := range fnext {
327+
if !slices.Contains(complete, item) {
328+
// remove it from the list
329+
next = append(next, item)
330+
}
328331
}
329332

330-
// this case is if we updated the SCR, and we have new leafs that are getting skipped because we have some complete
331-
if !node.IsComplete() && len(next) == 0 {
332-
next, err = node.graph.Next()
333-
334-
// now might have some completed ones, so remove those
335-
temp := next[:0]
336-
for _, item := range next {
337-
completed := false
338-
for _, done := range complete {
339-
if done == item {
340-
completed = true
341-
}
342-
}
343-
if !completed {
344-
temp = append(temp, item)
345-
}
346-
}
347-
next = temp
333+
if len(next) == 0 {
334+
next, err = node.graph.Next(complete...)
348335
}
349336

350337
if err != nil {

0 commit comments

Comments
 (0)