-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgraph.go
289 lines (252 loc) · 8.18 KB
/
graph.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
package doze
import (
"crypto"
_ "crypto/md5"
"fmt"
"os"
"path"
"slices"
"strings"
)
// A Graph contains Rules and Artifacts of the current build.
// The Rules and Artifacts are mapped using a Hash computed from their contents.
type Graph struct {
rules map[string]*Rule
artifacts map[string]*Artifact
}
func (graph *Graph) GetArtifacts() map[string]*Artifact {
return graph.artifacts
}
// A Rule represents an action that processes input Artifacts into output Artifacts by executing a Procedure.
// Its Hash is computer by digesting
type Rule struct {
Inputs, Outputs []ArtifactTag
procID ProcedureID
Processed bool
}
// An Artifact represents a file which is processed or created by Doze in the context of a build.
// Internally and to the operator, Artifacts are represented by their ArtifactTag.
// The Artifact keeps track of its own status:
// - Exists is true if the Artifact is found to exist on the disk.
// - Modified is true if the Artifact was touched by the operator since the last build or
// if the Artifact was created by a Doze rule in the current build.
type Artifact struct {
tag ArtifactTag
creator *Rule
Exists, Modified bool
}
// An ArtifactTag represents the path on disk to an Artifact.
// Internally its value is split into its name and its location.
// Its Hash is computer by digesting the normalized path made of the location and the name,
// thus reconstructing its real path on disk. Artifacts therefore cannot have duplicates.
type ArtifactTag struct {
name, location string
}
/* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
/* Graph */
// Execute to completion a non-sorted list of Rules, passed by their Hashes, with the goal
// of bringing the Graph up-to-date. Rules which are not ready for execution yet are postponed.
// Before returning the function resolves the Graph again, taking into account the updated status
// of the Rules that were exected, and calls itself recursively with the new list of resolved Rules,
// if it's not empty.
// Execute runs syncronously and single-threadedly.
func (graph *Graph) Execute(ruleHashes []string) {
RuleLoop:
for _, hash := range ruleHashes {
rule, ok := graph.rules[hash]
if !ok {
panic("rule '" + hash + "' resolved for execution but doesn't exist")
}
// if any input is missing, the Rule has to be postponed
for _, tag := range rule.Inputs {
artifact := graph.artifacts[tag.NormalizedTag()]
if !artifact.Exists {
// if the artifact doesn't have any creatorRule and it doesn't exist that's not good...
if artifact.creator == nil {
panic("artifact '" + tag.NormalizedTag() + "' doesn't exist and doesn't have a creator Rule")
}
continue RuleLoop
}
}
rule.Execute(hash)
// mark the Rule as Processed
rule.Processed = true
// mark its outputs as Existing and Modified
for _, tag := range rule.Outputs {
artifact := graph.artifacts[tag.NormalizedTag()]
artifact.Exists = true
artifact.Modified = true
}
}
newRuleHashes := graph.Resolve()
if len(newRuleHashes) > 0 {
graph.Execute(newRuleHashes)
}
}
// Resolve and return a non-sorted list of Hashes of Rules. These Rules should be executed to bring
// the Graph up-to-date. Of course, if the returned list is nil, then there is nothing to do.
func (graph *Graph) Resolve() []string {
var ruleHashes []string
RuleLoop:
for hash, rule := range graph.rules {
if rule.Processed {
// Rule's been processed already. Go next.
continue
}
for _, tag := range rule.Inputs {
if graph.artifacts[tag.NormalizedTag()].Modified {
// Rule has a modified input. Schedule the Rule.
if !slices.Contains(ruleHashes, hash) {
ruleHashes = append(ruleHashes, hash)
}
continue RuleLoop
}
}
for _, tag := range rule.Outputs {
if !graph.artifacts[tag.NormalizedTag()].Exists {
// Rule has a non-existing output. Schedule the Rule.
if !slices.Contains(ruleHashes, hash) {
ruleHashes = append(ruleHashes, hash)
}
continue RuleLoop
}
}
}
return ruleHashes
}
// Registers a new Rule with the Graph.
// input and outputs are Artifact names.
// inputLocation and outputLocation are Artifact locations.
// Returns an error if something went wrong and the Rule could not be registered.
func (graph *Graph) AddRule(
inputs, outputs []string,
procID ProcedureID,
inputLocation, outputLocation string,
) error {
if len(inputs) == 0 {
return fmt.Errorf("no inputs provided")
}
if len(outputs) == 0 {
return fmt.Errorf("no outputs provided")
}
rule := &Rule{
procID: procID,
}
// create, then register input Artifacts, if they don't already exist.
for _, name := range inputs {
newTag := ArtifactTag{
name,
inputLocation,
}
_, ok := graph.artifacts[newTag.NormalizedTag()]
if !ok {
graph.artifacts[newTag.NormalizedTag()] = &Artifact{
tag: newTag,
}
}
rule.Inputs = append(rule.Inputs, newTag)
}
// ditto for output Artifacts, but also handle its creator Rule,
// and check that the Artifact is not already an output of another Rule.
for _, name := range outputs {
newTag := ArtifactTag{
name,
outputLocation,
}
artifact, ok := graph.artifacts[newTag.NormalizedTag()]
if !ok {
graph.artifacts[newTag.NormalizedTag()] = &Artifact{
tag: newTag,
creator: rule,
}
} else if artifact.creator != nil {
return fmt.Errorf("artifact '%v' cannot be output by more than one rule", newTag.NormalizedTag())
} else {
artifact.creator = rule
}
rule.Outputs = append(rule.Outputs, newTag)
}
// the Hash will be used to check for duplicates and to store the Rule in the Graph.
ruleHash := rule.Hash()
// duplicate check of the Rule.
_, ok := graph.rules[ruleHash]
if ok {
return fmt.Errorf("rule already exists with the same data")
}
graph.rules[ruleHash] = rule
return nil
}
// Set the Exists flag to true for Artifacts which exist on disk.
func (graph *Graph) MarkArtifactsAsExisting() {
for normalizedTag, artifact := range graph.artifacts {
_, err := os.Stat(normalizedTag)
artifact.Exists = (err == nil) // @robustness
}
}
// Set the Modified flag to true for Artifacts whose tags are passed.
func (graph *Graph) markArtifactsAsModified(tags []ArtifactTag) {
for _, tag := range tags {
artifact, ok := graph.artifacts[tag.NormalizedTag()]
if !ok {
panic("artifact '" + tag.NormalizedTag() + "'' does not exist")
}
artifact.Modified = true
}
}
// Reset to false the Processed status of all Rules. This is run after every build.
func (graph *Graph) resetProcessedRules() {
for _, rule := range graph.rules {
rule.Processed = false
}
}
// Reset to false the Modified status of all Artifacts. This is run after every build.
func (graph *Graph) resetModifiedArtifacts() {
for _, artifact := range graph.artifacts {
artifact.Modified = false
}
}
func NewGraph() *Graph {
return &Graph{
rules: make(map[string]*Rule),
artifacts: make(map[string]*Artifact),
}
}
/* Rule */
// Placeholder function for running a Rule synchronously.
func (rule *Rule) Execute(hash string) {
procInfo, err := GetProcedure(rule.procID)
if err != nil {
fmt.Println("rule.Execute:", err)
os.Exit(2)
}
proc := procInfo.New()
if err = proc.Execute(rule); err != nil {
fmt.Println("rule.Execute:", err)
os.Exit(2)
}
}
// The Hash function of a Rule. Obviously, must be deterministic.
// Takes into account the ArtifactTags and the ProcedureID.
func (rule *Rule) Hash() string {
hash := crypto.MD5.New()
slices.SortFunc(rule.Inputs, CompareArtifactTags)
for _, inputTag := range rule.Inputs {
hash.Write([]byte(inputTag.NormalizedTag()))
}
slices.SortFunc(rule.Outputs, CompareArtifactTags)
for _, outputTag := range rule.Outputs {
hash.Write([]byte(outputTag.NormalizedTag()))
}
hash.Write([]byte(rule.procID))
return fmt.Sprintf("%x", hash.Sum(nil))
}
/* ArtifactTag */
// This is what the Graph uses as a key to an Artifact object.
// It's also the actual path of the Artifact on disk.
func (tag *ArtifactTag) NormalizedTag() string {
return path.Join(tag.location, tag.name)
}
// Required for respecting a deterministic order when hashing a Rule.
func CompareArtifactTags(first, second ArtifactTag) int {
return strings.Compare(first.NormalizedTag(), second.NormalizedTag())
}