diff --git a/scheduler/spec/.gitignore b/scheduler/spec/.gitignore new file mode 100644 index 00000000000..2b9579fa498 --- /dev/null +++ b/scheduler/spec/.gitignore @@ -0,0 +1,2 @@ +states +*.out \ No newline at end of file diff --git a/scheduler/spec/system/SystemCoverageWF.cfg b/scheduler/spec/system/SystemCoverageWF.cfg new file mode 100644 index 00000000000..d138415da78 --- /dev/null +++ b/scheduler/spec/system/SystemCoverageWF.cfg @@ -0,0 +1,26 @@ +\* TLC Model Configuration for SystemScheduler +\* Includes safety invariants and liveness (SystemCoverage) temporal property + +\* ---------- Constants ---------- + +CONSTANT + Nodes <- NodesValue + Jobs <- JobsValue + Attrs <- AttrsValue + Capacity <- CapacityValue + Demand <- DemandValue + ConstraintFn <- ConstraintFnValue + ScoreFn <- ScoreFnValue + + +\* ---------- Safety Invariants ---------- +INVARIANT + Inv + CapacitySafety + AllocRange + +\* ---------- Temporal Properties ---------- +PROPERTY SystemCoverage + +INIT Init +NEXT Next \ No newline at end of file diff --git a/scheduler/spec/system/SystemCoverageWF.tla b/scheduler/spec/system/SystemCoverageWF.tla new file mode 100644 index 00000000000..afc0fd9739b --- /dev/null +++ b/scheduler/spec/system/SystemCoverageWF.tla @@ -0,0 +1,20 @@ +---- MODULE SystemCoverageWF ---- +EXTENDS SystemScheduler + +NodesValue == { "n1", "n2" } +JobsValue == { "j1", "j2" } + +AttrsValue == [ + n1 |-> [ dc |-> "us", cores |-> 2 ], + n2 |-> [ dc |-> "eu", cores |-> 1 ] +] + +CapacityValue == [ n1 |-> 2, n2 |-> 1 ] + +DemandValue == [ j1 |-> 1, j2 |-> 1 ] + +ConstraintFnValue(a) == a.dc = "us" + +ScoreFnValue(j,a) == IF j = "j1" THEN a.cores ELSE a.cores - 1 + +==== \ No newline at end of file diff --git a/scheduler/spec/system/SystemScheduler.tla b/scheduler/spec/system/SystemScheduler.tla new file mode 100644 index 00000000000..f484f499095 --- /dev/null +++ b/scheduler/spec/system/SystemScheduler.tla @@ -0,0 +1,121 @@ +---- MODULE SystemScheduler ---- +EXTENDS Naturals, Sequences, FiniteSets, TLC + +CONSTANT + Nodes, \* finite set of nodes + Jobs, \* finite set of job identifiers + Attrs, \* Attrs is a function Nodes -> attribute record (abstract) + Capacity, \* Capacity is a function Nodes -> Nat + Demand, \* Demand is a function Jobs -> Nat + ConstraintFn(_), \* ConstraintFn(attr) -> BOOLEAN + ScoreFn(_,_) \* ScoreFn(job, attr) -> Nat + +NodeSet == Nodes +JobSet == Jobs + +VARIABLE + eligibleNodes, \* subset of Nodes that are currently eligible + allocs, \* allocs[j][n] = 0/1 indicating allocation of job j on node n + currentJobs \* subset of Jobs that exist (allows job add/remove) + +\* Initialize variables +Init == + /\ eligibleNodes = Nodes + /\ currentJobs = Jobs + /\ allocs = [j \in Jobs |-> [n \in Nodes |-> 0]] + +\* Used capacity on client c +RECURSIVE Sum(_) +Sum(S) == IF S = {} THEN 0 ELSE + LET x == CHOOSE y \in S: TRUE IN x + Sum(S \ { x }) + +UsedCap(c) == Sum({ allocs[j][c] * Demand[j] : j \in currentJobs }) + +\* Whether job j can run on node n (constraints + capacity + presence) +Eligible(j,n) == + /\ n \in eligibleNodes + /\ j \in currentJobs + /\ ConstraintFn(Attrs[n]) + /\ allocs[j][n] = 0 + /\ UsedCap(n) + Demand[j] <= Capacity[n] + +\* Safety: no node exceeds capacity +CapacitySafety == + \A n \in Nodes: UsedCap(n) <= Capacity[n] + +\* Safety: allocs are only 0 or 1 +AllocRange == + \A j \in Jobs: \A n \in Nodes: allocs[j][n] \in {0, 1} + +\* Desired coverage (for system jobs): for every job, every eligible client should have alloc=1 +\* SystemCoverage == +\* \A j \in currentJobs: \A c \in eligibleNodes: +\* (ConstraintFn[j][Attrs[c]] /\ (UsedCap(c) + Demand[j] <= Capacity[c])) +\* => allocs[j][c] = 1 + +SystemCoverage == + <> (\A j \in currentJobs: \A c \in eligibleNodes: + Eligible(j,c) => allocs[j][c] = 1) + +\* Choose the best eligible (job,client) pair. +\* To be deterministic for TLC, we break ties by lexicographic order (smallest +\* job then smallest client). +BestPairExists == + \E j \in currentJobs, c \in eligibleNodes: Eligible(j,c) + +BestPair(j,c) == + /\ Eligible(j,c) + /\ \A jb \in currentJobs, cb \in eligibleNodes: + Eligible(jb,cb) => + ( ScoreFn(j, Attrs[c]) > ScoreFn(jb, Attrs[cb]) + \/ (ScoreFn(j, Attrs[c]) = ScoreFn(jb, Attrs[cb]) + /\ (j < jb \/ (j = jb /\ c <= cb))) + ) + +\* Actual scheduling algorithm model +vars == << eligibleNodes, currentJobs, allocs >> + +Next == \/ /\ IF (\E j \in currentJobs, c \in eligibleNodes: Eligible(j,c)) + THEN /\ \E jb \in currentJobs: + \E nb \in eligibleNodes: + IF BestPair(jb,nb) + THEN /\ allocs' = [allocs EXCEPT ![jb][nb] = 1] + ELSE /\ TRUE + /\ UNCHANGED allocs + ELSE /\ TRUE + /\ UNCHANGED allocs + /\ UNCHANGED <> + \/ /\ \E n \in Nodes: + IF n \notin eligibleNodes + THEN /\ eligibleNodes' = (eligibleNodes \cup {n}) + ELSE /\ TRUE + /\ UNCHANGED eligibleNodes + /\ UNCHANGED <> + \/ /\ \E n \in eligibleNodes: + /\ eligibleNodes' = eligibleNodes \ {n} + /\ \E j \in Jobs: + allocs' = [allocs EXCEPT ![j][n] = 0] + /\ UNCHANGED currentJobs + \/ /\ \E j \in Jobs: + IF j \notin currentJobs + THEN /\ currentJobs' = (currentJobs \cup {j}) + ELSE /\ TRUE + /\ UNCHANGED currentJobs + /\ UNCHANGED <> + \/ /\ \E j \in currentJobs: + /\ currentJobs' = currentJobs \ {j} + /\ \E n \in Nodes: + allocs' = [allocs EXCEPT ![j][n] = 0] + /\ UNCHANGED eligibleNodes + +Spec == Init /\ [][Next]_vars + +\* ---------- Helpful invariants to check with TLC ---------- +Inv == + /\ AllocRange + /\ CapacitySafety + /\ \A j \in currentJobs, n \in eligibleNodes: + allocs[j][n] = 1 => ConstraintFn(Attrs[n]) + /\ \A j \in currentJobs, n \in Nodes: allocs[j][n] = 1 => n \in eligibleNodes + +====