11import json
2- import math
32import sys
43from enum import Enum
54from pathlib import Path
5+ from typing import List
66
77import typer
88from ruamel .yaml import YAML
@@ -48,15 +48,17 @@ def proportional_memory_strategy(
4848 # We operate on *available* memory, which already accounts for system components (like kubelet & systemd)
4949 # as well as daemonsets we run on every node. This represents the resources that are available
5050 # for user pods.
51-
52- # FIXME: Add some more more wiggle room here
53- available_node_mem = nodeinfo ["available" ]["memory" ]
54- available_node_cpu = nodeinfo ["available" ]["cpu" ]
55-
56- # Only show one digit after . for CPU, but round *down* not up so we never
57- # say they are getting more CPU than our limit is set to. We multiply & divide
58- # with a floor, as otherwise 3.75 gets rounded to 3.8, not 3.7
59- cpu_display = math .floor (available_node_cpu * 10 ) / 10
51+ # In addition, we provide some wiggle room to account for additional daemonset requests or other
52+ # issues that may pop up due to changes outside our control (like k8s upgrades). This is either
53+ # 2% of the available capacity, or 2GB / 1 CPU (whichever is smaller)
54+ WIGGLE_PERCENTAGE = 0.02
55+ mem_overhead_wiggle = min (
56+ nodeinfo ["available" ]["memory" ] * WIGGLE_PERCENTAGE , 2 * 1024 * 1024 * 1024
57+ )
58+ cpu_overhead_wiggle = min (nodeinfo ["available" ]["cpu" ] * WIGGLE_PERCENTAGE , 1 )
59+
60+ available_node_mem = nodeinfo ["available" ]["memory" ] - mem_overhead_wiggle
61+ available_node_cpu = nodeinfo ["available" ]["cpu" ] - cpu_overhead_wiggle
6062
6163 # We always start from the top, and provide a choice that takes up the whole node.
6264 mem_limit = available_node_mem
@@ -67,12 +69,26 @@ def proportional_memory_strategy(
6769 # This makes sure we utilize all the memory on a node all the time.
6870 cpu_guarantee = (mem_limit / available_node_mem ) * available_node_cpu
6971
70- # Memory is in bytes, let's convert it to GB (with only 1 digit after .) to display
71- mem_display = f"{ mem_limit / 1024 / 1024 / 1024 :.1f} "
72- display_name = f"{ mem_display } GB RAM, upto { cpu_display } CPUs"
72+ # Memory is in bytes, let's convert it to GB or MB (with no digits after 0) to display
73+ if mem_limit < 1024 * 1024 * 1024 :
74+ mem_display = f"{ mem_limit / 1024 / 1024 :.0f} MB"
75+ else :
76+ mem_display = f"{ mem_limit / 1024 / 1024 / 1024 :.0f} GB"
77+
78+ if cpu_guarantee < 2 :
79+ cpu_guarantee_display = f"~{ cpu_guarantee :0.1f} "
80+ else :
81+ cpu_guarantee_display = f"~{ cpu_guarantee :0.0f} "
82+
83+ display_name = f"~{ mem_display } RAM, { cpu_guarantee_display } CPUs"
84+ if cpu_guarantee != available_node_cpu :
85+ description = f"Up to ~{ available_node_cpu :.0f} CPUs when available"
86+ else :
87+ description = f"~{ available_node_cpu :.0f} CPUs always available"
7388
7489 choice = {
7590 "display_name" : display_name ,
91+ "description" : description ,
7692 "kubespawner_override" : {
7793 # Guarantee and Limit are the same - this strategy has no oversubscription
7894 "mem_guarantee" : int (mem_limit ),
@@ -91,26 +107,24 @@ def proportional_memory_strategy(
91107 # Use the amount of RAM made available as a slug, to allow combining choices from
92108 # multiple instance types in the same profile. This does mean you can not have
93109 # the same RAM allocation from multiple node selectors. But that's a feature, not a bug.
94- choices [f"mem_{ mem_display .replace ('.' , '_' )} " ] = choice
110+ choice_key = f"mem_{ mem_display .replace ('.' , '_' ).replace (' ' , '_' )} " .lower ()
111+ choices [choice_key ] = choice
95112
96113 # Halve the mem_limit for the next choice
97114 mem_limit = mem_limit / 2
98115
99116 # Reverse the choices so the smallest one is first
100117 choices = dict (reversed (choices .items ()))
101118
102- # Make the smallest choice the default explicitly
103- choices [list (choices .keys ())[0 ]]["default" ] = True
104-
105119 return choices
106120
107121
108122@resource_allocation_app .command ()
109123def choices (
110- instance_type : str = typer .Argument (
111- ..., help = "Instance type to generate Resource Allocation options for"
124+ instance_specification : List [str ] = typer .Argument (
125+ ...,
126+ help = "Instance type and number of choices to generate Resource Allocation options for. Specify as instance_type:count." ,
112127 ),
113- num_allocations : int = typer .Option (5 , help = "Number of choices to generate" ),
114128 strategy : ResourceAllocationStrategies = typer .Option (
115129 ResourceAllocationStrategies .PROPORTIONAL_MEMORY_STRATEGY ,
116130 help = "Strategy to use for generating resource allocation choices choices" ,
@@ -122,19 +136,25 @@ def choices(
122136 """
123137 with open (HERE / "node-capacity-info.json" ) as f :
124138 nodeinfo = json .load (f )
125-
126- if instance_type not in nodeinfo :
127- print (
128- f"Capacity information about { instance_type } not available" , file = sys .stderr
129- )
130- print ("TODO: Provide information on how to update it" , file = sys .stderr )
131- sys .exit (1 )
132-
133- # Call appropriate function based on what strategy we want to use
134- if strategy == ResourceAllocationStrategies .PROPORTIONAL_MEMORY_STRATEGY :
135- choices = proportional_memory_strategy (
136- instance_type , nodeinfo [instance_type ], num_allocations
137- )
138- else :
139- raise ValueError (f"Strategy { strategy } is not currently supported" )
139+ choices = {}
140+ for instance_spec in instance_specification :
141+ instance_type , num_allocations = instance_spec .split (":" , 2 )
142+
143+ if instance_type not in nodeinfo :
144+ print (
145+ f"Capacity information about { instance_type } not available" ,
146+ file = sys .stderr ,
147+ )
148+ print ("TODO: Provide information on how to update it" , file = sys .stderr )
149+ sys .exit (1 )
150+
151+ # Call appropriate function based on what strategy we want to use
152+ if strategy == ResourceAllocationStrategies .PROPORTIONAL_MEMORY_STRATEGY :
153+ choices .update (
154+ proportional_memory_strategy (
155+ instance_type , nodeinfo [instance_type ], int (num_allocations )
156+ )
157+ )
158+ else :
159+ raise ValueError (f"Strategy { strategy } is not currently supported" )
140160 yaml .dump (choices , sys .stdout )
0 commit comments