Skip to content

Commit

Permalink
Add node selector (#99)
Browse files Browse the repository at this point in the history
* add node selectors

* lint

* add node selector to local config

* update snpashots

* ui test

* remove comment

* update unit tests

* update readme.md

* update readme

* update readme
  • Loading branch information
Meriem-BenIsmail authored Feb 20, 2025
1 parent 5678759 commit a2b3e9a
Show file tree
Hide file tree
Showing 20 changed files with 224 additions and 37 deletions.
45 changes: 44 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,6 @@ c.JupyterHub.services.extend(
'{"label": "Medium", "cpu": 4, "memory": 4}',
"--machine_profiles",
'{"label": "Large", "cpu": 8, "memory": 8}'

],
"oauth_no_confirm": True,
}
Expand All @@ -228,6 +227,50 @@ c.JupyterHub.services.extend(

![image](https://github.com/plasmabio/tljh-repo2docker/assets/4451292/c1f0231e-a02d-41dc-85e0-97a97ffa0311)

### Node Selector

`tljh-repo2docker` allows specifying node selectors to control which Kubernetes nodes user environments are scheduled on. This can be useful for assigning workloads to specific nodes based on hardware characteristics like GPUs, SSD storage, or other node labels.

## Configuring Node Selectors

To configure node selectors, add the `--node_selector` argument in the service definition:

```python
c.JupyterHub.services.extend(
[
{
"name": "tljh_repo2docker",
"url": "http://127.0.0.1:6789",
"command": [
sys.executable,
"-m",
"tljh_repo2docker",
"--ip",
"127.0.0.1",
"--port",
"6789",
"--node_selector",
'{"gpu": {"description": "GPU availability", "values": ["yes", "no"]},'
' "ssd": {"description": "SSD availability", "values": ["yes", "no"]}}'
],
"oauth_no_confirm": True,
}
]
)
```

This ensures that workloads are scheduled only on nodes that meet the specified criteria.

## Accessing Node Selector in Spawner

The node selector information is passed through the metadata field of `user_options` and can be accessed in the `start` method of the spawner:

```python
user_options["metadata"]["node_selector"]
```

![node_selector](https://github.com/user-attachments/assets/046bee93-2c7c-4e42-a9a0-94ade6f191d9)

### Extra documentation

`tljh-repo2docker` is currently developed as part of the [Plasma project](https://github.com/plasmabio/plasma).
Expand Down
8 changes: 7 additions & 1 deletion src/environments/App.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,11 @@ import { ThemeProvider, createTheme } from '@mui/material/styles';

import { IEnvironmentData } from './types';
import { EnvironmentList } from './EnvironmentList';
import { IMachineProfile, NewEnvironmentDialog } from './NewEnvironmentDialog';
import {
IMachineProfile,
INodeSelector,
NewEnvironmentDialog
} from './NewEnvironmentDialog';
import { AxiosContext } from '../common/AxiosContext';
import { useEffect, useMemo, useState } from 'react';
import { AxiosClient } from '../common/axiosclient';
Expand All @@ -16,6 +20,7 @@ export interface IAppProps {
default_cpu_limit: string;
default_mem_limit: string;
machine_profiles: IMachineProfile[];
node_selector: INodeSelector;
use_binderhub: boolean;
repo_providers?: { label: string; value: string }[];
}
Expand Down Expand Up @@ -75,6 +80,7 @@ export default function App(props: IAppProps) {
default_cpu_limit={props.default_cpu_limit}
default_mem_limit={props.default_mem_limit}
machine_profiles={props.machine_profiles}
node_selector={props.node_selector}
use_binderhub={props.use_binderhub}
repo_providers={props.repo_providers}
/>
Expand Down
78 changes: 73 additions & 5 deletions src/environments/NewEnvironmentDialog.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import {
MenuItem,
OutlinedTextFieldProps,
Select,
TextField,
Typography
} from '@mui/material';
import {
Expand All @@ -31,10 +32,20 @@ export interface IMachineProfile {
cpu: string;
memory: string;
}
interface INodeSelectorOption {
description: string;
values: string[];
}

export interface INodeSelector {
[key: string]: INodeSelectorOption;
}

export interface INewEnvironmentDialogProps {
default_cpu_limit: string;
default_mem_limit: string;
machine_profiles: IMachineProfile[];
node_selector: INodeSelector;
use_binderhub: boolean;
repo_providers?: { label: string; value: string }[];
}
Expand All @@ -49,6 +60,7 @@ interface IFormValues {
buildargs?: string;
username?: string;
password?: string;
node_selector?: { [key: string]: string | undefined };
}
const commonInputProps: OutlinedTextFieldProps = {
autoFocus: true,
Expand Down Expand Up @@ -76,10 +88,14 @@ function _NewEnvironmentDialog(props: INewEnvironmentDialogProps) {

const [formValues, setFormValues] = useState<IFormValues>({});
const updateFormValue = useCallback(
(key: keyof IFormValues, value: string | number) => {
setFormValues(old => {
return { ...old, [key]: value };
});
(
key: keyof IFormValues,
value: string | number | { [key: string]: string | undefined }
) => {
setFormValues(old => ({
...old,
[key]: value
}));
},
[setFormValues]
);
Expand All @@ -89,6 +105,15 @@ function _NewEnvironmentDialog(props: INewEnvironmentDialogProps) {

const [selectedProfile, setSelectedProfile] = useState<number>(0);
const [selectedProvider, setSelectedProvider] = useState<number>(0);
const [selectedNodeSelectors, setSelectedNodeSelectors] = useState<{
[key: string]: string;
}>(() => {
const initialSelected: { [key: string]: string } = {};
Object.entries(props.node_selector).forEach(([key, option]) => {
initialSelected[key] = option.values[0] || '';
});
return initialSelected;
});

const onMachineProfileChange = useCallback(
(value?: string | number) => {
Expand Down Expand Up @@ -118,18 +143,39 @@ function _NewEnvironmentDialog(props: INewEnvironmentDialogProps) {
},
[props.repo_providers, updateFormValue]
);

const onNodeSelectorChange = useCallback(
(key: string, value: string) => {
if (value !== undefined) {
setSelectedNodeSelectors(prevState => {
const newState = { ...prevState, [key]: value };
updateFormValue('node_selector', newState);
return newState;
});
}
},
[updateFormValue]
);

useEffect(() => {
if (props.machine_profiles.length > 0) {
onMachineProfileChange(0);
}
if (props.repo_providers && props.repo_providers.length > 0) {
onRepoProviderChange(0);
}
if (props.node_selector) {
Object.entries(props.node_selector).forEach(([key, option]) => {
onNodeSelectorChange(key, option.values[0]);
});
}
}, [
props.machine_profiles,
props.repo_providers,
props.node_selector,
onMachineProfileChange,
onRepoProviderChange
onRepoProviderChange,
onNodeSelectorChange
]);
const MemoryCpuSelector = useMemo(() => {
return (
Expand Down Expand Up @@ -186,6 +232,27 @@ function _NewEnvironmentDialog(props: INewEnvironmentDialogProps) {
);
}, [props.machine_profiles, selectedProfile, onMachineProfileChange]);

const NodeSelectorDropdown = useMemo(() => {
return Object.entries(props.node_selector).map(([key, option]) => (
<FormControl key={key} fullWidth sx={{ marginTop: '8px' }}>
<TextField
id={`${key}-select`}
value={selectedNodeSelectors[key]}
label={key + option.description && `(${option.description})`}
size="small"
select
onChange={e => onNodeSelectorChange(key, e.target.value)}
>
{option.values.map((val: string) => (
<MenuItem key={val} value={val}>
{val}
</MenuItem>
))}
</TextField>
</FormControl>
));
}, [props.node_selector, selectedNodeSelectors, onNodeSelectorChange]);

return (
<Fragment>
<Box sx={{ display: 'flex', flexDirection: 'row-reverse' }}>
Expand Down Expand Up @@ -292,6 +359,7 @@ function _NewEnvironmentDialog(props: INewEnvironmentDialogProps) {
{props.machine_profiles.length > 0
? MachineProfileSelector
: MemoryCpuSelector}
{props.node_selector && NodeSelectorDropdown}
{!props.use_binderhub && (
<Fragment>
<Divider
Expand Down
1 change: 1 addition & 0 deletions src/environments/main.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ if (rootElement) {
default_cpu_limit: '2',
default_mem_limit: '2G',
machine_profiles: [],
node_selector: {},
use_binderhub: false
};
if (dataElement) {
Expand Down
15 changes: 15 additions & 0 deletions tljh_repo2docker/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,19 @@ def _logo_file_default(self):
def _default_log_level(self):
return logging.INFO

node_selector = Dict(
config=True,
help="""
The dictionary Selector labels used to match the Nodes where Pods will be launched.
Default is None and means it will be launched in any available Node.
For example to match the Nodes that have a label of `disktype: ssd` use::
c.KubeSpawner.node_selector = {'disktype': 'ssd'}
""",
)

machine_profiles = List(
default_value=[], trait=Dict, config=True, help="Pre-defined machine profiles"
)
Expand Down Expand Up @@ -162,6 +175,7 @@ def _default_log_level(self):
"default_memory_limit": "TljhRepo2Docker.default_memory_limit",
"default_cpu_limit": "TljhRepo2Docker.default_cpu_limit",
"machine_profiles": "TljhRepo2Docker.machine_profiles",
"node_selector": "TljhRepo2Docker.node_selector",
"binderhub_url": "TljhRepo2Docker.binderhub_url",
"db_url": "TljhRepo2Docker.db_url",
}
Expand Down Expand Up @@ -193,6 +207,7 @@ def init_settings(self) -> tp.Dict:
default_mem_limit=self.default_memory_limit,
default_cpu_limit=self.default_cpu_limit,
machine_profiles=self.machine_profiles,
node_selector=self.node_selector,
binderhub_url=self.binderhub_url,
repo_providers=self.repo_providers,
)
Expand Down
3 changes: 2 additions & 1 deletion tljh_repo2docker/binderhub_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ async def post(self):
memory = data["memory"]
cpu = data["cpu"]
provider = data["provider"]
node_selector = data["node_selector"]
if len(repo) == 0:
raise web.HTTPError(400, "Repository is empty")

Expand Down Expand Up @@ -115,7 +116,7 @@ async def post(self):
status=BuildStatusType.BUILDING,
log="",
image_meta=ImageMetadataType(
display_name=name, repo=repo, ref=ref, cpu_limit=cpu, mem_limit=memory
display_name=name, repo=repo, ref=ref, cpu_limit=cpu, mem_limit=memory, node_selector= node_selector
),
)
self.set_status(200)
Expand Down
3 changes: 2 additions & 1 deletion tljh_repo2docker/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ async def post(self):
name = data["name"].lower()
memory = data["memory"]
cpu = data["cpu"]
node_selector = data.get("node_selector", {})
buildargs = data.get("buildargs", None)
username = data.get("username", None)
password = data.get("password", None)
Expand Down Expand Up @@ -71,7 +72,7 @@ async def post(self):
raise web.HTTPError(400, "Invalid build argument format")
extra_buildargs.append(barg)
await build_image(
repo, ref, name, memory, cpu, username, password, extra_buildargs
repo, ref, node_selector, name, memory, cpu, username, password, extra_buildargs
)

self.set_status(200)
Expand Down
1 change: 1 addition & 0 deletions tljh_repo2docker/database/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ class ImageMetadataType(BaseModel):
ref: str
cpu_limit: str
mem_limit: str
node_selector: dict


class DockerImageCreateSchema(BaseModel):
Expand Down
28 changes: 28 additions & 0 deletions tljh_repo2docker/docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from urllib.parse import urlparse

from aiodocker import Docker
from tornado import web


async def list_images():
Expand All @@ -20,6 +21,7 @@ async def list_images():
"display_name": image["Labels"]["tljh_repo2docker.display_name"],
"mem_limit": image["Labels"]["tljh_repo2docker.mem_limit"],
"cpu_limit": image["Labels"]["tljh_repo2docker.cpu_limit"],
"node_selector": image["Labels"]["tljh_repo2docker.node_selector"],
"status": "built",
}
for image in r2d_images
Expand All @@ -45,6 +47,7 @@ async def list_containers():
"display_name": container["Labels"]["tljh_repo2docker.display_name"],
"mem_limit": container["Labels"]["tljh_repo2docker.mem_limit"],
"cpu_limit": container["Labels"]["tljh_repo2docker.cpu_limit"],
"node_selector": container["Labels"]["tljh_repo2docker.node_selector"],
"status": "building",
}
for container in r2d_containers
Expand All @@ -53,9 +56,32 @@ async def list_containers():
return containers


async def get_image_metadata(image_name):
"""
Retrieve metadata of a specific locally built Docker image.
"""
async with Docker() as docker:
images = await docker.images.list(
filters=json.dumps({"reference": [image_name]})
)
if not images:
raise web.HTTPError(404, "Image not found")

image = images[0]
return {
"repo": image["Labels"].get("repo2docker.repo", ""),
"ref": image["Labels"].get("repo2docker.ref", ""),
"display_name": image["Labels"].get("tljh_repo2docker.display_name", ""),
"mem_limit": image["Labels"].get("tljh_repo2docker.mem_limit", ""),
"cpu_limit": image["Labels"].get("tljh_repo2docker.cpu_limit", ""),
"node_selector": image["Labels"].get("tljh_repo2docker.node_selector", ""),
}


async def build_image(
repo,
ref,
node_selector={},
name="",
memory=None,
cpu=None,
Expand Down Expand Up @@ -86,6 +112,7 @@ async def build_image(
f"tljh_repo2docker.image_name={image_name}",
f"tljh_repo2docker.mem_limit={memory}",
f"tljh_repo2docker.cpu_limit={cpu}",
f"tljh_repo2docker.node_selector={node_selector}",
]
cmd = [
"jupyter-repo2docker",
Expand Down Expand Up @@ -118,6 +145,7 @@ async def build_image(
"tljh_repo2docker.display_name": name,
"tljh_repo2docker.mem_limit": memory,
"tljh_repo2docker.cpu_limit": cpu,
"tljh_repo2docker.node_selector": json.dumps(node_selector),
},
"Volumes": {
"/var/run/docker.sock": {
Expand Down
Loading

0 comments on commit a2b3e9a

Please sign in to comment.