Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add node selector #99

Merged
merged 10 commits into from
Feb 20, 2025
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 34 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,6 @@ c.JupyterHub.services.extend(
'{"label": "Medium", "cpu": 4, "memory": 4}',
"--machine_profiles",
'{"label": "Large", "cpu": 8, "memory": 8}'

],
"oauth_no_confirm": True,
}
Expand All @@ -228,6 +227,40 @@ c.JupyterHub.services.extend(

![image](https://github.com/plasmabio/tljh-repo2docker/assets/4451292/c1f0231e-a02d-41dc-85e0-97a97ffa0311)

### Node Selector

`tljh-repo2docker` allows specifying node selectors to control which Kubernetes nodes user environments are scheduled on. This can be useful for assigning workloads to specific nodes based on hardware characteristics like GPUs, SSD storage, or other node labels.

To configure node selectors, add the `--node_selector` argument in the service definition:

```python
c.JupyterHub.services.extend(
[
{
"name": "tljh_repo2docker",
"url": "http://127.0.0.1:6789",
"command": [
sys.executable,
"-m",
"tljh_repo2docker",
"--ip",
"127.0.0.1",
"--port",
"6789",
"--node_selector",
'{"gpu": {"description": "GPU availability", "values": ["yes", "no"]},'
' "ssd": {"description": "SSD availability", "values": ["yes", "no"]}}'
],
"oauth_no_confirm": True,
}
]
)
```
This ensures that workloads are scheduled only on nodes that meet the specified criteria.

![node_selector](https://github.com/user-attachments/assets/046bee93-2c7c-4e42-a9a0-94ade6f191d9)


### Extra documentation

`tljh-repo2docker` is currently developed as part of the [Plasma project](https://github.com/plasmabio/plasma).
Expand Down
8 changes: 7 additions & 1 deletion src/environments/App.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,11 @@ import { ThemeProvider, createTheme } from '@mui/material/styles';

import { IEnvironmentData } from './types';
import { EnvironmentList } from './EnvironmentList';
import { IMachineProfile, NewEnvironmentDialog } from './NewEnvironmentDialog';
import {
IMachineProfile,
INodeSelector,
NewEnvironmentDialog
} from './NewEnvironmentDialog';
import { AxiosContext } from '../common/AxiosContext';
import { useEffect, useMemo, useState } from 'react';
import { AxiosClient } from '../common/axiosclient';
Expand All @@ -16,6 +20,7 @@ export interface IAppProps {
default_cpu_limit: string;
default_mem_limit: string;
machine_profiles: IMachineProfile[];
node_selector: INodeSelector;
use_binderhub: boolean;
repo_providers?: { label: string; value: string }[];
}
Expand Down Expand Up @@ -75,6 +80,7 @@ export default function App(props: IAppProps) {
default_cpu_limit={props.default_cpu_limit}
default_mem_limit={props.default_mem_limit}
machine_profiles={props.machine_profiles}
node_selector={props.node_selector}
use_binderhub={props.use_binderhub}
repo_providers={props.repo_providers}
/>
Expand Down
78 changes: 73 additions & 5 deletions src/environments/NewEnvironmentDialog.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import {
MenuItem,
OutlinedTextFieldProps,
Select,
TextField,
Typography
} from '@mui/material';
import {
Expand All @@ -31,10 +32,20 @@ export interface IMachineProfile {
cpu: string;
memory: string;
}
interface INodeSelectorOption {
description: string;
values: string[];
}

export interface INodeSelector {
[key: string]: INodeSelectorOption;
}

export interface INewEnvironmentDialogProps {
default_cpu_limit: string;
default_mem_limit: string;
machine_profiles: IMachineProfile[];
node_selector: INodeSelector;
use_binderhub: boolean;
repo_providers?: { label: string; value: string }[];
}
Expand All @@ -49,6 +60,7 @@ interface IFormValues {
buildargs?: string;
username?: string;
password?: string;
node_selector?: { [key: string]: string | undefined };
}
const commonInputProps: OutlinedTextFieldProps = {
autoFocus: true,
Expand Down Expand Up @@ -76,10 +88,14 @@ function _NewEnvironmentDialog(props: INewEnvironmentDialogProps) {

const [formValues, setFormValues] = useState<IFormValues>({});
const updateFormValue = useCallback(
(key: keyof IFormValues, value: string | number) => {
setFormValues(old => {
return { ...old, [key]: value };
});
(
key: keyof IFormValues,
value: string | number | { [key: string]: string | undefined }
) => {
setFormValues(old => ({
...old,
[key]: value
}));
},
[setFormValues]
);
Expand All @@ -89,6 +105,15 @@ function _NewEnvironmentDialog(props: INewEnvironmentDialogProps) {

const [selectedProfile, setSelectedProfile] = useState<number>(0);
const [selectedProvider, setSelectedProvider] = useState<number>(0);
const [selectedNodeSelectors, setSelectedNodeSelectors] = useState<{
[key: string]: string;
}>(() => {
const initialSelected: { [key: string]: string } = {};
Object.entries(props.node_selector).forEach(([key, option]) => {
initialSelected[key] = option.values[0] || '';
});
return initialSelected;
});

const onMachineProfileChange = useCallback(
(value?: string | number) => {
Expand Down Expand Up @@ -118,18 +143,39 @@ function _NewEnvironmentDialog(props: INewEnvironmentDialogProps) {
},
[props.repo_providers, updateFormValue]
);

const onNodeSelectorChange = useCallback(
(key: string, value: string) => {
if (value !== undefined) {
setSelectedNodeSelectors(prevState => {
const newState = { ...prevState, [key]: value };
updateFormValue('node_selector', newState);
return newState;
});
}
},
[updateFormValue]
);

useEffect(() => {
if (props.machine_profiles.length > 0) {
onMachineProfileChange(0);
}
if (props.repo_providers && props.repo_providers.length > 0) {
onRepoProviderChange(0);
}
if (props.node_selector) {
Object.entries(props.node_selector).forEach(([key, option]) => {
onNodeSelectorChange(key, option.values[0]);
});
}
}, [
props.machine_profiles,
props.repo_providers,
props.node_selector,
onMachineProfileChange,
onRepoProviderChange
onRepoProviderChange,
onNodeSelectorChange
]);
const MemoryCpuSelector = useMemo(() => {
return (
Expand Down Expand Up @@ -186,6 +232,27 @@ function _NewEnvironmentDialog(props: INewEnvironmentDialogProps) {
);
}, [props.machine_profiles, selectedProfile, onMachineProfileChange]);

const NodeSelectorDropdown = useMemo(() => {
return Object.entries(props.node_selector).map(([key, option]) => (
<FormControl key={key} fullWidth sx={{ marginTop: '8px' }}>
<TextField
id={`${key}-select`}
value={selectedNodeSelectors[key]}
label={key + option.description && `(${option.description})`}
size="small"
select
onChange={e => onNodeSelectorChange(key, e.target.value)}
>
{option.values.map((val: string) => (
<MenuItem key={val} value={val}>
{val}
</MenuItem>
))}
</TextField>
</FormControl>
));
}, [props.node_selector, selectedNodeSelectors, onNodeSelectorChange]);

return (
<Fragment>
<Box sx={{ display: 'flex', flexDirection: 'row-reverse' }}>
Expand Down Expand Up @@ -292,6 +359,7 @@ function _NewEnvironmentDialog(props: INewEnvironmentDialogProps) {
{props.machine_profiles.length > 0
? MachineProfileSelector
: MemoryCpuSelector}
{props.node_selector && NodeSelectorDropdown}
{!props.use_binderhub && (
<Fragment>
<Divider
Expand Down
1 change: 1 addition & 0 deletions src/environments/main.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ if (rootElement) {
default_cpu_limit: '2',
default_mem_limit: '2G',
machine_profiles: [],
node_selector: {},
use_binderhub: false
};
if (dataElement) {
Expand Down
1 change: 1 addition & 0 deletions tljh_repo2docker/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ class Repo2DockerSpawner(SpawnerMixin, DockerSpawner):
"""

async def start(self, *args, **kwargs):
print("SPAWN START", self.user_options)
await self.set_limits()
return await super().start(*args, **kwargs)

Expand Down
15 changes: 15 additions & 0 deletions tljh_repo2docker/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,19 @@ def _logo_file_default(self):
def _default_log_level(self):
return logging.INFO

node_selector = Dict(
config=True,
help="""
The dictionary Selector labels used to match the Nodes where Pods will be launched.

Default is None and means it will be launched in any available Node.

For example to match the Nodes that have a label of `disktype: ssd` use::

c.KubeSpawner.node_selector = {'disktype': 'ssd'}
""",
)

machine_profiles = List(
default_value=[], trait=Dict, config=True, help="Pre-defined machine profiles"
)
Expand Down Expand Up @@ -162,6 +175,7 @@ def _default_log_level(self):
"default_memory_limit": "TljhRepo2Docker.default_memory_limit",
"default_cpu_limit": "TljhRepo2Docker.default_cpu_limit",
"machine_profiles": "TljhRepo2Docker.machine_profiles",
"node_selector": "TljhRepo2Docker.node_selector",
"binderhub_url": "TljhRepo2Docker.binderhub_url",
"db_url": "TljhRepo2Docker.db_url",
}
Expand Down Expand Up @@ -193,6 +207,7 @@ def init_settings(self) -> tp.Dict:
default_mem_limit=self.default_memory_limit,
default_cpu_limit=self.default_cpu_limit,
machine_profiles=self.machine_profiles,
node_selector=self.node_selector,
binderhub_url=self.binderhub_url,
repo_providers=self.repo_providers,
)
Expand Down
3 changes: 2 additions & 1 deletion tljh_repo2docker/binderhub_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ async def post(self):
memory = data["memory"]
cpu = data["cpu"]
provider = data["provider"]
node_selector = data["node_selector"]
if len(repo) == 0:
raise web.HTTPError(400, "Repository is empty")

Expand Down Expand Up @@ -115,7 +116,7 @@ async def post(self):
status=BuildStatusType.BUILDING,
log="",
image_meta=ImageMetadataType(
display_name=name, repo=repo, ref=ref, cpu_limit=cpu, mem_limit=memory
display_name=name, repo=repo, ref=ref, cpu_limit=cpu, mem_limit=memory, node_selector= node_selector
),
)
self.set_status(200)
Expand Down
3 changes: 2 additions & 1 deletion tljh_repo2docker/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ async def post(self):
name = data["name"].lower()
memory = data["memory"]
cpu = data["cpu"]
node_selector = data.get("node_selector", {})
buildargs = data.get("buildargs", None)
username = data.get("username", None)
password = data.get("password", None)
Expand Down Expand Up @@ -71,7 +72,7 @@ async def post(self):
raise web.HTTPError(400, "Invalid build argument format")
extra_buildargs.append(barg)
await build_image(
repo, ref, name, memory, cpu, username, password, extra_buildargs
repo, ref, node_selector, name, memory, cpu, username, password, extra_buildargs
)

self.set_status(200)
Expand Down
1 change: 1 addition & 0 deletions tljh_repo2docker/database/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ class ImageMetadataType(BaseModel):
ref: str
cpu_limit: str
mem_limit: str
node_selector: dict


class DockerImageCreateSchema(BaseModel):
Expand Down
28 changes: 28 additions & 0 deletions tljh_repo2docker/docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from urllib.parse import urlparse

from aiodocker import Docker
from tornado import web


async def list_images():
Expand All @@ -20,6 +21,7 @@ async def list_images():
"display_name": image["Labels"]["tljh_repo2docker.display_name"],
"mem_limit": image["Labels"]["tljh_repo2docker.mem_limit"],
"cpu_limit": image["Labels"]["tljh_repo2docker.cpu_limit"],
"node_selector": image["Labels"]["tljh_repo2docker.node_selector"],
"status": "built",
}
for image in r2d_images
Expand All @@ -45,6 +47,7 @@ async def list_containers():
"display_name": container["Labels"]["tljh_repo2docker.display_name"],
"mem_limit": container["Labels"]["tljh_repo2docker.mem_limit"],
"cpu_limit": container["Labels"]["tljh_repo2docker.cpu_limit"],
"node_selector": container["Labels"]["tljh_repo2docker.node_selector"],
"status": "building",
}
for container in r2d_containers
Expand All @@ -53,9 +56,32 @@ async def list_containers():
return containers


async def get_image_metadata(image_name):
"""
Retrieve metadata of a specific locally built Docker image.
"""
async with Docker() as docker:
images = await docker.images.list(
filters=json.dumps({"reference": [image_name]})
)
if not images:
raise web.HTTPError(404, "Image not found")

image = images[0]
return {
"repo": image["Labels"].get("repo2docker.repo", ""),
"ref": image["Labels"].get("repo2docker.ref", ""),
"display_name": image["Labels"].get("tljh_repo2docker.display_name", ""),
"mem_limit": image["Labels"].get("tljh_repo2docker.mem_limit", ""),
"cpu_limit": image["Labels"].get("tljh_repo2docker.cpu_limit", ""),
"node_selector": image["Labels"].get("tljh_repo2docker.node_selector", ""),
}


async def build_image(
repo,
ref,
node_selector={},
name="",
memory=None,
cpu=None,
Expand Down Expand Up @@ -86,6 +112,7 @@ async def build_image(
f"tljh_repo2docker.image_name={image_name}",
f"tljh_repo2docker.mem_limit={memory}",
f"tljh_repo2docker.cpu_limit={cpu}",
f"tljh_repo2docker.node_selector={node_selector}",
]
cmd = [
"jupyter-repo2docker",
Expand Down Expand Up @@ -118,6 +145,7 @@ async def build_image(
"tljh_repo2docker.display_name": name,
"tljh_repo2docker.mem_limit": memory,
"tljh_repo2docker.cpu_limit": cpu,
"tljh_repo2docker.node_selector": json.dumps(node_selector),
},
"Volumes": {
"/var/run/docker.sock": {
Expand Down
Loading
Loading