Add Docker Adapter

ronald-jaepel · ronald-jaepel · commit 2192bc36c83d · 2025-02-17T13:52:20.000+01:00
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
@@ -68,7 +68,7 @@ jobs:
 
       - name: Run Tests
         run: |
-          pytest tests -m "not server_api and not slow"
+          pytest tests -m "not server_api and not docker and not slow"
 
   build-release:
     name: Build and Upload Release
diff --git a/cadetrdm/batch_running/case.py b/cadetrdm/batch_running/case.py
@@ -155,7 +155,7 @@ def _get_results_branch(self):
             print("No matching results were found for these options and study version.")
         return None
 
-    def run_study(self, force=False) -> bool:
+    def run_study(self, force=False, container_adapter: "DockerAdapter" = None) -> bool:
         """
         Run specified study commands in the given repository.
 
@@ -177,15 +177,18 @@ def run_study(self, force=False) -> bool:
             print(f"{self.study.path} has already been computed with these options. Skipping...")
             return True
 
-        if not self.can_run_study:
+        if container_adapter is None and self.can_run_study is False:
             print(f"Current environment does not match required environment. Skipping...")
             self.status = 'failed'
             return False
 
         try:
             self.status = 'running'
 
-            self.study.module.main(self.options, str(self.study.path))
+            if container_adapter is not None:
+                container_adapter.run_case(self)
+            else:
+                self.study.module.main(self.options, str(self.study.path))
 
             print("Command execution successful.")
             self.status = 'finished'
diff --git a/cadetrdm/docker/Dockerfile_template b/cadetrdm/docker/Dockerfile_template
@@ -0,0 +1,31 @@
+# syntax=docker/dockerfile:1
+
+# Comments are provided throughout this file to help you get started.
+# If you need more help, visit the Dockerfile reference guide at
+# https://docs.docker.com/go/dockerfile-reference/
+
+# Want to help us make this template better? Share your feedback here: https://forms.gle/ybq9Krt8jtBL3iCk7
+
+ARG CONDA_VERSION=24.11.3
+FROM condaforge/miniforge3:${CONDA_VERSION}-0 AS base
+
+# Prevents Python from writing pyc files.
+ENV PYTHONDONTWRITEBYTECODE=1
+
+# Keeps Python from buffering stdout and stderr to avoid situations where
+# the application crashes without emitting any logs due to buffering.
+ENV PYTHONUNBUFFERED=1
+
+WORKDIR /rdm_workdir
+
+USER root
+
+# Prevents interactive prompts during apt-get
+ARG DEBIAN_FRONTEND=noninteractive
+
+RUN apt-get update && apt-get install -y git git-lfs ssh && \
+    apt-get clean && rm -rf /var/lib/apt/lists/*
+
+COPY environment.yml /tmp/environment.yml
+
+RUN conda env update -n base --file /tmp/environment.yml
diff --git a/cadetrdm/docker/__init__.py b/cadetrdm/docker/__init__.py
@@ -0,0 +1,3 @@
+
+from .containerAdapter import ContainerAdapter
+from .dockerAdapter import DockerAdapter
diff --git a/cadetrdm/docker/containerAdapter.py b/cadetrdm/docker/containerAdapter.py
@@ -0,0 +1,9 @@
+from abc import abstractmethod
+
+
+class ContainerAdapter:
+    pass
+
+    @abstractmethod
+    def run_case(self, case, command):
+        return
diff --git a/cadetrdm/docker/dockerAdapter.py b/cadetrdm/docker/dockerAdapter.py
@@ -0,0 +1,189 @@
+import os
+import subprocess
+import tempfile
+from pathlib import Path
+
+import docker
+from docker.types import Mount
+from docker.models.images import Image
+
+from cadetrdm.docker import ContainerAdapter
+
+
+class DockerAdapter(ContainerAdapter):
+
+    def __init__(self):
+        self.client = docker.from_env()
+        self.image = None
+
+    def run_case(self, case: "Case", command: str = None):
+
+        if case.environment is not None:
+            self._update_Dockerfile_with_env_reqs(case)
+
+        if self.image is None:
+            image = self._build_image(case)
+        else:
+            image = self.image
+
+        container_tmp_filename = "/tmp/options.json"
+        options_tmp_filename = self._dump_options(case)
+
+        full_command = self._prepare_command(
+            case=case,
+            command=command,
+            container_tmp_filename=container_tmp_filename
+        )
+
+        full_log = self._run_command(
+            container_tmp_filename=container_tmp_filename,
+            full_command=full_command,
+            image=image,
+            options_tmp_filename=options_tmp_filename
+        )
+
+        return full_log
+
+    def _run_command(self, container_tmp_filename, full_command, image, options_tmp_filename):
+
+        ssh_location = Path.home() / ".ssh"
+        if not ssh_location.exists():
+            raise FileNotFoundError("No ssh folder found. Please report this on GitHub/CADET/CADET-RDM")
+
+        container = self.client.containers.run(
+            image=image,
+            command=full_command,
+            volumes={
+                f"{Path.home()}/.ssh": {'bind': "/root/.ssh_host_os", 'mode': "ro"},
+                options_tmp_filename.absolute().as_posix(): {'bind': container_tmp_filename, 'mode': 'ro'}
+            },
+            detach=True,
+            remove=True
+        )
+
+        full_log = []
+        # Step 2: Attach to the container's logsu
+        for log in container.logs(stream=True):
+            full_log.append(log.decode("utf-8"))
+            print(log.decode("utf-8"), end="")
+        # Wait for the container to finish execution
+        container.wait()
+        print("Done.")
+
+        return full_log
+
+    def _prepare_command(self, case, command, container_tmp_filename):
+        # ensure ssh in the container knows where to look for known_hosts and that .ssh/config is read-only
+        command_ssh = 'cp -r /root/.ssh_host_os /root/.ssh && chmod 600 /root/.ssh/*'
+
+        # copy over git config
+        git_config_list = subprocess.check_output("git config --list --show-origin --global").decode().split("\n")
+        git_config = {
+            "user.name": None,
+            "user.email": None,
+        }
+        for line in git_config_list:
+            for key in git_config.keys():
+                if key in line:
+                    value = line.split("=")[-1]
+                    # print(value)
+                    git_config[key] = value
+
+        git_commands = [f'git config --global {key} "{value}"' for key, value in git_config.items()]
+
+        # pull the study from the URL into a "study" folder
+        command_pull = f"rdm clone {case.study.url} study"
+        # cd into the "study" folder
+        command_cd = "cd study"
+        # run main.py with the options, assuming main.py lies within a sub-folder with the same name as the study.name
+        if command is None:
+            command_python = f"python {case.study.name}/main.py {container_tmp_filename}"
+        else:
+            command_python = command
+
+        commands = git_commands + [command_ssh, command_pull, command_cd, command_python]
+        full_command = 'bash -c "' + ' && '.join(commands) + '"'
+        return full_command
+
+    def _dump_options(self, case):
+        tmp_filename = Path("tmp/" + next(tempfile._get_candidate_names()) + ".json")
+        case.options.dump_json_file(tmp_filename)
+        return tmp_filename
+
+    def _build_image(self, case) -> Image:
+        cwd = os.getcwd()
+        with open(case.study.path / "Dockerfile", "rb") as dockerfile:
+            os.chdir(case.study.path.as_posix())
+
+            image, logs = self.client.images.build(
+                path=case.study.path.as_posix(),
+                # fileobj=dockerfile,  # A file object to use as the Dockerfile.
+                tag=case.study.name + ":" + case.name[:10],  # A tag to add to the final image
+                quiet=False,  # Whether to return the status
+                pull=True,  # Downloads any updates to the FROM image in Dockerfiles
+
+            )
+        if case.options.debug:
+            for log in logs:
+                print(log)
+        os.chdir(cwd)
+        return image
+
+    def pull_image(self, repository, tag=None, all_tags=False, **kwargs):
+        self.image = self.client.images.pull(
+            repository=repository,
+            tag=tag,
+            all_tags=all_tags,
+            **kwargs
+        )
+
+    def _push_image(self, repository, tag=None, **kwargs):
+        self.client.images.push(
+            repository=repository,
+            tag=tag,
+            **kwargs
+        )
+
+    def _tag_image(self, image: Image, repository, tag=None, **kwargs) -> Image:
+        """
+        Tag this image into a repository. Similar to the ``docker tag``
+        command.
+
+        Args:
+            repository (str): The repository to set for the tag
+            tag (str): The tag name
+            force (bool): Force
+
+        Raises:
+            :py:class:`docker.errors.APIError`
+                If the server returns an error.
+
+        Returns:
+            (bool): ``True`` if successful
+        """
+        image.tag(repository=repository, tag=tag, **kwargs)
+        return image
+
+    def build_and_push_image(self, case, repository, tag=None, **kwargs):
+        image = self._build_image(case)
+        image = self._tag_image(image, repository, tag, **kwargs)
+        self._push_image(repository, tag, **kwargs)
+
+    def _update_Dockerfile_with_env_reqs(self, case):
+        case.study._reset_hard_to_head(force_entry=True)
+
+        dockerfile = Path(case.study.path) / "Dockerfile"
+        conda, pip = case.environment.prepare_install_instructions()
+        # We need to switch to root to update conda packages and to the $CONDA_USER to update pip packages
+        install_command = "\n"
+        if len(conda) > 0:
+            install_command += f"RUN {conda}\n"
+        if len(pip) > 0:
+            install_command += f"RUN {pip}\n"
+            install_command += f"RUN pip install --force-reinstall --no-deps {pip.split('pip install')[-1]}\n"
+
+        with open(dockerfile, "a") as handle:
+            handle.write(install_command)
+
+    def __del__(self):
+        self.client.close()
diff --git a/pyproject.toml b/pyproject.toml
@@ -32,6 +32,7 @@ dependencies = [
     "numpy",
     "pyyaml",
     "semantic-version",
+    "docker"  # python-docker interface
 ]
 
 [project.scripts]
@@ -73,11 +74,11 @@ docs = [
     "myst-nb>=0.17.1",
 ]
 
-
 [tool.pytest.ini_options]
 markers = [
     "slow: marks tests as slow (deselect with '-m \"not slow\"')",
-    "server_api: marks tests as using the GitLab/GitHub API"
+    "server_api: marks tests as using the GitLab/GitHub API",
+    "docker: marks tests as using the Docker API"
 ]
 
 [tool.setuptools.dynamic]
diff --git a/tests/test_docker.py b/tests/test_docker.py
@@ -0,0 +1,38 @@
+from pathlib import Path
+import pytest
+
+from cadetrdm import Study, Options, Environment, Case
+from cadetrdm.docker import DockerAdapter
+
+
+@pytest.mark.docker
+def test_run_dockered():
+    WORK_DIR = Path.cwd() / "tmp"
+    WORK_DIR.mkdir(parents=True, exist_ok=True)
+
+    rdm_example = Study(
+        WORK_DIR / 'template',
+        "git@github.com:ronald-jaepel/rdm_testing_template.git",
+    )
+
+    options = Options()
+    options.debug = False
+    options.push = False
+    options.commit_message = 'Trying out new things'
+    options.optimizer_options = {
+        "optimizer": "U_NSGA3",
+        "pop_size": 2,
+        "n_cores": 2,
+        "n_max_gen": 1,
+    }
+
+    matching_environment = Environment(
+        pip_packages={
+            "cadet-rdm": "git+https://github.com/cadet/CADET-RDM.git@3e073dd85c5e54d95422c0cdcc1190d80da9e138"
+        }
+    )
+
+    case = Case(study=rdm_example, options=options, environment=matching_environment)
+    docker_adapter = DockerAdapter()
+    has_run_study = case.run_study(container_adapter=docker_adapter, force=True)
+    assert has_run_study

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+`
	`2`	`+from .containerAdapter import ContainerAdapter`
	`3`	`+from .dockerAdapter import DockerAdapter`
Original file line number	Diff line number	Diff line change
`@@ -32,6 +32,7 @@ dependencies = [`
`32`	`32`	`"numpy",`
`33`	`33`	`"pyyaml",`
`34`	`34`	`"semantic-version",`
	`35`	`+ "docker" # python-docker interface`
`35`	`36`	`]`
`36`	`37`
`37`	`38`	`[project.scripts]`
`@@ -73,11 +74,11 @@ docs = [`
`73`	`74`	`"myst-nb>=0.17.1",`
`74`	`75`	`]`
`75`	`76`
`76`		`-`
`77`	`77`	`[tool.pytest.ini_options]`
`78`	`78`	`markers = [`
`79`	`79`	`"slow: marks tests as slow (deselect with '-m \"not slow\"')",`
`80`		`- "server_api: marks tests as using the GitLab/GitHub API"`
	`80`	`+ "server_api: marks tests as using the GitLab/GitHub API",`
	`81`	`+ "docker: marks tests as using the Docker API"`
`81`	`82`	`]`
`82`	`83`
`83`	`84`	`[tool.setuptools.dynamic]`