Skip to content

feat: Docker in Docker compatibility: add param "host_temp_dir" to get_source() and get_destination() #644

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 10 additions & 3 deletions airbyte/_executors/docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from airbyte import exceptions as exc
from airbyte._executors.base import Executor
from airbyte.constants import TEMP_DIR


logger = logging.getLogger("airbyte")
Expand Down Expand Up @@ -76,12 +77,18 @@ def map_cli_args(self, args: list[str]) -> list[str]:
# This is a file path and we need to map it to the same file within the
# relative path of the file within the container's volume.
for local_volume, container_path in self.volumes.items():
if Path(arg).is_relative_to(local_volume):
# If the container path corresponds to the container's temporary directory,
# then the local temporary directory path is used (as the local volume
# path can be overridden).
local_path = local_volume
if container_path == DEFAULT_AIRBYTE_CONTAINER_TEMP_DIR:
local_path = TEMP_DIR
if Path(arg).is_relative_to(local_path):
logger.debug(
f"Found file input path `{arg}` "
f"relative to container-mapped volume: {local_volume}"
f"relative to container-mapped volume: {local_path}"
)
mapped_path = Path(container_path) / Path(arg).relative_to(local_volume)
mapped_path = Path(container_path) / Path(arg).relative_to(local_path)
logger.debug(f"Mapping `{arg}` -> `{mapped_path}`")
new_args.append(str(mapped_path))
break
Expand Down
8 changes: 5 additions & 3 deletions airbyte/_executors/util.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
from __future__ import annotations

import tempfile
from pathlib import Path
from typing import TYPE_CHECKING, Literal, cast

Expand All @@ -16,7 +15,7 @@
from airbyte._executors.python import VenvExecutor
from airbyte._util.meta import which
from airbyte._util.telemetry import EventState, log_install_state # Non-public API
from airbyte.constants import AIRBYTE_OFFLINE_MODE, TEMP_DIR_OVERRIDE
from airbyte.constants import AIRBYTE_OFFLINE_MODE, TEMP_DIR
from airbyte.sources.registry import ConnectorMetadata, InstallType, get_connector_metadata
from airbyte.version import get_version

Expand Down Expand Up @@ -132,6 +131,7 @@ def get_connector_executor( # noqa: PLR0912, PLR0913, PLR0915 # Too many branch
local_executable: Path | str | None = None,
docker_image: bool | str | None = None,
use_host_network: bool = False,
host_temp_dir: Path | str | None = None,
source_manifest: bool | dict | Path | str | None = None,
install_if_missing: bool = True,
install_root: Path | None = None,
Expand Down Expand Up @@ -226,7 +226,9 @@ def get_connector_executor( # noqa: PLR0912, PLR0913, PLR0915 # Too many branch
if ":" not in docker_image:
docker_image = f"{docker_image}:{version or 'latest'}"

host_temp_dir = TEMP_DIR_OVERRIDE or Path(tempfile.gettempdir())
if not host_temp_dir:
host_temp_dir = TEMP_DIR

container_temp_dir = DEFAULT_AIRBYTE_CONTAINER_TEMP_DIR

local_mount_dir = Path().absolute() / name
Expand Down
4 changes: 2 additions & 2 deletions airbyte/_util/temp_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from pathlib import Path
from typing import TYPE_CHECKING, Any

from airbyte.constants import TEMP_DIR_OVERRIDE
from airbyte.constants import TEMP_DIR


if TYPE_CHECKING:
Expand All @@ -30,7 +30,7 @@ def as_temp_files(files_contents: list[dict | str]) -> Generator[list[str], Any,
mode="w+t",
delete=False,
encoding="utf-8",
dir=TEMP_DIR_OVERRIDE or None,
dir=TEMP_DIR,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

💡 Verification agent

🧩 Analysis chain

Consider the previous behavior with temporary directories.

According to previous feedback, when the temporary directory override is None, the code should not supply the dir argument to preserve existing behavior. With the current change, TEMP_DIR is always provided.

Would it make sense to check if TEMP_DIR corresponds to the system's default temp directory and omit the parameter in that case? wdyt?


🏁 Script executed:

#!/bin/bash
# Description: Check the difference between system temp dir and TEMP_DIR

# Get the temp dir from tempfile and compare with TEMP_DIR
cat airbyte/constants.py | grep -A 3 'TEMP_DIR: Path'
python -c "import tempfile; print('System temp dir:', tempfile.gettempdir())"

Length of output: 328


Conditional TEMP_DIR Parameter Suggestion

It looks like we're always passing TEMP_DIR when calling the tempfile function. Given that in airbyte/constants.py, TEMP_DIR is set to tempfile.gettempdir() when no override is provided (i.e., when AIRBYTE_TEMP_DIR is not set), wouldn’t it be better to conditionally omit the dir argument so that we preserve the original behavior? For instance, you might consider:

  • Only supplying dir=TEMP_DIR if TEMP_DIR does not equal tempfile.gettempdir()
  • Retaining the default behavior when the temporary directory override is effectively None

Would it make sense to implement a check like this? wdyt?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TEMP_DIR is never None and checking that TEMP_DIR is different from tempfile.gettempdir() seems unnecessary, just verbose, since the result will be the same. wdyt?

suffix=".json" if use_json else ".txt",
)
temp_file.write(
Expand Down
6 changes: 3 additions & 3 deletions airbyte/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from __future__ import annotations

import os
import tempfile
from pathlib import Path


Expand Down Expand Up @@ -64,9 +65,8 @@ def _str_to_bool(value: str) -> bool:
"""Convert a string value of an environment values to a boolean value."""
return bool(value) and value.lower() not in {"", "0", "false", "f", "no", "n", "off"}


TEMP_DIR_OVERRIDE: Path | None = (
Path(os.environ["AIRBYTE_TEMP_DIR"]) if os.getenv("AIRBYTE_TEMP_DIR") else None
TEMP_DIR: Path = Path(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Add an additional blank line after function definition.

The linter expects 2 blank lines after a function definition, but there's only 1. This is causing the pipeline to fail with a formatting error.

def _str_to_bool(value: str) -> bool:
    """Convert a string value of an environment values to a boolean value."""
    return bool(value) and value.lower() not in {"", "0", "false", "f", "no", "n", "off"}


TEMP_DIR: Path = Path(

You can fix this and other formatting issues by running ruff format as suggested in the pipeline failure.

Committable suggestion skipped: line range outside the PR's diff.

🧰 Tools
🪛 Ruff (0.8.2)

68-68: Expected 2 blank lines after class or function definition, found (1)

Add missing blank line(s)

(E305)

os.environ["AIRBYTE_TEMP_DIR"] if os.getenv("AIRBYTE_TEMP_DIR") else tempfile.gettempdir()
)
"""The directory to use for temporary files.

Expand Down
5 changes: 5 additions & 0 deletions airbyte/destinations/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def get_destination( # noqa: PLR0913 # Too many arguments
local_executable: Path | str | None = None,
docker_image: str | bool | None = None,
use_host_network: bool = False,
host_temp_dir: Path | str | None = None,
install_if_missing: bool = True,
) -> Destination:
"""Get a connector by name and version.
Expand Down Expand Up @@ -56,6 +57,9 @@ def get_destination( # noqa: PLR0913 # Too many arguments
the host network. This is useful for connectors that need to access resources on
the host machine, such as a local database. This parameter is ignored when
`docker_image` is not set.
host_temp_dir: If set, along with docker_image, this replaces the volume exposing the
temporary files directory, ensuring compatibility when the Docker engine runs on a
different host (e.g., Docker in Docker), where paths may differ.
install_if_missing: Whether to install the connector if it is not available locally. This
parameter is ignored when local_executable is set.
"""
Expand All @@ -70,6 +74,7 @@ def get_destination( # noqa: PLR0913 # Too many arguments
local_executable=local_executable,
docker_image=docker_image,
use_host_network=use_host_network,
host_temp_dir=host_temp_dir,
install_if_missing=install_if_missing,
),
)
Expand Down
5 changes: 5 additions & 0 deletions airbyte/sources/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ def get_source( # noqa: PLR0913 # Too many arguments
local_executable: Path | str | None = None,
docker_image: bool | str | None = None,
use_host_network: bool = False,
host_temp_dir: Path | str | None = None,
source_manifest: bool | dict | Path | str | None = None,
install_if_missing: bool = True,
install_root: Path | None = None,
Expand Down Expand Up @@ -95,6 +96,9 @@ def get_source( # noqa: PLR0913 # Too many arguments
the host network. This is useful for connectors that need to access resources on
the host machine, such as a local database. This parameter is ignored when
`docker_image` is not set.
host_temp_dir: If set, along with docker_image, this replaces the volume exposing the
temporary files directory, ensuring compatibility when the Docker engine runs on a
different host (e.g., Docker in Docker), where paths may differ.
source_manifest: If set, the connector will be executed based on a declarative YAML
source definition. This input can be `True` to attempt to auto-download a YAML spec,
`dict` to accept a Python dictionary as the manifest, `Path` to pull a manifest from
Expand All @@ -116,6 +120,7 @@ def get_source( # noqa: PLR0913 # Too many arguments
local_executable=local_executable,
docker_image=docker_image,
use_host_network=use_host_network,
host_temp_dir=host_temp_dir,
source_manifest=source_manifest,
install_if_missing=install_if_missing,
install_root=install_root,
Expand Down
Loading