diff --git a/airbyte/_executors/python.py b/airbyte/_executors/python.py index f3adcaac..37d57779 100644 --- a/airbyte/_executors/python.py +++ b/airbyte/_executors/python.py @@ -17,7 +17,7 @@ from airbyte._util.meta import is_windows from airbyte._util.telemetry import EventState, log_install_state from airbyte._util.venv_util import get_bin_dir -from airbyte.constants import NO_UV +from airbyte.constants import DEFAULT_INSTALL_DIR, NO_UV if TYPE_CHECKING: @@ -65,7 +65,9 @@ def __init__( if metadata and metadata.pypi_package_name else f"airbyte-{self.name}" ) - self.install_root = install_root or Path.cwd() + self.install_root = install_root or DEFAULT_INSTALL_DIR or Path.cwd() + with suppress(Exception): + self.install_root.mkdir(parents=True, exist_ok=True) self.use_python = use_python def _get_venv_name(self) -> str: diff --git a/airbyte/_executors/util.py b/airbyte/_executors/util.py index e33a9c05..6741d3fb 100644 --- a/airbyte/_executors/util.py +++ b/airbyte/_executors/util.py @@ -18,7 +18,7 @@ from airbyte._executors.python import VenvExecutor from airbyte._util.meta import which from airbyte._util.telemetry import EventState, log_install_state # Non-public API -from airbyte.constants import AIRBYTE_OFFLINE_MODE, TEMP_DIR_OVERRIDE +from airbyte.constants import AIRBYTE_OFFLINE_MODE, DEFAULT_PROJECT_DIR, TEMP_DIR_OVERRIDE from airbyte.sources.registry import ConnectorMetadata, InstallType, get_connector_metadata from airbyte.version import get_version @@ -282,7 +282,7 @@ def get_connector_executor( # noqa: PLR0912, PLR0913, PLR0914, PLR0915, C901 # host_temp_dir = TEMP_DIR_OVERRIDE or Path(tempfile.gettempdir()) container_temp_dir = DEFAULT_AIRBYTE_CONTAINER_TEMP_DIR - local_mount_dir = Path().absolute() / name + local_mount_dir = DEFAULT_PROJECT_DIR / name local_mount_dir.mkdir(exist_ok=True) volumes = { diff --git a/airbyte/caches/util.py b/airbyte/caches/util.py index 1d192406..0a4d108f 100644 --- a/airbyte/caches/util.py +++ b/airbyte/caches/util.py @@ -9,6 +9,7 @@ from airbyte import exceptions as exc from airbyte.caches.duckdb import DuckDBCache +from airbyte.constants import DEFAULT_GOOGLE_DRIVE_MOUNT_PATH, DEFAULT_PROJECT_DIR # Google drive constants: @@ -16,7 +17,7 @@ _MY_DRIVE = "MyDrive" """The default name of the user's personal Google Drive.""" -_GOOGLE_DRIVE_DEFAULT_MOUNT_PATH = "/content/drive" +_GOOGLE_DRIVE_DEFAULT_MOUNT_PATH = DEFAULT_GOOGLE_DRIVE_MOUNT_PATH """The recommended path to mount Google Drive to.""" @@ -29,7 +30,7 @@ def get_default_cache() -> DuckDBCache: Cache files are stored in the `.cache` directory, relative to the current working directory. """ - cache_dir = Path("./.cache/default_cache") + cache_dir = DEFAULT_PROJECT_DIR / "cache" / "default_cache" return DuckDBCache( db_path=cache_dir / "default_cache.duckdb", cache_dir=cache_dir, @@ -66,7 +67,7 @@ def new_local_cache( ) cache_name = cache_name or str(ulid.ULID()) - cache_dir = cache_dir or Path(f"./.cache/{cache_name}") + cache_dir = cache_dir or (DEFAULT_PROJECT_DIR / "cache" / cache_name) if not isinstance(cache_dir, Path): cache_dir = Path(cache_dir) diff --git a/airbyte/constants.py b/airbyte/constants.py index 3ab531ec..e0eeccaf 100644 --- a/airbyte/constants.py +++ b/airbyte/constants.py @@ -56,6 +56,32 @@ your mounted Google Drive by setting this to a path like `/content/drive/MyDrive/Airbyte/cache`. """ +DEFAULT_PROJECT_DIR: Path = ( + Path(os.getenv("AIRBYTE_PROJECT_DIR", "") or Path.cwd()).expanduser().absolute() +) +"""Default project directory. + +Can be overridden by setting the `AIRBYTE_PROJECT_DIR` environment variable. + +If not set, defaults to the current working directory. + +This serves as the parent directory for both cache and install directories when not explicitly +configured. +""" + +DEFAULT_INSTALL_DIR: Path = ( + Path(os.getenv("AIRBYTE_INSTALL_DIR", "") or DEFAULT_PROJECT_DIR).expanduser().absolute() +) +"""Default install directory for connectors. + +If not set, defaults to `DEFAULT_PROJECT_DIR` (`AIRBYTE_PROJECT_DIR` env var) or the current +working directory if neither is set. +""" + + +DEFAULT_GOOGLE_DRIVE_MOUNT_PATH = "/content/drive" +"""Default path to mount Google Drive in Google Colab environments.""" + DEFAULT_ARROW_MAX_CHUNK_SIZE = 100_000 """The default number of records to include in each batch of an Arrow dataset."""