Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 103 additions & 0 deletions .github/workflows/full-test-jsonl.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
name: Container Test

on:
pull_request:
branches:
- main
- dev
paths:
- "Dockerfile"
- "src/**"
- "docker-compose*.yml"
- ".last_release"
- "pyproject.toml"
- "uv.lock"

jobs:
test-container-jsonl:
runs-on: ubuntu-latest
steps:
- name: Checkout Repository
uses: actions/checkout@v6

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Read Photon version from .last_release
id: photon_version
run: |
PHOTON_VERSION=$(cat .last_release | tr -d '[:space:]')
if [[ -z "$PHOTON_VERSION" || ! "$PHOTON_VERSION" =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
echo "Error: .last_release is missing, empty, or contains an invalid version: '$PHOTON_VERSION'"
exit 1
fi
echo "PHOTON_VERSION=$PHOTON_VERSION" >> "$GITHUB_ENV"
echo "Photon Version: $PHOTON_VERSION"

- name: Build test image
uses: docker/build-push-action@v6
with:
context: .
file: ./Dockerfile
build-args: |
PHOTON_VERSION=${{ env.PHOTON_VERSION }}
push: false
load: true
tags: photon-test:pr-${{ github.event.pull_request.number }}
platforms: linux/amd64
cache-from: type=gha
cache-to: type=gha,mode=max

- name: Start container
run: |
docker run -d \
--name photon-test-pr-${{ github.event.pull_request.number }} \
-e REGION=andorra \
-e IMPORT_MODE=jsonl \
-e UPDATE_STRATEGY=DISABLED \
photon-test:pr-${{ github.event.pull_request.number }}

- name: Wait for container to be healthy
run: |
echo "Waiting for container to become healthy (timeout: 6 minutes)..."
CONTAINER_NAME=photon-test-pr-${{ github.event.pull_request.number }}

docker logs -f $CONTAINER_NAME &
LOGS_PID=$!

SECONDS=0
TIMEOUT=360

while [ $SECONDS -lt $TIMEOUT ]; do
HEALTH_STATUS=$(docker inspect --format='{{.State.Health.Status}}' $CONTAINER_NAME 2>/dev/null || echo "unknown")

if [ "$HEALTH_STATUS" = "healthy" ]; then
echo "Container is healthy after $SECONDS seconds"
kill $LOGS_PID 2>/dev/null || true
exit 0
fi

echo "Health status: $HEALTH_STATUS (elapsed: ${SECONDS}s)"
sleep 10
SECONDS=$((SECONDS + 10))
done

kill $LOGS_PID 2>/dev/null || true
echo "Container failed to become healthy within $TIMEOUT seconds"
docker logs $CONTAINER_NAME
exit 1

- name: Cleanup
if: always()
run: |
docker stop photon-test-pr-${{ github.event.pull_request.number }} || true
docker rm photon-test-pr-${{ github.event.pull_request.number }} || true
docker rmi photon-test:pr-${{ github.event.pull_request.number }} || true

- name: Output summary
if: always()
run: |
echo "## Container Test Summary" >> $GITHUB_STEP_SUMMARY
echo "- **PR Number:** ${{ github.event.pull_request.number }}" >> $GITHUB_STEP_SUMMARY
echo "- **Photon Version:** ${{ env.PHOTON_VERSION }}" >> $GITHUB_STEP_SUMMARY
echo "- **Status:** ${{ job.status }}" >> $GITHUB_STEP_SUMMARY
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ dependencies = [
"requests==2.32.5",
"schedule>=1.2.2",
"tqdm==4.67.3",
"zstandard>=0.23.0",
]

[dependency-groups]
Expand Down
52 changes: 41 additions & 11 deletions src/entrypoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from src.check_remote import check_index_age
from src.downloader import InsufficientSpaceError, parallel_update, sequential_update
from src.importer import run_jsonl_import
from src.utils import config
from src.utils.logger import get_logger, setup_logging
from src.utils.notify import send_notification
Expand All @@ -12,14 +13,15 @@
logger = get_logger()


def main():
send_notification("Photon-Docker Initializing")

logger.debug("Entrypoint setup called")
def log_config() -> None:
logger.info("=== CONFIG VARIABLES ===")
logger.info(f"IMPORT_MODE: {config.IMPORT_MODE}")
logger.info(f"UPDATE_STRATEGY: {config.UPDATE_STRATEGY}")
logger.info(f"UPDATE_INTERVAL: {config.UPDATE_INTERVAL}")
logger.info(f"REGION: {config.REGION}")
logger.info(f"LANGUAGES: {config.LANGUAGES}")
logger.info(f"EXTRA_TAGS: {config.EXTRA_TAGS}")
logger.info(f"IMPORT_GEOMETRIES: {config.IMPORT_GEOMETRIES}")
logger.info(f"FORCE_UPDATE: {config.FORCE_UPDATE}")
logger.info(f"DOWNLOAD_MAX_RETRIES: {config.DOWNLOAD_MAX_RETRIES}")
logger.info(f"FILE_URL (sanitized): {sanitize_url(config.FILE_URL)}")
Expand All @@ -39,6 +41,32 @@ def main():

logger.info("=== END CONFIG VARIABLES ===")


def run_update_or_import(force_update: bool = False) -> None:
if config.IMPORT_MODE == "jsonl":
action = "forced JSONL import" if force_update else "initial JSONL import"
logger.info(f"Starting {action}")
run_jsonl_import()
return

if not force_update:
logger.info("Starting initial download using sequential strategy")
logger.info("Note: Initial download will use sequential strategy regardless of config setting")
sequential_update()
return

if config.UPDATE_STRATEGY == "PARALLEL":
parallel_update()
else:
sequential_update()


def main():
send_notification("Photon-Docker Initializing")

logger.debug("Entrypoint setup called")
log_config()

try:
validate_config()
except ValueError as e:
Expand All @@ -51,10 +79,7 @@ def main():
if config.FORCE_UPDATE:
logger.info("Starting forced update")
try:
if config.UPDATE_STRATEGY == "PARALLEL":
parallel_update()
else:
sequential_update()
run_update_or_import(force_update=True)
except InsufficientSpaceError as e:
logger.error(f"Cannot proceed with force update: {e}")
send_notification(f"Photon-Docker force update failed: {e}")
Expand All @@ -66,17 +91,22 @@ def main():
if not config.INITIAL_DOWNLOAD:
logger.warning("Initial download is disabled but no existing Photon index was found. ")
return
logger.info("Starting initial download using sequential strategy")
logger.info("Note: Initial download will use sequential strategy regardless of config setting")
try:
sequential_update()
run_update_or_import(force_update=False)
except InsufficientSpaceError as e:
logger.error(f"Cannot proceed: {e}")
send_notification(f"Photon-Docker cannot start: {e}")
sys.exit(75)
except Exception:
logger.error("Initial setup failed")
raise
else:
logger.info("Existing index found, skipping download")

if config.IMPORT_MODE == "jsonl":
logger.info("JSONL mode with existing index found, skipping automatic rebuild during setup")
return

if config.MIN_INDEX_DATE and check_index_age():
logger.info("Index is older than minimum required date, starting sequential update")
try:
Expand Down
64 changes: 64 additions & 0 deletions src/importer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import os
import shlex
import subprocess

from src.filesystem import clear_temp_dir
from src.jsonl.decompressor import stream_decompress
from src.jsonl.downloader import download_jsonl
from src.utils import config
from src.utils.logger import get_logger
from src.utils.regions import get_regions_for_jsonl

logger = get_logger(__name__)


def run_jsonl_import() -> None:
regions = get_regions_for_jsonl(config.get_jsonl_regions())
if len(regions) != 1:
raise ValueError("JSONL mode currently supports exactly one region.")

region = regions[0]

try:
jsonl_path = download_jsonl(region)
import_proc = _start_photon_import("-")
try:
if import_proc.stdin is None:
raise RuntimeError("Photon import process stdin is unavailable")
for chunk in stream_decompress(jsonl_path):
import_proc.stdin.write(chunk)

import_proc.stdin.close()
return_code = import_proc.wait()
if return_code != 0:
raise RuntimeError(f"Photon JSONL import failed with exit code {return_code}")
except Exception:
import_proc.kill()
import_proc.wait()
raise
finally:
clear_temp_dir()


def _start_photon_import(input_source: str) -> subprocess.Popen:
os.makedirs(config.DATA_DIR, exist_ok=True)

cmd = ["java"]
if config.JAVA_PARAMS:
cmd.extend(shlex.split(config.JAVA_PARAMS))

cmd.extend(["-jar", "/photon/photon.jar", "import", "-import-file", input_source, "-data-dir", config.DATA_DIR])

languages = config.get_languages()
if languages:
cmd.extend(["-languages", ",".join(languages)])

extra_tags = config.get_extra_tags()
if extra_tags:
cmd.extend(["-extra-tags", ",".join(extra_tags)])

if config.IMPORT_GEOMETRIES:
cmd.append("-full-geometries")

logger.info(f"Starting Photon JSONL import for region(s): {', '.join(config.get_jsonl_regions())}")
return subprocess.Popen(cmd, cwd=config.PHOTON_DIR, stdin=subprocess.PIPE) # noqa: S603
3 changes: 3 additions & 0 deletions src/jsonl/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from src.jsonl.downloader import download_jsonl

__all__ = ["download_jsonl"]
7 changes: 7 additions & 0 deletions src/jsonl/decompressor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
import zstandard as zstd


def stream_decompress(input_path: str, read_size: int = 65536):
dctx = zstd.ZstdDecompressor()
with open(input_path, "rb") as file_handle:
yield from dctx.read_to_iter(file_handle, read_size=read_size)
46 changes: 46 additions & 0 deletions src/jsonl/downloader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import os

from src.downloader import download_file
from src.utils import config
from src.utils.logger import get_logger
from src.utils.regions import get_jsonl_filename, get_region_info, normalize_region

logger = get_logger(__name__)


def get_jsonl_url(region: str) -> str:
normalized_region = normalize_region(region)
if normalized_region is None:
raise ValueError(f"Unknown region: {region}")

region_info = get_region_info(normalized_region)
if not region_info:
raise ValueError(f"Unknown region: {region}")

filename = get_jsonl_filename(normalized_region, config.JSONL_FILE_EXTENSION, config.JSONL_RELEASE_CHANNEL)

if region_info["type"] == "planet":
return f"{config.BASE_URL}/{filename}"
if region_info["type"] == "continent":
return f"{config.BASE_URL}/{normalized_region}/{filename}"

continent = region_info["continent"]
return f"{config.BASE_URL}/{continent}/{normalized_region}/{filename}"


def download_jsonl(region: str) -> str:
os.makedirs(config.TEMP_DIR, exist_ok=True)

normalized_region = normalize_region(region)
if normalized_region is None:
raise ValueError(f"Unknown region: {region}")

download_url = get_jsonl_url(normalized_region)
output_path = os.path.join(config.TEMP_DIR, f"{normalized_region}.{config.JSONL_FILE_EXTENSION}")

logger.info(f"Downloading JSONL dump for {normalized_region} from {download_url}")

if not download_file(download_url, output_path):
raise RuntimeError(f"Failed to download JSONL dump from {download_url}")

return output_path
8 changes: 8 additions & 0 deletions src/process_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,10 @@ def run_update(self):
logger.info("Updates disabled, skipping")
return

if config.IMPORT_MODE == "jsonl":
logger.info("Scheduled JSONL rebuilds are not implemented yet, skipping")
return

self.state = AppState.UPDATING
logger.info(f"Running {config.UPDATE_STRATEGY.lower()} update...")
update_start = time.time()
Expand Down Expand Up @@ -257,6 +261,10 @@ def schedule_updates(self):
logger.info("Updates disabled, not scheduling")
return

if config.IMPORT_MODE == "jsonl":
logger.info("Skipping scheduled updates in JSONL mode until rebuild support is implemented")
return

interval = config.UPDATE_INTERVAL.lower()

if interval.endswith("d"):
Expand Down
4 changes: 4 additions & 0 deletions src/updater.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@ def main():
logger.info("Starting update process...")

try:
if config.IMPORT_MODE == "jsonl":
logger.info("Scheduled JSONL rebuilds are not implemented yet, skipping updater run")
return

if config.UPDATE_STRATEGY == "PARALLEL":
logger.info("Running parallel update...")
parallel_update()
Expand Down
Loading
Loading