Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CONSTITUTION.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ All code MUST consider security implications.
- Avoid running destructive commands without explicit user confirmation
- Use detect-secrets and gitleaks pre-commit hooks to prevent secret leakage
- Test code MUST NOT introduce vulnerabilities into the tested systems
- Use `utilities.path_utils.resolve_repo_path` to resolve and validate any user-supplied or parameterized file paths, preventing path-traversal and symlink-escape outside the repository root
- JIRA ticket links are allowed in PRs and commit messages (our Jira is public)
- Do NOT reference internal-only resources (Jenkins, Confluence, Slack threads) in code, PRs, or commit messages
- Do NOT link embargoed or security-restricted (RH-employee-only) tickets
Expand Down
2 changes: 0 additions & 2 deletions tests/llama_stack/conftest.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import os
from collections.abc import Callable, Generator
from pathlib import Path
from typing import Any

import httpx
Expand Down Expand Up @@ -810,7 +809,6 @@ def vector_store(
try:
vector_store_upload_doc_sources(
doc_sources=doc_sources,
repo_root=Path(request.config.rootdir).resolve(),
llama_stack_client=unprivileged_llama_stack_client,
vector_store=vector_store,
vector_io_provider=vector_io_provider,
Expand Down
25 changes: 6 additions & 19 deletions tests/llama_stack/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
LLS_CORE_POD_FILTER,
)
from utilities.exceptions import UnexpectedResourceCountError
from utilities.path_utils import resolve_repo_path
from utilities.resources.llama_stack_distribution import LlamaStackDistribution

LOGGER = get_logger(name=__name__)
Expand Down Expand Up @@ -280,26 +281,24 @@ def vector_store_create_file_from_path(

def vector_store_upload_doc_sources(
doc_sources: Any,
repo_root: Path,
llama_stack_client: LlamaStackClient,
vector_store: Any,
vector_io_provider: str,
) -> None:
"""Upload parametrized document sources (URLs and repo-local paths) to a vector store.

Resolves each local path under ``repo_root`` and re-resolves directory entries to avoid
symlink escape outside the repository.
Resolves each local path via ``resolve_repo_path`` and re-resolves directory entries
to avoid symlink escape outside the repository.

Args:
doc_sources: List of URL or path strings (repo-relative or absolute under repo root).
repo_root: Resolved repository root; local paths must resolve under this directory.
llama_stack_client: Client used for file and vector store APIs.
vector_store: Target vector store (must expose ``id``).
vector_io_provider: Provider id for log context only.

Raises:
TypeError: If ``doc_sources`` is not a list.
ValueError: If a local path resolves outside ``repo_root``.
ValueError: If a local path resolves outside the repo root.
FileNotFoundError: If a file or non-empty directory source is missing.
"""
if not isinstance(doc_sources, list):
Expand All @@ -310,7 +309,6 @@ def vector_store_upload_doc_sources(
vector_store.id,
doc_sources,
)
repo_root_resolved = repo_root.resolve()
for source in doc_sources:
if source.startswith(("http://", "https://")):
vector_store_create_file_from_url(
Expand All @@ -319,25 +317,14 @@ def vector_store_upload_doc_sources(
vector_store=vector_store,
)
continue
raw_path = Path(source) # noqa: FCN001
resolved_source = raw_path.resolve() if raw_path.is_absolute() else (repo_root_resolved / raw_path).resolve()
if not resolved_source.is_relative_to(repo_root_resolved):
raise ValueError(
f"doc_sources path must be under repo root ({repo_root_resolved}): {source!r}",
)
source_path = resolved_source
source_path = resolve_repo_path(source=source)

if source_path.is_dir():
files = sorted(source_path.iterdir())
if not files:
raise FileNotFoundError(f"No files found in directory: {source_path}")
for file_path in files:
file_path_resolved = file_path.resolve(strict=True)
if not file_path_resolved.is_relative_to(repo_root_resolved):
raise ValueError(
f"doc_sources directory entry must resolve under repo root "
f"({repo_root_resolved}): {file_path!r} -> {file_path_resolved!r}",
)
file_path_resolved = resolve_repo_path(source=file_path)
if not file_path_resolved.is_file():
continue
vector_store_create_file_from_path(
Expand Down
43 changes: 43 additions & 0 deletions utilities/path_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
"""Path resolution and validation utilities for repo-relative file access."""

from pathlib import Path

_REPO_ROOT = Path(__file__).resolve().parent.parent


def resolve_repo_path(source: str | Path, repo_root: Path | None = None) -> Path:
"""Turn a repo-relative or absolute path into a safe, resolved absolute path.

Ensures the final path lives inside the repository root. Symlinks are fully
resolved before the check so that a symlink pointing outside the repo is rejected.

Accepted (returns resolved absolute path)::

resolve_repo_path("tests/data/sample.pdf") # relative to repo root
resolve_repo_path("tests/data/../data/sample.pdf") # normalised, still under root
resolve_repo_path("/home/user/repo/tests/data/f.txt") # absolute, under root

Rejected (raises ``ValueError``)::

resolve_repo_path("../../etc/passwd") # escapes repo root
resolve_repo_path("/tmp/evil.txt") # absolute, outside root

Args:
source: A repo-relative string/path or an absolute path.
repo_root: Repository root to validate against. Defaults to the detected
repo root (parent of the ``utilities/`` package).

Returns:
The resolved absolute path, guaranteed to be under ``repo_root``.

Raises:
ValueError: If the resolved path falls outside the repo root.
"""
repo_root_resolved = (repo_root or _REPO_ROOT).resolve()
raw = Path(source) # noqa: FCN001
resolved = raw.resolve() if raw.is_absolute() else (repo_root_resolved / raw).resolve()
if not resolved.is_relative_to(repo_root_resolved):
raise ValueError(
f"Path must be under repo root ({repo_root_resolved}): {source!r}",
)
return resolved