Skip to content

Commit 5cee8ca

Browse files
committed
feat: Add path_utils.py with method resolve_repo_path
Signed-off-by: Jorge Garcia Oncins <jgarciao@redhat.com>
1 parent 5957e10 commit 5cee8ca

File tree

4 files changed

+50
-21
lines changed

4 files changed

+50
-21
lines changed

CONSTITUTION.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ All code MUST consider security implications.
7979
- Avoid running destructive commands without explicit user confirmation
8080
- Use detect-secrets and gitleaks pre-commit hooks to prevent secret leakage
8181
- Test code MUST NOT introduce vulnerabilities into the tested systems
82+
- Use `utilities.path_utils.resolve_repo_path` to resolve and validate any user-supplied or parameterized file paths, preventing path-traversal and symlink-escape outside the repository root
8283
- JIRA ticket links are allowed in PRs and commit messages (our Jira is public)
8384
- Do NOT reference internal-only resources (Jenkins, Confluence, Slack threads) in code, PRs, or commit messages
8485
- Do NOT link embargoed or security-restricted (RH-employee-only) tickets

tests/llama_stack/conftest.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import os
22
from collections.abc import Callable, Generator
3-
from pathlib import Path
43
from typing import Any
54

65
import httpx
@@ -810,7 +809,6 @@ def vector_store(
810809
try:
811810
vector_store_upload_doc_sources(
812811
doc_sources=doc_sources,
813-
repo_root=Path(request.config.rootdir).resolve(),
814812
llama_stack_client=unprivileged_llama_stack_client,
815813
vector_store=vector_store,
816814
vector_io_provider=vector_io_provider,

tests/llama_stack/utils.py

Lines changed: 6 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
LLS_CORE_POD_FILTER,
2121
)
2222
from utilities.exceptions import UnexpectedResourceCountError
23+
from utilities.path_utils import resolve_repo_path
2324
from utilities.resources.llama_stack_distribution import LlamaStackDistribution
2425

2526
LOGGER = get_logger(name=__name__)
@@ -280,26 +281,24 @@ def vector_store_create_file_from_path(
280281

281282
def vector_store_upload_doc_sources(
282283
doc_sources: Any,
283-
repo_root: Path,
284284
llama_stack_client: LlamaStackClient,
285285
vector_store: Any,
286286
vector_io_provider: str,
287287
) -> None:
288288
"""Upload parametrized document sources (URLs and repo-local paths) to a vector store.
289289
290-
Resolves each local path under ``repo_root`` and re-resolves directory entries to avoid
291-
symlink escape outside the repository.
290+
Resolves each local path via ``resolve_repo_path`` and re-resolves directory entries
291+
to avoid symlink escape outside the repository.
292292
293293
Args:
294294
doc_sources: List of URL or path strings (repo-relative or absolute under repo root).
295-
repo_root: Resolved repository root; local paths must resolve under this directory.
296295
llama_stack_client: Client used for file and vector store APIs.
297296
vector_store: Target vector store (must expose ``id``).
298297
vector_io_provider: Provider id for log context only.
299298
300299
Raises:
301300
TypeError: If ``doc_sources`` is not a list.
302-
ValueError: If a local path resolves outside ``repo_root``.
301+
ValueError: If a local path resolves outside the repo root.
303302
FileNotFoundError: If a file or non-empty directory source is missing.
304303
"""
305304
if not isinstance(doc_sources, list):
@@ -310,7 +309,6 @@ def vector_store_upload_doc_sources(
310309
vector_store.id,
311310
doc_sources,
312311
)
313-
repo_root_resolved = repo_root.resolve()
314312
for source in doc_sources:
315313
if source.startswith(("http://", "https://")):
316314
vector_store_create_file_from_url(
@@ -319,25 +317,14 @@ def vector_store_upload_doc_sources(
319317
vector_store=vector_store,
320318
)
321319
continue
322-
raw_path = Path(source) # noqa: FCN001
323-
resolved_source = raw_path.resolve() if raw_path.is_absolute() else (repo_root_resolved / raw_path).resolve()
324-
if not resolved_source.is_relative_to(repo_root_resolved):
325-
raise ValueError(
326-
f"doc_sources path must be under repo root ({repo_root_resolved}): {source!r}",
327-
)
328-
source_path = resolved_source
320+
source_path = resolve_repo_path(source=source)
329321

330322
if source_path.is_dir():
331323
files = sorted(source_path.iterdir())
332324
if not files:
333325
raise FileNotFoundError(f"No files found in directory: {source_path}")
334326
for file_path in files:
335-
file_path_resolved = file_path.resolve(strict=True)
336-
if not file_path_resolved.is_relative_to(repo_root_resolved):
337-
raise ValueError(
338-
f"doc_sources directory entry must resolve under repo root "
339-
f"({repo_root_resolved}): {file_path!r} -> {file_path_resolved!r}",
340-
)
327+
file_path_resolved = resolve_repo_path(source=file_path)
341328
if not file_path_resolved.is_file():
342329
continue
343330
vector_store_create_file_from_path(

utilities/path_utils.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
"""Path resolution and validation utilities for repo-relative file access."""
2+
3+
from pathlib import Path
4+
5+
_REPO_ROOT = Path(__file__).resolve().parent.parent
6+
7+
8+
def resolve_repo_path(source: str | Path, repo_root: Path | None = None) -> Path:
9+
"""Turn a repo-relative or absolute path into a safe, resolved absolute path.
10+
11+
Ensures the final path lives inside the repository root. Symlinks are fully
12+
resolved before the check so that a symlink pointing outside the repo is rejected.
13+
14+
Accepted (returns resolved absolute path)::
15+
16+
resolve_repo_path("tests/data/sample.pdf") # relative to repo root
17+
resolve_repo_path("tests/data/../data/sample.pdf") # normalised, still under root
18+
resolve_repo_path("/home/user/repo/tests/data/f.txt") # absolute, under root
19+
20+
Rejected (raises ``ValueError``)::
21+
22+
resolve_repo_path("../../etc/passwd") # escapes repo root
23+
resolve_repo_path("/tmp/evil.txt") # absolute, outside root
24+
25+
Args:
26+
source: A repo-relative string/path or an absolute path.
27+
repo_root: Repository root to validate against. Defaults to the detected
28+
repo root (parent of the ``utilities/`` package).
29+
30+
Returns:
31+
The resolved absolute path, guaranteed to be under ``repo_root``.
32+
33+
Raises:
34+
ValueError: If the resolved path falls outside the repo root.
35+
"""
36+
repo_root_resolved = (repo_root or _REPO_ROOT).resolve()
37+
raw = Path(source) # noqa: FCN001
38+
resolved = raw.resolve() if raw.is_absolute() else (repo_root_resolved / raw).resolve()
39+
if not resolved.is_relative_to(repo_root_resolved):
40+
raise ValueError(
41+
f"Path must be under repo root ({repo_root_resolved}): {source!r}",
42+
)
43+
return resolved

0 commit comments

Comments
 (0)