Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 16 additions & 4 deletions chatbot-core/api/services/file_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
except ImportError:
MAGIC_AVAILABLE = False

from api.tools.sanitizer import sanitize_logs
from utils import LoggerFactory

logger = LoggerFactory.instance().get_logger("api")
Expand Down Expand Up @@ -60,6 +61,13 @@ class FileProcessingError(Exception):
"""Custom exception for file processing errors."""


def _safe_filename_for_log(filename: str) -> str:
"""
Return a sanitized basename for log messages.
"""
return sanitize_logs(Path(filename).name)


def get_file_extension(filename: str) -> str:
"""
Extracts the file extension from a filename.
Expand Down Expand Up @@ -229,7 +237,7 @@ def validate_file_content_type(content: bytes, filename: str) -> None:
if detected_mime != expected_mime:
logger.warning(
"File content mismatch for '%s': detected %s, expected %s",
filename, detected_mime, expected_mime
_safe_filename_for_log(filename), detected_mime, expected_mime
)
raise FileProcessingError(
f"File '{filename}' content does not match its extension. "
Expand All @@ -251,7 +259,7 @@ def validate_file_content_type(content: bytes, filename: str) -> None:
if detected_mime in dangerous_mimes:
logger.warning(
"Dangerous file disguised as text: '%s' (detected: %s)",
filename, detected_mime
_safe_filename_for_log(filename), detected_mime
)
raise FileProcessingError(
f"File '{filename}' appears to be an executable, not a text file."
Expand Down Expand Up @@ -292,7 +300,7 @@ def process_text_file(content: bytes, filename: str) -> str:
if len(text_content) > MAX_TEXT_CONTENT_LENGTH:
logger.warning(
"File '%s' content truncated from %d to %d characters",
filename, len(text_content), MAX_TEXT_CONTENT_LENGTH
_safe_filename_for_log(filename), len(text_content), MAX_TEXT_CONTENT_LENGTH
)
text_content = text_content[:MAX_TEXT_CONTENT_LENGTH] + "\n... [truncated]"

Expand Down Expand Up @@ -355,7 +363,11 @@ def process_uploaded_file(content: bytes, filename: str) -> dict:
Raises:
FileProcessingError: If the file type is not supported or processing fails.
"""
logger.info("Processing uploaded file: %s (%d bytes)", filename, len(content))
logger.info(
"Processing uploaded file (size_bytes=%d, extension=%s)",
len(content),
get_file_extension(filename) or "[none]",
)

if not is_supported_file(filename):
raise FileProcessingError(
Expand Down
15 changes: 15 additions & 0 deletions chatbot-core/tests/unit/services/test_file_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,21 @@ def test_rejects_disguised_file(self):
process_uploaded_file(png_content, "fake.jpg")
assert "content does not match" in str(exc_info.value)

def test_process_uploaded_file_logs_without_raw_filename(self, mocker):
"""Test that info logs do not emit the raw uploaded filename."""
filename = "ghp_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.txt"
content = b"hello world"
info_spy = mocker.patch("api.services.file_service.logger.info")

process_uploaded_file(content, filename)

info_spy.assert_any_call(
"Processing uploaded file (size_bytes=%d, extension=%s)",
len(content),
".txt",
)
assert filename not in str(info_spy.call_args_list)


class TestFormatFileContext:
"""Tests for format_file_context function."""
Expand Down
Loading