Skip to content

Commit 8e00658

Browse files
🎨 Make record logging safer
Potential fix for pull request finding 'CodeQL / Log Injection' Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com>
1 parent 8a843f6 commit 8e00658

5 files changed

Lines changed: 72 additions & 11 deletions

File tree

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## 1.10.6
99

10+
### Added
11+
12+
- Protection against log injection attacks.
13+
1014
### Fixed
1115

1216
- Column misalignment in Ripple-to-REDCap.

python_jobs/src/hbnmigration/from_redcap/to_curious.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
initialize_logging,
2626
new_curious_account,
2727
redcap_api_push,
28+
safe_record_for_log,
2829
)
2930
from .config import Fields, Values
3031
from .from_redcap import fetch_data, RedcapRecord
@@ -506,20 +507,21 @@ async def redcap_to_curious_webhook(
506507
3. Set URL to: ``https://your-domain.com/webhook/redcap-to-curious``
507508
508509
"""
510+
safe_record = safe_record_for_log(record)
509511
logger.info(
510512
"Received REDCap trigger for record %s (instrument: %s)",
511-
record,
512-
instrument,
513+
safe_record,
514+
safe_record_for_log(instrument),
513515
)
514516
if ready_to_send_to_curious != "1":
515-
logger.debug("Ready flag not set for record %s, ignoring trigger", record)
517+
logger.debug("Ready flag not set for record %s, ignoring trigger", safe_record)
516518
return {
517519
"status": "ignored",
518520
"message": "Ready flag not set to '1', ignoring trigger",
519521
"record_id": record,
520522
}
521523
background_tasks.add_task(process_record_for_curious, record)
522-
logger.info("Queued record %s for Curious push", record)
524+
logger.info("Queued record %s for Curious push", safe_record)
523525
return {
524526
"status": "accepted",
525527
"message": f"Trigger accepted for record {record}",

python_jobs/src/hbnmigration/from_redcap/to_redcap.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020

2121
from .._config_variables import redcap_variables
2222
from ..exceptions import NoData
23-
from ..utility_functions import initialize_logging, redcap_api_push
23+
from ..utility_functions import initialize_logging, redcap_api_push, safe_record_for_log
2424
from .config import Fields, Values
2525
from .from_redcap import fetch_data, RedcapRecord
2626

@@ -443,23 +443,25 @@ async def redcap_to_intake_webhook(
443443
3. Set URL to: ``https://your-domain.com/webhook/redcap-to-intake``
444444
445445
"""
446+
safe_record = safe_record_for_log(record)
447+
446448
logger.info(
447449
"Received REDCap trigger for record %s (instrument: %s)",
448-
record,
449-
instrument,
450+
safe_record,
451+
safe_record_for_log(instrument),
450452
)
451453
if intake_ready != "1":
452-
logger.debug("Ready flag not set for record %s, ignoring trigger", record)
454+
logger.debug("Ready flag not set for record %s, ignoring trigger", safe_record)
453455
return {
454456
"status": "ignored",
455457
"message": "Ready flag not set to '1', ignoring trigger",
456458
"record_id": record,
457459
}
458460
background_tasks.add_task(process_record_for_redcap_operations, record)
459-
logger.info("Queued record %s for Intake REDCap push", record)
461+
logger.info("Queued record %s for Intake REDCap push", safe_record)
460462
return {
461463
"status": "accepted",
462-
"message": f"Trigger accepted for record {record}",
464+
"message": f"Trigger accepted for record {safe_record}",
463465
"record_id": record,
464466
}
465467

python_jobs/src/hbnmigration/utility_functions/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@
6363
ValueClass,
6464
ValueField,
6565
)
66-
from .logging import initialize_logging, setup_tsv_logger
66+
from .logging import initialize_logging, safe_record_for_log, setup_tsv_logger
6767
from .secrets import ImportWithFallback
6868
from .typescript import tsx
6969

@@ -125,6 +125,7 @@
125125
"print_module_variables",
126126
"read_vars_file_as_module",
127127
"redcap_api_push",
128+
"safe_record_for_log",
128129
"setup_tsv_logger",
129130
"today",
130131
"tsx",

python_jobs/src/hbnmigration/utility_functions/logging.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,22 @@
99
import os
1010
from pathlib import Path
1111
import platform
12+
import re
1213
import sys
1314
from typing import Optional, Set
15+
import unicodedata
16+
17+
_ANSI_RE = re.compile(
18+
r"\x1b\[[0-9;]*+[a-zA-Z]" # CSI sequences: ESC [ <params> <letter>
19+
r"|\x1b\](?>[^\x07]*)\x07" # OSC sequences: ESC ] <payload> BEL
20+
)
21+
"""Possessive quantifier (*+) and atomic group ((?>...)) prevent catastrophic
22+
backtracking (ReDoS)."""
23+
24+
_UNSAFE_CHARS = frozenset(("\u2028", "\u2029"))
25+
"""Line separator ↵ and paragraph separator ¶."""
26+
_UNSAFE_CATEGORIES = frozenset(("Cc", "Cf"))
27+
"""Control characters and format characters."""
1428

1529
_initialized = False
1630
"""Is logging initialized?"""
@@ -126,6 +140,44 @@ def initialize_logging(
126140
return logging.getLogger(name)
127141

128142

143+
def safe_record_for_log(record: str) -> str:
144+
r"""
145+
Sanitize a string for safe inclusion in log output.
146+
147+
Mitigates CRLF log injection attacks by removing characters that could
148+
be used to forge log entries, evade log analysis, or exploit log viewers.
149+
150+
Strips the following:
151+
- CR/LF (\r, \n) — prevents basic log line injection
152+
- Other ASCII control characters (\x00-\x1f, \x7f) — removes tabs,
153+
backspaces, escape sequences, null bytes, etc.
154+
- Unicode line separators (U+2028, U+2029) — prevents injection via
155+
lesser-known Unicode newline characters
156+
- ANSI escape sequences (ESC[...m, etc.) — prevents terminal escape
157+
attacks in log viewers that interpret color/cursor codes
158+
- Unicode control category characters (Cc, Cf) — catches remaining
159+
control and formatting characters across all of Unicode
160+
161+
Parameters
162+
----------
163+
record
164+
The untrusted string to sanitize before logging.
165+
166+
Returns
167+
-------
168+
str
169+
A sanitized copy of the string with dangerous characters removed.
170+
171+
"""
172+
result = _ANSI_RE.sub("", record)
173+
return "".join(
174+
ch
175+
for ch in result
176+
if ch not in _UNSAFE_CHARS
177+
and unicodedata.category(ch) not in _UNSAFE_CATEGORIES
178+
)
179+
180+
129181
def setup_tsv_logger(name="tsv_logger", filename="error_log.tsv", level=logging.ERROR):
130182
"""Configure a logger with TSV output."""
131183
logger = logging.getLogger(name)

0 commit comments

Comments
 (0)