Skip to content

Commit cd5c47b

Browse files
🎨 Make record logging safer
Potential fix for pull request finding 'CodeQL / Log Injection' Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com>
1 parent 8a843f6 commit cd5c47b

4 files changed

Lines changed: 68 additions & 9 deletions

File tree

python_jobs/src/hbnmigration/from_redcap/to_curious.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
initialize_logging,
2626
new_curious_account,
2727
redcap_api_push,
28+
safe_record_for_log,
2829
)
2930
from .config import Fields, Values
3031
from .from_redcap import fetch_data, RedcapRecord
@@ -506,20 +507,21 @@ async def redcap_to_curious_webhook(
506507
3. Set URL to: ``https://your-domain.com/webhook/redcap-to-curious``
507508
508509
"""
510+
safe_record = safe_record_for_log(record)
509511
logger.info(
510512
"Received REDCap trigger for record %s (instrument: %s)",
511-
record,
513+
safe_record,
512514
instrument,
513515
)
514516
if ready_to_send_to_curious != "1":
515-
logger.debug("Ready flag not set for record %s, ignoring trigger", record)
517+
logger.debug("Ready flag not set for record %s, ignoring trigger", safe_record)
516518
return {
517519
"status": "ignored",
518520
"message": "Ready flag not set to '1', ignoring trigger",
519521
"record_id": record,
520522
}
521523
background_tasks.add_task(process_record_for_curious, record)
522-
logger.info("Queued record %s for Curious push", record)
524+
logger.info("Queued record %s for Curious push", safe_record)
523525
return {
524526
"status": "accepted",
525527
"message": f"Trigger accepted for record {record}",

python_jobs/src/hbnmigration/from_redcap/to_redcap.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020

2121
from .._config_variables import redcap_variables
2222
from ..exceptions import NoData
23-
from ..utility_functions import initialize_logging, redcap_api_push
23+
from ..utility_functions import initialize_logging, redcap_api_push, safe_record_for_log
2424
from .config import Fields, Values
2525
from .from_redcap import fetch_data, RedcapRecord
2626

@@ -443,23 +443,24 @@ async def redcap_to_intake_webhook(
443443
3. Set URL to: ``https://your-domain.com/webhook/redcap-to-intake``
444444
445445
"""
446+
safe_record = safe_record_for_log(record)
446447
logger.info(
447448
"Received REDCap trigger for record %s (instrument: %s)",
448-
record,
449+
safe_record,
449450
instrument,
450451
)
451452
if intake_ready != "1":
452-
logger.debug("Ready flag not set for record %s, ignoring trigger", record)
453+
logger.debug("Ready flag not set for record %s, ignoring trigger", safe_record)
453454
return {
454455
"status": "ignored",
455456
"message": "Ready flag not set to '1', ignoring trigger",
456457
"record_id": record,
457458
}
458459
background_tasks.add_task(process_record_for_redcap_operations, record)
459-
logger.info("Queued record %s for Intake REDCap push", record)
460+
logger.info("Queued record %s for Intake REDCap push", safe_record)
460461
return {
461462
"status": "accepted",
462-
"message": f"Trigger accepted for record {record}",
463+
"message": f"Trigger accepted for record {safe_record}",
463464
"record_id": record,
464465
}
465466

python_jobs/src/hbnmigration/utility_functions/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@
6363
ValueClass,
6464
ValueField,
6565
)
66-
from .logging import initialize_logging, setup_tsv_logger
66+
from .logging import initialize_logging, safe_record_for_log, setup_tsv_logger
6767
from .secrets import ImportWithFallback
6868
from .typescript import tsx
6969

@@ -125,6 +125,7 @@
125125
"print_module_variables",
126126
"read_vars_file_as_module",
127127
"redcap_api_push",
128+
"safe_record_for_log",
128129
"setup_tsv_logger",
129130
"today",
130131
"tsx",

python_jobs/src/hbnmigration/utility_functions/logging.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,10 @@
99
import os
1010
from pathlib import Path
1111
import platform
12+
import re
1213
import sys
1314
from typing import Optional, Set
15+
import unicodedata
1416

1517
_initialized = False
1618
"""Is logging initialized?"""
@@ -126,6 +128,59 @@ def initialize_logging(
126128
return logging.getLogger(name)
127129

128130

131+
def safe_record_for_log(record: str) -> str:
132+
r"""
133+
Sanitize a string for safe inclusion in log output.
134+
135+
Mitigates CRLF log injection attacks by removing characters that could
136+
be used to forge log entries, evade log analysis, or exploit log viewers.
137+
138+
Strips the following:
139+
- CR/LF (\r, \n) — prevents basic log line injection
140+
- Other ASCII control characters (\x00-\x1f, \x7f) — removes tabs,
141+
backspaces, escape sequences, null bytes, etc.
142+
- Unicode line separators (U+2028, U+2029) — prevents injection via
143+
lesser-known Unicode newline characters
144+
- ANSI escape sequences (ESC[...m, etc.) — prevents terminal escape
145+
attacks in log viewers that interpret color/cursor codes
146+
- Unicode control category characters (Cc, Cf) — catches remaining
147+
control and formatting characters across all of Unicode
148+
149+
Parameters
150+
----------
151+
record
152+
The untrusted string to sanitize before logging.
153+
154+
Returns
155+
-------
156+
result
157+
A sanitized copy of the string with dangerous characters removed.
158+
159+
Example:
160+
>>> safe_record_for_log("admin\nINFO: Forged entry")
161+
'adminINFO: Forged entry'
162+
>>> safe_record_for_log("normal\x1b[31m RED ALERT")
163+
'normal RED ALERT'
164+
>>> safe_record_for_log("benign\u2028Injected line")
165+
'benignInjected line'
166+
167+
"""
168+
# Strip ANSI escape sequences (e.g. ESC[31m, ESC[0J, ESC]0;title\x07)
169+
result = re.sub(r"\x1b\[[0-9;]*[a-zA-Z]", "", record)
170+
result = re.sub(r"\x1b\][^\x07]*\x07", "", result)
171+
172+
# Remove characters in Unicode categories Cc (control) and Cf (format)
173+
# This covers \r, \n, \t, \x00, null bytes, soft hyphens, direction
174+
# overrides, zero-width characters, and more.
175+
result = "".join(
176+
ch for ch in result if unicodedata.category(ch) not in ("Cc", "Cf")
177+
)
178+
179+
# Explicitly strip Unicode line/paragraph separators (category Zl/Zp)
180+
# in case the Unicode standard ever reclassifies them
181+
return result.replace("\u2028", "").replace("\u2029", "")
182+
183+
129184
def setup_tsv_logger(name="tsv_logger", filename="error_log.tsv", level=logging.ERROR):
130185
"""Configure a logger with TSV output."""
131186
logger = logging.getLogger(name)

0 commit comments

Comments
 (0)