Skip to content

Commit b3554a4

Browse files
Merge pull request #194 from hexinw-nvidia/security_scan
fix: security risk in file symlink resolution.
2 parents dfb4365 + fdc3481 commit b3554a4

File tree

5 files changed

+106
-20
lines changed

5 files changed

+106
-20
lines changed

.github/workflows/unit_test.yml

Lines changed: 1 addition & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -12,36 +12,18 @@ jobs:
1212
build_wheels:
1313
runs-on: ubuntu-24.04
1414
container:
15-
image: 'nvidia/cuda:11.8.0-cudnn8-devel-ubuntu20.04'
15+
image: 'ghcr.io/nvidia/nvidia-resiliency-ext/builder_amd64:cuda-12.4.1-cudnn-devel-ubuntu20.04'
1616
steps:
17-
- name: Update GCC
18-
run: |
19-
export DEBIAN_FRONTEND=noninteractive
20-
apt update && apt install -y build-essential gcc-10 g++-10
21-
- name: Install Python versions and pips
22-
run: |
23-
export DEBIAN_FRONTEND=noninteractive
24-
apt update && apt install -y software-properties-common curl
25-
add-apt-repository ppa:deadsnakes/ppa
26-
apt-get install -y python3.10 python3.10-dev
27-
apt-get install -y python3.11 python3.11-dev
28-
apt-get install -y python3.12 python3.12-dev
29-
curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10
30-
curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11
31-
curl -sS https://bootstrap.pypa.io/get-pip.py | python3.12
3217
- name: Checkout code
3318
uses: actions/checkout@v4
3419
- name: Build wheel with Python 3.10
3520
run: |
36-
python3.10 -m pip install -U setuptools poetry build six pybind11
3721
python3.10 -m poetry build -f wheel
3822
- name: Build wheel with Python 3.11
3923
run: |
40-
python3.11 -m pip install -U setuptools poetry build six pybind11
4124
python3.11 -m poetry build -f wheel
4225
- name: Build wheel with Python 3.12
4326
run: |
44-
python3.12 -m pip install -U setuptools poetry build six pybind11
4527
python3.12 -m poetry build -f wheel
4628
- name: Upload the wheel artifact
4729
uses: actions/upload-artifact@v4

src/nvidia_resiliency_ext/shared_utils/log_aggregator.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@
7070

7171
from nvidia_resiliency_ext.shared_utils.log_manager import LogConfig
7272
from nvidia_resiliency_ext.shared_utils.log_node_local_tmp import NodeLogAggregator
73+
from nvidia_resiliency_ext.shared_utils.os_utils import validate_directory
7374

7475

7576
def main():
@@ -104,6 +105,9 @@ def main():
104105
if node_local_tmp_dir is None:
105106
raise RuntimeError("Distributed Log directory must be set for log aggregator service")
106107

108+
# Validate directory security before proceeding
109+
list(map(validate_directory, [log_dir, node_local_tmp_dir]))
110+
107111
print("Starting NVRx Log Aggregator Service")
108112
print(f" Log Path: {os.path.join(log_dir, log_file)}")
109113
print(f" Node Local Temp directory: {node_local_tmp_dir}")

src/nvidia_resiliency_ext/shared_utils/log_manager.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@
9999
DynamicLogFormatter,
100100
NodeLocalTmpLogHandler,
101101
)
102+
from nvidia_resiliency_ext.shared_utils.os_utils import validate_directory
102103

103104

104105
class LogConfig:
@@ -242,6 +243,7 @@ def _setup_logger(self) -> logging.Logger:
242243

243244
if self.node_local_tmp_logging_enabled:
244245
os.makedirs(self._node_local_tmp_dir, exist_ok=True)
246+
validate_directory(self._node_local_tmp_dir)
245247
handler = NodeLocalTmpLogHandler(
246248
self.workload_local_rank,
247249
self._node_local_tmp_dir,

src/nvidia_resiliency_ext/shared_utils/log_node_local_tmp.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525
from datetime import datetime
2626
from typing import Dict, List, Optional
2727

28+
from nvidia_resiliency_ext.shared_utils.os_utils import validate_filepath
29+
2830

2931
class NodeLocalTmpLogHandler(logging.Handler):
3032
"""Custom log handler that logs messages to temporary files on local node storage."""
@@ -103,7 +105,8 @@ def _write_message(self, message: str):
103105
sys.stderr.write(f"File rotation error for {self.fname}: {e}\n")
104106
sys.stderr.flush()
105107

106-
# Append message to the rank's message file
108+
# Validate file path before opening
109+
validate_filepath(self.fname)
107110
with open(self.fname, 'a') as f:
108111
f.write(f"{message}\n")
109112
f.flush() # Ensure message is written immediately
@@ -301,6 +304,7 @@ def _aggregator_loop(self):
301304
"""Main loop for the log aggregator."""
302305
# Setup per-node log file
303306
log_file = os.path.join(self._log_dir, self._log_file)
307+
validate_filepath(log_file)
304308
output = open(log_file, 'a', buffering=1) # Line buffered
305309
try:
306310
self._process_messages(output)
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
2+
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3+
# SPDX-License-Identifier: Apache-2.0
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
"""
18+
OS utilities for safe file operations.
19+
20+
This module provides os utilities to prevent symlink attacks and ensure
21+
safe file operations.
22+
"""
23+
24+
import os
25+
import stat
26+
27+
28+
def validate_directory(dir_path: str) -> None:
29+
"""
30+
Validate that a directory is safe for file operations.
31+
32+
This function performs comprehensive security checks to ensure the directory
33+
is safe from symlink attacks and has appropriate permissions.
34+
35+
Args:
36+
dir_path: Path to the directory to validate
37+
38+
Raises:
39+
OSError: If the directory is unsafe or inaccessible
40+
"""
41+
if not os.path.exists(dir_path):
42+
return
43+
44+
# Check if it's actually a directory
45+
if not os.path.isdir(dir_path):
46+
raise OSError(f"Path is not a directory: {dir_path}")
47+
48+
# Check if it's a symlink
49+
if os.path.islink(dir_path):
50+
raise OSError(f"Directory is a symlink: {dir_path}")
51+
52+
# Get directory stats
53+
try:
54+
dir_stat = os.stat(dir_path)
55+
except OSError as e:
56+
raise OSError(f"Cannot access directory {dir_path}: {e}")
57+
58+
# Check if it's a regular directory (not a special file)
59+
if not stat.S_ISDIR(dir_stat.st_mode):
60+
raise OSError(f"Path is not a regular directory: {dir_path}")
61+
62+
# Check permissions - directory should not be world-writable unless sticky bit is set
63+
mode = dir_stat.st_mode
64+
if stat.S_IWOTH & mode: # World writable
65+
if not (stat.S_ISVTX & mode): # No sticky bit
66+
raise OSError(f"Directory is world-writable without sticky bit: {dir_path}")
67+
68+
69+
def validate_filepath(file_path: str) -> None:
70+
"""
71+
Validate that a file path is safe for file operations.
72+
73+
This function checks that the file (if it exists) is not a symlink and is a regular file.
74+
75+
Args:
76+
file_path: Path to the file to validate
77+
78+
Raises:
79+
OSError: If the file is unsafe or inaccessible
80+
"""
81+
if os.path.exists(file_path):
82+
validate_directory(os.path.dirname(file_path))
83+
# Check if it's a symlink - this must be done every time
84+
if os.path.islink(file_path):
85+
raise OSError(f"File is a symlink: {file_path}")
86+
87+
# Validate it's a regular file
88+
try:
89+
file_stat = os.stat(file_path) # Follow symlinks for stat
90+
if not stat.S_ISREG(file_stat.st_mode):
91+
raise OSError(f"Path is not a regular file: {file_path}")
92+
except OSError as e:
93+
raise OSError(f"Cannot access file {file_path}: {e}")
94+
# If file doesn't exist, that's fine - it will be created

0 commit comments

Comments
 (0)