Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions src/aiperf/common/models/sequence_distribution.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""
Sequence length distribution models for AIPerf benchmarking.
Expand Down Expand Up @@ -266,11 +266,14 @@ class DistributionParser:
"""Parser for various sequence length distribution string formats."""

# Regex patterns for different formats (allow whitespace and optional stddev)
# Use unambiguous numeric pattern to avoid ReDoS: (?:\d+(?:\.\d+)?|\.\d+)
# This matches: integers (123), decimals (123.45), or leading-dot decimals (.45)
_NUM = r"(?:\d+(?:\.\d+)?|\.\d+)"
SEMICOLON_PATTERN = re.compile(
r"(\d+)(?:\|([0-9]*\.?[0-9]+))?\s*,\s*(\d+)(?:\|([0-9]*\.?[0-9]+))?\s*:\s*([0-9]*\.?[0-9]+)"
rf"(\d+)(?:\|({_NUM}))?\s*,\s*(\d+)(?:\|({_NUM}))?\s*:\s*({_NUM})"
)
BRACKET_PATTERN = re.compile(
r"\(\s*(\d+)(?:\|([0-9]*\.?[0-9]+))?\s*,\s*(\d+)(?:\|([0-9]*\.?[0-9]+))?\s*\)\s*:\s*([0-9]*\.?[0-9]+)"
rf"\(\s*(\d+)(?:\|({_NUM}))?\s*,\s*(\d+)(?:\|({_NUM}))?\s*\)\s*:\s*({_NUM})"
)

@classmethod
Expand Down
6 changes: 4 additions & 2 deletions src/aiperf/gpu_telemetry/metrics_config.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

"""GPU telemetry metrics configuration utilities."""
Expand Down Expand Up @@ -136,7 +136,9 @@ def _infer_unit_from_help(self, help_msg: str) -> MetricUnitT:
GenericMetricUnit.PERCENT
"""
# Extract unit from "(in UNIT)" pattern
match = re.search(r"\(in\s+([^)]+)\)", help_msg, re.IGNORECASE)
# Use [^\s)]+ instead of [^)]+ to avoid ReDoS from overlapping quantifiers
# (whitespace matches both \s+ and [^)]+, causing O(n²) backtracking)
match = re.search(r"\(in\s+([^\s)]+)\)", help_msg, re.IGNORECASE)
if not match:
return GenericMetricUnit.COUNT

Expand Down
5 changes: 3 additions & 2 deletions src/aiperf/server_metrics/units.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Detect units from prometheus metric descriptions and names"""

Expand Down Expand Up @@ -267,7 +267,8 @@ def _parse_unit_from_metric_name(metric_name: str) -> BaseMetricUnit | None:

# Regex to match "(in <unit>)" pattern and capture the unit.
# Examples: "(in MiB)", "(in W)", "(in mJ)", "(in C)"
_PARENTHETICAL_IN_UNIT_PATTERN = re.compile(r"\(in\s+([^)]+)\)")
# Use [^\s)]+ instead of [^)]+ to avoid ReDoS from overlapping quantifiers
_PARENTHETICAL_IN_UNIT_PATTERN = re.compile(r"\(in\s+([^\s)]+)\)")


def _parse_parenthetical_unit(description: str | None) -> BaseMetricUnit | None:
Expand Down
6 changes: 4 additions & 2 deletions tests/ci/test_docs_end_to_end/parser.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""
Markdown parser for extracting server setup and AIPerf run commands.
Expand Down Expand Up @@ -55,7 +55,9 @@ def _parse_file(self, file_path: str):

# Look for HTML comment tags
if line.startswith("<!--") and line.endswith("-->"):
tag_match = re.match(r"<!--\s*([^-\s]+.*?)\s*-->", line)
# Use [^-\s]\S* instead of [^-\s]+.*? to avoid ReDoS
# (both quantifiers can match the same chars, causing O(n²) backtracking)
tag_match = re.match(r"<!--\s*([^-\s]\S*)\s*-->", line)
if tag_match:
tag_name = tag_match.group(1).strip()

Expand Down