Skip to content

Commit 5d68306

Browse files
committed
split monolithic savedcmd_parser.py file into separate files
Signed-off-by: Luis Augenstein <luis.augenstein@tngtech.com>
1 parent 414391c commit 5d68306

5 files changed

Lines changed: 315 additions & 281 deletions

File tree

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# SPDX-License-Identifier: GPL-2.0-only OR MIT
2+
# Copyright (C) 2025 TNG Technology Consulting GmbH
3+
4+
from sbom.cmd_graph.savedcmd_parser.savedcmd_parser import parse_inputs_from_commands
5+
6+
__all__ = ["parse_inputs_from_commands"]
Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
# SPDX-License-Identifier: GPL-2.0-only OR MIT
2+
# Copyright (C) 2025 TNG Technology Consulting GmbH
3+
4+
import re
5+
from dataclasses import dataclass
6+
7+
8+
# If Block pattern to match a simple, single-level if-then-fi block. Nested If blocks are not supported.
9+
IF_BLOCK_PATTERN = re.compile(
10+
r"""
11+
^if(.*?);\s* # Match 'if <condition>;' (non-greedy)
12+
then(.*?);\s* # Match 'then <body>;' (non-greedy)
13+
fi\b # Match 'fi'
14+
""",
15+
re.VERBOSE,
16+
)
17+
18+
19+
@dataclass
20+
class IfBlock:
21+
condition: str
22+
then_statement: str
23+
24+
25+
def _unwrap_outer_parentheses(s: str) -> str:
26+
s = s.strip()
27+
if not (s.startswith("(") and s.endswith(")")):
28+
return s
29+
30+
count = 0
31+
for i, char in enumerate(s):
32+
if char == "(":
33+
count += 1
34+
elif char == ")":
35+
count -= 1
36+
# If count is 0 before the end, outer parentheses don't match
37+
if count == 0 and i != len(s) - 1:
38+
return s
39+
40+
# outer parentheses do match, unwrap once
41+
return _unwrap_outer_parentheses(s[1:-1])
42+
43+
44+
def _find_first_top_level_command_separator(
45+
commands: str, separators: list[str] = [";", "&&"]
46+
) -> tuple[int | None, int | None]:
47+
in_single_quote = False
48+
in_double_quote = False
49+
in_curly_braces = 0
50+
in_braces = 0
51+
for i, char in enumerate(commands):
52+
if char == "'" and not in_double_quote:
53+
# Toggle single quote state (unless inside double quotes)
54+
in_single_quote = not in_single_quote
55+
elif char == '"' and not in_single_quote:
56+
# Toggle double quote state (unless inside single quotes)
57+
in_double_quote = not in_double_quote
58+
59+
if in_single_quote or in_double_quote:
60+
continue
61+
62+
# Toggle braces state
63+
if char == "{":
64+
in_curly_braces += 1
65+
if char == "}":
66+
in_curly_braces -= 1
67+
68+
if char == "(":
69+
in_braces += 1
70+
if char == ")":
71+
in_braces -= 1
72+
73+
if in_curly_braces > 0 or in_braces > 0:
74+
continue
75+
76+
# return found separator position and separator length
77+
for separator in separators:
78+
if commands[i : i + len(separator)] == separator:
79+
return i, len(separator)
80+
81+
return None, None
82+
83+
84+
def split_commands(commands: str) -> list[str | IfBlock]:
85+
"""
86+
Splits a string of command-line commands into individual parts.
87+
88+
This function handles:
89+
- Top-level command separators (e.g., `;` and `&&`) to split multiple commands.
90+
- Conditional if-blocks, returning them as `IfBlock` instances.
91+
- Preserves the order of commands and trims whitespace.
92+
93+
Args:
94+
commands (str): The raw command string.
95+
96+
Returns:
97+
list[str | IfBlock]: A list of single commands or `IfBlock` objects.
98+
"""
99+
single_commands: list[str | IfBlock] = []
100+
remaining_commands = _unwrap_outer_parentheses(commands)
101+
while len(remaining_commands) > 0:
102+
remaining_commands = remaining_commands.strip()
103+
104+
# if block
105+
matched_if = IF_BLOCK_PATTERN.match(remaining_commands)
106+
if matched_if:
107+
condition, then_statement = matched_if.groups()
108+
single_commands.append(IfBlock(condition.strip(), then_statement.strip()))
109+
full_matched = matched_if.group(0)
110+
remaining_commands = remaining_commands.removeprefix(full_matched).lstrip("; \n")
111+
continue
112+
113+
# command until next separator
114+
separator_position, separator_length = _find_first_top_level_command_separator(remaining_commands)
115+
if separator_position is not None and separator_length is not None:
116+
single_commands.append(remaining_commands[:separator_position].strip())
117+
remaining_commands = remaining_commands[separator_position + separator_length :].strip()
118+
continue
119+
120+
# single last command
121+
single_commands.append(remaining_commands)
122+
break
123+
124+
return single_commands
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
# SPDX-License-Identifier: GPL-2.0-only OR MIT
2+
# Copyright (C) 2025 TNG Technology Consulting GmbH
3+
4+
from typing import Any
5+
import sbom.sbom_logging as sbom_logging
6+
from sbom.cmd_graph.savedcmd_parser.command_splitter import IfBlock, split_commands
7+
from sbom.cmd_graph.savedcmd_parser.single_command_parsers import SINGLE_COMMAND_PARSERS
8+
from sbom.cmd_graph.savedcmd_parser.tokenizer import CmdParsingError
9+
from sbom.path_utils import PathStr
10+
11+
12+
def parse_inputs_from_commands(commands: str, fail_on_unknown_build_command: bool) -> list[PathStr]:
13+
"""
14+
Extract input files referenced in a set of command-line commands.
15+
16+
Args:
17+
commands (str): Command line expression to parse.
18+
fail_on_unknown_build_command (bool): Whether to fail if an unknown build command is encountered. If False, errors are logged as warnings.
19+
20+
Returns:
21+
list[PathStr]: List of input file paths required by the commands.
22+
"""
23+
24+
def log_error_or_warning(message: str, /, **kwargs: Any) -> None:
25+
if fail_on_unknown_build_command:
26+
sbom_logging.error(message, **kwargs)
27+
else:
28+
sbom_logging.warning(message, **kwargs)
29+
30+
input_files: list[PathStr] = []
31+
for single_command in split_commands(commands):
32+
if isinstance(single_command, IfBlock):
33+
inputs = parse_inputs_from_commands(single_command.then_statement, fail_on_unknown_build_command)
34+
if inputs:
35+
log_error_or_warning(
36+
"Skipped parsing command {then_statement} because input files in IfBlock 'then' statement are not supported",
37+
then_statement=single_command.then_statement,
38+
)
39+
continue
40+
41+
matched_parser = next(
42+
(parser for pattern, parser in SINGLE_COMMAND_PARSERS if pattern.match(single_command)), None
43+
)
44+
if matched_parser is None:
45+
log_error_or_warning(
46+
"Skipped parsing command {single_command} because no matching parser was found",
47+
single_command=single_command,
48+
)
49+
continue
50+
try:
51+
inputs = matched_parser(single_command)
52+
input_files.extend(inputs)
53+
except CmdParsingError as e:
54+
log_error_or_warning(
55+
"Skipped parsing command {single_command} because of command parsing error: {error_message}",
56+
single_command=single_command,
57+
error_message=e.message,
58+
)
59+
60+
return [input.strip().rstrip("/") for input in input_files]

0 commit comments

Comments
 (0)