From 541bb183c5ad5f8616a1578539ddfae4ee98ea74 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Mon, 20 Jan 2025 16:31:24 +0000 Subject: [PATCH 1/8] Replace -R tree with cbi-tree utility This initial commit just moves the existing functionality. Other changes to reflect the new standalone nature of the utility (e.g., improved formatting, additional options) will follow in subsequent commits. Signed-off-by: John Pennycook --- codebasin/__main__.py | 6 +- codebasin/tree.py | 175 ++++++++++++++++++++++++++++++++++++++++++ pyproject.toml | 1 + 3 files changed, 177 insertions(+), 5 deletions(-) create mode 100755 codebasin/tree.py diff --git a/codebasin/__main__.py b/codebasin/__main__.py index 8f8e324..46f78d2 100755 --- a/codebasin/__main__.py +++ b/codebasin/__main__.py @@ -96,7 +96,7 @@ def _main(): metavar="", action="append", default=[], - choices=["all", "summary", "clustering", "duplicates", "files"], + choices=["all", "summary", "clustering", "duplicates"], help=_help_string( "Generate a report of the specified type:", "- summary: code divergence information", @@ -246,10 +246,6 @@ def report_enabled(name): if report_enabled("summary"): report.summary(setmap) - # Print files report - if report_enabled("files"): - report.files(codebase, state) - # Print clustering report if report_enabled("clustering"): basename = os.path.basename(args.analysis_file) diff --git a/codebasin/tree.py b/codebasin/tree.py new file mode 100755 index 0000000..5ff13bd --- /dev/null +++ b/codebasin/tree.py @@ -0,0 +1,175 @@ +#!/usr/bin/env python3 +# Copyright (C) 2019-2024 Intel Corporation +# SPDX-License-Identifier: BSD-3-Clause + +import argparse +import logging +import os +import sys + +from codebasin import CodeBase, config, finder, report, util + +# TODO: Refactor to avoid imports from __main__ +from codebasin.__main__ import Formatter, _help_string, version + +log = logging.getLogger("codebasin") + + +def _build_parser() -> argparse.ArgumentParser: + """ + Build argument parser. + """ + parser = argparse.ArgumentParser( + description="CBI Tree Tool " + version, + formatter_class=argparse.RawTextHelpFormatter, + add_help=False, + ) + parser.add_argument( + "-h", + "--help", + action="help", + help=_help_string("Display help message and exit."), + ) + parser.add_argument( + "--version", + action="version", + version=f"CBI Coverage Tool {version}", + help=_help_string("Display version information and exit."), + ) + parser.add_argument( + "-x", + "--exclude", + dest="excludes", + metavar="", + action="append", + default=[], + help=_help_string( + "Exclude files matching this pattern from the code base.", + "May be specified multiple times.", + is_long=True, + ), + ) + parser.add_argument( + "-p", + "--platform", + dest="platforms", + metavar="", + action="append", + default=[], + help=_help_string( + "Include the specified platform in the analysis.", + "May be specified multiple times.", + "If not specified, all platforms will be included.", + is_long=True, + is_last=True, + ), + ) + + parser.add_argument( + "analysis_file", + metavar="", + help=_help_string( + "TOML file describing the analysis to be performed, " + + "including the codebase and platform descriptions.", + is_last=True, + ), + ) + + return parser + + +def _tree(args: argparse.Namespace): + # TODO: Refactor this to avoid duplication in __main__ + # Determine the root directory based on where codebasin is run. + rootdir = os.path.abspath(os.getcwd()) + + # Set up a default configuration object. + configuration = {} + + # Load the analysis file if it exists. + if args.analysis_file is not None: + path = os.path.abspath(args.analysis_file) + if os.path.exists(path): + if not os.path.splitext(path)[1] == ".toml": + raise RuntimeError(f"Analysis file {path} must end in .toml.") + + with open(path, "rb") as f: + analysis_toml = util._load_toml(f, "analysis") + + if "codebase" in analysis_toml: + if "exclude" in analysis_toml["codebase"]: + args.excludes += analysis_toml["codebase"]["exclude"] + + for name in args.platforms: + if name not in analysis_toml["platform"].keys(): + raise KeyError( + f"Platform {name} requested on the command line " + + "does not exist in the configuration file.", + ) + + cmd_platforms = args.platforms.copy() + for name in analysis_toml["platform"].keys(): + if cmd_platforms and name not in cmd_platforms: + continue + if "commands" not in analysis_toml["platform"][name]: + raise ValueError(f"Missing 'commands' for platform {name}") + p = analysis_toml["platform"][name]["commands"] + db = config.load_database(p, rootdir) + args.platforms.append(name) + configuration.update({name: db}) + + # Construct a codebase object associated with the root directory. + codebase = CodeBase(rootdir, exclude_patterns=args.excludes) + + # Parse the source tree, and determine source line associations. + # The trees and associations are housed in state. + state = finder.find( + rootdir, + codebase, + configuration, + show_progress=True, + ) + + # Print the file tree. + report.files(codebase, state) + sys.exit(0) + + +def cli(argv: list[str]) -> int: + parser = _build_parser() + args = parser.parse_args(argv) + + # Configure logging such that: + # - All messages are written to a log file + # - Only errors are written to the terminal + log.setLevel(logging.DEBUG) + + file_handler = logging.FileHandler("cbi.log", mode="w") + file_handler.setLevel(logging.INFO) + file_handler.setFormatter(Formatter()) + log.addHandler(file_handler) + + # Inform the user that a log file has been created. + # 'print' instead of 'log' to ensure the message is visible in the output. + log_path = os.path.abspath("cbi.log") + print(f"Log file created at {log_path}") + + stderr_handler = logging.StreamHandler(sys.stderr) + stderr_handler.setLevel(logging.ERROR) + stderr_handler.setFormatter(Formatter(colors=sys.stderr.isatty())) + log.addHandler(stderr_handler) + + return _tree(args) + + +def main(): + try: + cli(sys.argv[1:]) + except Exception as e: + log.error(str(e)) + sys.exit(1) + + +if __name__ == "__main__": + sys.argv[0] = "codebasin.tree" + main() diff --git a/pyproject.toml b/pyproject.toml index dc113f3..59b8bc7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,6 +38,7 @@ dependencies = [ [project.scripts] codebasin = "codebasin:__main__.main" cbi-cov = "codebasin.coverage:__main__.main" +cbi-tree = "codebasin:tree.main" [project.urls] "Github" = "https://www.github.com/intel/code-base-investigator" From 7011433e38c233296f816364f56c5baa1eebd32a Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Mon, 20 Jan 2025 16:36:22 +0000 Subject: [PATCH 2/8] Remove cbi.log from cbi-tree The currently intended workflow for cbi-tree is to run codebasin first, and ensure that it accurately analyzes the code base. As such, we do not need to display the full set of warnings from cbi-tree. Over time, we can improve this workflow by making the relationship explicit (e.g., by having codebasin produce a results directory that cbi-tree reads). Signed-off-by: John Pennycook --- codebasin/tree.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/codebasin/tree.py b/codebasin/tree.py index 5ff13bd..ccccbe9 100755 --- a/codebasin/tree.py +++ b/codebasin/tree.py @@ -140,20 +140,9 @@ def cli(argv: list[str]) -> int: args = parser.parse_args(argv) # Configure logging such that: - # - All messages are written to a log file # - Only errors are written to the terminal log.setLevel(logging.DEBUG) - file_handler = logging.FileHandler("cbi.log", mode="w") - file_handler.setLevel(logging.INFO) - file_handler.setFormatter(Formatter()) - log.addHandler(file_handler) - - # Inform the user that a log file has been created. - # 'print' instead of 'log' to ensure the message is visible in the output. - log_path = os.path.abspath("cbi.log") - print(f"Log file created at {log_path}") - stderr_handler = logging.StreamHandler(sys.stderr) stderr_handler.setLevel(logging.ERROR) stderr_handler.setFormatter(Formatter(colors=sys.stderr.isatty())) From 188da964774ffb428cd984f1fb741f2398364df1 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Thu, 27 Feb 2025 11:28:20 +0000 Subject: [PATCH 3/8] Remove header from files report Only printed by cbi-tree. Signed-off-by: John Pennycook --- codebasin/report.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/codebasin/report.py b/codebasin/report.py index 0270a4f..dbd9667 100644 --- a/codebasin/report.py +++ b/codebasin/report.py @@ -789,9 +789,6 @@ def files( setmap[platform] += node.num_lines tree.insert(f, setmap) - print("", file=stream) - print(_heading("Files", stream), file=stream) - # Print a legend. legend = [] legend += ["Legend:"] From 9c3aa33cb02ddd59358cee42374c8e6677e16726 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Thu, 27 Feb 2025 11:59:22 +0000 Subject: [PATCH 4/8] Add --prune option to cbi-tree Excludes unused files from the output and computed metrics. Signed-off-by: John Pennycook --- codebasin/report.py | 7 +++++++ codebasin/tree.py | 11 ++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/codebasin/report.py b/codebasin/report.py index dbd9667..9217659 100644 --- a/codebasin/report.py +++ b/codebasin/report.py @@ -748,7 +748,9 @@ def write_to(self, stream: TextIO): def files( codebase: CodeBase, state: ParserState | None = None, + *, stream: TextIO = sys.stdout, + prune: bool = False, ): """ Produce a file tree representing the code base. @@ -787,6 +789,11 @@ def files( ): platform = frozenset(association[node]) setmap[platform] += node.num_lines + if prune: + # Prune unused files from the tree. + platforms = set().union(*setmap.keys()) + if len(platforms) == 0: + continue tree.insert(f, setmap) # Print a legend. diff --git a/codebasin/tree.py b/codebasin/tree.py index ccccbe9..8c12e1c 100755 --- a/codebasin/tree.py +++ b/codebasin/tree.py @@ -61,6 +61,15 @@ def _build_parser() -> argparse.ArgumentParser: "May be specified multiple times.", "If not specified, all platforms will be included.", is_long=True, + ), + ) + parser.add_argument( + "--prune", + dest="prune", + action="store_true", + help=_help_string( + "Prune unused files from the tree.", + is_long=True, is_last=True, ), ) @@ -131,7 +140,7 @@ def _tree(args: argparse.Namespace): ) # Print the file tree. - report.files(codebase, state) + report.files(codebase, state, prune=args.prune) sys.exit(0) From e8caa7332664d2be9a047e87d0e064374ed15fe7 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Thu, 27 Feb 2025 12:10:48 +0000 Subject: [PATCH 5/8] Add --levels/-L option Signed-off-by: John Pennycook --- codebasin/report.py | 22 +++++++++++++++++++--- codebasin/tree.py | 17 ++++++++++++++++- 2 files changed, 35 insertions(+), 4 deletions(-) diff --git a/codebasin/report.py b/codebasin/report.py index 9217659..fb7638f 100644 --- a/codebasin/report.py +++ b/codebasin/report.py @@ -649,9 +649,11 @@ def insert( def _print( self, node: Node, + depth: int = 0, prefix: str = "", connector: str = "", fancy: bool = True, + levels: int = None, ): """ Recursive helper function to print all nodes in a FileTree. @@ -669,7 +671,14 @@ def _print( fancy: bool, default: True Whether to use fancy formatting (including colors). + + levels: int, optional + The maximum number of levels to print. """ + # Skip this node and its children if we have hit the maximum depth. + if levels and depth > levels: + return [] + if fancy: dash = "\u2500" cont = "\u251C" @@ -722,14 +731,16 @@ def _print( next_connector = cont lines += self._print( node.children[name], + depth + 1, next_prefix, next_connector, fancy, + levels, ) return lines - def write_to(self, stream: TextIO): + def write_to(self, stream: TextIO, levels: int = None): """ Write the FileTree to the specified stream. @@ -737,8 +748,12 @@ def write_to(self, stream: TextIO): ---------- stream: TextIO The text stream to write to. + + levels: int, optional + The maximum number of levels to print. + If 0, print only the top-level summary. """ - lines = self._print(self.root, fancy=stream.isatty()) + lines = self._print(self.root, fancy=stream.isatty(), levels=levels) output = "\n".join(lines) if not stream.isatty(): output = _strip_colors(output) @@ -751,6 +766,7 @@ def files( *, stream: TextIO = sys.stdout, prune: bool = False, + levels: int = None, ): """ Produce a file tree representing the code base. @@ -818,4 +834,4 @@ def files( print(legend, file=stream) # Print the tree. - tree.write_to(stream) + tree.write_to(stream, levels=levels) diff --git a/codebasin/tree.py b/codebasin/tree.py index 8c12e1c..6ffbfd6 100755 --- a/codebasin/tree.py +++ b/codebasin/tree.py @@ -70,6 +70,17 @@ def _build_parser() -> argparse.ArgumentParser: help=_help_string( "Prune unused files from the tree.", is_long=True, + ), + ) + parser.add_argument( + "-L", + "--levels", + dest="levels", + metavar="", + type=int, + help=_help_string( + "Print only the specified number of levels.", + is_long=True, is_last=True, ), ) @@ -88,6 +99,10 @@ def _build_parser() -> argparse.ArgumentParser: def _tree(args: argparse.Namespace): + # Refuse to print a tree with no levels, consistent with tree utility. + if args.levels is not None and args.levels <= 0: + raise ValueError("Number of levels must be greater than 0.") + # TODO: Refactor this to avoid duplication in __main__ # Determine the root directory based on where codebasin is run. rootdir = os.path.abspath(os.getcwd()) @@ -140,7 +155,7 @@ def _tree(args: argparse.Namespace): ) # Print the file tree. - report.files(codebase, state, prune=args.prune) + report.files(codebase, state, prune=args.prune, levels=args.levels) sys.exit(0) From ee3c3e9dde8ac0f478270556bdbdc752fe430bcf Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Mon, 3 Mar 2025 14:10:30 +0000 Subject: [PATCH 6/8] Add tests for --levels/-L option Signed-off-by: John Pennycook --- tests/files/test_filetree.py | 67 +++++++++++++++++++++++++++++++++++- 1 file changed, 66 insertions(+), 1 deletion(-) diff --git a/tests/files/test_filetree.py b/tests/files/test_filetree.py index 22ed3c4..8e643af 100644 --- a/tests/files/test_filetree.py +++ b/tests/files/test_filetree.py @@ -32,7 +32,6 @@ def setUpClass(self): self.path = Path(self.tmp.name) open(self.path / "file.cpp", mode="w").close() open(self.path / "other.cpp", mode="w").close() - open(self.path / "unused.cpp", mode="w").close() os.symlink(self.path / "file.cpp", self.path / "symlink.cpp") @classmethod @@ -153,6 +152,72 @@ def test_report(self): self.assertTrue("[-B | 1 | 100.00 | 50.00]" in output) self.assertTrue("[A- | 1 | 100.00 | 50.00]" in output) + def test_levels(self): + """Check report --levels flag works correctly""" + # Set up subdirectories for this test + tmp = tempfile.TemporaryDirectory() + path = Path(tmp.name) + os.makedirs(path / "first" / "second" / "third") + open(path / "first" / "one.cpp", mode="w").close() + open(path / "first" / "second" / "two.cpp", mode="w").close() + + codebase = CodeBase(path) + configuration = { + "X": [ + { + "file": str(path / "first" / "one.cpp"), + "defines": [], + "include_paths": [], + "include_files": [], + }, + ], + "Y": [ + { + "file": str(path / "first" / "second" / "two.cpp"), + "defines": [], + "include_paths": [], + "include_files": [], + }, + ], + } + state = finder.find( + path, + codebase, + configuration, + show_progress=False, + ) + + # By default, we should see all levels of the tree. + stream = io.StringIO() + report.files(codebase, state, stream=stream) + output = stream.getvalue() + self.assertTrue(str(path) in output) + self.assertTrue("first/" in output) + self.assertTrue("one.cpp" in output) + self.assertTrue("two.cpp" in output) + + # With two levels, the "second" directory should be collapsed. + # This will hide "two.cpp" from the output. + stream = io.StringIO() + report.files(codebase, state, stream=stream, levels=2) + output = stream.getvalue() + self.assertTrue(str(path) in output) + self.assertTrue("first/" in output) + self.assertTrue("one.cpp" in output) + self.assertFalse("two.cpp" in output) + + # With just one level, the "first" directory should be collapsed. + # This will hide "one.cpp" and "two.cpp" from the output. + stream = io.StringIO() + report.files(codebase, state, stream=stream, levels=1) + output = stream.getvalue() + self.assertTrue(str(path) in output) + self.assertTrue("first/" in output) + self.assertFalse("one.cpp" in output) + self.assertFalse("two.cpp" in output) + + tmp.cleanup() + if __name__ == "__main__": unittest.main() From d316835f342c4f8e6d7688ddfc20fe0be58d61c0 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Mon, 3 Mar 2025 14:15:32 +0000 Subject: [PATCH 7/8] Add tests for --prune option Signed-off-by: John Pennycook --- tests/files/test_filetree.py | 43 ++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/tests/files/test_filetree.py b/tests/files/test_filetree.py index 8e643af..2583a24 100644 --- a/tests/files/test_filetree.py +++ b/tests/files/test_filetree.py @@ -218,6 +218,49 @@ def test_levels(self): tmp.cleanup() + def test_prune(self): + """Check report --prune flag works correctly""" + # Set up subdirectories for this test + tmp = tempfile.TemporaryDirectory() + path = Path(tmp.name) + with open(path / "foo.cpp", mode="w") as f: + f.write("void foo();") + open(path / "unused.cpp", mode="w").close() + + codebase = CodeBase(path) + configuration = { + "X": [ + { + "file": str(path / "foo.cpp"), + "defines": [], + "include_paths": [], + "include_files": [], + }, + ], + } + state = finder.find( + path, + codebase, + configuration, + show_progress=False, + ) + + # By default, we should see both used and unused files. + stream = io.StringIO() + report.files(codebase, state, stream=stream) + output = stream.getvalue() + self.assertTrue("foo.cpp" in output) + self.assertTrue("unused.cpp" in output) + + # With prune, we should only see the used files. + stream = io.StringIO() + report.files(codebase, state, stream=stream, prune=True) + output = stream.getvalue() + self.assertTrue("foo.cpp" in output) + self.assertFalse("unused.cpp" in output) + + tmp.cleanup() + if __name__ == "__main__": unittest.main() From 2f4b32d295b23fbcfb02aa2a5c74e4915ae030dd Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Thu, 27 Mar 2025 08:55:35 +0000 Subject: [PATCH 8/8] Remove is_long from cbi-tree --prune option Although --prune is a "long option", that's not what the "long" in "is_long" refers to. Because --prune doesn't have a shorthand and doesn't accept an argument, the description of the option is not long enough to generate an initial newline. Signed-off-by: John Pennycook --- codebasin/tree.py | 1 - 1 file changed, 1 deletion(-) diff --git a/codebasin/tree.py b/codebasin/tree.py index 6ffbfd6..c6a2408 100755 --- a/codebasin/tree.py +++ b/codebasin/tree.py @@ -69,7 +69,6 @@ def _build_parser() -> argparse.ArgumentParser: action="store_true", help=_help_string( "Prune unused files from the tree.", - is_long=True, ), ) parser.add_argument(