diff --git a/codebasin/__main__.py b/codebasin/__main__.py index 8f8e324..46f78d2 100755 --- a/codebasin/__main__.py +++ b/codebasin/__main__.py @@ -96,7 +96,7 @@ def _main(): metavar="", action="append", default=[], - choices=["all", "summary", "clustering", "duplicates", "files"], + choices=["all", "summary", "clustering", "duplicates"], help=_help_string( "Generate a report of the specified type:", "- summary: code divergence information", @@ -246,10 +246,6 @@ def report_enabled(name): if report_enabled("summary"): report.summary(setmap) - # Print files report - if report_enabled("files"): - report.files(codebase, state) - # Print clustering report if report_enabled("clustering"): basename = os.path.basename(args.analysis_file) diff --git a/codebasin/report.py b/codebasin/report.py index 0270a4f..fb7638f 100644 --- a/codebasin/report.py +++ b/codebasin/report.py @@ -649,9 +649,11 @@ def insert( def _print( self, node: Node, + depth: int = 0, prefix: str = "", connector: str = "", fancy: bool = True, + levels: int = None, ): """ Recursive helper function to print all nodes in a FileTree. @@ -669,7 +671,14 @@ def _print( fancy: bool, default: True Whether to use fancy formatting (including colors). + + levels: int, optional + The maximum number of levels to print. """ + # Skip this node and its children if we have hit the maximum depth. + if levels and depth > levels: + return [] + if fancy: dash = "\u2500" cont = "\u251C" @@ -722,14 +731,16 @@ def _print( next_connector = cont lines += self._print( node.children[name], + depth + 1, next_prefix, next_connector, fancy, + levels, ) return lines - def write_to(self, stream: TextIO): + def write_to(self, stream: TextIO, levels: int = None): """ Write the FileTree to the specified stream. @@ -737,8 +748,12 @@ def write_to(self, stream: TextIO): ---------- stream: TextIO The text stream to write to. + + levels: int, optional + The maximum number of levels to print. + If 0, print only the top-level summary. """ - lines = self._print(self.root, fancy=stream.isatty()) + lines = self._print(self.root, fancy=stream.isatty(), levels=levels) output = "\n".join(lines) if not stream.isatty(): output = _strip_colors(output) @@ -748,7 +763,10 @@ def write_to(self, stream: TextIO): def files( codebase: CodeBase, state: ParserState | None = None, + *, stream: TextIO = sys.stdout, + prune: bool = False, + levels: int = None, ): """ Produce a file tree representing the code base. @@ -787,11 +805,13 @@ def files( ): platform = frozenset(association[node]) setmap[platform] += node.num_lines + if prune: + # Prune unused files from the tree. + platforms = set().union(*setmap.keys()) + if len(platforms) == 0: + continue tree.insert(f, setmap) - print("", file=stream) - print(_heading("Files", stream), file=stream) - # Print a legend. legend = [] legend += ["Legend:"] @@ -814,4 +834,4 @@ def files( print(legend, file=stream) # Print the tree. - tree.write_to(stream) + tree.write_to(stream, levels=levels) diff --git a/codebasin/tree.py b/codebasin/tree.py new file mode 100755 index 0000000..c6a2408 --- /dev/null +++ b/codebasin/tree.py @@ -0,0 +1,187 @@ +#!/usr/bin/env python3 +# Copyright (C) 2019-2024 Intel Corporation +# SPDX-License-Identifier: BSD-3-Clause + +import argparse +import logging +import os +import sys + +from codebasin import CodeBase, config, finder, report, util + +# TODO: Refactor to avoid imports from __main__ +from codebasin.__main__ import Formatter, _help_string, version + +log = logging.getLogger("codebasin") + + +def _build_parser() -> argparse.ArgumentParser: + """ + Build argument parser. + """ + parser = argparse.ArgumentParser( + description="CBI Tree Tool " + version, + formatter_class=argparse.RawTextHelpFormatter, + add_help=False, + ) + parser.add_argument( + "-h", + "--help", + action="help", + help=_help_string("Display help message and exit."), + ) + parser.add_argument( + "--version", + action="version", + version=f"CBI Coverage Tool {version}", + help=_help_string("Display version information and exit."), + ) + parser.add_argument( + "-x", + "--exclude", + dest="excludes", + metavar="", + action="append", + default=[], + help=_help_string( + "Exclude files matching this pattern from the code base.", + "May be specified multiple times.", + is_long=True, + ), + ) + parser.add_argument( + "-p", + "--platform", + dest="platforms", + metavar="", + action="append", + default=[], + help=_help_string( + "Include the specified platform in the analysis.", + "May be specified multiple times.", + "If not specified, all platforms will be included.", + is_long=True, + ), + ) + parser.add_argument( + "--prune", + dest="prune", + action="store_true", + help=_help_string( + "Prune unused files from the tree.", + ), + ) + parser.add_argument( + "-L", + "--levels", + dest="levels", + metavar="", + type=int, + help=_help_string( + "Print only the specified number of levels.", + is_long=True, + is_last=True, + ), + ) + + parser.add_argument( + "analysis_file", + metavar="", + help=_help_string( + "TOML file describing the analysis to be performed, " + + "including the codebase and platform descriptions.", + is_last=True, + ), + ) + + return parser + + +def _tree(args: argparse.Namespace): + # Refuse to print a tree with no levels, consistent with tree utility. + if args.levels is not None and args.levels <= 0: + raise ValueError("Number of levels must be greater than 0.") + + # TODO: Refactor this to avoid duplication in __main__ + # Determine the root directory based on where codebasin is run. + rootdir = os.path.abspath(os.getcwd()) + + # Set up a default configuration object. + configuration = {} + + # Load the analysis file if it exists. + if args.analysis_file is not None: + path = os.path.abspath(args.analysis_file) + if os.path.exists(path): + if not os.path.splitext(path)[1] == ".toml": + raise RuntimeError(f"Analysis file {path} must end in .toml.") + + with open(path, "rb") as f: + analysis_toml = util._load_toml(f, "analysis") + + if "codebase" in analysis_toml: + if "exclude" in analysis_toml["codebase"]: + args.excludes += analysis_toml["codebase"]["exclude"] + + for name in args.platforms: + if name not in analysis_toml["platform"].keys(): + raise KeyError( + f"Platform {name} requested on the command line " + + "does not exist in the configuration file.", + ) + + cmd_platforms = args.platforms.copy() + for name in analysis_toml["platform"].keys(): + if cmd_platforms and name not in cmd_platforms: + continue + if "commands" not in analysis_toml["platform"][name]: + raise ValueError(f"Missing 'commands' for platform {name}") + p = analysis_toml["platform"][name]["commands"] + db = config.load_database(p, rootdir) + args.platforms.append(name) + configuration.update({name: db}) + + # Construct a codebase object associated with the root directory. + codebase = CodeBase(rootdir, exclude_patterns=args.excludes) + + # Parse the source tree, and determine source line associations. + # The trees and associations are housed in state. + state = finder.find( + rootdir, + codebase, + configuration, + show_progress=True, + ) + + # Print the file tree. + report.files(codebase, state, prune=args.prune, levels=args.levels) + sys.exit(0) + + +def cli(argv: list[str]) -> int: + parser = _build_parser() + args = parser.parse_args(argv) + + # Configure logging such that: + # - Only errors are written to the terminal + log.setLevel(logging.DEBUG) + + stderr_handler = logging.StreamHandler(sys.stderr) + stderr_handler.setLevel(logging.ERROR) + stderr_handler.setFormatter(Formatter(colors=sys.stderr.isatty())) + log.addHandler(stderr_handler) + + return _tree(args) + + +def main(): + try: + cli(sys.argv[1:]) + except Exception as e: + log.error(str(e)) + sys.exit(1) + + +if __name__ == "__main__": + sys.argv[0] = "codebasin.tree" + main() diff --git a/pyproject.toml b/pyproject.toml index dc113f3..59b8bc7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,6 +38,7 @@ dependencies = [ [project.scripts] codebasin = "codebasin:__main__.main" cbi-cov = "codebasin.coverage:__main__.main" +cbi-tree = "codebasin:tree.main" [project.urls] "Github" = "https://www.github.com/intel/code-base-investigator" diff --git a/tests/files/test_filetree.py b/tests/files/test_filetree.py index 22ed3c4..2583a24 100644 --- a/tests/files/test_filetree.py +++ b/tests/files/test_filetree.py @@ -32,7 +32,6 @@ def setUpClass(self): self.path = Path(self.tmp.name) open(self.path / "file.cpp", mode="w").close() open(self.path / "other.cpp", mode="w").close() - open(self.path / "unused.cpp", mode="w").close() os.symlink(self.path / "file.cpp", self.path / "symlink.cpp") @classmethod @@ -153,6 +152,115 @@ def test_report(self): self.assertTrue("[-B | 1 | 100.00 | 50.00]" in output) self.assertTrue("[A- | 1 | 100.00 | 50.00]" in output) + def test_levels(self): + """Check report --levels flag works correctly""" + # Set up subdirectories for this test + tmp = tempfile.TemporaryDirectory() + path = Path(tmp.name) + os.makedirs(path / "first" / "second" / "third") + open(path / "first" / "one.cpp", mode="w").close() + open(path / "first" / "second" / "two.cpp", mode="w").close() + + codebase = CodeBase(path) + configuration = { + "X": [ + { + "file": str(path / "first" / "one.cpp"), + "defines": [], + "include_paths": [], + "include_files": [], + }, + ], + "Y": [ + { + "file": str(path / "first" / "second" / "two.cpp"), + "defines": [], + "include_paths": [], + "include_files": [], + }, + ], + } + state = finder.find( + path, + codebase, + configuration, + show_progress=False, + ) + + # By default, we should see all levels of the tree. + stream = io.StringIO() + report.files(codebase, state, stream=stream) + output = stream.getvalue() + self.assertTrue(str(path) in output) + self.assertTrue("first/" in output) + self.assertTrue("one.cpp" in output) + self.assertTrue("two.cpp" in output) + + # With two levels, the "second" directory should be collapsed. + # This will hide "two.cpp" from the output. + stream = io.StringIO() + report.files(codebase, state, stream=stream, levels=2) + output = stream.getvalue() + self.assertTrue(str(path) in output) + self.assertTrue("first/" in output) + self.assertTrue("one.cpp" in output) + self.assertFalse("two.cpp" in output) + + # With just one level, the "first" directory should be collapsed. + # This will hide "one.cpp" and "two.cpp" from the output. + stream = io.StringIO() + report.files(codebase, state, stream=stream, levels=1) + output = stream.getvalue() + self.assertTrue(str(path) in output) + self.assertTrue("first/" in output) + self.assertFalse("one.cpp" in output) + self.assertFalse("two.cpp" in output) + + tmp.cleanup() + + def test_prune(self): + """Check report --prune flag works correctly""" + # Set up subdirectories for this test + tmp = tempfile.TemporaryDirectory() + path = Path(tmp.name) + with open(path / "foo.cpp", mode="w") as f: + f.write("void foo();") + open(path / "unused.cpp", mode="w").close() + + codebase = CodeBase(path) + configuration = { + "X": [ + { + "file": str(path / "foo.cpp"), + "defines": [], + "include_paths": [], + "include_files": [], + }, + ], + } + state = finder.find( + path, + codebase, + configuration, + show_progress=False, + ) + + # By default, we should see both used and unused files. + stream = io.StringIO() + report.files(codebase, state, stream=stream) + output = stream.getvalue() + self.assertTrue("foo.cpp" in output) + self.assertTrue("unused.cpp" in output) + + # With prune, we should only see the used files. + stream = io.StringIO() + report.files(codebase, state, stream=stream, prune=True) + output = stream.getvalue() + self.assertTrue("foo.cpp" in output) + self.assertFalse("unused.cpp" in output) + + tmp.cleanup() + if __name__ == "__main__": unittest.main()