diff --git a/clang/tools/3c/utils/port_tools/common.py b/clang/tools/3c/utils/port_tools/common.py deleted file mode 100644 index 5489df6bfbf2..000000000000 --- a/clang/tools/3c/utils/port_tools/common.py +++ /dev/null @@ -1,32 +0,0 @@ -# Data structures that need to be imported by both generate_ccommands and -# expand_macros. - -from typing import List, NamedTuple -import functools -import os - -# We are assuming it's OK to cache canonical paths for the lifetime of any -# process that uses this code. -realpath_cached = functools.lru_cache(maxsize=None)(os.path.realpath) - - -class TranslationUnitInfo(NamedTuple): - compiler_path: str - # Any file paths in compiler_args (-I, etc.), input_filename, and - # output_filename may be relative to target_directory. - compiler_args: List[str] - target_directory: str - input_filename: str - output_filename: str - - def realpath(self, path: str): - return realpath_cached(os.path.join(self.target_directory, path)) - - # Perhaps this could be cached. It's not a big cost though. - @property - def input_realpath(self): - return self.realpath(self.input_filename) - - @property - def output_realpath(self): - return self.realpath(self.output_filename) diff --git a/clang/tools/3c/utils/port_tools/convert_project_common.py b/clang/tools/3c/utils/port_tools/convert_project_common.py new file mode 100644 index 000000000000..3061005ec6fd --- /dev/null +++ b/clang/tools/3c/utils/port_tools/convert_project_common.py @@ -0,0 +1,140 @@ +# Data structures that need to be imported by both generate_ccommands and +# expand_macros. + +from dataclasses import dataclass, field +from typing import List +import functools +import os +import re + +# We are assuming it's OK to cache canonical paths for the lifetime of any +# process that uses this code. +realpath_cached = functools.lru_cache(maxsize=None)(os.path.realpath) + + +@dataclass +class TranslationUnitInfo: + compiler_path: str + # Any file paths in compiler_args (-I, etc.), input_filename, and + # output_filename may be relative to working_directory. + compiler_args: List[str] + working_directory: str + input_filename: str + + # compiler_args with -c, -o, and the input filename removed so the macro + # expander can add its own options. Don't compute this until it's requested + # because it makes some assertions that may fail in some scenarios. + @functools.cached_property + def common_compiler_args(self) -> List[str]: + self.__scan_compiler_args() + return self.common_compiler_args + + @functools.cached_property + def output_filename(self) -> str: + self.__scan_compiler_args() + return self.output_filename + + def __scan_compiler_args(self): + assert self.input_filename == self.compiler_args[-1], ( + 'TranslationUnitInfo.__scan_compiler_args expects the last ' + 'compiler argument to be the input filename.') + args_without_input = self.compiler_args[:-1] + assert self.input_filename.endswith('.c'), ( + 'TranslationUnitInfo.__scan_compiler_args currently only supports ' + 'C source files.') + # Default; overwritten if we see an `-o` option later. + # TODO: Use removesuffix once we require Python >= 3.9. + self.output_filename = self.input_filename[:-len('.c')] + '.o' + self.common_compiler_args = [] + idx = 0 + while idx < len(args_without_input): + arg = args_without_input[idx] + idx += 1 + if arg == '-c': + pass + elif arg == '-o': + self.output_filename = args_without_input[idx] + idx += 1 + else: + self.common_compiler_args.append(arg) + + def fullpath(self, path: str): + return os.path.normpath(os.path.join(self.working_directory, path)) + + def realpath(self, path: str): + return realpath_cached(os.path.join(self.working_directory, path)) + + @functools.cached_property + def input_realpath(self): + return self.realpath(self.input_filename) + + @functools.cached_property + def output_fullpath(self): + return self.fullpath(self.output_filename) + + @functools.cached_property + def output_realpath(self): + return self.realpath(self.output_filename) + + +CompilationDatabase = List[TranslationUnitInfo] + + +def assert_no_duplicate_outputs(compdb): + output_fullpaths = set() + for tu in compdb: + assert tu.output_fullpath not in output_fullpaths, ( + 'Multiple compilation database entries with output file ' + f'{tu.output_fullpath}: not supported by this tool.') + + +def unescape_compdb_command(command_str): + # See the specification of how `command` is escaped in + # clang/docs/JSONCompilationDatabase.rst. Clang's actual implementation in + # `unescapeCommandLine` in clang/lib/Tooling/JSONCompilationDatabase.cpp is + # somewhat fancier, but this should be good enough for us. + args = [] + # Skip any leading spaces. + pos = re.match(' *', command_str).end() + while pos < len(command_str): + # Each escaped argument consists of a sequence of one or more of the + # following units: a double-quoted string of zero or more ordinary + # characters (not \ or ") or escape sequences (\\ or \"), or a single + # escape sequence or ordinary character other than a space. Look for the + # next well-formed escaped argument and ignore any spaces after it. If + # we're not at the end of the string but we can't match another + # well-formed escaped argument, that means the command is invalid. + # + # We have to compile the regex in order to use the `pos` argument. + escaped_arg_re = re.compile( + r' *((?:\"(?:[^\\"]|\\[\\"])*\"|[^\\" ]|\\[\\"])+) *') + m = escaped_arg_re.match(command_str, pos=pos) + assert m, ('Improperly escaped command in compilation database: ' + + command_str) + # Now decode escape sequences and remove double quotes that are not part + # of escape sequences. `re.sub` finds non-overlapping matches from left + # to right, so it won't start a match in the middle of an escape + # sequence. + args.append(re.sub(r'"|\\([\\"])', r'\1', m.group(1))) + pos = m.end() + return args + + +def compdb_entry_from_json(j): + input_filename = j['file'] + working_directory = j['directory'] + if 'arguments' in j: + args = j['arguments'] + elif 'command' in j: + args = unescape_compdb_command(j['command']) + else: + raise AssertionError(f'Compilation database entry has no command') + compiler_path = args[0] + compiler_args = args[1:] + # TODO: Should we honor j['output'] if it is set? + return TranslationUnitInfo(compiler_path, compiler_args, working_directory, + input_filename) + + +def compdb_from_json(j): + return [compdb_entry_from_json(ej) for ej in j] diff --git a/clang/tools/3c/utils/port_tools/expand_macros.py b/clang/tools/3c/utils/port_tools/expand_macros.py index a6df5fc3f441..232413edcc94 100644 --- a/clang/tools/3c/utils/port_tools/expand_macros.py +++ b/clang/tools/3c/utils/port_tools/expand_macros.py @@ -29,14 +29,14 @@ # translation unit using the _original_ compiler options (not using options # specific to this module such as `undef_macros`) is identical before and after # our edits. - +import argparse from typing import List, NamedTuple, Dict import collections import logging import os import re import subprocess -from common import TranslationUnitInfo, realpath_cached +from convert_project_common import (TranslationUnitInfo, realpath_cached, assert_no_duplicate_outputs) class ExpandMacrosOptions(NamedTuple): @@ -91,8 +91,8 @@ def preprocess(tu: TranslationUnitInfo, input_filename = (custom_input_filename if custom_input_filename is not None else tu.input_filename) subprocess.check_call([tu.compiler_path, '-E', '-o', out_fname] + - tu.compiler_args + [input_filename], - cwd=tu.target_directory) + tu.common_compiler_args + [input_filename], + cwd=tu.working_directory) def expandMacros(opts: ExpandMacrosOptions, compilation_base_dir: str, @@ -103,12 +103,7 @@ def expandMacros(opts: ExpandMacrosOptions, compilation_base_dir: str, # If this somehow happens (e.g., it happened in one build configuration of # thttpd), fail up front rather than producing mysterious verification # failures later. - tu_output_realpaths = set() - for tu in translation_units: - assert tu.output_realpath not in tu_output_realpaths, ( - f'Multiple compilation database entries with output file ' - f'{tu.output_realpath}: not supported by expand_macros') - tu_output_realpaths.add(tu.output_realpath) + assert_no_duplicate_outputs(translation_units) compilation_base_dir = realpath_cached(compilation_base_dir) @@ -291,3 +286,15 @@ def expandMacros(opts: ExpandMacrosOptions, compilation_base_dir: str, verification_ok = False assert verification_ok, ( 'Verification of preprocessed output failed: see diffs above.') + + +if __name__ == '__main__': + # TODO: Provide a LibTooling-like CLI that takes either `-p` or + # `FILENAMES -- FIXED_COMPILER_OPTIONS`. Probably check for `--` first and + # then call argparse, analogous to how LibTooling CommonOptionsParser uses + # the LLVM CommandLine library. + # TODO: Factor out the macro-related argparse stuff from convert_project so + # it can be added to either ArgumentParser. + #parser = argparse.ArgumentParser() + # FIXME: Clarify assumptions about base dir versus working dir. + raise NotImplementedError diff --git a/clang/tools/3c/utils/port_tools/generate_ccommands.py b/clang/tools/3c/utils/port_tools/generate_ccommands.py index d292a7a1cb84..f13ef287c8b9 100644 --- a/clang/tools/3c/utils/port_tools/generate_ccommands.py +++ b/clang/tools/3c/utils/port_tools/generate_ccommands.py @@ -4,251 +4,87 @@ from typing import List import re import os -import sys import json -import traceback import subprocess import logging -from common import TranslationUnitInfo +from convert_project_common import TranslationUnitInfo, compdb_from_json from expand_macros import expandMacros, ExpandMacrosOptions SLASH = os.sep -# file in which the individual commands will be stored -INDIVIDUAL_COMMANDS_FILE = os.path.realpath("convert_individual.sh") # file in which the total commands will be stored. TOTAL_COMMANDS_FILE = os.path.realpath("convert_all.sh") -VSCODE_SETTINGS_JSON = os.path.realpath("settings.json") - # to separate multiple commands in a line -CMD_SEP = " &&" DEFAULT_ARGS = ["-dump-stats"] +# XXX: Do we actually intend for convert_project to support Windows? We haven't +# tested that in a long time. if os.name == "nt": DEFAULT_ARGS.append("-extra-arg-before=--driver-mode=cl") - CMD_SEP = " ;" - - -class VSCodeJsonWriter(): - - def __init__(self): - self.clangd_path = "" - self.args = [] - - def setClangdPath(self, cdpath): - self.clangd_path = cdpath - - def addClangdArg(self, arg): - if isinstance(arg, list): - self.args.extend(arg) - else: - self.args.append(arg) - - def writeJsonFile(self, outputF): - fp = open(outputF, "w") - fp.write("{\"clangd.path\":\"" + self.clangd_path + "\",\n") - fp.write("\"clangd.arguments\": [\n") - argsstrs = map(lambda x: "\"" + x + "\"", self.args) - argsstrs = ",\n".join(argsstrs) - fp.write(argsstrs) - fp.write("]\n") - fp.write("}") - fp.close() - - -def getCheckedCArgs(argument_list): - """ - Adjust the compilation arguments. This is now used only by - expand_macros_before_conversion since 3c takes the arguments directly from - the compilation database. Thus, we no longer use -extra-arg-before here. - - :param argument_list: list of compiler argument. - :return: (checked c args, output filename) - """ - # New approach: Rather than keeping only specific flags, try keeping - # everything except `-c` (because we will add `-E` if we preprocess the - # translation unit) and the source file name (assumed to be the last - # argument) because it's hard to know what flags different benchmarks might - # be using that might affect the default preprocessor state. We rely on - # setting the working directory instead of trying to recognize all paths - # that might need to be made absolute here. - clang_x_args = [] - source_filename = argument_list[-1] - assert source_filename.endswith('.c') - # By default; may be overwritten below. - output_filename = source_filename[:-len('.c')] + '.o' - idx = 0 - while idx < len(argument_list) - 1: - arg = argument_list[idx] - idx += 1 - if arg == '-c': - pass - elif arg == '-o': - # Remove the output filename from the argument list and save it - # separately. - output_filename = argument_list[idx] - idx += 1 - else: - clang_x_args.append(arg) - # Disable all Clang warnings. Generally, we don't want to do anything about - # them and they are just distracting. - clang_x_args.append('-w') - return (clang_x_args, output_filename) - - -def tryFixUp(s): - """ - Fix-up for a failure between cmake and nmake. - """ - b = open(s, 'r').read() - b = re.sub(r'@<<\n', "", b) - b = re.sub(r'\n<<', "", b) - f = open(s, 'w') - f.write(b) - f.close() - return # We no longer take the checkedc_include_dir here because we assume the working # tree is set up so that the Checked C headers get used automatically by 3c. -def run3C(checkedc_bin, +def run3C(_3c_bin, extra_3c_args, compilation_base_dir, compile_commands_json, skip_paths, expand_macros_opts: ExpandMacrosOptions, - skip_running=False, - run_individual=False): - global INDIVIDUAL_COMMANDS_FILE - global TOTAL_COMMANDS_FILE - runs = 0 - cmds = None - filters = [] - for i in skip_paths: - filters.append(re.compile(i)) - while runs < 2: - runs = runs + 1 - try: - cmds = json.load(open(compile_commands_json, 'r')) - except: - traceback.print_exc() - tryFixUp(compile_commands_json) - - if cmds == None: - logging.error("failed to get commands from compile commands json:" + - compile_commands_json) - return + skip_running=False): + filters = [re.compile(i) for i in skip_paths] + with open(compile_commands_json) as cdb_f: + compdb = compdb_from_json(json.load(cdb_f)) translation_units: List[TranslationUnitInfo] = [] all_files = [] - for i in cmds: - file_to_add = i['file'] - compiler_path = None # XXX Clean this up - compiler_x_args = [] - output_filename = None - target_directory = "" - if file_to_add.endswith(".cpp"): + for tu in compdb: + if tu.input_filename.endswith(".cpp"): continue # Checked C extension doesn't support cpp files yet - - # BEAR uses relative paths for 'file' rather than absolute paths. It - # also has a field called 'arguments' instead of 'command' in the cmake - # style. Use that to detect BEAR and add the directory. - if 'arguments' in i and not 'command' in i: - # BEAR. Need to add directory. - file_to_add = i['directory'] + SLASH + file_to_add - compiler_path = i['arguments'][0] - # get the compiler arguments - (compiler_x_args, - output_filename) = getCheckedCArgs(i["arguments"][1:]) - # get the directory used during compilation. - target_directory = i['directory'] - file_to_add = os.path.realpath(file_to_add) - matched = False - for j in filters: - if j.match(file_to_add) is not None: - matched = True - if not matched: - all_files.append(file_to_add) - tu = TranslationUnitInfo(compiler_path, compiler_x_args, - target_directory, file_to_add, - output_filename) - translation_units.append(tu) + file_realpath = tu.input_realpath() + file_relative_to_basedir = os.path.relpath(file_realpath, + compilation_base_dir) + # Only let filters match path components under the base dir. For + # example, we might have a filter `test` that is intended to exclude a + # subdirectory of the base dir, but if the user put the base dir at + # something like `~/test/foo-benchmark`, we don't want to exclude + # everything in the base dir. + if any(j.match(file_relative_to_basedir) for j in filters): + continue + all_files.append(file_realpath) + translation_units.append(tu) expandMacros(expand_macros_opts, compilation_base_dir, translation_units) - prog_name = checkedc_bin - f = open(INDIVIDUAL_COMMANDS_FILE, 'w') - f.write("#!/bin/bash\n") - for tu in translation_units: - args = [] - # get the command to change the working directory - target_directory = tu.target_directory - change_dir_cmd = "" - if len(target_directory) > 0: - change_dir_cmd = "cd " + target_directory + CMD_SEP - else: - # default working directory - target_directory = os.getcwd() - args.append(prog_name) - args.extend(DEFAULT_ARGS) - args.extend(extra_3c_args) - # Even when we run 3c on a single file, we can let it read the compiler - # options from the compilation database. - args.append('-p') - args.append(compile_commands_json) - # ...but we need to add -w, as in getCheckedCArgs. - args.append('-extra-arg=-w') - args.append('-base-dir="' + compilation_base_dir + '"') - args.append('-output-dir="' + compilation_base_dir + '/out.checked"') - args.append(tu.input_filename) - # run individual commands. - if run_individual: - logging.debug("Running:" + ' '.join(args)) - subprocess.check_call(' '.join(args), - cwd=target_directory, - shell=True) - # prepend the command to change the working directory. - if len(change_dir_cmd) > 0: - args = [change_dir_cmd] + args - f.write(" \\\n".join(args)) - f.write("\n") - f.close() - logging.debug("Saved all the individual commands into the file:" + - INDIVIDUAL_COMMANDS_FILE) - os.system("chmod +x " + INDIVIDUAL_COMMANDS_FILE) - - vcodewriter = VSCodeJsonWriter() - # get path to clangd3c - # - # clangd3c is believed not to work, but since this code has been here for a - # while and no one has been bothered by the fact that it didn't work, we - # won't bother removing it now; hopefully clangd3c will eventually be back. - vcodewriter.setClangdPath( - os.path.join(os.path.dirname(prog_name), "clangd3c")) args = [] - args.append(prog_name) + args.append(_3c_bin) args.extend(DEFAULT_ARGS) args.extend(extra_3c_args) args.append('-p') args.append(compile_commands_json) + # Disable all compiler warnings during the 3C input loading phase. + # Generally, we don't want to do anything about them and they are just + # distracting. args.append('-extra-arg=-w') - vcodewriter.addClangdArg("-log=verbose") - vcodewriter.addClangdArg(args[1:]) args.append('-base-dir="' + compilation_base_dir + '"') - vcodewriter.addClangdArg('-base-dir=' + compilation_base_dir) # Try to choose a name unlikely to collide with anything in any real # project. args.append('-output-dir="' + compilation_base_dir + '/out.checked"') args.extend(list(set(all_files))) - vcodewriter.addClangdArg(list(set(all_files))) - vcodewriter.writeJsonFile(VSCODE_SETTINGS_JSON) - f = open(TOTAL_COMMANDS_FILE, 'w') - f.write("#!/bin/bash\n") - f.write(" \\\n".join(args)) - f.close() + with open(TOTAL_COMMANDS_FILE, 'w') as f: + # Using an array literal rather than backslashes makes it easy for the + # user to comment out individual lines. + arg_lines = '\n'.join(args) + f.write(f'''\ +#!/bin/bash +args=( +{arg_lines} +) +"${{args[@]}}" +''') os.system("chmod +x " + TOTAL_COMMANDS_FILE) # run whole command - if not run_individual and not skip_running: + if not skip_running: logging.info("Running:" + str(' '.join(args))) subprocess.check_call(' '.join(args), shell=True) logging.debug("Saved the total command into the file:" + @@ -257,9 +93,3 @@ def run3C(checkedc_bin, compilation_base_dir, os.path.basename(TOTAL_COMMANDS_FILE))) logging.debug("Saved to:" + os.path.join( compilation_base_dir, os.path.basename(TOTAL_COMMANDS_FILE))) - os.system("cp " + INDIVIDUAL_COMMANDS_FILE + " " + os.path.join( - compilation_base_dir, os.path.basename(INDIVIDUAL_COMMANDS_FILE))) - logging.debug("Saved to:" + os.path.join( - compilation_base_dir, os.path.basename(INDIVIDUAL_COMMANDS_FILE))) - logging.debug("VSCode Settings json saved to:" + VSCODE_SETTINGS_JSON) - return