Skip to content

Overhaul of convert_project and related tools #724

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 0 additions & 32 deletions clang/tools/3c/utils/port_tools/common.py

This file was deleted.

140 changes: 140 additions & 0 deletions clang/tools/3c/utils/port_tools/convert_project_common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
# Data structures that need to be imported by both generate_ccommands and
# expand_macros.

from dataclasses import dataclass, field
from typing import List
import functools
import os
import re

# We are assuming it's OK to cache canonical paths for the lifetime of any
# process that uses this code.
realpath_cached = functools.lru_cache(maxsize=None)(os.path.realpath)


@dataclass
class TranslationUnitInfo:
compiler_path: str
# Any file paths in compiler_args (-I, etc.), input_filename, and
# output_filename may be relative to working_directory.
compiler_args: List[str]
working_directory: str
input_filename: str

# compiler_args with -c, -o, and the input filename removed so the macro
# expander can add its own options. Don't compute this until it's requested
# because it makes some assertions that may fail in some scenarios.
@functools.cached_property
def common_compiler_args(self) -> List[str]:
self.__scan_compiler_args()
return self.common_compiler_args

@functools.cached_property
def output_filename(self) -> str:
self.__scan_compiler_args()
return self.output_filename

def __scan_compiler_args(self):
assert self.input_filename == self.compiler_args[-1], (
'TranslationUnitInfo.__scan_compiler_args expects the last '
'compiler argument to be the input filename.')
args_without_input = self.compiler_args[:-1]
assert self.input_filename.endswith('.c'), (
'TranslationUnitInfo.__scan_compiler_args currently only supports '
'C source files.')
# Default; overwritten if we see an `-o` option later.
# TODO: Use removesuffix once we require Python >= 3.9.
self.output_filename = self.input_filename[:-len('.c')] + '.o'
self.common_compiler_args = []
idx = 0
while idx < len(args_without_input):
arg = args_without_input[idx]
idx += 1
if arg == '-c':
pass
elif arg == '-o':
self.output_filename = args_without_input[idx]
idx += 1
else:
self.common_compiler_args.append(arg)

def fullpath(self, path: str):
return os.path.normpath(os.path.join(self.working_directory, path))

def realpath(self, path: str):
return realpath_cached(os.path.join(self.working_directory, path))

@functools.cached_property
def input_realpath(self):
return self.realpath(self.input_filename)

@functools.cached_property
def output_fullpath(self):
return self.fullpath(self.output_filename)

@functools.cached_property
def output_realpath(self):
return self.realpath(self.output_filename)


CompilationDatabase = List[TranslationUnitInfo]


def assert_no_duplicate_outputs(compdb):
output_fullpaths = set()
for tu in compdb:
assert tu.output_fullpath not in output_fullpaths, (
'Multiple compilation database entries with output file '
f'{tu.output_fullpath}: not supported by this tool.')


def unescape_compdb_command(command_str):
# See the specification of how `command` is escaped in
# clang/docs/JSONCompilationDatabase.rst. Clang's actual implementation in
# `unescapeCommandLine` in clang/lib/Tooling/JSONCompilationDatabase.cpp is
# somewhat fancier, but this should be good enough for us.
args = []
# Skip any leading spaces.
pos = re.match(' *', command_str).end()
while pos < len(command_str):
# Each escaped argument consists of a sequence of one or more of the
# following units: a double-quoted string of zero or more ordinary
# characters (not \ or ") or escape sequences (\\ or \"), or a single
# escape sequence or ordinary character other than a space. Look for the
# next well-formed escaped argument and ignore any spaces after it. If
# we're not at the end of the string but we can't match another
# well-formed escaped argument, that means the command is invalid.
#
# We have to compile the regex in order to use the `pos` argument.
escaped_arg_re = re.compile(
r' *((?:\"(?:[^\\"]|\\[\\"])*\"|[^\\" ]|\\[\\"])+) *')
m = escaped_arg_re.match(command_str, pos=pos)
assert m, ('Improperly escaped command in compilation database: ' +
command_str)
# Now decode escape sequences and remove double quotes that are not part
# of escape sequences. `re.sub` finds non-overlapping matches from left
# to right, so it won't start a match in the middle of an escape
# sequence.
args.append(re.sub(r'"|\\([\\"])', r'\1', m.group(1)))
pos = m.end()
return args


def compdb_entry_from_json(j):
input_filename = j['file']
working_directory = j['directory']
if 'arguments' in j:
args = j['arguments']
elif 'command' in j:
args = unescape_compdb_command(j['command'])
else:
raise AssertionError(f'Compilation database entry has no command')
compiler_path = args[0]
compiler_args = args[1:]
# TODO: Should we honor j['output'] if it is set?
return TranslationUnitInfo(compiler_path, compiler_args, working_directory,
input_filename)


def compdb_from_json(j):
return [compdb_entry_from_json(ej) for ej in j]
27 changes: 17 additions & 10 deletions clang/tools/3c/utils/port_tools/expand_macros.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,14 @@
# translation unit using the _original_ compiler options (not using options
# specific to this module such as `undef_macros`) is identical before and after
# our edits.

import argparse
from typing import List, NamedTuple, Dict
import collections
import logging
import os
import re
import subprocess
from common import TranslationUnitInfo, realpath_cached
from convert_project_common import (TranslationUnitInfo, realpath_cached, assert_no_duplicate_outputs)


class ExpandMacrosOptions(NamedTuple):
Expand Down Expand Up @@ -91,8 +91,8 @@ def preprocess(tu: TranslationUnitInfo,
input_filename = (custom_input_filename if custom_input_filename is not None
else tu.input_filename)
subprocess.check_call([tu.compiler_path, '-E', '-o', out_fname] +
tu.compiler_args + [input_filename],
cwd=tu.target_directory)
tu.common_compiler_args + [input_filename],
cwd=tu.working_directory)


def expandMacros(opts: ExpandMacrosOptions, compilation_base_dir: str,
Expand All @@ -103,12 +103,7 @@ def expandMacros(opts: ExpandMacrosOptions, compilation_base_dir: str,
# If this somehow happens (e.g., it happened in one build configuration of
# thttpd), fail up front rather than producing mysterious verification
# failures later.
tu_output_realpaths = set()
for tu in translation_units:
assert tu.output_realpath not in tu_output_realpaths, (
f'Multiple compilation database entries with output file '
f'{tu.output_realpath}: not supported by expand_macros')
tu_output_realpaths.add(tu.output_realpath)
assert_no_duplicate_outputs(translation_units)

compilation_base_dir = realpath_cached(compilation_base_dir)

Expand Down Expand Up @@ -291,3 +286,15 @@ def expandMacros(opts: ExpandMacrosOptions, compilation_base_dir: str,
verification_ok = False
assert verification_ok, (
'Verification of preprocessed output failed: see diffs above.')


if __name__ == '__main__':
# TODO: Provide a LibTooling-like CLI that takes either `-p` or
# `FILENAMES -- FIXED_COMPILER_OPTIONS`. Probably check for `--` first and
# then call argparse, analogous to how LibTooling CommonOptionsParser uses
# the LLVM CommandLine library.
# TODO: Factor out the macro-related argparse stuff from convert_project so
# it can be added to either ArgumentParser.
#parser = argparse.ArgumentParser()
# FIXME: Clarify assumptions about base dir versus working dir.
raise NotImplementedError
Loading