Skip to content

Commit 0ecc226

Browse files
Merge pull request #32 from lwshanbd/baodi/gentoo
2 parents eeb5415 + daae83c commit 0ecc226

File tree

6 files changed

+241
-11
lines changed

6 files changed

+241
-11
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
{
2+
"sources": [],
3+
"folder_name": "boost",
4+
"build_system": "portage",
5+
"package_name": "boost",
6+
"package_spec": "dev-libs/boost",
7+
"license": "GPL-3.0-only"
8+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
{
2+
"sources": [],
3+
"folder_name": "hello",
4+
"build_system": "portage",
5+
"package_name": "hello",
6+
"package_spec": "app-misc/hello",
7+
"license": "GPL-3.0-only"
8+
}

llvm_ir_dataset_utils/builders/builder.py

+19-9
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,10 @@
1111

1212
import ray
1313

14-
from llvm_ir_dataset_utils.builders import (
15-
autoconf_builder,
16-
cargo_builder,
17-
cmake_builder,
18-
julia_builder,
19-
manual_builder,
20-
spack_builder,
21-
swift_builder,
22-
)
14+
from llvm_ir_dataset_utils.builders import (autoconf_builder, cargo_builder,
15+
cmake_builder, julia_builder,
16+
manual_builder, spack_builder,
17+
swift_builder, portage_builder)
2318
from llvm_ir_dataset_utils.sources import source
2419
from llvm_ir_dataset_utils.util import file, licenses
2520

@@ -214,6 +209,21 @@ def parse_and_build_from_description(
214209
build_dir,
215210
cleanup,
216211
)
212+
elif corpus_description["build_system"] == "portage":
213+
if "dependency_futures" in extra_builder_arguments:
214+
dependency_futures = extra_builder_arguments["dependency_futures"]
215+
else:
216+
dependency_futures = []
217+
build_log = portage_builder.build_package(
218+
dependency_futures,
219+
corpus_description["package_name"],
220+
corpus_description["package_spec"],
221+
corpus_dir,
222+
threads,
223+
extra_builder_arguments["buildcache_dir"],
224+
build_dir,
225+
cleanup,
226+
)
217227
elif corpus_description["build_system"] == "julia":
218228
build_log = julia_builder.perform_build(corpus_description["package_name"],
219229
build_dir, corpus_dir, threads)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
"""Module for building and extracting bitcode from applications using Portage"""
2+
3+
import subprocess
4+
import os
5+
import glob
6+
import tempfile
7+
import logging
8+
import pathlib
9+
import shutil
10+
import re
11+
import getpass
12+
import ray
13+
14+
from mlgo.corpus import extract_ir_lib
15+
16+
from llvm_ir_dataset_utils.util import file
17+
from llvm_ir_dataset_utils.util import portage as portage_utils
18+
from llvm_ir_dataset_utils.util import extract_source_lib
19+
20+
BUILD_LOG_NAME = './portage_build.log'
21+
22+
23+
def get_spec_command_vector_section(spec):
24+
return spec.split(' ')
25+
26+
27+
def generate_emerge_command(package_to_build, threads, build_dir):
28+
command_vector = [
29+
'emerge', # Portage package management command
30+
'--jobs={}'.format(
31+
threads), # Set the number of jobs for parallel building
32+
'--load-average={}'.format(
33+
threads), # Set the maximum load average for parallel builds
34+
'--config-root={}'.format(
35+
build_dir), # Set the configuration root directory
36+
'--buildpkg', # Build binary packages, similar to Spack's build cache
37+
'--usepkg', # Use binary packages if available
38+
'--binpkg-respect-use=y', # Ensure that binary package installations respect USE flag settings
39+
'--quiet-build=y', # Reduce output during the build process
40+
package_to_build # The package to install
41+
]
42+
43+
# Portage does not support setting the build directory directly in the command,
44+
# but this can be controlled with the PORTAGE_TMPDIR environment variable
45+
# This environment variable needs to be set when calling subprocess, not here directly
46+
return command_vector
47+
48+
49+
def perform_build(package_name, assembled_build_command, corpus_dir, build_dir):
50+
logging.info(f"Portage building package {package_name}")
51+
environment = os.environ.copy()
52+
# Set DISTDIR and PORTAGE_TMPDIR to set the build directory for Portage
53+
environment['DISTDIR'] = build_dir
54+
environment['PORTAGE_TMPDIR'] = build_dir
55+
build_log_path = os.path.join(corpus_dir, BUILD_LOG_NAME)
56+
try:
57+
with open(build_log_path, 'w') as build_log_file:
58+
subprocess.run(
59+
assembled_build_command,
60+
stdout=build_log_file,
61+
stderr=build_log_file,
62+
check=True,
63+
env=environment)
64+
except subprocess.SubprocessError:
65+
logging.warn(f"Failed to build portage package {package_name}")
66+
return False
67+
logging.info(f"Finished build portage package {package_name}")
68+
return True
69+
70+
71+
def extract_ir(package_spec, corpus_dir, build_dir, threads):
72+
build_directory = build_dir + "/portage/"
73+
package_spec = package_spec + "*"
74+
match = glob.glob(os.path.join(build_directory, package_spec))
75+
assert (len(match) == 1)
76+
package_name_with_version = os.path.basename(match[0])
77+
build_directory = match[0] + "/work/" + package_name_with_version
78+
if build_directory is not None:
79+
objects = extract_ir_lib.load_from_directory(build_directory, corpus_dir)
80+
relative_output_paths = extract_ir_lib.run_extraction(
81+
objects, threads, "llvm-objcopy", None, None, ".llvmcmd", ".llvmbc")
82+
extract_ir_lib.write_corpus_manifest(None, relative_output_paths,
83+
corpus_dir)
84+
extract_source_lib.copy_source(build_directory, corpus_dir)
85+
86+
87+
def cleanup(package_name, package_spec, corpus_dir, uninstall=True):
88+
#TODO: Implement cleanup
89+
return
90+
91+
92+
def construct_build_log(build_success, package_name):
93+
return {
94+
'targets': [{
95+
'name': package_name,
96+
'build_log': BUILD_LOG_NAME,
97+
'success': build_success
98+
}]
99+
}
100+
101+
102+
def build_package(dependency_futures,
103+
package_name,
104+
package_spec,
105+
corpus_dir,
106+
threads,
107+
buildcache_dir,
108+
build_dir,
109+
cleanup_build=False):
110+
dependency_futures = ray.get(dependency_futures)
111+
for dependency_future in dependency_futures:
112+
if not dependency_future['targets'][0]['success']:
113+
logging.warning(
114+
f"Dependency {dependency_future['targets'][0]['name']} failed to build "
115+
f"for package {package_name}, not building.")
116+
if cleanup_build:
117+
cleanup(package_name, package_spec, corpus_dir, uninstall=False)
118+
return construct_build_log(False, package_name, None)
119+
portage_utils.portage_setup_compiler(build_dir)
120+
portage_utils.clean_binpkg(package_spec)
121+
build_command = generate_emerge_command(package_spec, threads, build_dir)
122+
build_result = perform_build(package_name, build_command, corpus_dir,
123+
build_dir)
124+
if build_result:
125+
extract_ir(package_spec, corpus_dir, build_dir, threads)
126+
logging.warning(f'Finished building {package_name}')
127+
if cleanup_build:
128+
if build_result:
129+
cleanup(package_name, package_spec, corpus_dir)
130+
else:
131+
cleanup(package_name, package_spec, corpus_dir, uninstall=False)
132+
return construct_build_log(build_result, package_name)

llvm_ir_dataset_utils/util/portage.py

+46
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
"""Utilities related to portage."""
2+
3+
import subprocess
4+
import shutil
5+
import os
6+
7+
8+
def get_portage_compiler_config(filename):
9+
content = ('COMMON_FLAGS="-O2 -pipe -Xclang -fembed-bitcode=all"\n'
10+
'\n'
11+
'CC="/root/ir-dataset/utils/compiler_wrapper"\n'
12+
'CXX="/root/ir-dataset/utils/compiler_wrapper++"\n'
13+
'CFLAGS="${COMMON_FLAGS}"\n'
14+
'CXXFLAGS="${COMMON_FLAGS}"\n'
15+
'FCFLAGS="${COMMON_FLAGS}"\n'
16+
'FFLAGS="${COMMON_FLAGS}"\n'
17+
'\n'
18+
'FEATURES="noclean"\n'
19+
'\n'
20+
'LC_MESSAGES=C.utf8')
21+
with open(filename, 'w') as file:
22+
file.write(content)
23+
24+
25+
def portage_setup_compiler(build_dir):
26+
# Same as spack, path is variable depending upon the system.
27+
# Path to the Portage make.conf file within the build directory
28+
source_config_folder = '/etc/portage/'
29+
config_path = os.path.join(build_dir, "etc/portage")
30+
make_conf_path = os.path.join(config_path, "make.conf")
31+
make_profile_path = os.path.join(config_path, "make.profile")
32+
if os.path.exists(config_path):
33+
shutil.rmtree(config_path)
34+
shutil.copytree(source_config_folder, config_path)
35+
36+
# Delete make.profile and make a new soft link to the default profile
37+
shutil.rmtree(make_profile_path)
38+
os.symlink('/etc/portage/make.profile', make_profile_path)
39+
get_portage_compiler_config(make_conf_path)
40+
41+
42+
def clean_binpkg(package_spec):
43+
command_vector = ['rm', '-rf', '/var/cache/binpkgs/' + package_spec]
44+
subprocess.run(command_vector)
45+
sync_command = ['emaint', '--fix', 'binhost']
46+
subprocess.run(sync_command)

utils/compiler_wrapper.py

+28-2
Original file line numberDiff line numberDiff line change
@@ -39,13 +39,37 @@ def save_preprocessed_source(mode, compiler_arguments):
3939
run_compiler_invocation(mode, arguments_copy)
4040

4141

42+
def save_preprocessed_source_multi(mode, source_file, compiler_arguments):
43+
# We shouldn't fail to find the output here if the argument parsing
44+
# succeeded.
45+
output_index = compiler_arguments.index('-o') + 1
46+
arguments_copy = compiler_arguments.copy()
47+
output_path = source_file + '.preprocessed_source'
48+
arguments_copy[output_index] = output_path
49+
for arg_idx in range(len(arguments_copy)):
50+
for recognized_extension in RECOGNIZED_SOURCE_FILE_EXTENSIONS:
51+
if arguments_copy[arg_idx].endswith(
52+
recognized_extension) and arguments_copy[arg_idx] != source_file:
53+
arguments_copy[arg_idx] = ''
54+
arguments_copy = list(filter(None, arguments_copy))
55+
# Add -E to the compiler invocation to run just the preprocessor.
56+
arguments_copy.append('-E')
57+
run_compiler_invocation(mode, arguments_copy)
58+
59+
4260
def save_source(source_files, output_file, mode, compiler_arguments):
43-
assert (len(source_files) <= 1)
61+
if len(source_files) == 1:
62+
new_file_name = output_file + '.source'
63+
shutil.copy(source_files[0], new_file_name)
64+
65+
save_preprocessed_source(mode, compiler_arguments)
66+
return
67+
4468
for source_file in source_files:
4569
new_file_name = output_file + '.source'
4670
shutil.copy(source_file, new_file_name)
4771

48-
save_preprocessed_source(mode, compiler_arguments)
72+
save_preprocessed_source_multi(mode, source_file, compiler_arguments)
4973

5074

5175
def parse_args(arguments_split):
@@ -77,6 +101,8 @@ def main(args):
77101
# In this case, don't copy over any files and just run the compiler
78102
# invocation.
79103
mode = parsed_arguments
104+
if len(parsed_arguments) == 1:
105+
mode = parsed_arguments[0]
80106
return_code = run_compiler_invocation(mode, args[1:])
81107
sys.exit(return_code)
82108

0 commit comments

Comments
 (0)