Skip to content

Commit e597ec9

Browse files
Add self contained builder
1 parent 88d2ff3 commit e597ec9

File tree

2 files changed

+49
-1
lines changed

2 files changed

+49
-1
lines changed

llvm_ir_dataset_utils/builders/builder.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@
1414
from llvm_ir_dataset_utils.builders import (autoconf_builder, cargo_builder,
1515
cmake_builder, julia_builder,
1616
manual_builder, spack_builder,
17-
swift_builder, portage_builder)
17+
swift_builder, portage_builder,
18+
self_contained_builder)
1819
from llvm_ir_dataset_utils.sources import source
1920
from llvm_ir_dataset_utils.util import file, licenses
2021

@@ -235,6 +236,10 @@ def parse_and_build_from_description(
235236
threads,
236237
corpus_description["package_name"],
237238
)
239+
elif corpus_description["build_system"] == "self_contained":
240+
build_log = self_contained_builder.perform_build(
241+
corpus_description["source_file_list"], build_dir, corpus_dir)
242+
self_contained_builder.extract_ir(build_dir, corpus_dir, threads)
238243
else:
239244
raise ValueError(
240245
f"Build system {corpus_description['build_system']} is not supported")
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
"""Module for building and extracting builder from a set of self-contained
2+
C/c++ files."""
3+
4+
import subprocess
5+
import os
6+
7+
from mlgo.corpus import extract_ir_lib
8+
from mlgo.corpus import make_corpus_lib
9+
10+
11+
def compile_file(source_file, object_file):
12+
command_vector = [
13+
'clang', '-Xclang', '-fembed-bitcode=all', source_file, '-o', object_file
14+
]
15+
compile_process = subprocess.run(
16+
command_vector, stderr=subprocess.STDOUT, stdout=subprocess.PIPE)
17+
assert (compile_process.returncode == 0)
18+
19+
20+
def perform_build(source_file_list, build_dir, corpus_dir):
21+
for source_file in source_file_list:
22+
object_file = os.path.join(build_dir, os.path.basename(source_file) + '.o')
23+
compile_file(source_file, object_file)
24+
25+
return {
26+
'targets': [{
27+
'success': True,
28+
'build_log': None,
29+
'name': 'self_contained'
30+
}]
31+
}
32+
33+
34+
# TODO(boomanaiden154): This is duplicated with extract_ir in the manual builder.
35+
# We might want to look into refactoring to consolidate the two functions at some
36+
# point.
37+
def extract_ir(build_dir, corpus_dir, threads):
38+
objects = extract_ir_lib.load_from_directory(build_dir, corpus_dir)
39+
relative_output_paths = extract_ir_lib.run_extraction(objects, threads,
40+
"llvm-objcopy", None,
41+
None, ".llvmcmd",
42+
".llvmbc")
43+
extract_ir_lib.write_corpus_manifest(None, relative_output_paths, corpus_dir)

0 commit comments

Comments
 (0)