Skip to content

Commit 97b3c81

Browse files
Add build script
1 parent e597ec9 commit 97b3c81

File tree

2 files changed

+68
-2
lines changed

2 files changed

+68
-2
lines changed

llvm_ir_dataset_utils/builders/self_contained_builder.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,21 @@
33

44
import subprocess
55
import os
6+
import logging
67

78
from mlgo.corpus import extract_ir_lib
89
from mlgo.corpus import make_corpus_lib
910

1011

1112
def compile_file(source_file, object_file):
1213
command_vector = [
13-
'clang', '-Xclang', '-fembed-bitcode=all', source_file, '-o', object_file
14+
'clang', '-Xclang', '-fembed-bitcode=all', '-c', source_file, '-o',
15+
object_file
1416
]
1517
compile_process = subprocess.run(
1618
command_vector, stderr=subprocess.STDOUT, stdout=subprocess.PIPE)
17-
assert (compile_process.returncode == 0)
19+
if compile_process.returncode != 0:
20+
logging.warning('Compiler returned non-zero exit code')
1821

1922

2023
def perform_build(source_file_list, build_dir, corpus_dir):
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
"""Tool for building a list of self contained sources."""
2+
3+
import logging
4+
import json
5+
6+
from absl import app
7+
from absl import flags
8+
9+
import ray
10+
11+
from llvm_ir_dataset_utils.builders import builder
12+
13+
FLAGS = flags.FLAGS
14+
15+
flags.DEFINE_string('batch_list', None, 'The path to the batch list.')
16+
flags.DEFINE_string('source_dir', '/tmp/source',
17+
'The path to the source dir. Not used by this builder.')
18+
flags.DEFINE_string('build_dir', None, 'The path to the build dir.')
19+
flags.DEFINE_string('corpus_dir', None, 'The directory to place the corpus in.')
20+
flags.DEFINE_bool(
21+
'archive_corpus', False,
22+
'Whether or not to put the output corpus into an arxiv to reduce inode usage'
23+
)
24+
25+
26+
def main(_):
27+
ray.init()
28+
29+
with open(FLAGS.batch_list) as batch_list_handle:
30+
batch_list = json.load(batch_list_handle)
31+
32+
batch_futures = []
33+
34+
for index, batch_info in enumerate(batch_list['batches']):
35+
corpus_description = {
36+
'sources': [],
37+
'folder_name': f'batch-{index}',
38+
'build_system': 'self_contained',
39+
'package_name': 'batch-{index}',
40+
'license': 'UNKNOWN',
41+
'license_source': None,
42+
'source_file_list': batch_info
43+
}
44+
45+
batch_futures.append(
46+
builder.get_build_future(
47+
corpus_description,
48+
FLAGS.source_dir,
49+
FLAGS.build_dir,
50+
FLAGS.corpus_dir,
51+
1, {},
52+
cleanup=True,
53+
archive_corpus=FLAGS.archive_corpus))
54+
55+
while len(batch_futures) > 0:
56+
finished, batch_futures = ray.wait(batch_futures, timeout=5.0)
57+
finished_data = ray.get(finished)
58+
logging.info(
59+
f'Just finished {len(finished_data)}, {len(batch_futures)} remaining.')
60+
61+
62+
if __name__ == '__main__':
63+
app.run(main)

0 commit comments

Comments
 (0)