|
| 1 | +"""Tool for building a list of self contained sources.""" |
| 2 | + |
| 3 | +import logging |
| 4 | +import json |
| 5 | + |
| 6 | +from absl import app |
| 7 | +from absl import flags |
| 8 | + |
| 9 | +import ray |
| 10 | + |
| 11 | +from llvm_ir_dataset_utils.builders import builder |
| 12 | + |
| 13 | +FLAGS = flags.FLAGS |
| 14 | + |
| 15 | +flags.DEFINE_string('batch_list', None, 'The path to the batch list.') |
| 16 | +flags.DEFINE_string('source_dir', '/tmp/source', |
| 17 | + 'The path to the source dir. Not used by this builder.') |
| 18 | +flags.DEFINE_string('build_dir', None, 'The path to the build dir.') |
| 19 | +flags.DEFINE_string('corpus_dir', None, 'The directory to place the corpus in.') |
| 20 | +flags.DEFINE_bool( |
| 21 | + 'archive_corpus', False, |
| 22 | + 'Whether or not to put the output corpus into an arxiv to reduce inode usage' |
| 23 | +) |
| 24 | + |
| 25 | + |
| 26 | +def main(_): |
| 27 | + ray.init() |
| 28 | + |
| 29 | + with open(FLAGS.batch_list) as batch_list_handle: |
| 30 | + batch_list = json.load(batch_list_handle) |
| 31 | + |
| 32 | + batch_futures = [] |
| 33 | + |
| 34 | + for index, batch_info in enumerate(batch_list['batches']): |
| 35 | + corpus_description = { |
| 36 | + 'sources': [], |
| 37 | + 'folder_name': f'batch-{index}', |
| 38 | + 'build_system': 'self_contained', |
| 39 | + 'package_name': 'batch-{index}', |
| 40 | + 'license': 'UNKNOWN', |
| 41 | + 'license_source': None, |
| 42 | + 'source_file_list': batch_info |
| 43 | + } |
| 44 | + |
| 45 | + batch_futures.append( |
| 46 | + builder.get_build_future( |
| 47 | + corpus_description, |
| 48 | + FLAGS.source_dir, |
| 49 | + FLAGS.build_dir, |
| 50 | + FLAGS.corpus_dir, |
| 51 | + 1, {}, |
| 52 | + cleanup=True, |
| 53 | + archive_corpus=FLAGS.archive_corpus)) |
| 54 | + |
| 55 | + while len(batch_futures) > 0: |
| 56 | + finished, batch_futures = ray.wait(batch_futures, timeout=5.0) |
| 57 | + finished_data = ray.get(finished) |
| 58 | + logging.info( |
| 59 | + f'Just finished {len(finished_data)}, {len(batch_futures)} remaining.') |
| 60 | + |
| 61 | + |
| 62 | +if __name__ == '__main__': |
| 63 | + app.run(main) |
0 commit comments