diff --git a/metamapper/instruction_selection/dag_rewrite.py b/metamapper/instruction_selection/dag_rewrite.py index ac7a984..0097829 100644 --- a/metamapper/instruction_selection/dag_rewrite.py +++ b/metamapper/instruction_selection/dag_rewrite.py @@ -64,7 +64,7 @@ def visit_Select(self, node): #What this is doing is pointing the matched inputs of the dag to the body of the tile. #Then replacing the body of the tile to this node #TODO verify and call with the matched dag - rr_name = node.children()[0].iname + rr_name = str(self.rr.name).replace(".", "_") replace_dag_copy = Clone().clone(self.rr.replace(None), iname_prefix=f"{rr_name}_{node.iname}_") ReplaceInputs(matched_inputs).run(replace_dag_copy) return replace_dag_copy.output.children()[0] diff --git a/metamapper/irs/coreir/__init__.py b/metamapper/irs/coreir/__init__.py index db1049b..40049bc 100644 --- a/metamapper/irs/coreir/__init__.py +++ b/metamapper/irs/coreir/__init__.py @@ -18,6 +18,11 @@ def gen_CoreIRNodes(width): CoreIRNodes = Nodes("CoreIR") peak_ir = gen_peak_CoreIR(width) c = CoreIRContext() + cgralib = True + try: + c.load_library("cgralib") + except: + cgralib = False basic = ("mul", "add", "const", "and_", "or_", "neg") other = ("ashr", "eq", "neq", "lshr", "mux", "sub", "slt", "sle", "sgt", "sge", "ult", "ule", "ugt", "uge", "shl") @@ -45,7 +50,6 @@ def gen_CoreIRNodes(width): assert name_ == name assert name in CoreIRNodes.coreir_modules assert CoreIRNodes.name_from_coreir(cmod) == name - name = f"float_DW.fp_add" peak_fc = peak_ir.instructions[name] cmod = None @@ -139,9 +143,39 @@ def gen_CoreIRNodes(width): cmod = None name_ = load_from_peak(CoreIRNodes, peak_fc, cmod=cmod, name="commonlib.mult_middle", modparams=()) + if cgralib: + name = f"cgralib.Mem" + peak_fc = peak_ir.instructions[name] + cmod = c.get_namespace('cgralib').generators['Mem'](ctrl_width=16, has_chain_en=False, has_external_addrgen=False, has_flush=True, has_read_valid=False, has_reset=False, has_stencil_valid=True, has_valid=False, is_rom=True, num_inputs=2, num_outputs=2, use_prebuilt_mem=True, width=16) + name_ = load_from_peak(CoreIRNodes, peak_fc, cmod=cmod, stateful=True, name="cgralib.Mem", modparams=()) + + name = f"cgralib.Pond" + peak_fc = peak_ir.instructions[name] + cmod = c.get_namespace('cgralib').generators['Pond'](num_inputs=2, num_outputs=2, width=16) + name_ = load_from_peak(CoreIRNodes, peak_fc, cmod=cmod, stateful=True, name="cgralib.Pond", modparams=()) + + CoreIRNodes.custom_nodes = ["coreir.neq", "commonlib.mult_middle", "float.max", "float.min", "float.div", "float_DW.fp_mul", "float_DW.fp_add", "float.sub", "fp_getmant", "fp_addiexp", "fp_subexp", "fp_cnvexp2f", "fp_getfint", "fp_getffrac", "fp_cnvint2f", "fp_gt", "fp_lt", "float.exp", "float.mux"] + class Mem_amber(DagNode): + def __init__(self, clk_en, data_in_0, data_in_1, wen_in_0, wen_in_1, *, iname): + super().__init__(clk_en, data_in_0, data_in_1, wen_in_0, wen_in_1, iname=iname) + self.modparams=() + @property + def attributes(self): + return ("iname") + + #Hack to get correct port name + #def select(self, field, original=None): + # self._selects.add("data_out_0") + # return Select(self, field="rdata",type=BitVector[16]) + nodes = CoreIRNodes + static_attributes = {} + node_name = "cgralib.Mem_amber" + num_children = 3 + type = Product.from_fields("Output",{"data_out_0":BitVector[16], "data_out_1":BitVector[16], "stencil_valid":BitVector[1]}) + class FPRom(DagNode): def __init__(self, raddr, ren, *, init, iname): super().__init__(raddr, ren, init=init, iname=iname) diff --git a/metamapper/irs/coreir/ir.py b/metamapper/irs/coreir/ir.py index b30356a..166b278 100644 --- a/metamapper/irs/coreir/ir.py +++ b/metamapper/irs/coreir/ir.py @@ -12,6 +12,30 @@ def gen_peak_CoreIR(width): DATAWIDTH = 16 CoreIR = gen_custom_ops_peak_CoreIR(DATAWIDTH) + @family_closure + def mem_fc(family: AbstractFamily): + Data = family.BitVector[width] + Bit = family.Bit + class mem(Peak): + @name_outputs(data_out_0=Data, data_out_1=Data, stencil_valid=Bit) + def __call__(self, rst_n: Bit, clk_en: Bit, data_in_0: Data, chain_data_in_0: Data, data_in_1: Data, chain_data_in_1: Data, wen_in_0: Bit, ren_in_0: Bit, addr_in_0: Data, flush: Bit) -> (Data, Data, Bit): + return Data(0), Data(0), Bit(0) + return mem + + CoreIR.add_instruction("cgralib.Mem", mem_fc) + + @family_closure + def pond_fc(family: AbstractFamily): + Data = family.BitVector[width] + Bit = family.Bit + class pond(Peak): + @name_outputs(data_out_pond_0=Data, data_out_pond_1=Data, valid_out_pond=Bit) + def __call__(self, rst_n: Bit, clk_en: Bit, data_in_pond_0: Data, data_in_pond_1: Data, flush: Bit) -> (Data, Data, Bit): + return Data(0), Data(0), Bit(0) + return pond + + CoreIR.add_instruction("cgralib.Pond", pond_fc) + @family_closure def rom_fc(family: AbstractFamily): Data = family.BitVector[width] diff --git a/metamapper/rewrite_table.py b/metamapper/rewrite_table.py index cea37b9..e885b50 100644 --- a/metamapper/rewrite_table.py +++ b/metamapper/rewrite_table.py @@ -179,6 +179,7 @@ def discover(self, from_name, to_name, path_constraints={}, rr_name=None, solver def sort_rules(self): + self.rules.sort(key=lambda x: x.name) rule_nodes = [] for rule in self.rules: dag = rule.tile @@ -187,3 +188,12 @@ def sort_rules(self): keydict = dict(zip(self.rules, rule_nodes)) self.rules.sort(key=keydict.get, reverse=True) + + mul_add_rules = [] + for idx,rule in enumerate(self.rules): + if "mac" in rule.name or "muladd" in rule.name: + mul_add_rules.append(idx) + + for idx in mul_add_rules: + self.rules.insert(0, self.rules.pop(idx)) + diff --git a/scripts/map_app.py b/scripts/map_app.py index 3b98b7b..b6fd035 100755 --- a/scripts/map_app.py +++ b/scripts/map_app.py @@ -16,6 +16,7 @@ from metamapper.common_passes import print_dag, gen_dag_img, Constant2CoreIRConstant from peak.mapper import read_serialized_bindings + class _ArchCycles: def get(self, node): kind = node.kind()[0] @@ -25,8 +26,12 @@ def get(self, node): return pe_cycles return 0 + lassen_location = os.path.join(Path(__file__).parent.parent.parent.resolve(), "lassen") -lassen_header = os.path.join(Path(__file__).parent.parent.resolve(), "libs/lassen_header.json") +lassen_header = os.path.join( + Path(__file__).parent.parent.resolve(), "libs/lassen_header.json" +) + def gen_rrules(pipelined=False): @@ -39,16 +44,30 @@ def gen_rrules(pipelined=False): ops = [] if pipelined: - rrule_files = glob.glob(f'{lassen_location}/lassen/rewrite_rules/*_pipelined.json') + rrule_files = glob.glob( + f"{lassen_location}/lassen/rewrite_rules/*_pipelined.json" + ) else: - rrule_files = glob.glob(f'{lassen_location}/lassen/rewrite_rules/*.json') - rrule_files = [rrule_file for rrule_file in rrule_files if "pipelined" not in rrule_file] - - custom_rule_names = {"mult_middle": "commonlib.mult_middle", "fp_exp": "float.exp", "fp_div": "float.div", "fp_mux": "float.mux", "fp_mul":"float_DW.fp_mul", "fp_add":"float_DW.fp_add", "fp_sub":"float.sub"} + rrule_files = glob.glob(f"{lassen_location}/lassen/rewrite_rules/*.json") + rrule_files = [ + rrule_file for rrule_file in rrule_files if "pipelined" not in rrule_file + ] + + custom_rule_names = { + "mult_middle": "commonlib.mult_middle", + "fp_exp": "float.exp", + "fp_div": "float.div", + "fp_mux": "float.mux", + "fp_mul": "float_DW.fp_mul", + "fp_add": "float_DW.fp_add", + "fp_sub": "float.sub", + } for idx, rrule in enumerate(rrule_files): rule_name = Path(rrule).stem - if ("fp" in rule_name and "pipelined" in rule_name) or rule_name.split("_pipelined")[0] in custom_rule_names: + if ("fp" in rule_name and "pipelined" in rule_name) or rule_name.split( + "_pipelined" + )[0] in custom_rule_names: rule_name = rule_name.split("_pipelined")[0] if rule_name in custom_rule_names: ops.append(custom_rule_names[rule_name]) @@ -69,6 +88,7 @@ def gen_rrules(pipelined=False): return rrules, ops + pe_reg_instrs = {} pe_reg_instrs["const"] = 0 pe_reg_instrs["bypass"] = 2 @@ -80,16 +100,16 @@ def gen_rrules(pipelined=False): pe_port_to_reg["data2"] = "regc" pe_reg_info = {} -pe_reg_info['instrs'] = pe_reg_instrs -pe_reg_info['port_to_reg'] = pe_port_to_reg +pe_reg_info["instrs"] = pe_reg_instrs +pe_reg_info["port_to_reg"] = pe_port_to_reg file_name = str(sys.argv[1]) -if len(sys.argv) > 2: - pe_cycles = int(sys.argv[2]) +if "PIPELINED" in os.environ and os.environ["PIPELINED"].isnumeric(): + pe_cycles = int(os.environ["PIPELINED"]) else: - pe_cycles = 0 + pe_cycles = 1 -rrules, ops = gen_rrules(pipelined = pe_cycles != 0) +rrules, ops = gen_rrules(pipelined=pe_cycles != 0) verilog = False app = os.path.basename(file_name).split(".json")[0] output_dir = os.path.dirname(file_name) @@ -97,22 +117,23 @@ def gen_rrules(pipelined=False): c = CoreIRContext(reset=True) cutil.load_libs(["commonlib", "float_DW"]) CoreIRNodes = gen_CoreIRNodes(16) -cutil.load_from_json(file_name) #libraries=["lakelib"]) +cutil.load_from_json(file_name) # libraries=["lakelib"]) kernels = dict(c.global_namespace.modules) arch_fc = lassen_fc ArchNodes = Nodes("Arch") -putil.load_and_link_peak( - ArchNodes, - lassen_header, - {"global.PE": arch_fc} -) +putil.load_and_link_peak(ArchNodes, lassen_header, {"global.PE": arch_fc}) mr = "memory.fprom2" -ArchNodes.add(mr, CoreIRNodes.peak_nodes[mr], CoreIRNodes.coreir_modules[mr], CoreIRNodes.dag_nodes[mr]) +ArchNodes.add( + mr, + CoreIRNodes.peak_nodes[mr], + CoreIRNodes.coreir_modules[mr], + CoreIRNodes.dag_nodes[mr], +) -mapper = Mapper(CoreIRNodes, ArchNodes, lazy=False, ops = ops, rrules=rrules) +mapper = Mapper(CoreIRNodes, ArchNodes, lazy=False, ops=ops, rrules=rrules) c.run_passes(["rungenerators", "deletedeadinstances"]) mods = [] @@ -122,8 +143,17 @@ def gen_rrules(pipelined=False): dag = cutil.coreir_to_dag(CoreIRNodes, kmod, archnodes=ArchNodes) Constant2CoreIRConstant(CoreIRNodes).run(dag) - mapped_dag = mapper.do_mapping(dag, kname=kname, node_cycles=_ArchCycles(), convert_unbound=False, prove_mapping=False, pe_reg_info=pe_reg_info) - mod = cutil.dag_to_coreir(ArchNodes, mapped_dag, f"{kname}_mapped", convert_unbounds=verilog) + mapped_dag = mapper.do_mapping( + dag, + kname=kname, + node_cycles=_ArchCycles(), + convert_unbound=False, + prove_mapping=False, + pe_reg_info=pe_reg_info, + ) + mod = cutil.dag_to_coreir( + ArchNodes, mapped_dag, f"{kname}_mapped", convert_unbounds=verilog + ) mods.append(mod) print(f"Total num PEs used: {mapper.num_pes}") @@ -131,7 +161,5 @@ def gen_rrules(pipelined=False): print(f"saving to {output_file}") c.serialize_definitions(output_file, mods) -with open(f'{output_dir}/{app}_kernel_latencies.json', 'w') as outfile: +with open(f"{output_dir}/{app}_kernel_latencies.json", "w") as outfile: json.dump(mapper.kernel_cycles, outfile) - - diff --git a/scripts/map_dse.py b/scripts/map_dse.py index 1d4b966..196b328 100755 --- a/scripts/map_dse.py +++ b/scripts/map_dse.py @@ -19,6 +19,7 @@ from peak_gen.arch import read_arch from peak_gen.peak_wrapper import wrapped_peak_class + class _ArchCycles: def get(self, node): kind = node.kind()[0] @@ -28,9 +29,15 @@ def get(self, node): return pe_cycles return 0 -pe_location = os.path.join(Path(__file__).parent.parent.parent.resolve(), "DSEGraphAnalysis/outputs") + +pe_location = os.path.join( + Path(__file__).parent.parent.parent.resolve(), "DSEGraphAnalysis/outputs" +) pe_header = os.path.join(Path(__file__).parent.parent.resolve(), "libs/pe_header.json") -metamapper_location = os.path.join(Path(__file__).parent.parent.resolve(), "examples/peak_gen") +metamapper_location = os.path.join( + Path(__file__).parent.parent.resolve(), "examples/peak_gen" +) + def gen_rrules(): @@ -42,37 +49,43 @@ def gen_rrules(): mapping_funcs = [] rrules = [] - num_rrules = len(glob.glob(f'{pe_location}/rewrite_rules/*.json')) + num_rrules = len(glob.glob(f"{pe_location}/rewrite_rules/*.json")) - if not os.path.exists(f'{metamapper_location}'): - os.makedirs(f'{metamapper_location}') + if not os.path.exists(f"{metamapper_location}"): + os.makedirs(f"{metamapper_location}") for ind in range(num_rrules): with open(f"{pe_location}/peak_eqs/peak_eq_" + str(ind) + ".py", "r") as file: - with open(f"{metamapper_location}/peak_eq_" + str(ind) + ".py", "w") as outfile: + with open( + f"{metamapper_location}/peak_eq_" + str(ind) + ".py", "w" + ) as outfile: for line in file: - outfile.write(line.replace('mapping_function', 'mapping_function_'+str(ind))) + outfile.write( + line.replace("mapping_function", "mapping_function_" + str(ind)) + ) peak_eq = importlib.import_module("examples.peak_gen.peak_eq_" + str(ind)) ir_fc = getattr(peak_eq, "mapping_function_" + str(ind) + "_fc") mapping_funcs.append(ir_fc) - with open(f"{pe_location}/rewrite_rules/rewrite_rule_" + str(ind) + ".json", "r") as json_file: + with open( + f"{pe_location}/rewrite_rules/rewrite_rule_" + str(ind) + ".json", "r" + ) as json_file: rewrite_rule_in = json.load(json_file) rewrite_rule = read_serialized_bindings(rewrite_rule_in, ir_fc, PE_fc) counter_example = rewrite_rule.verify() - rrules.append(rewrite_rule) return PE_fc, rrules + file_name = str(sys.argv[1]) -if len(sys.argv) > 2: - pe_cycles = int(sys.argv[2]) +if "PIPELINED" in os.environ and os.environ["PIPELINED"].isnumeric(): + pe_cycles = int(os.environ["PIPELINED"]) else: - pe_cycles = 0 + pe_cycles = 1 arch_fc, rrules = gen_rrules() verilog = False @@ -83,15 +96,11 @@ def gen_rrules(): c = CoreIRContext(reset=True) cutil.load_libs(["commonlib", "float_DW"]) CoreIRNodes = gen_CoreIRNodes(16) -cutil.load_from_json(file_name) #libraries=["lakelib"]) +cutil.load_from_json(file_name) # libraries=["lakelib"]) kernels = dict(c.global_namespace.modules) ArchNodes = Nodes("Arch") -putil.load_and_link_peak( - ArchNodes, - pe_header, - {"global.PE": arch_fc} -) +putil.load_and_link_peak(ArchNodes, pe_header, {"global.PE": arch_fc}) mapper = Mapper(CoreIRNodes, ArchNodes, lazy=True, rrules=rrules) @@ -103,8 +112,16 @@ def gen_rrules(): dag = cutil.coreir_to_dag(CoreIRNodes, kmod, archnodes=ArchNodes) Constant2CoreIRConstant(CoreIRNodes).run(dag) - mapped_dag = mapper.do_mapping(dag, kname=kname, node_cycles=_ArchCycles(), convert_unbound=False, prove_mapping=False) - mod = cutil.dag_to_coreir(ArchNodes, mapped_dag, f"{kname}_mapped", convert_unbounds=verilog) + mapped_dag = mapper.do_mapping( + dag, + kname=kname, + node_cycles=_ArchCycles(), + convert_unbound=False, + prove_mapping=False, + ) + mod = cutil.dag_to_coreir( + ArchNodes, mapped_dag, f"{kname}_mapped", convert_unbounds=verilog + ) mods.append(mod) print(f"Num PEs used: {mapper.num_pes}") @@ -113,5 +130,5 @@ def gen_rrules(): c.serialize_definitions(output_file, mods) -with open(f'{output_dir}/{app}_kernel_latencies.json', 'w') as outfile: +with open(f"{output_dir}/{app}_kernel_latencies.json", "w") as outfile: json.dump(mapper.kernel_cycles, outfile) diff --git a/tests/test_kernel_mapping.py b/tests/test_kernel_mapping.py index f70bbcb..44e4a68 100644 --- a/tests/test_kernel_mapping.py +++ b/tests/test_kernel_mapping.py @@ -40,9 +40,10 @@ def get(self, node): def gen_rrules(pipelined=False): - c = CoreIRContext(reset=True) + c = CoreIRContext() cmod = putil.peak_to_coreir(lassen_fc) c.serialize_header(lassen_header, [cmod]) + # c.serialize_definitions(pe_def, [cmod]) mapping_funcs = [] rrules = [] ops = [] @@ -53,7 +54,7 @@ def gen_rrules(pipelined=False): rrule_files = glob.glob(f'{lassen_location}/lassen/rewrite_rules/*.json') rrule_files = [rrule_file for rrule_file in rrule_files if "pipelined" not in rrule_file] - custom_rule_names = {"mult_middle":"commonlib.mult_middle","fp_exp": "float.exp", "fp_div": "float.div", "fp_mux": "float.mux", "fp_mul":"float_DW.fp_mul", "fp_add":"float_DW.fp_add", "fp_sub":"float.sub"} + custom_rule_names = {"mult_middle": "commonlib.mult_middle", "fp_exp": "float.exp", "fp_div": "float.div", "fp_mux": "float.mux", "fp_mul":"float_DW.fp_mul", "fp_add":"float_DW.fp_add", "fp_sub":"float.sub"} for idx, rrule in enumerate(rrule_files): rule_name = Path(rrule).stem @@ -71,7 +72,9 @@ def gen_rrules(pipelined=False): rewrite_rule_in = json.load(json_file) rewrite_rule = read_serialized_bindings(rewrite_rule_in, ir_fc, lassen_fc) - + if False: + counter_example = rewrite_rule.verify() + assert counter_example == None, f"{rule_name} failed" rrules.append(rewrite_rule) return rrules, ops @@ -103,13 +106,12 @@ def test_kernel_mapping(pipelined, app): c = CoreIRContext(reset=True) cutil.load_libs(["commonlib", "float_DW"]) CoreIRNodes = gen_CoreIRNodes(16) - - cutil.load_from_json(app_file) - c.run_passes(["rungenerators", "deletedeadinstances"]) + cutil.load_from_json(app_file) #libraries=["lakelib"]) kernels = dict(c.global_namespace.modules) arch_fc = lassen_fc ArchNodes = Nodes("Arch") + putil.load_and_link_peak( ArchNodes, lassen_header, diff --git a/tests/test_mem_header.py b/tests/test_mem_header.py deleted file mode 100644 index 42547bc..0000000 --- a/tests/test_mem_header.py +++ /dev/null @@ -1,14 +0,0 @@ -from metamapper.lake_mem import gen_MEM_fc -from peak import family -from metamapper import peak_util as putil -from metamapper import CoreIRContext - - -def test_mem_header(): - MEM_fc = gen_MEM_fc() - MEM_py = MEM_fc(family.PyFamily()) - MEM = MEM_fc(family.MagmaFamily()) - cmod = putil.magma_to_coreir(MEM) - c = CoreIRContext() - c.serialize_header("libs/mem_header.json", [cmod]) -