Skip to content

Dense iter #19

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
392 changes: 283 additions & 109 deletions codegen.py

Large diffs are not rendered by default.

12 changes: 8 additions & 4 deletions gold_cgen.py
Original file line number Diff line number Diff line change
@@ -16,8 +16,6 @@ def einsum_expr(sub_stmt, op_list, op_dict, dest_dict):
stmt.extend(",")
stmt = stmt[:-1]



dest_keys = list(dest_dict.keys())
dest_list = dest_dict[dest_keys[0]]

@@ -180,11 +178,17 @@ def get_split_factor(split_dict):
return split_factor


def expr_to_df_dict(input_dict):
return {key: {item: 's' for item in items} for key, items in input_dict.items()}



def sparse(expr, op_list, op_dict, dest_dict, split_dict, output_dir_path, scalar, workspace):

schedule = get_schedule(op_dict)
op_map = get_op_map(op_dict)
split_factor = get_split_factor(split_dict)
data_format_dict = expr_to_df_dict(op_dict)
gold_file = open("gold.cpp", "w+")

gold_file.write("#include <stdlib.h>\n")
@@ -194,7 +198,6 @@ def sparse(expr, op_list, op_dict, dest_dict, split_dict, output_dir_path, scala
gold_file.write("#include <fstream>\n")
gold_file.write("#include <vector>\n")
gold_file.write("#include <string>\n")
gold_file.write("#include <boost/format.hpp>\n")
gold_file.write("#include <sys/types.h>\n")
gold_file.write("#include <sys/stat.h>\n")
gold_file.write("using namespace std;\n")
@@ -205,12 +208,13 @@ def sparse(expr, op_list, op_dict, dest_dict, split_dict, output_dir_path, scala
gold_file.write("\n")
gold_file.write("\n")


gold_file.write("int main() {\n")
gold_file.write("\n")
outsize = gold_tensor_decleration(gold_file, op_dict, dest_dict, split_factor, scalar)
gold_file.write("\n")

for element in codegen.lower(expr, op_dict, op_dict, op_list, schedule, 1, "cg", split_factor, dest_dict, "rtl", op_dict, op_map, scalar, workspace, False, False):
for element in codegen.lower(expr, op_dict, op_dict, op_list, schedule, 1, "cg", split_factor, dest_dict, "rtl", op_dict, op_map, scalar, workspace, False, False, "int", data_format_dict, []):
if element != [""]:
gold_file.write(element[0])
gold_file.write("\n")
17 changes: 10 additions & 7 deletions input/program.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
app_name: matmul_ijk_football
stmt: X(i, j)= B(i, k) * C(k, j)
schedule_ap: [ikj]
schedule_cp: [ikj]
schedule_cgra: [ijk]
i:split:2100:2100:30
j:split:2100:2100:30
k:split:2100:2100:30
stmt: X(i) = B(i) * C(i)
schedule_ap: [i]
schedule_cp: [i]
schedule_cgra: [i]
i:split:100:100:20
activation_ap: none
activation_cp: none
activation_cgra: none
B:s:s:s
C:s:s:d
4 changes: 2 additions & 2 deletions input/tensor.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
B:ss:football:s:0:60:int
C:ss:football:s:onyx_matmul:60:int
B:gen:football:s:0:60:int
C:gen:football:s:0:60:int
203 changes: 155 additions & 48 deletions main.py

Large diffs are not rendered by default.

155 changes: 155 additions & 0 deletions mem_op_gen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@

import os

def struct_gen(format_str, file):

format = format_str.split(":")

# Defining the required structs
encoding = "".join(format)
file.write(f"struct tile_{encoding}")
file.write("{\n")
for i in range(len(format)):
if(format[i] == "s"):
file.write(f"std::vector<int> pos{i + 1};\n")
file.write(f"std::vector<int> crd{i + 1};\n")
elif(format[i] == "d"):
file.write(f"std::vector<int> dim{i + 1};\n")
file.write("std::vector<float> vals;\n")
file.write("};\n\n")

def zero_op_gen(format_str, file):

in_format_str, out_format_str = format_str.split("->")
in_format = in_format_str.split(":")
out_format = out_format_str.split(":")

# Defining the required encodings
in_encoding = "".join(in_format)
out_encoding = "".join(out_format)

# Defining the function to generate the memory operation
file.write(f"tile_{out_encoding} zero_op_{out_encoding}(tile_{out_encoding} tile_op1)")
file.write("{\n")

len_in = len(in_format)
len_out = len(out_format)

if out_format != in_format[len_in - len_out:]:
print("Error: Output format should be a subset of input format")
return

file.write(f"tile_{out_encoding} tile_op;\n\n")

for i in range(len_out):
if(out_format[i] == "s"):
file.write(f"tile_op.pos{i + 1}.push_back(0);\n")
file.write(f"tile_op.pos{i + 1}.push_back(1);\n")
file.write(f"tile_op.crd{i + 1}.push_back(0);\n")
elif(out_format[i] == "d"):
file.write(f"tile_op.dim{i + 1}.push_back(1);\n")

file.write(f"tile_op.vals.push_back(0);\n")
file.write("return tile_op;\n")
file.write("}\n\n")

def mem_op_gen(format_str, file) :

in_format_str, out_format_str = format_str.split("->")
in_format = in_format_str.split(":")
out_format = out_format_str.split(":")

# Defining the required encodings
in_encoding = "".join(in_format)
out_encoding = "".join(out_format)

# Defining the function to generate the memory operation
file.write(f"tile_{out_encoding} mem_op_{in_encoding}_{out_encoding}(tile_{in_encoding} tensor_op, int index)")
file.write("{\n")

len_in = len(in_format)
len_out = len(out_format)

if out_format != in_format[len_in - len_out:]:
print("Error: Output format should be a subset of input format")
return

for i in range(len_out):
if(out_format[i] == "s"):
file.write(f"int *pos{i + 1} = tensor_op.pos{i + 1 + len_in - len_out}.data();\n")
file.write(f"int *crd{i + 1} = tensor_op.crd{i + 1 + len_in - len_out}.data();\n")
elif(out_format[i] == "d"):
file.write(f"int i{i}_dim = tensor_op.dim{i + 1 + len_in - len_out}[0];\n")

file.write("float *vals = tensor_op.vals.data();\n\n")

file.write(f"tile_{out_encoding} tile_op;\n\n")

for i in range(len_out):
if out_format[i] == "s":
file.write(f"int pos{i+1}_last;\n")

for i in range(len_out):
if i == 0:
prev_i = "ndex"
else:
prev_i = i - 1

if(out_format[i] == "d"):
file.write(f"for(int i{i} = i{prev_i} * i{i}_dim; i{i} < (i{prev_i} + 1) * i{i}_dim; i{i}++)")
file.write("{\n")
# file.write(f"i{i}_end = 0;\n")
# file.write(f"if(i{i} == ((i{prev_i} + 1) * i{i}_dim - 1)) i{i}_end = 1;\n")
elif(out_format[i] == "s"):
if i == 0:
file.write(f"tile_op.pos{i+1}.push_back(pos{i+1}[index]);\n")
file.write(f"tile_op.pos{i+1}.push_back(pos{i+1}[index + 1]);\n")
else:
file.write(f"tile_op.pos{i+1}.push_back(pos{i+1}[i{prev_i}]);\n")
file.write(f"pos{i+1}_last = pos{i+1}[i{prev_i} + 1];\n")
# file.write("if(i0_end")
# for j in range(1, i):
# file.write(f" && i{j}_end")
# file.write(") ")
# file.write(f"tile_op.pos{i+1}.push_back(pos{i+1}[i{i-1} + 1]);\n")
file.write(f"for(int i{i} = pos{i+1}[i{prev_i}]; i{i} < pos{i+1}[i{prev_i} + 1]; i{i}++)")
file.write("{\n")
# file.write(f"i{i}_end = 0;\n")
# file.write(f"if(i{i} == (pos{i+1}[i{prev_i} + 1] - 1)) i{i}_end = 1;")
file.write(f"tile_op.crd{i+1}.push_back(crd{i+1}[i{i}]);\n")

if(i == len_out - 1):
file.write(f"tile_op.vals.push_back(vals[i{i}]);\n")
file.write("}" * len_out)
file.write("\n\n")

for i in range(len_out):
if(out_format[i] == "d"):
file.write(f"tile_op.dim{i+1}.push_back(i{i}_dim);\n")

for i in range(len_out):
if(out_format[i] == "s"):
file.write(f"tile_op.pos{i+1}.push_back(pos{i+1}_last);\n")

for i in range(len_out):
if(out_format[i] == "s"):
file.write(f"if(tile_op.pos{i+1}.size() > 0)")
file.write("{\n")
file.write(f"int pos{i+1}_start = tile_op.pos{i+1}[0];\n")
file.write(f"std::transform(tile_op.pos{i+1}.begin(), tile_op.pos{i+1}.end(), tile_op.pos{i+1}.begin(), [pos{i+1}_start](int elem)" + "{return elem - pos" + str(i+1) + "_start; });\n")
file.write("}\n")

file.write("return tile_op;\n")
file.write("}\n\n")

if __name__ == "__main__":

in_format = ["s", "s", "s", "s", "d", "s"]
out_format = ["s", "s", "d", "s"]
file_name = "mem_op.cpp"
mem_op_gen(in_format, out_format, file_name)
os.system(f"clang-format -i {file_name}")




164 changes: 157 additions & 7 deletions pre_process.py
Original file line number Diff line number Diff line change
@@ -12,6 +12,10 @@
import sparse
import sys
import math
import pytaco as pt
from pytaco import dense, compressed
import re
from typing import Dict, List

from pathlib import Path

@@ -21,7 +25,91 @@
from sam.sim.src.tiling.process_expr import parse_all
from lassen.utils import float2bfbin, bfbin2float

def process_coo(tensor, tile_dims, output_dir_path, format, schedule_dict, positive_only, dtype):
class PydataSparseTensorDumper:
def dump(self, coo_tensor, output_path):
"""Write a COO tensor to a .tns file in coordinate format."""

if not isinstance(coo_tensor, sparse.COO):
raise TypeError("Input tensor must be a pydata/sparse COO tensor")

indices = coo_tensor.coords # shape (ndim, nnz)
data = coo_tensor.data # shape (nnz,)

os.makedirs(os.path.dirname(output_path), exist_ok=True)

with open(output_path, 'w') as f:
for i in range(data.shape[0]):
index_list = ' '.join(str(indices[dim, i] + 1) for dim in range(indices.shape[0])) # 1-based index
f.write(f"{index_list} {data[i]}\n")

print(f"Tensor successfully dumped to {output_path}")

def parse_tiled_tensor(tiled_tensor_str: str) -> Dict[str, List]:
"""
Parses the tiled_tensor string and extracts compressed sections into pos_i, crd_i lists,
and extracts the vals list.
Args:
tiled_tensor_str (str): The string representation of the tiled tensor.
Returns:
Dict[str, Any]: A dictionary containing:
- 'compressed': A dictionary where each key is the compressed index (i),
and the value is another dictionary with 'pos_i' and 'crd_i' lists.
- 'vals': The vals list extracted from the string.
"""
compressed_pattern = r'compressed\s*\((\d+)\):\s*\[\s*([^\]]*?)\s*\]\s*\[\s*([^\]]*?)\s*\]'
compressed_matches = re.findall(compressed_pattern, tiled_tensor_str, re.DOTALL | re.IGNORECASE)

compressed_data = {}

for match in compressed_matches:
index = int(match[0])

pos_i_str = match[1]
pos_i = [int(num.strip()) for num in pos_i_str.replace('\n', '').split(',') if num.strip()]

crd_i_str = match[2]
crd_i = [int(num.strip()) for num in crd_i_str.replace('\n', '').split(',') if num.strip()]

compressed_data[index] = {
'pos': pos_i,
'crd': crd_i
}

# Now, extract the 'vals' list.
# Assume 'vals' is the last list in the string not associated with any compressed(i)
# First, find all lists in the string
list_pattern = r'\[\s*([\d,\s]+?)\s*\]'
all_lists = re.findall(list_pattern, tiled_tensor_str, re.DOTALL)

# Extract lists associated with compressed(i)
associated_lists = []
for match in compressed_matches:
associated_lists.append(match[1]) # pos_i
associated_lists.append(match[2]) # crd_i

# Find lists that are not associated with compressed(i)
vals_candidates = []
for lst in all_lists:
if lst not in associated_lists:
# To ensure it's not part of other sections like 'dense', you might need additional checks
# For simplicity, we'll assume the last unmatched list is 'vals'
vals_candidates.append(lst)

if vals_candidates:
vals_str = vals_candidates[-1] # Assuming the last unmatched list is 'vals'
vals = [int(num.strip()) for num in vals_str.replace('\n', '').split(',') if num.strip()]
else:
vals = []

return {
'compressed': compressed_data,
'vals': vals
}


def process_coo(tensor, tensor_dims, output_dir_path, format, schedule_dict, positive_only, dtype, data_format):

'''
This is the main function that is called to tile and store as CSF
@@ -36,6 +124,8 @@ def process_coo(tensor, tile_dims, output_dir_path, format, schedule_dict, posit
coords = []
data = []

tile_dims = tensor_dims[1:]

if format == "s":
coords = tensor.coords
data = tensor.data
@@ -70,7 +160,7 @@ def process_coo(tensor, tile_dims, output_dir_path, format, schedule_dict, posit

# Creating the COO representation for the tiled tensor at each level
for i in range(num_values):
d_list[i] = data[i]
d_list[i] = abs(data[i])
for level in range(n_levels):
for dim in range(n_dim):

@@ -87,12 +177,70 @@ def process_coo(tensor, tile_dims, output_dir_path, format, schedule_dict, posit
n_lists[idx1][i] = n_lists[idx1][i] // tile_dims[level][crd_dim]
n_lists[idx2][i] = coords[crd_dim][i] % tile_dims[level][crd_dim]


tiled_COO = sparse.COO(n_lists, d_list)

# Write the tiled COO as .tns file
dumper = PydataSparseTensorDumper()
dumper.dump(tiled_COO, output_dir_path + "/tiled_tensor.tns")

# Create the custom tiled tensor
for i in range(len(data_format)):
if data_format[i] == "s":
data_format[i] = compressed
else:
data_format[i] = dense

taco_tensor = pt.read(output_dir_path + "/tiled_tensor.tns", pt.format(data_format))
internal_tensor = taco_tensor._tensor
tiled_dict = parse_tiled_tensor(str(internal_tensor))

tiled_dict['dense'] = {}

for i in range(len(data_format)):
if data_format[i] == dense:
# tiled_dict['dense'][i] = schedule_dict
level = i // n_dim
dim = i % n_dim

if level == 2:
crd_dim = schedule_dict[level][dim]
tiled_dict['dense'][i] = tensor_dims[level][crd_dim]
else:
crd_dim = schedule_dict[level][dim]
nxt_dim = schedule_dict[level + 1].index(crd_dim)
tiled_dict['dense'][i] = math.ceil(tensor_dims[level][crd_dim] / tensor_dims[level + 1][nxt_dim])

for keys in tiled_dict['compressed'].keys():
if keys != None:
pos_path = output_dir_path + "/tcsf_pos" + str(keys + 1) + ".txt"
with open(pos_path, 'w+') as f:
for item in tiled_dict['compressed'][keys]['pos']:
f.write("%s\n" % item)
crd_path = output_dir_path + "/tcsf_crd" + str(keys + 1) + ".txt"
with open(crd_path, 'w+') as f:
for item in tiled_dict['compressed'][keys]['crd']:
f.write("%s\n" % item)

for keys in tiled_dict['dense'].keys():
if keys != None:
dense_path = output_dir_path + "/tcsf_dim" + str(keys + 1) + ".txt"
with open(dense_path, 'w+') as f:
f.write("%s\n" % tiled_dict['dense'][keys])

# print(output_dir_path)
# if output_dir_path == "./lego_scratch/tensor_B":
# print(tiled_dict['vals'])
d_list_path = output_dir_path + "/tcsf_vals" + ".txt"
with open(d_list_path, 'w+') as f:
for item in tiled_dict['vals']:
f.write("%s\n" % item)

# tiled_coo.coords holds the COO coordinates for each level
# tiled_coo.data holds the data for each level

"""
# Create the CSF representation for the tensor at each level
crd_dict = {}
pos_dict = {}
@@ -148,6 +296,8 @@ def process_coo(tensor, tile_dims, output_dir_path, format, schedule_dict, posit
else:
f.write("%s\n" % (tiled_COO.data[val]))
return n_lists, d_list, crd_dict, pos_dict
"""


def write_csf(COO, output_dir_path):

@@ -198,12 +348,12 @@ def write_csf(COO, output_dir_path):
d_list_path = output_dir_path + "/csf_vals" + ".txt"
with open(d_list_path, 'w+') as f:
for val in range(num_values):
f.write("%s\n" % (COO.data[val]))
f.write("%s\n" % int(abs(COO.data[val])))

inputCacheSuiteSparse = InputCacheSuiteSparse()
inputCacheTensor = InputCacheTensor()

def process(tensor_type, input_path, output_dir_path, tensor_size, schedule_dict, format, gen_tensor, density, gold_check, positive_only, dtype):
def process(tensor_type, input_path, output_dir_path, tensor_size, schedule_dict, format, gen_tensor, density, gold_check, positive_only, dtype, data_format):

tensor = None
cwd = os.getcwd()
@@ -373,5 +523,5 @@ def process(tensor_type, input_path, output_dir_path, tensor_size, schedule_dict
size = tensor_size[0]
write_csf(tensor, output_dir_path)

tile_size = tensor_size[1:]
process_coo(tensor, tile_size, output_dir_path, format, schedule_dict, positive_only, dtype)
# tile_size = tensor_size[1:]
process_coo(tensor, tensor_size, output_dir_path, format, schedule_dict, positive_only, dtype, data_format)