StanfordAHA · sgauthamr2001 · Nov 19, 2024 · Nov 20, 2024 · Dec 4, 2024 · Dec 19, 2024
diff --git a/codegen.py b/codegen.py
diff --git a/gold_cgen.py b/gold_cgen.py
@@ -16,8 +16,6 @@ def einsum_expr(sub_stmt, op_list, op_dict, dest_dict):
             stmt.extend(",")
         stmt = stmt[:-1]
 
-
-
         dest_keys = list(dest_dict.keys())
         dest_list = dest_dict[dest_keys[0]]
 
@@ -180,11 +178,17 @@ def get_split_factor(split_dict):
     return split_factor
 
 
+def expr_to_df_dict(input_dict): 
+    return {key: {item: 's' for item in items} for key, items in input_dict.items()}
+
+
+
 def sparse(expr, op_list, op_dict, dest_dict, split_dict, output_dir_path, scalar, workspace):
 
     schedule = get_schedule(op_dict)
     op_map = get_op_map(op_dict)
     split_factor = get_split_factor(split_dict)
+    data_format_dict = expr_to_df_dict(op_dict)
     gold_file = open("gold.cpp", "w+")
 
     gold_file.write("#include <stdlib.h>\n")   
@@ -194,7 +198,6 @@ def sparse(expr, op_list, op_dict, dest_dict, split_dict, output_dir_path, scala
     gold_file.write("#include <fstream>\n")
     gold_file.write("#include <vector>\n")
     gold_file.write("#include <string>\n")
-    gold_file.write("#include <boost/format.hpp>\n")
     gold_file.write("#include <sys/types.h>\n")
     gold_file.write("#include <sys/stat.h>\n")
     gold_file.write("using namespace std;\n")
@@ -205,12 +208,13 @@ def sparse(expr, op_list, op_dict, dest_dict, split_dict, output_dir_path, scala
     gold_file.write("\n")
     gold_file.write("\n")
 
+
     gold_file.write("int main() {\n")
     gold_file.write("\n")
     outsize = gold_tensor_decleration(gold_file, op_dict, dest_dict, split_factor, scalar)
     gold_file.write("\n")
 
-    for element in codegen.lower(expr, op_dict, op_dict, op_list, schedule, 1, "cg", split_factor, dest_dict, "rtl", op_dict, op_map, scalar, workspace, False, False):
+    for element in codegen.lower(expr, op_dict, op_dict, op_list, schedule, 1, "cg",    split_factor,  dest_dict, "rtl", op_dict, op_map, scalar, workspace, False, False, "int", data_format_dict, []):
         if element != [""]:
             gold_file.write(element[0])
             gold_file.write("\n")

diff --git a/input/program.txt b/input/program.txt
@@ -1,8 +1,11 @@
 app_name: matmul_ijk_football
-stmt: X(i, j)= B(i, k) * C(k, j)
-schedule_ap:   [ikj]
-schedule_cp:   [ikj]
-schedule_cgra: [ijk]
-i:split:2100:2100:30
-j:split:2100:2100:30
-k:split:2100:2100:30
+stmt: X(i) = B(i) * C(i)
+schedule_ap:   [i]
+schedule_cp:   [i]
+schedule_cgra: [i]
+i:split:100:100:20
+activation_ap: none 
+activation_cp: none
+activation_cgra: none 
+B:s:s:s
+C:s:s:d
diff --git a/input/tensor.txt b/input/tensor.txt
@@ -1,2 +1,2 @@
-B:ss:football:s:0:60:int
-C:ss:football:s:onyx_matmul:60:int
+B:gen:football:s:0:60:int
+C:gen:football:s:0:60:int
diff --git a/main.py b/main.py
diff --git a/mem_op_gen.py b/mem_op_gen.py
@@ -0,0 +1,155 @@
+
+import os
+
+def struct_gen(format_str, file):
+
+    format = format_str.split(":")
+
+    # Defining the required structs
+    encoding = "".join(format)
+    file.write(f"struct tile_{encoding}")
+    file.write("{\n")
+    for i in range(len(format)):
+        if(format[i] == "s"):
+            file.write(f"std::vector<int> pos{i + 1};\n")
+            file.write(f"std::vector<int> crd{i + 1};\n")
+        elif(format[i] == "d"):
+            file.write(f"std::vector<int> dim{i + 1};\n")
+    file.write("std::vector<float> vals;\n")
+    file.write("};\n\n")
+
+def zero_op_gen(format_str, file):  
+
+    in_format_str, out_format_str = format_str.split("->")
+    in_format = in_format_str.split(":")
+    out_format = out_format_str.split(":")
+
+    # Defining the required encodings
+    in_encoding = "".join(in_format)
+    out_encoding = "".join(out_format)
+
+    # Defining the function to generate the memory operation
+    file.write(f"tile_{out_encoding} zero_op_{out_encoding}(tile_{out_encoding} tile_op1)")
+    file.write("{\n")
+
+    len_in = len(in_format)
+    len_out = len(out_format)
+
+    if out_format != in_format[len_in - len_out:]:
+        print("Error: Output format should be a subset of input format")
+        return
+
+    file.write(f"tile_{out_encoding} tile_op;\n\n")
+
+    for i in range(len_out):
+        if(out_format[i] == "s"):
+            file.write(f"tile_op.pos{i + 1}.push_back(0);\n")
+            file.write(f"tile_op.pos{i + 1}.push_back(1);\n")
+            file.write(f"tile_op.crd{i + 1}.push_back(0);\n")
+        elif(out_format[i] == "d"):
+            file.write(f"tile_op.dim{i + 1}.push_back(1);\n")
+
+    file.write(f"tile_op.vals.push_back(0);\n")
+    file.write("return tile_op;\n")
+    file.write("}\n\n") 
+
+def mem_op_gen(format_str, file) :
+
+    in_format_str, out_format_str = format_str.split("->")
+    in_format = in_format_str.split(":")
+    out_format = out_format_str.split(":")
+
+    # Defining the required encodings
+    in_encoding = "".join(in_format)
+    out_encoding = "".join(out_format)
+
+    # Defining the function to generate the memory operation
+    file.write(f"tile_{out_encoding} mem_op_{in_encoding}_{out_encoding}(tile_{in_encoding} tensor_op, int index)")
+    file.write("{\n")
+
+    len_in = len(in_format)
+    len_out = len(out_format)
+
+    if out_format != in_format[len_in - len_out:]:
+        print("Error: Output format should be a subset of input format")
+        return
+
+    for i in range(len_out):
+        if(out_format[i] == "s"):
+            file.write(f"int *pos{i + 1} = tensor_op.pos{i + 1 + len_in - len_out}.data();\n")
+            file.write(f"int *crd{i + 1} = tensor_op.crd{i + 1 + len_in - len_out}.data();\n")
+        elif(out_format[i] == "d"):
+            file.write(f"int i{i}_dim = tensor_op.dim{i + 1 + len_in - len_out}[0];\n")
+
+    file.write("float *vals = tensor_op.vals.data();\n\n")
+
+    file.write(f"tile_{out_encoding} tile_op;\n\n")
+
+    for i in range(len_out):
+        if out_format[i] == "s":
+            file.write(f"int pos{i+1}_last;\n")
+
+    for i in range(len_out):
+        if i == 0:
+            prev_i = "ndex"
+        else:
+            prev_i = i - 1
+
+        if(out_format[i] == "d"):
+            file.write(f"for(int i{i} = i{prev_i} * i{i}_dim; i{i} < (i{prev_i} + 1) * i{i}_dim; i{i}++)")
+            file.write("{\n")
+            # file.write(f"i{i}_end = 0;\n")
+            # file.write(f"if(i{i} == ((i{prev_i} + 1) * i{i}_dim - 1)) i{i}_end = 1;\n")
+        elif(out_format[i] == "s"):
+            if i == 0: 
+                file.write(f"tile_op.pos{i+1}.push_back(pos{i+1}[index]);\n")
+                file.write(f"tile_op.pos{i+1}.push_back(pos{i+1}[index + 1]);\n")
+            else: 
+                file.write(f"tile_op.pos{i+1}.push_back(pos{i+1}[i{prev_i}]);\n")
+                file.write(f"pos{i+1}_last = pos{i+1}[i{prev_i} + 1];\n")
+                # file.write("if(i0_end")
+                # for j in range(1, i):
+                #     file.write(f" && i{j}_end")
+                # file.write(") ")
+                # file.write(f"tile_op.pos{i+1}.push_back(pos{i+1}[i{i-1} + 1]);\n")
+            file.write(f"for(int i{i} = pos{i+1}[i{prev_i}]; i{i} < pos{i+1}[i{prev_i} + 1]; i{i}++)")
+            file.write("{\n")
+            # file.write(f"i{i}_end = 0;\n")
+            # file.write(f"if(i{i} == (pos{i+1}[i{prev_i} + 1] - 1)) i{i}_end = 1;")
+            file.write(f"tile_op.crd{i+1}.push_back(crd{i+1}[i{i}]);\n")
+
+        if(i == len_out - 1):
+            file.write(f"tile_op.vals.push_back(vals[i{i}]);\n")
+            file.write("}" * len_out)
+            file.write("\n\n")
+
+    for i in range(len_out):
+        if(out_format[i] == "d"):
+            file.write(f"tile_op.dim{i+1}.push_back(i{i}_dim);\n")
+
+    for i in range(len_out):
+        if(out_format[i] == "s"):
+            file.write(f"tile_op.pos{i+1}.push_back(pos{i+1}_last);\n")
+
+    for i in range(len_out):
+        if(out_format[i] == "s"):
+            file.write(f"if(tile_op.pos{i+1}.size() > 0)")
+            file.write("{\n")
+            file.write(f"int pos{i+1}_start = tile_op.pos{i+1}[0];\n")
+            file.write(f"std::transform(tile_op.pos{i+1}.begin(), tile_op.pos{i+1}.end(), tile_op.pos{i+1}.begin(), [pos{i+1}_start](int elem)" + "{return elem - pos" +  str(i+1) + "_start; });\n")
+            file.write("}\n")
+
+    file.write("return tile_op;\n")
+    file.write("}\n\n")
+
+if __name__ == "__main__":
+
+    in_format = ["s", "s", "s", "s", "d", "s"]
+    out_format = ["s", "s", "d", "s"]
+    file_name = "mem_op.cpp"
+    mem_op_gen(in_format, out_format, file_name)
+    os.system(f"clang-format -i {file_name}")
+
+
+
+
diff --git a/pre_process.py b/pre_process.py
@@ -12,6 +12,10 @@
 import sparse
 import sys
 import math
+import pytaco as pt
+from pytaco import dense, compressed
+import re
+from typing import Dict, List
 
 from pathlib import Path
 
@@ -21,7 +25,91 @@
 from sam.sim.src.tiling.process_expr import parse_all
 from lassen.utils import float2bfbin, bfbin2float
 
-def process_coo(tensor, tile_dims, output_dir_path, format, schedule_dict, positive_only, dtype):
+class PydataSparseTensorDumper:
+    def dump(self, coo_tensor, output_path):
+        """Write a COO tensor to a .tns file in coordinate format."""
+
+        if not isinstance(coo_tensor, sparse.COO):
+            raise TypeError("Input tensor must be a pydata/sparse COO tensor")
+
+        indices = coo_tensor.coords  # shape (ndim, nnz)
+        data = coo_tensor.data  # shape (nnz,)
+
+        os.makedirs(os.path.dirname(output_path), exist_ok=True)
+
+        with open(output_path, 'w') as f:
+            for i in range(data.shape[0]):
+                index_list = ' '.join(str(indices[dim, i] + 1) for dim in range(indices.shape[0]))  # 1-based index
+                f.write(f"{index_list} {data[i]}\n")
+
+        print(f"Tensor successfully dumped to {output_path}")
+
+def parse_tiled_tensor(tiled_tensor_str: str) -> Dict[str, List]:
+    """
+    Parses the tiled_tensor string and extracts compressed sections into pos_i, crd_i lists,
+    and extracts the vals list.
+
+    Args:
+        tiled_tensor_str (str): The string representation of the tiled tensor.
+
+    Returns:
+        Dict[str, Any]: A dictionary containing:
+            - 'compressed': A dictionary where each key is the compressed index (i),
+                           and the value is another dictionary with 'pos_i' and 'crd_i' lists.
+            - 'vals': The vals list extracted from the string.
+    """
+    compressed_pattern = r'compressed\s*\((\d+)\):\s*\[\s*([^\]]*?)\s*\]\s*\[\s*([^\]]*?)\s*\]'
+    compressed_matches = re.findall(compressed_pattern, tiled_tensor_str, re.DOTALL | re.IGNORECASE)
+
+    compressed_data = {}
+
+    for match in compressed_matches:
+        index = int(match[0])
+
+        pos_i_str = match[1]
+        pos_i = [int(num.strip()) for num in pos_i_str.replace('\n', '').split(',') if num.strip()]
+
+        crd_i_str = match[2]
+        crd_i = [int(num.strip()) for num in crd_i_str.replace('\n', '').split(',') if num.strip()]
+
+        compressed_data[index] = {
+            'pos': pos_i,
+            'crd': crd_i
+        }
+
+    # Now, extract the 'vals' list.
+    # Assume 'vals' is the last list in the string not associated with any compressed(i)
+    # First, find all lists in the string
+    list_pattern = r'\[\s*([\d,\s]+?)\s*\]'
+    all_lists = re.findall(list_pattern, tiled_tensor_str, re.DOTALL)
+
+    # Extract lists associated with compressed(i)
+    associated_lists = []
+    for match in compressed_matches:
+        associated_lists.append(match[1])  # pos_i
+        associated_lists.append(match[2])  # crd_i
+
+    # Find lists that are not associated with compressed(i)
+    vals_candidates = []
+    for lst in all_lists:
+        if lst not in associated_lists:
+            # To ensure it's not part of other sections like 'dense', you might need additional checks
+            # For simplicity, we'll assume the last unmatched list is 'vals'
+            vals_candidates.append(lst)
+
+    if vals_candidates:
+        vals_str = vals_candidates[-1]  # Assuming the last unmatched list is 'vals'
+        vals = [int(num.strip()) for num in vals_str.replace('\n', '').split(',') if num.strip()]
+    else:
+        vals = []
+
+    return {
+        'compressed': compressed_data,
+        'vals': vals
+    }
+
+
+def process_coo(tensor, tensor_dims, output_dir_path, format, schedule_dict, positive_only, dtype, data_format):
 
     ''' 
     This is the main function that is called to tile and store as CSF
@@ -36,6 +124,8 @@ def process_coo(tensor, tile_dims, output_dir_path, format, schedule_dict, posit
     coords = []
     data = []
 
+    tile_dims = tensor_dims[1:]
+
     if format == "s": 
         coords = tensor.coords
         data = tensor.data
@@ -70,7 +160,7 @@ def process_coo(tensor, tile_dims, output_dir_path, format, schedule_dict, posit
 
     # Creating the COO representation for the tiled tensor at each level
     for i in range(num_values):
-        d_list[i] = data[i] 
+        d_list[i] = abs(data[i])
         for level in range(n_levels):
             for dim in range(n_dim):
 
@@ -87,12 +177,70 @@ def process_coo(tensor, tile_dims, output_dir_path, format, schedule_dict, posit
                     n_lists[idx1][i] = n_lists[idx1][i] // tile_dims[level][crd_dim]
                     n_lists[idx2][i] = coords[crd_dim][i] % tile_dims[level][crd_dim]
 
-
+    
     tiled_COO = sparse.COO(n_lists, d_list)
 
+    # Write the tiled COO as .tns file
+    dumper = PydataSparseTensorDumper()
+    dumper.dump(tiled_COO, output_dir_path + "/tiled_tensor.tns")
+
+    # Create the custom tiled tensor
+    for i in range(len(data_format)):
+        if data_format[i] == "s":
+            data_format[i] = compressed
+        else:
+            data_format[i] = dense
+
+    taco_tensor = pt.read(output_dir_path + "/tiled_tensor.tns", pt.format(data_format))
+    internal_tensor = taco_tensor._tensor
+    tiled_dict = parse_tiled_tensor(str(internal_tensor))
+
+    tiled_dict['dense'] = {}
+
+    for i in range(len(data_format)):
+        if data_format[i] == dense: 
+            # tiled_dict['dense'][i] = schedule_dict
+            level = i // n_dim
+            dim = i % n_dim
+
+            if level == 2: 
+                crd_dim = schedule_dict[level][dim]
+                tiled_dict['dense'][i] = tensor_dims[level][crd_dim]
+            else:
+                crd_dim = schedule_dict[level][dim]
+                nxt_dim = schedule_dict[level + 1].index(crd_dim)
+                tiled_dict['dense'][i] = math.ceil(tensor_dims[level][crd_dim] / tensor_dims[level + 1][nxt_dim])
+
+    for keys in tiled_dict['compressed'].keys():
+        if keys != None:
+            pos_path = output_dir_path + "/tcsf_pos" + str(keys + 1) + ".txt"
+            with open(pos_path, 'w+') as f:
+                for item in tiled_dict['compressed'][keys]['pos']:
+                    f.write("%s\n" % item)
+            crd_path = output_dir_path + "/tcsf_crd" + str(keys + 1) + ".txt"
+            with open(crd_path, 'w+') as f:
+                for item in tiled_dict['compressed'][keys]['crd']:
+                    f.write("%s\n" % item)
+
+    for keys in tiled_dict['dense'].keys():
+        if keys != None:
+            dense_path = output_dir_path + "/tcsf_dim" + str(keys + 1) + ".txt"
+            with open(dense_path, 'w+') as f:
+                f.write("%s\n" % tiled_dict['dense'][keys])
+
+    # print(output_dir_path)
+    # if output_dir_path == "./lego_scratch/tensor_B":
+    #    print(tiled_dict['vals'])
+    d_list_path = output_dir_path + "/tcsf_vals" + ".txt"
+    with open(d_list_path, 'w+') as f:
+        for item in tiled_dict['vals']:
+            f.write("%s\n" % item)
+
     # tiled_coo.coords holds the COO coordinates for each level
     # tiled_coo.data holds the data for each level
 
+    """
+
     # Create the CSF representation for the tensor at each level
     crd_dict = {} 
     pos_dict = {}
@@ -148,6 +296,8 @@ def process_coo(tensor, tile_dims, output_dir_path, format, schedule_dict, posit
             else:   
                 f.write("%s\n" % (tiled_COO.data[val]))                
     return n_lists, d_list, crd_dict, pos_dict
+    """
+
 
 def write_csf(COO, output_dir_path): 
 
@@ -198,12 +348,12 @@ def write_csf(COO, output_dir_path):
     d_list_path = output_dir_path + "/csf_vals" + ".txt"
     with open(d_list_path, 'w+') as f:
         for val in range(num_values):
-            f.write("%s\n" % (COO.data[val]))
+            f.write("%s\n" % int(abs(COO.data[val])))
 
 inputCacheSuiteSparse = InputCacheSuiteSparse()
 inputCacheTensor = InputCacheTensor()
 
-def process(tensor_type, input_path, output_dir_path, tensor_size, schedule_dict, format, gen_tensor, density, gold_check, positive_only, dtype):
+def process(tensor_type, input_path, output_dir_path, tensor_size, schedule_dict, format, gen_tensor, density, gold_check, positive_only, dtype, data_format):
 
     tensor = None
     cwd = os.getcwd()
@@ -373,5 +523,5 @@ def process(tensor_type, input_path, output_dir_path, tensor_size, schedule_dict
         size = tensor_size[0]
         write_csf(tensor, output_dir_path)
 
-    tile_size = tensor_size[1:]
-    process_coo(tensor, tile_size, output_dir_path, format, schedule_dict, positive_only, dtype)
+    # tile_size = tensor_size[1:]
+    process_coo(tensor, tensor_size, output_dir_path, format, schedule_dict, positive_only, dtype, data_format)