@@ -92,6 +92,15 @@ def get_nodeattr_types(self):
9292 "PE" : ("i" , False , 1 ),
9393 # FPGA resource type for memories/internal buffers of the operator
9494 "ram_style" : ("s" , False , "auto" , {"auto" , "block" , "distributed" , "ultra" }),
95+ # memory mode for the const value
96+ # internal_embedded -- embedded parameters
97+ # internal_decoupled -- streaming parameters with streamer packaged inside IP
98+ "mem_mode" : (
99+ "s" ,
100+ False ,
101+ "internal_embedded" ,
102+ {"internal_embedded" , "internal_decoupled" },
103+ ),
95104 # Input and output FIFO depths for multi-I/O nodes
96105 # Note: Need to override here as there might be two inputs
97106 "inFIFODepths" : ("ints" , False , [2 , 2 ]),
@@ -266,13 +275,21 @@ def get_folded_output_shape(self, ind=0):
266275 # Folding along the last dimension
267276 return * num_inputs , num_elems // self .pe , self .pe
268277
278+ def calc_wmem (self ):
279+ """Calculates and returns WMEM."""
280+ folded_shape = self .get_folded_input_shape (ind = 1 )
281+ return np .prod (folded_shape [:- 1 ])
282+
269283 # Widths of the input data stream of the input at index ind
270284 def get_instream_width (self , ind = 0 ):
271285 # Get the number of bits used to represent the input
272286 i_bits = self .get_input_datatype (ind ).bitwidth ()
273287 # Parallelism is the number of elements in the last dimension of the
274288 # folded input
275289 * _ , elems = self .get_folded_input_shape (ind )
290+ # apply parallelism if broadcast
291+ if self .broadcast_last_axis :
292+ elems = elems * self .pe
276293 # Width of a stream receiving input elements in parallel
277294 return elems * i_bits
278295
0 commit comments