1- /*
2- * Monolithic Control Unit
3- * Contains state machine control with data output selection
4- * Data routing is handled directly between memory and systolic array
5- * Simplified to 2-state machine: IDLE and ACTIVE
6- */
7-
81`default_nettype none
92
103module control_unit (
@@ -20,150 +13,88 @@ module control_unit (
2013
2114 // Systolic array control signals (lightweight!)
2215 output wire clear,
23- output reg data_valid,
2416 output reg [1 :0 ] a0_sel, a1_sel, b0_sel, b1_sel,
2517
2618 // Output interface
27- output wire done,
2819 output reg [7 :0 ] data_out
2920);
3021
31- // STATES - Simplified to just IDLE and ACTIVE
32- localparam S_IDLE = 1'b0 ;
33- localparam S_ACTIVE = 1'b1 ;
34-
35- reg state, next_state;
3622 reg [2 :0 ] mmu_cycle; // Counting Systolic Array Stages
3723
3824 // Done signal and clear signal
39- assign done = data_valid && (mmu_cycle >= 3'b010 );
4025 assign clear = (mmu_cycle == 3'b000 );
4126
4227 // Buffer of output after clearing previous
4328 reg [7 :0 ] tail_hold;
4429
45- // Next state logic - very simple now!
46- always @(* ) begin
47- next_state = state;
48-
49- case (state)
50- S_IDLE: begin
51- if (load_en) begin
52- next_state = S_ACTIVE;
53- end
54- end
55-
56- S_ACTIVE: begin
57- next_state = S_ACTIVE; // Stay active, cycles forever
58- end
59-
60- default : begin
61- next_state = S_IDLE;
62- end
63- endcase
64- end
65-
6630 // State machine and control signal generation
6731 always @(posedge clk) begin
6832 if (rst) begin
69- state <= S_IDLE;
7033 mmu_cycle <= 0 ;
71- data_valid <= 0 ;
7234 mem_addr <= 0 ;
7335 tail_hold <= 8'b0 ;
74- a0_sel <= 2'b0 ;
75- a1_sel <= 2'b0 ;
76- b0_sel <= 2'b0 ;
77- b1_sel <= 2'b0 ;
7836 end else begin
79- state <= next_state;
80-
81- case (state)
82- S_IDLE: begin
83- mem_addr <= 0 ;
84- mmu_cycle <= 0 ;
85- data_valid <= 0 ;
86- a0_sel <= 2'b0 ;
87- a1_sel <= 2'b0 ;
88- b0_sel <= 2'b0 ;
89- b1_sel <= 2'b0 ;
90-
91- if (load_en) begin
92- mem_addr <= mem_addr + 1 ;
93- end
94- end
95-
96- S_ACTIVE: begin
97- // Handle memory addressing
98- if (load_en) begin
99- mem_addr <= mem_addr + 1 ;
100- data_valid <= 1 ;
101- end
37+ // Handle memory addressing
38+ if (load_en) begin
39+ mem_addr <= mem_addr + 1 ;
40+ end else begin
41+ mem_addr <= 0 ;
42+ mmu_cycle <= 0 ;
43+ end
10244
103- // The signal data_valid triggers systolic array computation, overlapping load & compute
104- if (mem_addr == 3'b101 ) begin
105- mmu_cycle <= 0 ; // systolic cycling begins at 5th load
106- tail_hold <= c11[7 :0 ];
107- end else begin
108- mmu_cycle <= mmu_cycle + 1 ;
109- if (mem_addr == 3'b111 ) begin
110- mem_addr <= 0 ;
111- end
112- end
113-
114- // Generate mux selects based on mmu_cycle (same for all cycles)
115- case (mmu_cycle)
116- 3'd0 : begin
117- a0_sel <= 2'd0 ; // weight0
118- a1_sel <= 2'd2 ; // not used
119- b0_sel <= 2'd0 ; // input0
120- b1_sel <= 2'd2 ; // not used
121- end
122- 3'd1 : begin
123- a0_sel <= 2'd1 ; // weight1
124- a1_sel <= 2'd0 ; // weight2
125- b0_sel <= 2'd1 ; // input1/input2 (transpose)
126- b1_sel <= 2'd0 ; // input2/input1 (transpose)
127- end
128- 3'd2 : begin
129- a0_sel <= 2'd2 ; // not used
130- a1_sel <= 2'd1 ; // weight3
131- b0_sel <= 2'd2 ; // not used
132- b1_sel <= 2'd1 ; // input3
133- end
134- default : begin // by default turn everything off, i.e. set systolic inputs to 0
135- a0_sel <= 2'd2 ;
136- a1_sel <= 2'd2 ;
137- b0_sel <= 2'd2 ;
138- b1_sel <= 2'd2 ;
139- end
140- endcase
141- end
142-
143- default : begin
144- mmu_cycle <= 0 ;
145- data_valid <= 0 ;
45+ if (mem_addr == 3'b101 ) begin
46+ mmu_cycle <= 0 ; // systolic cycling begins at 5th load
47+ tail_hold <= c11[7 :0 ];
48+ end else begin
49+ mmu_cycle <= mmu_cycle + 1 ;
50+ if (mem_addr == 3'b111 ) begin
14651 mem_addr <= 0 ;
14752 end
148- endcase
53+ end
14954 end
15055 end
15156
15257 // Combinational logic for data_out
15358 always @(* ) begin
15459 data_out = 8'b0 ;
155- if (data_valid) begin
156- case (mem_addr)
157- 3'b000 : data_out = c00[15 :8 ];
158- 3'b001 : data_out = c00[7 :0 ];
159- 3'b010 : data_out = c01[15 :8 ];
160- 3'b011 : data_out = c01[7 :0 ];
161- 3'b100 : data_out = c10[15 :8 ];
162- 3'b101 : data_out = c10[7 :0 ];
163- 3'b110 : data_out = c11[15 :8 ];
164- 3'b111 : data_out = tail_hold;
165- endcase
166- end
60+ case (mem_addr)
61+ 3'b000 : data_out = c00[15 :8 ];
62+ 3'b001 : data_out = c00[7 :0 ];
63+ 3'b010 : data_out = c01[15 :8 ];
64+ 3'b011 : data_out = c01[7 :0 ];
65+ 3'b100 : data_out = c10[15 :8 ];
66+ 3'b101 : data_out = c10[7 :0 ];
67+ 3'b110 : data_out = c11[15 :8 ];
68+ 3'b111 : data_out = tail_hold;
69+ endcase
70+
71+ // Generate mux selects based on mmu_cycle (same for all cycles)
72+ case (mmu_cycle)
73+ 3'd0 : begin
74+ a0_sel = 2'd0 ; // weight0
75+ a1_sel = 2'd2 ; // not used
76+ b0_sel = 2'd0 ; // input0
77+ b1_sel = 2'd2 ; // not used
78+ end
79+ 3'd1 : begin
80+ a0_sel = 2'd1 ; // weight1
81+ a1_sel = 2'd0 ; // weight2
82+ b0_sel = 2'd1 ; // input1/input2 (transpose)
83+ b1_sel = 2'd0 ; // input2/input1 (transpose)
84+ end
85+ 3'd2 : begin
86+ a0_sel = 2'd2 ; // not used
87+ a1_sel = 2'd1 ; // weight3
88+ b0_sel = 2'd2 ; // not used
89+ b1_sel = 2'd1 ; // input3
90+ end
91+ default : begin // by default turn everything off, i.e. set systolic inputs to 0
92+ a0_sel = 2'd2 ;
93+ a1_sel = 2'd2 ;
94+ b0_sel = 2'd2 ;
95+ b1_sel = 2'd2 ;
96+ end
97+ endcase
16798 end
16899
169100endmodule
0 commit comments