Skip to content

Commit 84ea99f

Browse files
Add additional pipeline stages
1 parent c848b6b commit 84ea99f

37 files changed

+1437
-995
lines changed

quartus/SharedPipelinedCacheSynthTop/SharedPipelinedCacheSynthTop.qsf

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,5 @@ set_location_assignment PIN_Y2 -to clock
7070
set_location_assignment PIN_M23 -to reset
7171

7272

73-
set_global_assignment -name OPTIMIZATION_MODE "AGGRESSIVE PERFORMANCE"
74-
set_global_assignment -name CYCLONEII_OPTIMIZATION_TECHNIQUE BALANCED
73+
set_global_assignment -name OPTIMIZATION_MODE "HIGH PERFORMANCE EFFORT"
7574
set_instance_assignment -name PARTITION_HIERARCHY root_partition -to | -section_id Top

src/main/scala/caches/hardware/pipelined/MissFifo.scala

Lines changed: 65 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ class MshrInfoIO(nCores: Int, nMshrs: Int, nWays: Int, indexWidth: Int, tagWidth
2626
val currentTags = Output(Vec(nMshrs, UInt(tagWidth.W)))
2727
val replacementWays = Output(Vec(nMshrs, UInt(log2Up(nWays).W)))
2828
val incidentCoreIds = Output(Vec(nMshrs, UInt(log2Up(nCores).W)))
29-
val isCritCores = Output(Vec(nMshrs, Bool()))
30-
val validMSHRs = Output(Vec(nMshrs, Bool()))
29+
val critMshrs = Output(Vec(nMshrs, Bool()))
30+
val validMshrs = Output(Vec(nMshrs, Bool()))
3131
val fullCmds = Output(Vec(nMshrs, Bool()))
3232
val wrPtr = Output(UInt(log2Ceil(nMshrs).W))
3333
val elementCnt = Output(UInt(log2Up(nMshrs + 1).W))
@@ -36,7 +36,6 @@ class MshrInfoIO(nCores: Int, nMshrs: Int, nWays: Int, indexWidth: Int, tagWidth
3636
class MshrPushIO(nCores: Int, nMshrs: Int, nWays: Int, reqIdWidth: Int, tagWidth: Int, indexWidth: Int, blockOffsetWidth: Int, subBlockWidth: Int) extends Bundle() {
3737
// For inserting a new MSHR entry
3838
val pushReq = Input(Bool())
39-
val withCmd = Input(Bool()) // If true, the pushReqEntry will be pushed with a command
4039
val pushReqEntry = new LineRequestIO(nCores, nWays, tagWidth, indexWidth, subBlockWidth)
4140
// For pushing new command into MSHR entry
4241
val pushCmd = Input(Bool())
@@ -234,7 +233,6 @@ class CmdMshrQueue(nCmds: Int, nCores: Int, nMshrs: Int, reqIdWidth: Int, blockO
234233

235234
val io = IO(new Bundle {
236235
val push = Input(Bool())
237-
val withCmd = Input(Bool())
238236
val update = Input(Bool())
239237
val rdPtr = Input(UInt(log2Up(nMshrs).W))
240238
val wrPtr = Input(UInt(log2Up(nMshrs).W))
@@ -249,7 +247,7 @@ class CmdMshrQueue(nCmds: Int, nCores: Int, nMshrs: Int, reqIdWidth: Int, blockO
249247
val cntRegs = RegInit(VecInit(Seq.fill(nMshrs)(0.U((log2Up(nCmds) + 1).W))))
250248

251249
when(io.push) {
252-
cntRegs(io.wrPtr) := Mux(io.withCmd, 1.U, 0.U)
250+
cntRegs(io.wrPtr) := 1.U
253251
}.elsewhen(io.update) {
254252
cntRegs(io.updtPtr) := cntRegs(io.updtPtr) + 1.U
255253
}
@@ -260,7 +258,7 @@ class CmdMshrQueue(nCmds: Int, nCores: Int, nMshrs: Int, reqIdWidth: Int, blockO
260258
cmdBlockQueue.io.update := io.update
261259
cmdBlockQueue.io.rdPtr := io.rdPtr
262260
cmdBlockQueue.io.wrPtr := io.wrPtr
263-
cmdBlockQueue.io.wrData := Mux(io.withCmd, io.wrData, 0.U)
261+
cmdBlockQueue.io.wrData := io.wrData
264262
cmdBlockQueue.io.updtPtr := io.updtPtr
265263
cmdBlockQueue.io.updtBlockIdx := cntRegs(io.updtPtr)
266264
cmdBlockQueue.io.updtData := io.updtData
@@ -295,7 +293,6 @@ class MshrQueue(nCores: Int, nCmds: Int, nMshrs: Int, nWays: Int, reqIdWidth: In
295293
reqQueue.io.pop := io.pop.pop
296294

297295
cmdQueue.io.push := io.push.pushReq
298-
cmdQueue.io.withCmd := io.push.withCmd
299296
cmdQueue.io.update := io.push.pushCmd
300297
cmdQueue.io.rdPtr := reqQueue.io.rdPtr
301298
cmdQueue.io.wrPtr := reqQueue.io.wrPtr
@@ -320,12 +317,12 @@ class MshrQueue(nCores: Int, nCmds: Int, nMshrs: Int, nWays: Int, reqIdWidth: In
320317
io.push.full := reqQueue.io.full
321318

322319
io.info.wrPtr := reqQueue.io.wrPtr
323-
io.info.validMSHRs := validMshrs
320+
io.info.validMshrs := validMshrs
324321
io.info.currentTags := reqQueue.io.currentTags
325322
io.info.currentIndexes := reqQueue.io.currentIndexes
326323
io.info.replacementWays := reqQueue.io.replacementWays
327324
io.info.incidentCoreIds := reqQueue.io.incidentCoreIds
328-
io.info.isCritCores := reqQueue.io.isCritCores
325+
io.info.critMshrs := reqQueue.io.isCritCores
329326
io.info.fullCmds := cmdQueue.io.full
330327
io.info.elementCnt := queueElementCntReg
331328

@@ -335,53 +332,67 @@ class MshrQueue(nCores: Int, nCmds: Int, nMshrs: Int, nWays: Int, reqIdWidth: In
335332
io.pop.cmds := cmdQueue.io.rdCmds
336333
}
337334

338-
class MissFifo(nCores: Int, nCmds: Int, nMshrs: Int, nWays: Int, reqIdWidth: Int, tagWidth: Int, indexWidth: Int, blockOffsetWidth: Int, subBlockWidth: Int, blockWidth: Int) extends Module {
335+
class MissFifo(nCores: Int, nCmds: Int, nMshrs: Int, nWays: Int, reqIdWidth: Int, tagWidth: Int, indexWidth: Int, blockOffsetWidth: Int, subBlockWidth: Int, blockWidth: Int, enCritMisses: Boolean = false) extends Module {
339336
val io = IO(new MissFifoIO(nCores, nMshrs, nCmds, nWays, reqIdWidth, tagWidth, indexWidth, blockOffsetWidth, blockWidth, subBlockWidth))
340337

341-
val critQueue = Module(new MshrQueue(nCores, nCmds, nMshrs, nWays, reqIdWidth, tagWidth, indexWidth, blockOffsetWidth, subBlockWidth, blockWidth))
342-
val nonCritQueue = Module(new MshrQueue(nCores, nCmds, nMshrs, nWays, reqIdWidth, tagWidth, indexWidth, blockOffsetWidth, subBlockWidth, blockWidth))
343-
344-
// De-multiplex the push interface to the two queues
345-
val mshrPushDemux = Module(new MshrPushDemux(nCores, nCmds, nWays, reqIdWidth, tagWidth, indexWidth, blockOffsetWidth, blockWidth))
346-
347-
mshrPushDemux.io.sel := io.pushCrit
348-
mshrPushDemux.io.in <> io.push
349-
nonCritQueue.io.push <> mshrPushDemux.io.out1
350-
critQueue.io.push <> mshrPushDemux.io.out2
351-
352-
// Multiplex between the two queues for popping
353-
val mshrPopDemux = Module(new MshrPopMux(nCores, nCmds, nWays, reqIdWidth, tagWidth, indexWidth, blockOffsetWidth, blockWidth))
354-
355-
// Choose to always pop the critical queue as long as it is not empty
356-
mshrPopDemux.io.sel := io.popQSel
357-
mshrPopDemux.io.in1 <> nonCritQueue.io.pop
358-
mshrPopDemux.io.in2 <> critQueue.io.pop
359-
io.pop <> mshrPopDemux.io.out
360-
361-
io.critInfo <> critQueue.io.info
362-
io.nonCritInfo <> nonCritQueue.io.info
363-
364-
// Since the critical queue is always given priority over non-critical queue, a hazards occurs when a replacement policy
365-
// instructs a non-critical request to evict a way that then a critical request that has reached contention limit is
366-
// told to evict too. For instance, a non-critical request can evict way 2, so it is pushed to non-critical fifo; then
367-
// a critical request whose core has reached contention limit is told to evict the same way: 2 (most likely since any
368-
// other ways are already owned by critical cores); then the critical way evicts this line first followed by a
369-
// non-critical way evicting this line later on. This creates a additional contention, since if this line is later on
370-
// needed by a critical core, it will have to refetch again, thus resulting in two line accesses from the main memory
371-
// for a critical core.
372-
// NOTE: This is a rather rare case.
373-
374-
val anyMatchingReqsInNonCrit = VecInit(Seq.fill(nMshrs)(false.B))
375-
val critPopValid = !critQueue.io.pop.empty
376-
for (mshrIdx <- 0 until nMshrs) {
377-
val nonCritValid = nonCritQueue.io.info.validMSHRs(mshrIdx)
378-
val conflict = critPopValid && nonCritValid && nonCritQueue.io.info.currentIndexes(mshrIdx) === critQueue.io.pop.popEntry.index && nonCritQueue.io.info.replacementWays(mshrIdx) === critQueue.io.pop.popEntry.replaceWay
379-
anyMatchingReqsInNonCrit(mshrIdx) := conflict
380-
}
338+
if (enCritMisses) {
339+
val critQueue = Module(new MshrQueue(nCores, nCmds, nMshrs, nWays, reqIdWidth, tagWidth, indexWidth, blockOffsetWidth, subBlockWidth, blockWidth))
340+
val nonCritQueue = Module(new MshrQueue(nCores, nCmds, nMshrs, nWays, reqIdWidth, tagWidth, indexWidth, blockOffsetWidth, subBlockWidth, blockWidth))
341+
342+
// De-multiplex the push interface to the two queues
343+
val mshrPushDemux = Module(new MshrPushDemux(nCores, nCmds, nWays, reqIdWidth, tagWidth, indexWidth, blockOffsetWidth, blockWidth))
344+
345+
mshrPushDemux.io.sel := io.pushCrit
346+
mshrPushDemux.io.in <> io.push
347+
nonCritQueue.io.push <> mshrPushDemux.io.out1
348+
critQueue.io.push <> mshrPushDemux.io.out2
349+
350+
// Multiplex between the two queues for popping
351+
val mshrPopDemux = Module(new MshrPopMux(nCores, nCmds, nWays, reqIdWidth, tagWidth, indexWidth, blockOffsetWidth, blockWidth))
352+
353+
// Choose to always pop the critical queue as long as it is not empty
354+
mshrPopDemux.io.sel := io.popQSel
355+
mshrPopDemux.io.in1 <> nonCritQueue.io.pop
356+
mshrPopDemux.io.in2 <> critQueue.io.pop
357+
io.pop <> mshrPopDemux.io.out
358+
359+
io.critInfo <> critQueue.io.info
360+
io.nonCritInfo <> nonCritQueue.io.info
361+
362+
// Since the critical queue is always given priority over non-critical queue, a hazards occurs when a replacement policy
363+
// instructs a non-critical request to evict a way that then a critical request that has reached contention limit is
364+
// told to evict too. For instance, a non-critical request can evict way 2, so it is pushed to non-critical fifo; then
365+
// a critical request whose core has reached contention limit is told to evict the same way: 2 (most likely since any
366+
// other ways are already owned by critical cores); then the critical way evicts this line first followed by a
367+
// non-critical way evicting this line later on. This creates a additional contention, since if this line is later on
368+
// needed by a critical core, it will have to refetch again, thus resulting in two line accesses from the main memory
369+
// for a critical core.
370+
// NOTE: This is a rather rare case.
371+
372+
val anyMatchingReqsInNonCrit = VecInit(Seq.fill(nMshrs)(false.B))
373+
val critPopValid = !critQueue.io.pop.empty
374+
for (mshrIdx <- 0 until nMshrs) {
375+
val nonCritValid = nonCritQueue.io.info.validMshrs(mshrIdx)
376+
val conflict = critPopValid && nonCritValid && nonCritQueue.io.info.currentIndexes(mshrIdx) === critQueue.io.pop.popEntry.index && nonCritQueue.io.info.replacementWays(mshrIdx) === critQueue.io.pop.popEntry.replaceWay
377+
anyMatchingReqsInNonCrit(mshrIdx) := conflict
378+
}
379+
380+
val queueConflict = anyMatchingReqsInNonCrit.reduce((x, y) => x || y)
381381

382-
val queueConflict = anyMatchingReqsInNonCrit.reduce((x, y) => x || y)
382+
io.full := critQueue.io.push.full || nonCritQueue.io.push.full
383+
io.critEmpty := critQueue.io.pop.empty || queueConflict
384+
io.nonCritEmpty := nonCritQueue.io.pop.empty
385+
} else {
386+
val nonCritQueue = Module(new MshrQueue(nCores, nCmds, nMshrs, nWays, reqIdWidth, tagWidth, indexWidth, blockOffsetWidth, subBlockWidth, blockWidth))
383387

384-
io.full := critQueue.io.push.full || nonCritQueue.io.push.full
385-
io.critEmpty := critQueue.io.pop.empty || queueConflict
386-
io.nonCritEmpty := nonCritQueue.io.pop.empty
388+
nonCritQueue.io.push <> io.push
389+
io.pop <> nonCritQueue.io.pop
390+
391+
io.critInfo <> 0.U.asTypeOf(io.critInfo)
392+
io.nonCritInfo <> nonCritQueue.io.info
393+
394+
io.full := nonCritQueue.io.push.full
395+
io.critEmpty := true.B
396+
io.nonCritEmpty := nonCritQueue.io.pop.empty
397+
}
387398
}

src/main/scala/caches/hardware/pipelined/SharedPipelinedCache.scala

Lines changed: 11 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -62,21 +62,21 @@ class SharedPipelinedCache(
6262
case None => nWays
6363
}
6464

65-
val missQueue = Module(new MissFifo(nCores, halfMissCmdCnt, mshrCnt, nWays, reqIdWidth, tagWidth, indexWidth, blockOffsetWidth, bytesPerSubBlock * 8, bytesPerBlock * 8))
66-
val wbQueue = Module(new WriteBackFifo(mshrCnt, tagWidth, indexWidth, bytesPerBlock * 8))
67-
val updateLogic = Module(new UpdateUnit(nCores, nWays, reqIdWidth, tagWidth, indexWidth, bytesPerBlock * 8, bytesPerSubBlock * 8))
6865
val repPol = Module(l2RepPolicy())
66+
val missQueue = Module(new MissFifo(nCores, halfMissCmdCnt, mshrCnt, nWays, reqIdWidth, tagWidth, indexWidth, blockOffsetWidth, bytesPerSubBlock * 8, bytesPerBlock * 8, enCritMisses = repPol.includeCriticalMissQ()))
67+
val wbQueue = Module(new WriteBackFifo(mshrCnt, tagWidth, indexWidth, bytesPerBlock * 8, enCritWb = repPol.includeCriticalWbQ()))
68+
val updateLogic = Module(new UpdateUnit(nCores, nWays, reqIdWidth, tagWidth, indexWidth, bytesPerBlock * 8, bytesPerSubBlock * 8))
6969

7070
val schedulerDataWidth = repPol.getSchedulerDataWidth
7171
val l2CacheBytesPerSubBlock = bytesPerSubBlock
7272

7373
val invalidateLine = WireDefault(false.B)
74+
val insertBubble = WireDefault(false.B)
7475
val invalidateWay = WireDefault(0.U(log2Up(nWays).W))
7576
val invalidateIndex = WireDefault(0.U(indexWidth.W))
7677
val missFifoCmdCapacity = WireDefault(false.B)
7778
val repDirtyInvalidStall = WireDefault(false.B)
7879
val writeMissHazard = WireDefault(false.B)
79-
val memIntPopWb = WireDefault(false.B)
8080

8181
println(
8282
s"L2 Cache Configuration: " +
@@ -106,7 +106,7 @@ class SharedPipelinedCache(
106106
val coreReqArbiter = Module(new CoreReqArbiter(nCores = nCores, addrWidth = addressWidth, dataWidth = bytesPerSubBlock * 8, reqIdWidth = reqIdWidth))
107107

108108
val pipeStall = updateLogic.io.stall || missQueue.io.full || missFifoCmdCapacity || repDirtyInvalidStall || writeMissHazard
109-
val reqAccept = !pipeStall
109+
val reqAccept = !pipeStall && !insertBubble
110110

111111
// Connect core request and rejection queue to the core request multiplexer that feeds into the cache pipeline
112112
coreReqArbiter.io.req1 <> io.core.req
@@ -117,10 +117,11 @@ class SharedPipelinedCache(
117117

118118
// Connect replacement policy with the rejection queue
119119
repPol.io.scheduler <> io.scheduler
120+
insertBubble := repPol.io.control.insertBubble
120121

121122
// ---------------- Decode ----------------
122123
val decLogic = Module(new Dec(nCores = nCores, nWays = nWays, reqIdWidth = reqIdWidth, tagWidth = tagWidth, indexWidth = indexWidth, blockOffWidth = blockOffsetWidth, byteOffWidth = byteOffsetWidth, subBlockWidth = bytesPerSubBlock * 8))
123-
decLogic.io.stall := pipeStall
124+
decLogic.io.stall := pipeStall || insertBubble
124125
decLogic.io.dec.coreId := coreReqArbiter.io.outCoreID
125126
decLogic.io.dec.reqValid := coreReqArbiter.io.out.reqId.valid
126127
decLogic.io.dec.reqId := coreReqArbiter.io.out.reqId.bits
@@ -130,9 +131,8 @@ class SharedPipelinedCache(
130131
decLogic.io.dec.byteEn := coreReqArbiter.io.out.byteEn
131132

132133
// ---------------- Tag and Dirty Lookup ----------------
133-
134134
val tagLogic = Module(new Tag(nCores = nCores, nSets = nSets, nWays = nWays, reqIdWidth = reqIdWidth, tagWidth = tagWidth, indexWidth = indexWidth, blockOffWidth = blockOffsetWidth, subBlockWidth = bytesPerSubBlock * 8))
135-
tagLogic.io.stall := pipeStall
135+
tagLogic.io.stall := pipeStall || insertBubble
136136
tagLogic.io.tag <> decLogic.io.tag
137137
tagLogic.io.tagCtrl <> updateLogic.io.tagUpdate
138138
tagLogic.io.invalidate.invalidate := invalidateLine
@@ -141,8 +141,7 @@ class SharedPipelinedCache(
141141
tagLogic.io.setLineValid := updateLogic.io.setValidLine
142142

143143
// ---------------- Replacement ----------------
144-
145-
val repLogic = Module(new Rep(nCores = nCores, nSets = nSets, nWays = nWays, nMshrs = mshrCnt, reqIdWidth = reqIdWidth, tagWidth = tagWidth, indexWidth = indexWidth, blockWidth = bytesPerBlock * 8, subBlockWidth = bytesPerSubBlock * 8))
144+
val repLogic = Module(new Rep(nCores = nCores, nSets = nSets, nWays = nWays, nMshrs = mshrCnt, reqIdWidth = reqIdWidth, tagWidth = tagWidth, indexWidth = indexWidth, blockWidth = bytesPerBlock * 8, subBlockWidth = bytesPerSubBlock * 8, useInvalidate = !repPol.isInstanceOf[ContentionReplacementPolicy]))
146145
repLogic.io.stall := pipeStall
147146
repLogic.io.rep <> tagLogic.io.rep
148147
repLogic.io.missFifoPush <> missQueue.io.push
@@ -151,20 +150,17 @@ class SharedPipelinedCache(
151150
repLogic.io.repPolCtrl <> repPol.io.control
152151
repLogic.io.repPolInfo <> repPol.io.info
153152
repLogic.io.setLineValid := updateLogic.io.setValidLine
154-
repLogic.io.nonCritWbPop := memIntPopWb
155-
repLogic.io.nonCritWbEntryIsCrit := wbQueue.io.isFirstInQCrit
156153
invalidateLine := repLogic.io.invalidate.invalidate
157154
invalidateWay := repLogic.io.invalidate.way
158155
invalidateIndex := repLogic.io.invalidate.index
159156
missQueue.io.pushCrit := repLogic.io.isMissPushCrit
160157
missFifoCmdCapacity := repLogic.io.halfMissCapacity
161-
repDirtyInvalidStall := repLogic.io.dirtyInvalidStall
158+
repDirtyInvalidStall := repLogic.io.evictionLineBusy
162159
rejectionQueue.io.push := repLogic.io.pushReject
163160
rejectionQueue.io.pushEntry := repLogic.io.pushRejectEntry
164-
writeMissHazard := repLogic.io.writeMissHazard
161+
repLogic.io.wbInfo <> wbQueue.io.wbInfo
165162

166163
// ---------------- Read ----------------
167-
168164
val readLogic = Module(new Read(memSizeInBytes = sizeInBytes, nCores = nCores, nWays = nWays, reqIdWidth = reqIdWidth, tagWidth = tagWidth, indexWidth = indexWidth, blockOffWidth = blockOffsetWidth, blockWidth = bytesPerBlock * 8, subBlockWidth = bytesPerSubBlock * 8))
169165
readLogic.io.stall := pipeStall
170166
readLogic.io.read <> repLogic.io.read
@@ -175,7 +171,6 @@ class SharedPipelinedCache(
175171
wbQueue.io.pushCrit := readLogic.io.wbQueuePushCrit
176172

177173
// ---------------- Update ----------------
178-
179174
val memInterface = Module(new MemoryInterface(nCores, nWays, halfMissCmdCnt, reqIdWidth, tagWidth, indexWidth, blockOffsetWidth, bytesPerBlock * 8, bytesPerSubBlock * 8, beatSize = memBeatSize, burstLen = memBurstLen))
180175
memInterface.io.missFifo <> missQueue.io.pop
181176
memInterface.io.missCritEmpty := missQueue.io.critEmpty
@@ -184,7 +179,6 @@ class SharedPipelinedCache(
184179
memInterface.io.wbCritEmpty := wbQueue.io.critEmpty
185180
memInterface.io.wbNonCritEmpty := wbQueue.io.nonCritEmpty
186181
memInterface.io.memController <> io.mem
187-
memIntPopWb := memInterface.io.wbFifo.pop
188182
missQueue.io.popQSel := memInterface.io.popQSel
189183
wbQueue.io.popQSel := memInterface.io.popQSel
190184

0 commit comments

Comments
 (0)