Skip to content

Commit b78cb92

Browse files
authored
Merge pull request #651 from intel-go/gregory/performance
Implemented performance improvements
2 parents 7fd91a6 + 3d0edd9 commit b78cb92

File tree

5 files changed

+257
-72
lines changed

5 files changed

+257
-72
lines changed

.gitignore

+3
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
11
# emacs
22
*~
33
doc
4+
GPATH
5+
GTAGS
6+
GRTAGS

flow/flow.go

+55-16
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ func addReceiver(portId uint16, out low.Rings, inIndexNumber int32) {
161161
par.port = low.GetPort(portId)
162162
par.out = out
163163
par.status = make([]int32, maxRecv, maxRecv)
164-
schedState.addFF("receiver", nil, recvRSS, nil, par, nil, receiveRSS, inIndexNumber, &par.stats)
164+
schedState.addFF("receiverPort"+string(portId), nil, recvRSS, nil, par, nil, receiveRSS, inIndexNumber, &par.stats)
165165
}
166166

167167
type receiveOSParameters struct {
@@ -256,18 +256,23 @@ func addFastGenerator(out low.Rings, generateFunction GenerateFunction,
256256
}
257257

258258
type sendParameters struct {
259-
in low.Rings
260-
port uint16
261-
anyway bool
262-
stats common.RXTXStats
259+
in low.Rings
260+
port uint16
261+
unrestrictedClones bool
262+
stats common.RXTXStats
263+
sendThreadIndex int
263264
}
264265

265266
func addSender(port uint16, in low.Rings, inIndexNumber int32) {
266-
par := new(sendParameters)
267-
par.port = port
268-
par.in = in
269-
par.anyway = schedState.anyway
270-
schedState.addFF("sender", nil, send, nil, par, nil, sendReceiveKNI, inIndexNumber, &par.stats)
267+
for iii := 0; iii < sendCPUCoresPerPort; iii++ {
268+
par := new(sendParameters)
269+
par.port = port
270+
par.in = in
271+
par.unrestrictedClones = schedState.unrestrictedClones
272+
par.sendThreadIndex = iii
273+
schedState.addFF("senderPort"+string(port)+"Thread"+string(iii),
274+
nil, send, nil, par, nil, sendReceiveKNI, inIndexNumber, &par.stats)
275+
}
271276
}
272277

273278
type sendOSParameters struct {
@@ -468,6 +473,7 @@ var sizeMultiplier uint
468473
var schedTime uint
469474
var hwtxchecksum, hwrxpacketstimestamp, setSIGINTHandler bool
470475
var maxRecv int
476+
var sendCPUCoresPerPort, tXQueuesNumberPerPort int
471477

472478
type port struct {
473479
wasRequested bool // has user requested any send/receive operations at this port
@@ -531,7 +537,7 @@ type Config struct {
531537
// Limits parallel instances. 1 for one instance, 1000 for RSS count determine instances
532538
MaxInIndex int32
533539
// Scheduler should clone functions even if it can lead to reordering.
534-
// This option should be switch off for all high level reassembling like TCP or HTTP
540+
// This option should be switched off for all high level reassembling like TCP or HTTP
535541
RestrictedCloning bool
536542
// If application uses EncapsulateHead or DecapsulateHead functions L2 pointers
537543
// should be reinit every receving or generating a packet. This can be removed if
@@ -570,6 +576,19 @@ type Config struct {
570576
// SystemStartScheduler waits for SIGINT notification and calls
571577
// SystemStop after it. It is enabled by default.
572578
NoSetSIGINTHandler bool
579+
// Number of CPU cores to be occupied by Send routines. It is
580+
// necessary to set TXQueuesNumberPerPort to a reasonably big
581+
// number which can be divided by SendCPUCoresPerPort.
582+
SendCPUCoresPerPort int
583+
// Number of transmit queues to use on network card. By default it
584+
// is minimum of NIC supported TX queues number and 2. If this
585+
// value is specified and NIC doesn't support this number of TX
586+
// queues, initialization fails.
587+
TXQueuesNumberPerPort int
588+
// Controls scheduler interval in milliseconds. Default value is
589+
// 500. Lower values allow faster reaction to changing traffic but
590+
// increase scheduling overhead.
591+
SchedulerInterval uint
573592
}
574593

575594
// SystemInit is initialization of system. This function should be always called before graph construction.
@@ -588,12 +607,26 @@ func SystemInit(args *Config) error {
588607
cpus = common.GetDefaultCPUs(CPUCoresNumber)
589608
}
590609

610+
tXQueuesNumberPerPort = args.TXQueuesNumberPerPort
611+
if tXQueuesNumberPerPort == 0 {
612+
tXQueuesNumberPerPort = 2
613+
}
614+
615+
sendCPUCoresPerPort = args.SendCPUCoresPerPort
616+
if sendCPUCoresPerPort == 0 {
617+
sendCPUCoresPerPort = 1
618+
if tXQueuesNumberPerPort%sendCPUCoresPerPort != 0 {
619+
return common.WrapWithNFError(nil, "TXQueuesNumberPerPort should be divisible by SendCPUCoresPerPort",
620+
common.BadArgument)
621+
}
622+
}
623+
591624
schedulerOff := args.DisableScheduler
592625
schedulerOffRemove := args.PersistentClones
593626
stopDedicatedCore := args.StopOnDedicatedCore
594627
hwtxchecksum = args.HWTXChecksum
595628
hwrxpacketstimestamp = args.HWRXPacketsTimestamp
596-
anyway := !args.RestrictedCloning
629+
unrestrictedClones := !args.RestrictedCloning
597630

598631
mbufNumber := uint(8191)
599632
if args.MbufNumber != 0 {
@@ -610,7 +643,12 @@ func SystemInit(args *Config) error {
610643
sizeMultiplier = args.RingSize
611644
}
612645

613-
schedTime = 500
646+
if args.SchedulerInterval != 0 {
647+
schedTime = args.SchedulerInterval
648+
} else {
649+
schedTime = 500
650+
}
651+
614652
if args.ScaleTime != 0 {
615653
schedTime = args.ScaleTime
616654
}
@@ -702,7 +740,7 @@ func SystemInit(args *Config) error {
702740
common.LogTitle(common.Initialization, "------------***------ Initializing scheduler -----***------------")
703741
StopRing := low.CreateRings(burstSize*sizeMultiplier, maxInIndex /* Maximum possible rings */)
704742
common.LogDebug(common.Initialization, "Scheduler can use cores:", cpus)
705-
schedState = newScheduler(cpus, schedulerOff, schedulerOffRemove, stopDedicatedCore, StopRing, checkTime, debugTime, maxPacketsToClone, maxRecv, anyway)
743+
schedState = newScheduler(cpus, schedulerOff, schedulerOffRemove, stopDedicatedCore, StopRing, checkTime, debugTime, maxPacketsToClone, maxRecv, unrestrictedClones)
706744

707745
// Set HW offloading flag in packet package
708746
packet.SetHWTXChecksumFlag(hwtxchecksum)
@@ -737,7 +775,7 @@ func SystemInitPortsAndMemory() error {
737775
for i := range createdPorts {
738776
if createdPorts[i].wasRequested {
739777
if err := low.CreatePort(createdPorts[i].port, createdPorts[i].willReceive,
740-
true, hwtxchecksum, hwrxpacketstimestamp, createdPorts[i].InIndex); err != nil {
778+
true, hwtxchecksum, hwrxpacketstimestamp, createdPorts[i].InIndex, tXQueuesNumberPerPort); err != nil {
741779
return err
742780
}
743781
}
@@ -1690,7 +1728,8 @@ func pcopy(parameters interface{}, inIndex []int32, stopper [2]chan int, report
16901728

16911729
func send(parameters interface{}, inIndex []int32, flag *int32, coreID int) {
16921730
srp := parameters.(*sendParameters)
1693-
low.Send(srp.port, srp.in, srp.anyway, flag, coreID, &srp.stats)
1731+
low.Send(srp.port, srp.in, srp.unrestrictedClones, flag, coreID, &srp.stats,
1732+
srp.sendThreadIndex, sendCPUCoresPerPort)
16941733
}
16951734

16961735
func sendOS(parameters interface{}, inIndex []int32, flag *int32, coreID int) {

flow/scheduler.go

+31-23
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ const generatePauseStep = 0.1
3636
const process = 1
3737
const stopRequest = 2
3838
const wasStopped = 9
39+
const printPortStatistics = false
3940

4041
// TODO "5" and "39" constants derived empirically. Need to investigate more elegant thresholds.
4142
const RSSCloneMin = 5
@@ -156,26 +157,26 @@ func (scheduler *scheduler) addFF(name string, ucfn uncloneFlowFunction, Cfn cFl
156157
}
157158

158159
type scheduler struct {
159-
ff []*flowFunction
160-
cores []core
161-
off bool
162-
offRemove bool
163-
anyway bool
164-
stopDedicatedCore bool
165-
StopRing low.Rings
166-
usedCores uint8
167-
checkTime uint
168-
debugTime uint
169-
Dropped uint
170-
maxPacketsToClone uint32
171-
stopFlag int32
172-
maxRecv int
173-
Timers []*Timer
174-
nAttempts []uint64
175-
pAttempts []uint64
176-
maxInIndex int32
177-
measureRings low.Rings
178-
coreIndex int
160+
ff []*flowFunction
161+
cores []core
162+
off bool
163+
offRemove bool
164+
unrestrictedClones bool
165+
stopDedicatedCore bool
166+
StopRing low.Rings
167+
usedCores uint8
168+
checkTime uint
169+
debugTime uint
170+
Dropped uint
171+
maxPacketsToClone uint32
172+
stopFlag int32
173+
maxRecv int
174+
Timers []*Timer
175+
nAttempts []uint64
176+
pAttempts []uint64
177+
maxInIndex int32
178+
measureRings low.Rings
179+
coreIndex int
179180
}
180181

181182
type core struct {
@@ -184,7 +185,7 @@ type core struct {
184185
}
185186

186187
func newScheduler(cpus []int, schedulerOff bool, schedulerOffRemove bool, stopDedicatedCore bool,
187-
stopRing low.Rings, checkTime uint, debugTime uint, maxPacketsToClone uint32, maxRecv int, anyway bool) *scheduler {
188+
stopRing low.Rings, checkTime uint, debugTime uint, maxPacketsToClone uint32, maxRecv int, unrestrictedClones bool) *scheduler {
188189
coresNumber := len(cpus)
189190
// Init scheduler
190191
scheduler := new(scheduler)
@@ -200,7 +201,7 @@ func newScheduler(cpus []int, schedulerOff bool, schedulerOffRemove bool, stopDe
200201
scheduler.debugTime = debugTime
201202
scheduler.maxPacketsToClone = maxPacketsToClone
202203
scheduler.maxRecv = maxRecv
203-
scheduler.anyway = anyway
204+
scheduler.unrestrictedClones = unrestrictedClones
204205
scheduler.pAttempts = make([]uint64, len(scheduler.cores), len(scheduler.cores))
205206

206207
return scheduler
@@ -405,6 +406,13 @@ func (scheduler *scheduler) schedule(schedTime uint) {
405406
common.LogDebug(common.Debug, "---------------")
406407
common.LogDebug(common.Debug, "System is using", scheduler.usedCores, "cores now.", uint8(len(scheduler.cores))-scheduler.usedCores, "cores are left available.")
407408
low.Statistics(float32(scheduler.debugTime) / 1000)
409+
if printPortStatistics {
410+
for i := range createdPorts {
411+
if createdPorts[i].wasRequested {
412+
low.PortStatistics(createdPorts[i].port)
413+
}
414+
}
415+
}
408416
for i := range scheduler.ff {
409417
scheduler.ff[i].printDebug(schedTime)
410418
}
@@ -549,7 +557,7 @@ func (scheduler *scheduler) schedule(schedTime uint) {
549557
ffi.removed = false
550558
continue
551559
}
552-
if ffi.inIndex[0] == 1 && scheduler.anyway && ffi.checkInputRingClonable(scheduler.maxPacketsToClone) &&
560+
if ffi.inIndex[0] == 1 && scheduler.unrestrictedClones && ffi.checkInputRingClonable(scheduler.maxPacketsToClone) &&
553561
ffi.checkOutputRingClonable(scheduler.maxPacketsToClone) &&
554562
(ffi.increasedSpeed == 0 || ffi.increasedSpeed > ffi.reportedState.V.Packets) {
555563
if scheduler.pAttempts[ffi.cloneNumber+1] == 0 {

internal/low/low.go

+23-6
Original file line numberDiff line numberDiff line change
@@ -517,12 +517,19 @@ func SrKNI(port uint16, flag *int32, coreID int, recv bool, OUT Rings, send bool
517517
}
518518

519519
// Send - dequeue packets and send.
520-
func Send(port uint16, IN Rings, anyway bool, flag *int32, coreID int, stats *common.RXTXStats) {
520+
func Send(port uint16, IN Rings, unrestrictedClones bool, flag *int32, coreID int, stats *common.RXTXStats,
521+
sendThreadIndex, totalSendTreads int) {
521522
if C.rte_eth_dev_socket_id(C.uint16_t(port)) != C.int(C.rte_lcore_to_socket_id(C.uint(coreID))) {
522523
common.LogWarning(common.Initialization, "Send port", port, "is on remote NUMA node to polling thread - not optimal performance.")
523524
}
524-
C.nff_go_send(C.uint16_t(port), C.extractDPDKRings((**C.struct_nff_go_ring)(unsafe.Pointer(&(IN[0]))), C.int32_t(len(IN))), C.int32_t(len(IN)),
525-
C.bool(anyway), (*C.int)(unsafe.Pointer(flag)), C.int(coreID), (*C.RXTXStats)(unsafe.Pointer(stats)))
525+
C.nff_go_send(C.uint16_t(port),
526+
C.extractDPDKRings((**C.struct_nff_go_ring)(unsafe.Pointer(&(IN[0]))), C.int32_t(len(IN))),
527+
C.int32_t(len(IN)),
528+
C.bool(unrestrictedClones),
529+
(*C.int)(unsafe.Pointer(flag)), C.int(coreID),
530+
(*C.RXTXStats)(unsafe.Pointer(stats)),
531+
C.int32_t(sendThreadIndex),
532+
C.int32_t(totalSendTreads))
526533
}
527534

528535
// Stop - dequeue and free packets.
@@ -590,11 +597,16 @@ func GetPortsNumber() int {
590597
}
591598

592599
func CheckPortRSS(port uint16) int32 {
593-
return int32(C.check_port_rss(C.uint16_t(port)))
600+
return int32(C.check_max_port_rx_queues(C.uint16_t(port)))
601+
}
602+
603+
func CheckPortMaxTXQueues(port uint16) int32 {
604+
return int32(C.check_max_port_tx_queues(C.uint16_t(port)))
594605
}
595606

596607
// CreatePort initializes a new port using global settings and parameters.
597-
func CreatePort(port uint16, willReceive bool, promiscuous bool, hwtxchecksum, hwrxpacketstimestamp bool, inIndex int32) error {
608+
func CreatePort(port uint16, willReceive bool, promiscuous bool, hwtxchecksum,
609+
hwrxpacketstimestamp bool, inIndex int32, tXQueuesNumberPerPort int) error {
598610
var mempools **C.struct_rte_mempool
599611
if willReceive {
600612
m := CreateMempools("receive", inIndex)
@@ -603,7 +615,7 @@ func CreatePort(port uint16, willReceive bool, promiscuous bool, hwtxchecksum, h
603615
mempools = nil
604616
}
605617
if C.port_init(C.uint16_t(port), C.bool(willReceive), mempools,
606-
C._Bool(promiscuous), C._Bool(hwtxchecksum), C._Bool(hwrxpacketstimestamp), C.int32_t(inIndex)) != 0 {
618+
C._Bool(promiscuous), C._Bool(hwtxchecksum), C._Bool(hwrxpacketstimestamp), C.int32_t(inIndex), C.int32_t (tXQueuesNumberPerPort)) != 0 {
607619
msg := common.LogError(common.Initialization, "Cannot init port ", port, "!")
608620
return common.WrapWithNFError(nil, msg, common.FailToInitPort)
609621
}
@@ -697,6 +709,11 @@ func Statistics(N float32) {
697709
C.statistics(C.float(N))
698710
}
699711

712+
// PortStatistics print statistics about NIC port.
713+
func PortStatistics(port uint16) {
714+
C.portStatistics(C.uint16_t(port))
715+
}
716+
700717
// ReportMempoolsState prints used and free space of mempools.
701718
func ReportMempoolsState() {
702719
for _, m := range usedMempools {

0 commit comments

Comments
 (0)