Skip to content

Commit 4c5d450

Browse files
committed
applets.interface.better_la: better performance
this implements dynamic priority based scheduling in the LAArbiter. Also it does improvements on the host-side python
1 parent a5c4a74 commit 4c5d450

File tree

7 files changed

+346
-155
lines changed

7 files changed

+346
-155
lines changed
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from collections import defaultdict
2+
import io
23
import logging
34
import argparse
45
from vcd import VCDWriter
@@ -9,27 +10,49 @@
910
from ....gateware.analyzer import *
1011
from ... import *
1112
from .signal_compressor import SignalCompressor
12-
from .arbeiter import LAArbeiter
13-
14-
# This LA uses a simple protocol for sending compressed values over the FIFO:
15-
# Each packet starts with a 8 bit size word. The size can be 0, then the word only consists of that
16-
# word. If the size is n != 0, the packet is n*2 bytes long. Each 16bit word is encoded acording
17-
# to the format described in the SignalCompressor value. The packets are round-robin for each pin.
13+
from .arbiter import LAArbiter
14+
15+
# This LA uses a simple protocol for sending compressed values over the FIFO which is explained
16+
# in the arbiter.py (high level chunks) and signal_compressor.py (low level packets) files.
17+
# The basic architecture is as follows:
18+
# +------------------+ +--------+
19+
# Pin0 --->| SignalCompressor |------>| FIFO |-----+
20+
# +------------------+ +--------+ |
21+
# |
22+
# +------------------+ +--------+ |
23+
# Pin1 --->| SignalCompressor |------>| FIFO |-----+ +-----------+ +----------+
24+
# +------------------+ +--------+ | | | | |
25+
# +---->| LAArbiter |----->| USB-FIFO |
26+
# +------------------+ +--------+ | | | | |
27+
# Pin2 --->| SignalCompressor |------>| FIFO |-----+ +-----------+ +----------+
28+
# +------------------+ +--------+ |
29+
# |
30+
# +------------------+ +--------+ |
31+
# PinN --->| ... |------>| ... |-----+
32+
# +------------------+ +--------+
1833

1934
class BetterLASubtarget(Elaboratable):
20-
def __init__(self, pads, in_fifo):
35+
def __init__(self, pads, in_fifo, counter_target=False):
2136
self.pads = pads
2237
self.in_fifo = in_fifo
38+
self.counter_target = counter_target
2339

24-
self.la = LAArbeiter(in_fifo)
40+
self.la = LAArbiter(in_fifo)
2541

2642
def elaborate(self, platform):
2743
m = Module()
2844
m.submodules += self.la
2945

30-
pins_i = Signal.like(self.pads.i_t.i)
31-
m.submodules += FFSynchronizer(self.pads.i_t.i, pins_i)
32-
m.d.comb += self.la.input.eq(pins_i)
46+
if self.counter_target:
47+
print("building bitstream with simulated counter target")
48+
counter = Signal(len(self.pads.i_t.i)+2)
49+
m.d.sync += counter.eq(counter + 1)
50+
m.d.comb += self.la.input.eq(counter[2:])
51+
else:
52+
print("building bitstream connected to real target")
53+
pins_i = Signal.like(self.pads.i_t.i)
54+
m.submodules += FFSynchronizer(self.pads.i_t.i, pins_i)
55+
m.d.comb += self.la.input.eq(pins_i)
3356

3457
return m
3558

@@ -46,12 +69,17 @@ def add_build_arguments(cls, parser, access):
4669
super().add_build_arguments(parser, access)
4770

4871
access.add_pin_set_argument(parser, "i", width=range(1, 17), default=1)
72+
parser.add_argument(
73+
"--counter-target", default=False, action="store_true",
74+
help="simulate a target with a counter signal",
75+
)
4976

5077
def build(self, target, args):
5178
self.mux_interface = iface = target.multiplexer.claim_interface(self, args)
5279
iface.add_subtarget(BetterLASubtarget(
5380
pads=iface.get_pads(args, pin_sets=("i",)),
54-
in_fifo=iface.get_in_fifo(depth=512*16),
81+
in_fifo=iface.get_in_fifo(depth=512*16, auto_flush=False),
82+
counter_target=args.counter_target
5583
))
5684

5785
self._sample_freq = target.sys_clk_freq
@@ -85,53 +113,76 @@ def add_interact_arguments(cls, parser):
85113
parser.add_argument(
86114
"file", metavar="VCD-FILE", type=argparse.FileType("w"),
87115
help="write VCD waveforms to VCD-FILE")
116+
parser.add_argument("--buffer-size", type=int, default=10,
117+
help="how much data to capture in MB")
88118

89119
async def interact(self, device, args, iface):
90-
pins = defaultdict(list)
91-
overrun = False
92-
93-
zero_chunks = 0
94-
chunks = 0
95-
try: # this try catches Ctrl+C for being able to manually interrupt capture
96-
while not overrun:
97-
for p in self._pins:
98-
pkgs = await LAArbeiter.read_chunk(iface.read)
99-
if len(pkgs) == 0:
100-
zero_chunks += 1
101-
chunks += 1
102-
pins[p].extend(pkgs)
103-
if len(pkgs) > 255 - len(self._pins):
104-
overrun = True
105-
print("overrun")
120+
# Step 1: record a buffer
121+
# we do this before to get the full USB performance and not have any lag-spikes in between
122+
try:
123+
print(f"starting capture of {args.buffer_size} MB")
124+
buffer = await iface.read(1024*1024 * args.buffer_size)
125+
except KeyboardInterrupt:
126+
pass
106127
finally:
107-
events = []
108-
cycles = 0
109-
for p, pkgs in pins.items():
110-
cycle = 0
111-
for pkg in pkgs:
112-
for value, duration in SignalCompressor.decode_pkg(pkg):
113-
timestamp = cycle * 1_000_000_000 // self._sample_freq
114-
events.append((timestamp, p, value))
115-
cycle += duration
116-
cycles = max(cycle, cycles)
117-
events.sort(key=lambda e: e[0])
118-
119-
total_pkgs = sum(len(pkgs) for pkgs in pins.values())
120-
total_bytes = chunks + total_pkgs * 2
121-
122-
print(f"captured {cycles} cycles")
123-
print(f"chunking overhead: {chunks / total_bytes * 100}%")
124-
print(f"zero chunks overhead: {zero_chunks / total_bytes * 100}%")
125-
print(f"compression gain: {100 - (total_bytes * 8 / cycle * 100)}%")
126-
127-
128-
vcd_writer = VCDWriter(args.file, timescale="1 ns", check_values=False)
129-
vcd_signals = {
130-
p: vcd_writer.register_var(scope="", name="pin[{}]".format(p), var_type="wire",
131-
size=1, init=0)
132-
for p in pins.keys()
133-
}
134-
for timestamp, p, value in events:
135-
signal = vcd_signals[p]
136-
vcd_writer.change(signal, timestamp, value)
137-
vcd_writer.close(timestamp)
128+
print("captured buffer, converting...")
129+
130+
131+
# Step 2: parse the packets from the captured buffer and sort them into channels
132+
ptr = 0
133+
async def read(size, ) -> bytes:
134+
nonlocal ptr
135+
to_return = buffer[ptr:ptr+size]
136+
ptr += size
137+
if ptr >= len(buffer):
138+
return None
139+
return to_return
140+
channels = defaultdict(list)
141+
chunks = 0
142+
while True:
143+
read_result = await LAArbiter.read_chunk(read)
144+
if read_result is None:
145+
break
146+
channel, chunk = read_result
147+
if len(chunk) == 255:
148+
print(f"channel {channel} overrun")
149+
break
150+
channels[self._pins[channel]].extend(chunk)
151+
chunks += 1
152+
153+
# Step 3: convert each channels packets into events, attach timestamps and sort them by
154+
# timestamp
155+
events = []
156+
cycles = None
157+
for p, pkgs in channels.items():
158+
cycle = 0
159+
for pkg in pkgs:
160+
for value, duration in SignalCompressor.decode_pkg(pkg):
161+
events.append((cycle, p, value))
162+
cycle += duration
163+
cycles = cycle if cycles is None else cycle if cycle < cycles else cycles
164+
events.sort(key=lambda e: e[0])
165+
166+
# Step 3.5: report statistics
167+
total_pkgs = sum(len(pkgs) for pkgs in channels.values())
168+
total_bytes = chunks + total_pkgs * 2
169+
print(f"captured {cycles} samples ({cycles / self._sample_freq * 1000}ms)")
170+
print(f"chunking overhead: {chunks / total_bytes * 100}%")
171+
print(f"compression gain: {100 - (total_bytes * 8 / (cycle * len(self._pins)) * 100)}%")
172+
173+
174+
# Step 4: write out VCD file
175+
vcd_writer = VCDWriter(args.file, timescale="1 ns", check_values=False)
176+
vcd_signals = {
177+
p: vcd_writer.register_var(scope="", name="pin[{}]".format(p), var_type="wire",
178+
size=1, init=0)
179+
for p in self._pins
180+
}
181+
for cycle, p, value in events:
182+
if cycle > cycles:
183+
# we dont write any timestamps for which we dont have data on all channels
184+
break
185+
signal = vcd_signals[p]
186+
timestamp = cycle * 1_000_000_000 // self._sample_freq
187+
vcd_writer.change(signal, timestamp, value)
188+
vcd_writer.close(timestamp)

software/glasgow/applet/interface/better_la/arbeiter.py

-86
This file was deleted.

0 commit comments

Comments
 (0)