Skip to content

Commit db82480

Browse files
committed
Fixes for #212 (#239)
- remove hardcoded pipeline length in PipelinedExecutor - fix PyTorch iterator for multi-GPU - adjust PyTorch example to use new nvJpeg API Signed-off-by: Janusz Lisiecki <[email protected]>
1 parent 2c1c9c8 commit db82480

File tree

5 files changed

+10
-8
lines changed

5 files changed

+10
-8
lines changed

dali/benchmark/resnet50_bench.cc

+3-3
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ BENCHMARK_DEFINE_F(RN50, C2Pipe)(benchmark::State& st) { // NOLINT
3737
Pipeline pipe(
3838
batch_size,
3939
num_thread,
40-
0, -1, pipelined, 2,
40+
0, -1, pipelined, 3,
4141
async);
4242

4343
TensorList<CPUBackend> data;
@@ -167,7 +167,7 @@ BENCHMARK_DEFINE_F(RN50, HybridPipe)(benchmark::State& st) { // NOLINT
167167
Pipeline pipe(
168168
batch_size,
169169
num_thread,
170-
0, -1, pipelined, 2,
170+
0, -1, pipelined, 3,
171171
async);
172172

173173
TensorList<CPUBackend> data;
@@ -299,7 +299,7 @@ BENCHMARK_DEFINE_F(RN50, nvJPEGPipe)(benchmark::State& st) { // NOLINT
299299
Pipeline pipe(
300300
batch_size,
301301
num_thread,
302-
0, -1, pipelined, 2,
302+
0, -1, pipelined, 3,
303303
async);
304304

305305
TensorList<CPUBackend> data;

dali/pipeline/executor/pipelined_executor.h

-1
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@ class DLL_PUBLIC PipelinedExecutor : public Executor {
4242
bool set_affinity = false, int max_num_stream = -1, int prefetch_queue_depth = 2) :
4343
Executor(batch_size, num_thread, device_id, bytes_per_sample_hint,
4444
set_affinity, max_num_stream, prefetch_queue_depth) {
45-
Executor::queue_depth_ = 3;
4645
}
4746

4847
DLL_PUBLIC virtual ~PipelinedExecutor() = default;

dali/python/nvidia/dali/pipeline.py

+1
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,7 @@ def deserialize_and_build(self, serialized_pipeline):
313313
self._num_threads,
314314
self._device_id,
315315
self._exec_pipelined,
316+
self._prefetch_queue_depth,
316317
self._exec_async,
317318
self._bytes_per_sample,
318319
self._set_affinity,

dali/python/nvidia/dali/plugin/pytorch.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -146,10 +146,10 @@ def __next__(self):
146146
feed_ndarray(d_arr, pyt_data[j])
147147
for j, l_arr in enumerate(labels):
148148
feed_ndarray(l_arr, pyt_labels[j])
149-
for p in self._pipes:
150-
p._release_outputs()
151-
p._start_run()
152149

150+
for p in self._pipes:
151+
p._release_outputs()
152+
p._start_run()
153153

154154
copy_db_index = self._current_data_batch
155155
# Change index for double buffering

docs/examples/pytorch/main.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,9 @@ class HybridTrainPipe(Pipeline):
8080
def __init__(self, batch_size, num_threads, device_id, data_dir, crop):
8181
super(HybridTrainPipe, self).__init__(batch_size, num_threads, device_id, seed=12 + device_id)
8282
self.input = ops.FileReader(file_root=data_dir, shard_id=args.local_rank, num_shards=args.world_size, random_shuffle=True)
83-
self.decode = ops.nvJPEGDecoder(device="mixed", output_type=types.RGB)
83+
# This padding sets the size of the internal nvJPEG buffers to be able to handle all images from full-sized ImageNet
84+
# without additional reallocations
85+
self.decode = ops.nvJPEGDecoder(device="mixed", output_type=types.RGB, device_memory_padding=211025920, host_memory_padding=140544512)
8486
self.rrc = ops.RandomResizedCrop(device="gpu", size =(crop, crop))
8587
self.cmnp = ops.CropMirrorNormalize(device="gpu",
8688
output_dtype=types.FLOAT,

0 commit comments

Comments
 (0)