[Feature] Support unpadded whole-trajectory collector batches#3876
Merged
Conversation
🔗 Helpful Links🧪 See artifacts and rendered test results at hud.pytorch.org/pr/pytorch/rl/3876
Note: Links to docs will display an error until the docs builds have been completed. ✅ No FailuresAs of commit 2b987bf with merge base 6364a19 ( This comment was automatically generated by Dr. CI and updates every 15 minutes. |
This was referenced Jun 17, 2026
This was referenced Jun 17, 2026
Contributor
Benchmark Results: PR
|
| Benchmark | main ops | PR ops | Change |
|---|---|---|---|
benchmarks/test_replaybuffer_benchmark.py::test_rb_iterate[TensorDictReplayBuffer-LazyMemmapStorage-SamplerWithoutReplacement-10000] |
465.89 | 3,054 | +555.51% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_populate[TensorDictPrioritizedReplayBuffer-ListStorage-None-400] |
50.81 | 190.41 | +274.72% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_populate[TensorDictReplayBuffer-ListStorage-SamplerWithoutReplacement-400] |
192.98 | 54.57 | -71.72% |
benchmarks/test_objectives_benchmarks.py::test_values[vec_td1_return_estimate-False-False] |
86.58 | 55.78 | -35.57% |
benchmarks/test_objectives_benchmarks.py::test_values[vec_td_lambda_return_estimate-True-False] |
86.33 | 55.85 | -35.31% |
benchmarks/test_objectives_benchmarks.py::test_values[vec_generalized_advantage_estimate-True-True] |
85.05 | 56.08 | -34.07% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_iterate[TensorDictReplayBuffer-LazyTensorStorage-SamplerWithoutReplacement-10000] |
2,804 | 3,732 | +33.06% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_sample[TensorDictReplayBuffer-LazyTensorStorage-SamplerWithoutReplacement-10000] |
3,647 | 2,806 | -23.05% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_iterate[TensorDictPrioritizedReplayBuffer-LazyTensorStorage-None-10000] |
1,914 | 2,336 | +22.03% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_sample[TensorDictReplayBuffer-LazyMemmapStorage-SamplerWithoutReplacement-10000] |
3,413 | 2,676 | -21.60% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_iterate[TensorDictPrioritizedReplayBuffer-LazyMemmapStorage-None-10000] |
1,830 | 2,217 | +21.17% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_populate[TensorDictPrioritizedReplayBuffer-LazyMemmapStorage-None-400] |
447.22 | 532.87 | +19.15% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[False-True-False-True-False] |
26,855 | 31,648 | +17.85% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_sample[TensorDictPrioritizedReplayBuffer-LazyTensorStorage-None-10000] |
2,315 | 1,949 | -15.81% |
benchmarks/test_envs_benchmark.py::test_cat_frames_functional[16-same] |
18.93 | 21.91 | +15.77% |
benchmarks/test_envs_benchmark.py::test_cat_frames_functional[4-same] |
27.02 | 23.22 | -14.07% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_sample[TensorDictPrioritizedReplayBuffer-LazyMemmapStorage-None-10000] |
2,259 | 1,980 | -12.35% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_sample[TensorDictReplayBuffer-LazyMemmapStorage-RandomSampler-10000] |
3,391 | 2,989 | -11.86% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_iterate[TensorDictReplayBuffer-LazyMemmapStorage-RandomSampler-10000] |
2,958 | 2,628 | -11.16% |
benchmarks/test_objectives_benchmarks.py::test_dqn_speed[True-backward] |
890.48 | 986.11 | +10.74% |
benchmarks/test_objectives_benchmarks.py::test_td3_speed[True-backward] |
254.90 | 281.46 | +10.42% |
benchmarks/test_objectives_benchmarks.py::test_ddpg_speed[True-backward] |
376.26 | 414.55 | +10.18% |
benchmarks/test_objectives_benchmarks.py::test_gae_speed[vec_generalized_advantage_estimate-True-32-512] |
32.30 | 29.29 | -9.34% |
benchmarks/test_objectives_benchmarks.py::test_reinforce_speed[True-None] |
367.29 | 334.48 | -8.93% |
benchmarks/test_objectives_benchmarks.py::test_sac_speed[True-backward] |
230.88 | 251.32 | +8.85% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_populate[TensorDictReplayBuffer-LazyTensorStorage-SamplerWithoutReplacement-400] |
1,025 | 1,111 | +8.33% |
benchmarks/test_objectives_benchmarks.py::test_iql_speed[True-backward] |
65.25 | 59.93 | -8.15% |
benchmarks/test_objectives_benchmarks.py::test_reinforce_speed[reduce-overhead-None] |
366.66 | 338.86 | -7.58% |
benchmarks/test_objectives_benchmarks.py::test_iql_speed[reduce-overhead-None] |
127.51 | 118.17 | -7.33% |
benchmarks/test_objectives_benchmarks.py::test_iql_speed[True-None] |
127.78 | 118.50 | -7.26% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[True-False-True-False-True] |
40,089 | 37,258 | -7.06% |
benchmarks/test_objectives_benchmarks.py::test_reinforce_speed[False-None] |
227.05 | 212.71 | -6.32% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_sample[TensorDictReplayBuffer-LazyMemmapStorage-sampler6-10000] |
771.57 | 726.30 | -5.87% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_populate[TensorDictPrioritizedReplayBuffer-LazyTensorStorage-None-400] |
863.65 | 913.37 | +5.76% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_extend_sample[ReplayBuffer-LazyTensorStorage-RandomSampler-1000000-10000-100-True] |
22.39 | 23.66 | +5.67% |
benchmarks/test_rnn_reset_backends_benchmark.py::test_rnn_rollout_with_intermediate_resets[b256-t128-i32-h512-scan-False-0-gru] |
2.9059 | 3.0552 | +5.14% |
benchmarks/test_objectives_benchmarks.py::test_ppo_speed[reduce-overhead-None] |
287.05 | 272.38 | -5.11% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[True-True-False-False-True] |
38,485 | 36,523 | -5.10% |
benchmarks/test_objectives_benchmarks.py::test_iql_speed[False-None] |
51.75 | 49.22 | -4.90% |
benchmarks/test_rnn_reset_backends_benchmark.py::test_rnn_rollout_with_intermediate_resets[b256-t128-i32-h512-cudnn-True-0-gru] |
1.4479 | 1.3808 | -4.64% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_extend_sample[ReplayBuffer-LazyTensorStorage-RandomSampler-1000000-10000-100-False] |
47.10 | 49.19 | +4.44% |
benchmarks/test_rnn_reset_backends_benchmark.py::test_rnn_rollout_with_intermediate_resets[b256-t128-i32-h512-cudnn-False-0-gru] |
1.3566 | 1.2982 | -4.30% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[True-False-False-True-True] |
20,159 | 19,327 | -4.13% |
benchmarks/test_envs_benchmark.py::test_transformed |
0.8795 | 0.9140 | +3.92% |
benchmarks/test_objectives_benchmarks.py::test_cql_speed[True-backward] |
60.25 | 57.94 | -3.83% |
benchmarks/test_objectives_benchmarks.py::test_gae_speed[vec_generalized_advantage_estimate-True-1-512] |
650.58 | 674.81 | +3.72% |
benchmarks/test_storage_write_benchmark.py::TestStorageWriteBenchmark::test_storage_write_contiguous[100-img_shape1-atari] |
5,132 | 4,941 | -3.72% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_populate[TensorDictReplayBuffer-LazyMemmapStorage-SamplerWithoutReplacement-400] |
501.40 | 483.00 | -3.67% |
benchmarks/test_rnn_reset_backends_benchmark.py::test_rnn_rollout_with_intermediate_resets[b256-t128-i32-h512-scan-False-0-lstm] |
2.0100 | 2.0773 | +3.35% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[False-True-False-True-True] |
19,938 | 19,277 | -3.32% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[True-True-True-False-True] |
42,159 | 40,830 | -3.15% |
benchmarks/test_storage_write_benchmark.py::TestStorageWriteBenchmark::test_collector_lazystack_then_write[100-img_shape2-large_img] |
401.60 | 413.80 | +3.04% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[False-True-True-True-True] |
21,214 | 20,570 | -3.03% |
benchmarks/test_compressed_storage_benchmark.py::TestCompressedStorageBenchmark::test_tensor_to_bytestream_speed[pickle] |
12,151 | 11,783 | -3.03% |
benchmarks/test_objectives_benchmarks.py::test_redq_speed[True-None] |
218.78 | 225.25 | +2.96% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[True-True-True-True-True] |
23,901 | 23,197 | -2.94% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[False-False-True-True-False] |
28,735 | 29,560 | +2.87% |
benchmarks/test_storage_write_benchmark.py::TestStorageWriteBenchmark::test_storage_write_lazystack[100-img_shape2-large_img] |
423.83 | 435.98 | +2.87% |
benchmarks/test_rnn_reset_backends_benchmark.py::test_rnn_rollout_with_intermediate_resets[b256-t128-i32-h512-cudnn-True-0-lstm] |
0.9397 | 0.9131 | -2.83% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[False-True-True-False-True] |
32,910 | 31,985 | -2.81% |
benchmarks/test_objectives_benchmarks.py::test_redq_speed[reduce-overhead-None] |
223.58 | 229.39 | +2.60% |
benchmarks/test_storage_write_benchmark.py::TestStorageWriteBenchmark::test_collector_lazystack_then_write[100-img_shape1-atari] |
633.96 | 650.34 | +2.58% |
benchmarks/test_objectives_benchmarks.py::test_gae_speed[vec_generalized_advantage_estimate-False-1-512] |
2,159 | 2,214 | +2.56% |
benchmarks/test_objectives_benchmarks.py::test_reinforce_speed[True-backward] |
130.08 | 126.78 | -2.54% |
benchmarks/test_envs_benchmark.py::test_serial |
0.5694 | 0.5834 | +2.47% |
benchmarks/test_compressed_storage_benchmark.py::TestCompressedStorageBenchmark::test_tensor_to_bytestream_speed[numpy] |
343,696 | 352,175 | +2.47% |
benchmarks/test_collectors_benchmark.py::test_single_with_rb |
8.4693 | 8.6744 | +2.42% |
benchmarks/test_storage_write_benchmark.py::TestStorageWriteBenchmark::test_collector_lazystack_then_write[50-img_shape0-small] |
3,428 | 3,509 | +2.37% |
benchmarks/test_objectives_benchmarks.py::test_ddpg_speed[reduce-overhead-None] |
699.98 | 716.43 | +2.35% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_sample[TensorDictReplayBuffer-LazyTensorStorage-sampler7-10000] |
827.30 | 808.16 | -2.31% |
benchmarks/test_objectives_benchmarks.py::test_redq_deprec_speed[reduce-overhead-None] |
279.42 | 285.87 | +2.31% |
benchmarks/test_storage_write_benchmark.py::TestStorageWriteBenchmark::test_storage_write_lazystack[200-img_shape3-large_batch] |
332.29 | 339.90 | +2.29% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_extend_sample[ReplayBuffer-LazyTensorStorage-RandomSampler-10000-10000-100-True] |
24.92 | 25.49 | +2.28% |
benchmarks/test_replaybuffer_benchmark.py::TestPrioritizedReplayBufferBenchmark::test_sampler_sample_scale[1000000-cpu] |
97.09 | 99.30 | +2.28% |
benchmarks/test_envs_benchmark.py::test_parallel |
0.9703 | 0.9484 | -2.25% |
benchmarks/test_objectives_benchmarks.py::test_ddpg_speed[True-None] |
693.69 | 708.82 | +2.18% |
benchmarks/test_rnn_reset_backends_benchmark.py::test_rnn_rollout_with_intermediate_resets[b256-t128-i32-h512-cudnn-False-0-lstm] |
0.8514 | 0.8334 | -2.11% |
benchmarks/test_objectives_benchmarks.py::test_td3_speed[True-None] |
545.75 | 557.27 | +2.11% |
benchmarks/test_objectives_benchmarks.py::test_values[generalized_advantage_estimate-True-True] |
91.91 | 93.83 | +2.09% |
benchmarks/test_objectives_benchmarks.py::test_redq_deprec_speed[True-backward] |
133.47 | 136.13 | +1.99% |
benchmarks/test_replaybuffer_benchmark.py::TestPrioritizedReplayBufferBenchmark::test_sample_mixed_devices[1000000-memmap_cpu_storage_cpu... |
82.08 | 83.71 | +1.99% |
benchmarks/test_objectives_benchmarks.py::test_redq_speed[False-backward] |
56.52 | 55.41 | -1.96% |
benchmarks/test_storage_write_benchmark.py::TestStorageWriteBenchmark::test_collector_lazystack_then_write[200-img_shape3-large_batch] |
310.18 | 316.22 | +1.95% |
benchmarks/test_storage_write_benchmark.py::TestCollectorIntegrationBenchmark::test_collector_with_rb[100-img_shape0-atari] |
25.51 | 26.00 | +1.89% |
benchmarks/test_objectives_benchmarks.py::test_reinforce_speed[False-backward] |
135.34 | 132.82 | -1.86% |
benchmarks/test_objectives_benchmarks.py::test_dqn_speed[False-backward] |
507.53 | 516.96 | +1.86% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[True-False-False-False-True] |
35,373 | 34,721 | -1.84% |
benchmarks/test_non_tensor_env_benchmark.py::test_non_tensor_env_rollout_speed[1000-parallel-no-buffers-True] |
0.2120 | 0.2159 | +1.84% |
benchmarks/test_storage_write_benchmark.py::TestStorageWriteBenchmark::test_collector_stack_then_write[100-img_shape2-large_img] |
175.92 | 172.71 | -1.82% |
benchmarks/test_objectives_benchmarks.py::test_iql_speed[False-backward] |
33.52 | 32.91 | -1.82% |
benchmarks/test_objectives_benchmarks.py::test_sac_speed[True-None] |
469.12 | 477.58 | +1.80% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[False-True-True-True-False] |
34,963 | 34,338 | -1.79% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_extend_sample[ReplayBuffer-LazyTensorStorage-RandomSampler-100000-10000-100-False] |
53.09 | 52.15 | -1.78% |
benchmarks/test_non_tensor_env_benchmark.py::test_non_tensor_env_rollout_speed[1000-parallel-buffers-False] |
0.5985 | 0.6090 | +1.74% |
benchmarks/test_objectives_benchmarks.py::test_redq_deprec_speed[False-None] |
88.25 | 89.78 | +1.73% |
benchmarks/test_objectives_benchmarks.py::test_values[td0_return_estimate-False-False] |
7,793 | 7,924 | +1.68% |
benchmarks/test_compressed_storage_benchmark.py::TestCompressedStorageBenchmark::test_tensor_to_bytestream_speed[safetensors] |
23,362 | 22,982 | -1.62% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_populate[TensorDictReplayBuffer-ListStorage-RandomSampler-400] |
194.56 | 197.70 | +1.61% |
benchmarks/test_objectives_benchmarks.py::test_dqn_speed[reduce-overhead-None] |
1,820 | 1,849 | +1.61% |
benchmarks/test_objectives_benchmarks.py::test_a2c_speed[False-None] |
176.84 | 179.66 | +1.59% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[True-True-False-True-True] |
22,033 | 21,684 | -1.59% |
benchmarks/test_storage_write_benchmark.py::TestStorageWriteBenchmark::test_storage_write_contiguous[100-img_shape2-large_img] |
569.47 | 578.18 | +1.53% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_extend_sample[ReplayBuffer-LazyTensorStorage-RandomSampler-100000-10000-100-True] |
24.32 | 24.69 | +1.50% |
benchmarks/test_storage_write_benchmark.py::TestStorageWriteBenchmark::test_storage_write_contiguous[200-img_shape3-large_batch] |
776.60 | 788.05 | +1.48% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[True-False-True-True-True] |
20,314 | 20,613 | +1.47% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_populate[TensorDictReplayBuffer-LazyMemmapStorage-RandomSampler-400] |
507.24 | 514.63 | +1.46% |
benchmarks/test_objectives_benchmarks.py::test_cql_speed[True-None] |
86.45 | 85.20 | -1.44% |
benchmarks/test_non_tensor_env_benchmark.py::test_non_tensor_env_rollout_speed[1000-parallel-no-buffers-False] |
0.2223 | 0.2253 | +1.31% |
benchmarks/test_storage_write_benchmark.py::TestStorageWriteBenchmark::test_collector_stack_then_write[200-img_shape3-large_batch] |
140.88 | 139.05 | -1.30% |
benchmarks/test_non_tensor_env_benchmark.py::test_non_tensor_env_rollout_speed[1000-serial-no-buffers-True] |
0.5956 | 0.6033 | +1.29% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[False-False-True-False-True] |
30,280 | 29,896 | -1.27% |
benchmarks/test_objectives_benchmarks.py::test_td3_speed[False-None] |
121.68 | 123.19 | +1.24% |
benchmarks/test_compressed_storage_benchmark.py::TestCompressedStorageBenchmark::test_tensor_to_bytestream_speed[torch.save] |
6,986 | 6,900 | -1.24% |
benchmarks/test_storage_write_benchmark.py::TestStorageWriteBenchmark::test_storage_write_lazystack[50-img_shape0-small] |
4,348 | 4,401 | +1.23% |
benchmarks/test_collectors_benchmark.py::test_single |
8.7479 | 8.8553 | +1.23% |
benchmarks/test_objectives_benchmarks.py::test_dqn_speed[False-None] |
697.01 | 705.50 | +1.22% |
benchmarks/test_objectives_benchmarks.py::test_sac_speed[False-None] |
123.16 | 124.66 | +1.22% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[False-False-False-False-False] |
43,545 | 43,020 | -1.20% |
benchmarks/test_objectives_benchmarks.py::test_ppo_speed[True-backward] |
114.84 | 116.17 | +1.15% |
benchmarks/test_storage_write_benchmark.py::TestStorageWriteBenchmark::test_storage_write_contiguous[50-img_shape0-small] |
7,190 | 7,108 | -1.14% |
| ... | ... | ... | Showing 120 of 192 comparisons, sorted by absolute change. |
GPU
Compared 202 benchmarks. Regressions over 5%: 14. Improvements over 5%: 12.
| Benchmark | main ops | PR ops | Change |
|---|---|---|---|
benchmarks/test_replaybuffer_benchmark.py::test_rb_populate[TensorDictReplayBuffer-ListStorage-SamplerWithoutReplacement-400] |
44.09 | 196.00 | +344.53% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_populate[TensorDictReplayBuffer-ListStorage-RandomSampler-400] |
194.53 | 39.31 | -79.79% |
benchmarks/test_objectives_benchmarks.py::test_iql_speed[False-None] |
98.17 | 59.27 | -39.62% |
benchmarks/test_objectives_benchmarks.py::test_reinforce_speed[reduce-overhead-None] |
97.61 | 123.73 | +26.76% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_sample[TensorDictReplayBuffer-LazyMemmapStorage-SamplerWithoutReplacement-10000] |
3,109 | 2,446 | -21.33% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_iterate[TensorDictReplayBuffer-LazyTensorStorage-RandomSampler-10000] |
3,290 | 2,748 | -16.49% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_sample[TensorDictReplayBuffer-LazyMemmapStorage-sampler6-10000] |
690.09 | 780.43 | +13.09% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_sample[TensorDictPrioritizedReplayBuffer-LazyMemmapStorage-None-10000] |
1,958 | 2,197 | +12.18% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_iterate[TensorDictPrioritizedReplayBuffer-LazyMemmapStorage-None-10000] |
1,972 | 2,212 | +12.17% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_sample[TensorDictPrioritizedReplayBuffer-LazyTensorStorage-None-10000] |
1,983 | 2,223 | +12.08% |
benchmarks/test_collectors_benchmark.py::test_single_with_rb_pixels |
5.3687 | 4.7458 | -11.60% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_iterate[TensorDictReplayBuffer-LazyMemmapStorage-SamplerWithoutReplacement-10000] |
2,897 | 2,570 | -11.31% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_sample[TensorDictReplayBuffer-LazyTensorStorage-SamplerWithoutReplacement-10000] |
3,280 | 2,967 | -9.54% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_iterate[TensorDictReplayBuffer-LazyMemmapStorage-RandomSampler-10000] |
3,120 | 3,399 | +8.96% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_iterate[TensorDictReplayBuffer-LazyTensorStorage-SamplerWithoutReplacement-10000] |
3,160 | 2,887 | -8.64% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_populate[TensorDictReplayBuffer-LazyMemmapStorage-SamplerWithoutReplacement-400] |
496.82 | 454.74 | -8.47% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_populate[TensorDictReplayBuffer-LazyMemmapStorage-RandomSampler-400] |
509.89 | 467.55 | -8.30% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_populate[TensorDictPrioritizedReplayBuffer-LazyTensorStorage-None-400] |
776.09 | 713.52 | -8.06% |
benchmarks/test_objectives_benchmarks.py::test_ppo_speed[True-backward] |
328.41 | 353.89 | +7.76% |
benchmarks/test_storage_write_benchmark.py::TestStorageWriteBenchmark::test_storage_write_contiguous[100-img_shape2-large_img] |
575.42 | 536.94 | -6.69% |
benchmarks/test_non_tensor_env_benchmark.py::test_non_tensor_env_rollout_speed[1000-single-True] |
1.3143 | 1.3969 | +6.28% |
benchmarks/test_objectives_benchmarks.py::test_ddpg_speed[True-backward] |
444.47 | 471.53 | +6.09% |
benchmarks/test_storage_write_benchmark.py::TestStorageWriteBenchmark::test_collector_stack_then_write[100-img_shape1-atari] |
276.13 | 259.55 | -6.00% |
benchmarks/test_objectives_benchmarks.py::test_values[vec_generalized_advantage_estimate-True-True] |
313.16 | 330.86 | +5.65% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_populate[TensorDictPrioritizedReplayBuffer-LazyMemmapStorage-None-400] |
497.37 | 469.33 | -5.64% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_iterate[TensorDictPrioritizedReplayBuffer-LazyTensorStorage-None-10000] |
2,198 | 2,321 | +5.59% |
benchmarks/test_objectives_benchmarks.py::test_iql_speed[True-backward] |
239.44 | 251.11 | +4.87% |
benchmarks/test_objectives_benchmarks.py::test_iql_speed[reduce-overhead-None] |
107.47 | 102.53 | -4.60% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_sample[TensorDictReplayBuffer-LazyTensorStorage-sampler7-10000] |
737.26 | 770.88 | +4.56% |
benchmarks/test_storage_write_benchmark.py::TestStorageWriteBenchmark::test_storage_write_contiguous[200-img_shape3-large_batch] |
744.49 | 711.49 | -4.43% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[True-False-True-False-True] |
36,897 | 38,502 | +4.35% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[True-True-False-True-True] |
21,652 | 22,575 | +4.26% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[False-True-True-False-False] |
56,408 | 58,810 | +4.26% |
benchmarks/test_storage_write_benchmark.py::TestStorageWriteBenchmark::test_storage_write_lazystack[50-img_shape0-small] |
4,517 | 4,325 | -4.25% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_sample[TensorDictReplayBuffer-LazyMemmapStorage-RandomSampler-10000] |
2,547 | 2,449 | -3.86% |
benchmarks/test_objectives_benchmarks.py::test_reinforce_speed[True-backward] |
361.81 | 375.72 | +3.84% |
benchmarks/test_envs_benchmark.py::test_simple |
1.2425 | 1.1948 | -3.84% |
benchmarks/test_objectives_benchmarks.py::test_ppo_speed[True-None] |
669.69 | 692.85 | +3.46% |
benchmarks/test_objectives_benchmarks.py::test_ddpg_speed[False-None] |
349.30 | 337.34 | -3.42% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[True-True-True-False-True] |
41,524 | 42,924 | +3.37% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[True-False-False-True-False] |
31,456 | 32,495 | +3.30% |
benchmarks/test_replaybuffer_benchmark.py::TestPrioritizedReplayBufferBenchmark::test_sample_mixed_devices[1000000-cuda_storage_cpu_sampler] |
90.17 | 87.28 | -3.21% |
benchmarks/test_objectives_benchmarks.py::test_sac_speed[False-None] |
113.50 | 109.96 | -3.12% |
benchmarks/test_objectives_benchmarks.py::test_reinforce_speed[False-backward] |
265.99 | 274.15 | +3.07% |
benchmarks/test_storage_write_benchmark.py::TestCollectorIntegrationBenchmark::test_collector_without_rb_cuda[200-img_shape1-large_batch] |
8.9826 | 8.7076 | -3.06% |
benchmarks/test_objectives_benchmarks.py::test_redq_deprec_speed[reduce-overhead-None] |
111.84 | 108.48 | -3.01% |
benchmarks/test_storage_write_benchmark.py::TestCollectorIntegrationBenchmark::test_collector_with_rb_cuda[100-img_shape0-atari] |
17.23 | 16.73 | -2.90% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[False-False-True-False-True] |
29,692 | 30,549 | +2.89% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_iterate[TensorDictPrioritizedReplayBuffer-ListStorage-None-4000] |
159.12 | 163.32 | +2.64% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[False-True-False-True-True] |
19,372 | 19,880 | +2.62% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[True-True-False-False-True] |
37,206 | 38,149 | +2.53% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[False-False-False-False-True] |
28,076 | 28,785 | +2.53% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[True-False-False-True-True] |
19,522 | 20,013 | +2.52% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[True-False-True-False-False] |
63,632 | 65,208 | +2.48% |
benchmarks/test_replaybuffer_benchmark.py::TestPrioritizedReplayBufferBenchmark::test_sample_mixed_devices[1000000-cuda_storage_cuda_samp... |
1,487 | 1,451 | -2.47% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[False-True-True-False-True] |
32,140 | 32,918 | +2.42% |
benchmarks/test_objectives_benchmarks.py::test_sac_speed[reduce-overhead-None] |
103.32 | 100.84 | -2.40% |
benchmarks/test_storage_write_benchmark.py::TestStorageWriteBenchmark::test_collector_lazystack_then_write[100-img_shape1-atari] |
653.61 | 638.21 | -2.36% |
benchmarks/test_objectives_benchmarks.py::test_iql_speed[False-backward] |
68.48 | 70.06 | +2.31% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[False-True-False-True-False] |
31,765 | 32,492 | +2.29% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_populate[TensorDictReplayBuffer-LazyTensorStorage-SamplerWithoutReplacement-400] |
966.98 | 989.10 | +2.29% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_populate[TensorDictPrioritizedReplayBuffer-ListStorage-None-400] |
184.83 | 188.98 | +2.25% |
benchmarks/test_replaybuffer_benchmark.py::TestPrioritizedReplayBufferBenchmark::test_sample_mixed_devices[1000000-memmap_cpu_storage_cud... |
979.41 | 957.49 | -2.24% |
benchmarks/test_storage_write_benchmark.py::TestStorageWriteBenchmark::test_collector_lazystack_then_write[100-img_shape2-large_img] |
412.86 | 403.62 | -2.24% |
benchmarks/test_envs_benchmark.py::test_transformed |
0.7142 | 0.6985 | -2.19% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_sample[TensorDictReplayBuffer-ListStorage-RandomSampler-4000] |
161.56 | 158.06 | -2.16% |
benchmarks/test_storage_write_benchmark.py::TestCollectorIntegrationBenchmark::test_collector_with_rb_cuda[200-img_shape1-large_batch] |
8.6019 | 8.4185 | -2.13% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[False-False-False-False-False] |
44,357 | 45,271 | +2.06% |
benchmarks/test_non_tensor_env_benchmark.py::test_non_tensor_env_rollout_speed[1000-serial-no-buffers-True] |
0.5918 | 0.6039 | +2.05% |
benchmarks/test_non_tensor_env_benchmark.py::test_non_tensor_env_rollout_speed[1000-single-False] |
1.5962 | 1.6280 | +2.00% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_extend_sample[ReplayBuffer-LazyTensorStorage-RandomSampler-10000-10000-100-False] |
53.69 | 52.62 | -1.99% |
benchmarks/test_storage_write_benchmark.py::TestStorageWriteBenchmark::test_collector_stack_then_write[100-img_shape2-large_img] |
171.34 | 167.93 | -1.99% |
benchmarks/test_objectives_benchmarks.py::test_values[td0_return_estimate-False-False] |
11,431 | 11,208 | -1.95% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[False-True-True-True-True] |
20,538 | 20,937 | +1.94% |
benchmarks/test_compressed_storage_benchmark.py::TestCompressedStorageBenchmark::test_tensor_to_bytestream_speed[numpy] |
346,309 | 339,906 | -1.85% |
benchmarks/test_objectives_benchmarks.py::test_iql_speed[True-None] |
507.46 | 516.76 | +1.83% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[False-False-True-True-True] |
18,430 | 18,764 | +1.82% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[True-True-True-True-True] |
23,130 | 23,549 | +1.81% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[True-False-True-True-True] |
20,988 | 21,369 | +1.81% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_iterate[TensorDictReplayBuffer-ListStorage-SamplerWithoutReplacement-4000] |
168.23 | 165.19 | -1.80% |
benchmarks/test_replaybuffer_benchmark.py::TestPrioritizedReplayBufferBenchmark::test_sampler_sample_scale[1000000-cuda] |
2,247 | 2,208 | -1.71% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[True-True-True-False-False] |
77,681 | 79,006 | +1.71% |
benchmarks/test_non_tensor_env_benchmark.py::test_non_tensor_env_rollout_speed[1000-parallel-no-buffers-False] |
0.2289 | 0.2250 | -1.69% |
benchmarks/test_objectives_benchmarks.py::test_reinforce_speed[True-None] |
757.69 | 769.71 | +1.59% |
benchmarks/test_objectives_benchmarks.py::test_values[td_lambda_return_estimate-True-False] |
12.12 | 11.93 | -1.58% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_extend_sample[ReplayBuffer-LazyTensorStorage-RandomSampler-100000-10000-100-True] |
23.06 | 22.69 | -1.58% |
benchmarks/test_objectives_benchmarks.py::test_values[td1_return_estimate-False-False] |
20.04 | 19.72 | -1.58% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[False-True-False-False-True] |
29,871 | 30,337 | +1.56% |
benchmarks/test_envs_benchmark.py::test_serial |
0.4172 | 0.4236 | +1.55% |
benchmarks/test_storage_write_benchmark.py::TestCollectorIntegrationBenchmark::test_collector_without_rb[100-img_shape0-atari] |
30.33 | 29.86 | -1.55% |
benchmarks/test_objectives_benchmarks.py::test_a2c_speed[False-None] |
272.96 | 277.08 | +1.51% |
benchmarks/test_storage_write_benchmark.py::TestStorageWriteBenchmark::test_storage_write_lazystack[100-img_shape2-large_img] |
421.27 | 427.61 | +1.50% |
benchmarks/test_storage_write_benchmark.py::TestCollectorIntegrationBenchmark::test_collector_with_rb[200-img_shape1-large_batch] |
13.32 | 13.12 | -1.50% |
benchmarks/test_replaybuffer_benchmark.py::test_rb_extend_sample[ReplayBuffer-LazyTensorStorage-RandomSampler-10000-10000-100-True] |
23.72 | 24.08 | +1.50% |
benchmarks/test_objectives_benchmarks.py::test_cql_speed[reduce-overhead-None] |
89.83 | 91.16 | +1.49% |
benchmarks/test_objectives_benchmarks.py::test_td3_speed[True-None] |
741.90 | 731.07 | -1.46% |
benchmarks/test_non_tensor_env_benchmark.py::test_non_tensor_env_rollout_speed[1000-serial-no-buffers-False] |
0.6845 | 0.6942 | +1.43% |
benchmarks/test_storage_write_benchmark.py::TestStorageWriteBenchmark::test_storage_write_contiguous[100-img_shape1-atari] |
4,024 | 4,081 | +1.41% |
benchmarks/test_objectives_benchmarks.py::test_dqn_speed[True-backward] |
888.45 | 876.24 | -1.37% |
benchmarks/test_objectives_benchmarks.py::test_dqn_speed[reduce-overhead-None] |
1,898 | 1,873 | -1.35% |
benchmarks/test_objectives_benchmarks.py::test_sac_speed[True-None] |
618.62 | 610.31 | -1.34% |
benchmarks/test_storage_write_benchmark.py::TestCollectorIntegrationBenchmark::test_collector_without_rb_cuda[100-img_shape0-atari] |
17.85 | 17.62 | -1.31% |
benchmarks/test_objectives_benchmarks.py::test_redq_deprec_speed[False-None] |
99.52 | 98.22 | -1.31% |
benchmarks/test_storage_write_benchmark.py::TestStorageWriteBenchmark::test_storage_write_lazystack[100-img_shape1-atari] |
683.97 | 675.06 | -1.30% |
benchmarks/test_objectives_benchmarks.py::test_gae_speed[generalized_advantage_estimate-False-1-512] |
47.76 | 47.15 | -1.29% |
benchmarks/test_objectives_benchmarks.py::test_td3_speed[reduce-overhead-None] |
46.27 | 45.68 | -1.27% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[True-False-True-True-False] |
34,785 | 35,219 | +1.25% |
benchmarks/test_non_tensor_env_benchmark.py::test_non_tensor_env_rollout_speed[1000-parallel-buffers-False] |
0.5911 | 0.5839 | -1.23% |
benchmarks/test_objectives_benchmarks.py::test_redq_deprec_speed[False-backward] |
71.66 | 70.80 | -1.21% |
benchmarks/test_compressed_storage_benchmark.py::TestCompressedStorageBenchmark::test_tensor_to_bytestream_speed[torch.save] |
7,177 | 7,092 | -1.18% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[True-True-True-True-False] |
42,270 | 42,764 | +1.17% |
benchmarks/test_storage_write_benchmark.py::TestStorageWriteBenchmark::test_collector_lazystack_then_write[50-img_shape0-small] |
3,554 | 3,595 | +1.16% |
benchmarks/test_envs_benchmark.py::test_cat_frames_functional[16-constant] |
4,685 | 4,739 | +1.16% |
benchmarks/test_objectives_benchmarks.py::test_ddpg_speed[False-backward] |
233.16 | 235.74 | +1.10% |
benchmarks/test_objectives_benchmarks.py::test_ppo_speed[reduce-overhead-None] |
799.51 | 808.26 | +1.09% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[True-False-False-False-False] |
55,321 | 55,923 | +1.09% |
benchmarks/test_collectors_benchmark.py::test_sync |
10.53 | 10.41 | -1.09% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[False-False-True-False-False] |
49,992 | 50,529 | +1.08% |
benchmarks/test_envs_benchmark.py::test_step_mdp_speed[False-False-False-True-False] |
27,354 | 27,638 | +1.04% |
benchmarks/test_collectors_benchmark.py::test_single |
6.6970 | 6.7660 | +1.03% |
| ... | ... | ... | Showing 120 of 202 comparisons, sorted by absolute change. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.
Stack from ghstack (oldest at bottom):