Skip to content
This repository was archived by the owner on Mar 21, 2024. It is now read-only.

Commit 68a50fa

Browse files
committed
null-pointer exception bug for iterator inputs - Update device histogram testing to include iterator-based samples - Prevent a few macro redefinitions - Update doc for 1.7.2
1 parent 16c2f87 commit 68a50fa

File tree

5 files changed

+193
-76
lines changed

5 files changed

+193
-76
lines changed

CHANGE_LOG.TXT

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
1.7.2 08/28/2017
2+
- Bug fixes:
3+
- Issue #110: DeviceHistogram null-pointer exception bug for iterator inputs
4+
5+
//-----------------------------------------------------------------------------
6+
17
1.7.2 08/26/2017
28
- Bug fixes:
39
- Issue #104: Device-wide reduction is now "run-to-run" deterministic for

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
<hr>
22
<h3>About CUB</h3>
33

4-
Current release: v1.7.2 (08/26/2017)
4+
Current release: v1.7.3 (08/28/2017)
55

66
We recommend the [CUB Project Website](http://nvlabs.github.com/cub) and the [cub-users discussion forum](http://groups.google.com/group/cub-users) for further information and examples.
77

cub/agent/agent_histogram.cuh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -746,7 +746,7 @@ struct AgentHistogram
746746
((row_bytes & pixel_mask) == 0); // number of row-samples is a multiple of the alignment of the pixel
747747

748748
// Whether rows are aligned and can be vectorized
749-
if ((d_native_samples != nullptr) && (quad_aligned_rows || pixel_aligned_rows))
749+
if ((d_native_samples != NULL) && (quad_aligned_rows || pixel_aligned_rows))
750750
ConsumeTiles<true>(num_row_pixels, num_rows, row_stride_samples, tiles_per_row, tile_queue, Int2Type<IS_WORK_STEALING>());
751751
else
752752
ConsumeTiles<false>(num_row_pixels, num_rows, row_stride_samples, tiles_per_row, tile_queue, Int2Type<IS_WORK_STEALING>());

cub/util_arch.cuh

Lines changed: 24 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,8 @@ namespace cub {
4343

4444
#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document
4545

46-
#if (__CUDACC_VER_MAJOR__ >= 9)
47-
#define CUB_USE_COOPERATIVE_GROUPS
46+
#if (__CUDACC_VER_MAJOR__ >= 9) && !defined(CUB_USE_COOPERATIVE_GROUPS)
47+
#define CUB_USE_COOPERATIVE_GROUPS
4848
#endif
4949

5050
/// CUB_PTX_ARCH reflects the PTX version targeted by the active compiler pass (or zero during the host pass).
@@ -117,25 +117,32 @@ namespace cub {
117117

118118

119119
/// Scale down the number of warps to keep same amount of "tile" storage as the nominal configuration for 4B data. Minimum of two warps.
120-
#define CUB_BLOCK_THREADS(NOMINAL_4B_BLOCK_THREADS, T, PTX_ARCH) \
121-
(CUB_MIN( \
122-
NOMINAL_4B_BLOCK_THREADS * 2, \
123-
CUB_WARP_THREADS(PTX_ARCH) * CUB_MAX( \
124-
(NOMINAL_4B_BLOCK_THREADS / CUB_WARP_THREADS(PTX_ARCH)) * 3 / 4, \
125-
(NOMINAL_4B_BLOCK_THREADS / CUB_WARP_THREADS(PTX_ARCH)) * 4 / sizeof(T))))
120+
#ifndef CUB_BLOCK_THREADS
121+
#define CUB_BLOCK_THREADS(NOMINAL_4B_BLOCK_THREADS, T, PTX_ARCH) \
122+
(CUB_MIN( \
123+
NOMINAL_4B_BLOCK_THREADS * 2, \
124+
CUB_WARP_THREADS(PTX_ARCH) * CUB_MAX( \
125+
(NOMINAL_4B_BLOCK_THREADS / CUB_WARP_THREADS(PTX_ARCH)) * 3 / 4, \
126+
(NOMINAL_4B_BLOCK_THREADS / CUB_WARP_THREADS(PTX_ARCH)) * 4 / sizeof(T))))
127+
#endif
126128

127129
/// Scale up/down number of items per thread to keep the same amount of "tile" storage as the nominal configuration for 4B data. Minimum 1 item per thread
128-
#define CUB_ITEMS_PER_THREAD(NOMINAL_4B_ITEMS_PER_THREAD, NOMINAL_4B_BLOCK_THREADS, T, PTX_ARCH) \
129-
(CUB_MIN( \
130-
NOMINAL_4B_ITEMS_PER_THREAD * 2, \
131-
CUB_MAX( \
132-
1, \
133-
(NOMINAL_4B_ITEMS_PER_THREAD * NOMINAL_4B_BLOCK_THREADS * 4 / sizeof(T)) / CUB_BLOCK_THREADS(NOMINAL_4B_BLOCK_THREADS, T, PTX_ARCH))))
130+
#ifndef CUB_ITEMS_PER_THREAD
131+
#define CUB_ITEMS_PER_THREAD(NOMINAL_4B_ITEMS_PER_THREAD, NOMINAL_4B_BLOCK_THREADS, T, PTX_ARCH) \
132+
(CUB_MIN( \
133+
NOMINAL_4B_ITEMS_PER_THREAD * 2, \
134+
CUB_MAX( \
135+
1, \
136+
(NOMINAL_4B_ITEMS_PER_THREAD * NOMINAL_4B_BLOCK_THREADS * 4 / sizeof(T)) / CUB_BLOCK_THREADS(NOMINAL_4B_BLOCK_THREADS, T, PTX_ARCH))))
137+
#endif
134138

139+
/// Define both nominal threads-per-block and items-per-thread
140+
#ifndef CUB_NOMINAL_CONFIG
141+
#define CUB_NOMINAL_CONFIG(NOMINAL_4B_BLOCK_THREADS, NOMINAL_4B_ITEMS_PER_THREAD, T) \
142+
CUB_BLOCK_THREADS(NOMINAL_4B_BLOCK_THREADS, T, 200), \
143+
CUB_ITEMS_PER_THREAD(NOMINAL_4B_ITEMS_PER_THREAD, NOMINAL_4B_BLOCK_THREADS, T, 200)
144+
#endif
135145

136-
#define CUB_NOMINAL_CONFIG(NOMINAL_4B_BLOCK_THREADS, NOMINAL_4B_ITEMS_PER_THREAD, T) \
137-
CUB_BLOCK_THREADS(NOMINAL_4B_BLOCK_THREADS, T, 200), \
138-
CUB_ITEMS_PER_THREAD(NOMINAL_4B_ITEMS_PER_THREAD, NOMINAL_4B_BLOCK_THREADS, T, 200)
139146

140147

141148
#endif // Do not document

0 commit comments

Comments
 (0)