Skip to content

Commit 9906d4c

Browse files
committed
Remove block wide search code
always use binary search code
1 parent 3c6b399 commit 9906d4c

2 files changed

Lines changed: 0 additions & 38 deletions

File tree

src/stream/TRIAD_PARTED_FUSED-Cuda.cpp

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,6 @@ __launch_bounds__(block_size)
105105
__global__ void triad_parted_fused_scan_aos(scan_index_type* first_blocks, scan_index_type num_fused,
106106
triad_holder* triad_holders)
107107
{
108-
#if 0
109108
scan_index_type min_j = 0;
110109
scan_index_type max_j = num_fused-1;
111110
scan_index_type j = (min_j + max_j + 1) / 2;
@@ -119,24 +118,6 @@ __global__ void triad_parted_fused_scan_aos(scan_index_type* first_blocks, scan_
119118
j = (min_j + max_j + 1) / 2;
120119
first_block = first_blocks[j];
121120
}
122-
#elif 1
123-
__shared__ scan_index_type s_j;
124-
__shared__ scan_index_type s_first_block;
125-
for (scan_index_type j = threadIdx.x; j < num_fused; j += block_size) {
126-
scan_index_type first_block = first_blocks[j];
127-
if (first_block <= blockIdx.x) {
128-
if (j+1 == num_fused || first_blocks[j+1] > blockIdx.x) {
129-
s_j = j;
130-
s_first_block = first_block;
131-
}
132-
} else {
133-
break;
134-
}
135-
}
136-
__syncthreads();
137-
scan_index_type j = s_j;
138-
scan_index_type first_block = s_first_block;
139-
#endif
140121

141122
Index_type len = triad_holders[j].len;
142123
Real_ptr a = triad_holders[j].a;

src/stream/TRIAD_PARTED_FUSED-Hip.cpp

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,6 @@ __launch_bounds__(block_size)
105105
__global__ void triad_parted_fused_scan_aos(scan_index_type* first_blocks, scan_index_type num_fused,
106106
triad_holder* triad_holders)
107107
{
108-
#if 1
109108
scan_index_type min_j = 0;
110109
scan_index_type max_j = num_fused-1;
111110
scan_index_type j = (min_j + max_j + 1) / 2;
@@ -119,24 +118,6 @@ __global__ void triad_parted_fused_scan_aos(scan_index_type* first_blocks, scan_
119118
j = (min_j + max_j + 1) / 2;
120119
first_block = first_blocks[j];
121120
}
122-
#elif 0
123-
__shared__ scan_index_type s_j;
124-
__shared__ scan_index_type s_first_block;
125-
for (scan_index_type j = threadIdx.x; j < num_fused; j += block_size) {
126-
scan_index_type first_block = first_blocks[j];
127-
if (first_block <= blockIdx.x) {
128-
if (j+1 == num_fused || first_blocks[j+1] > blockIdx.x) {
129-
s_j = j;
130-
s_first_block = first_block;
131-
}
132-
} else {
133-
break;
134-
}
135-
}
136-
__syncthreads();
137-
scan_index_type j = s_j;
138-
scan_index_type first_block = s_first_block;
139-
#endif
140121

141122
Index_type len = triad_holders[j].len;
142123
Real_ptr a = triad_holders[j].a;

0 commit comments

Comments
 (0)