Skip to content

Commit bdc718d

Browse files
committed
Enable -Xptas compile flag
1 parent b12a079 commit bdc718d

4 files changed

Lines changed: 7 additions & 6 deletions

File tree

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ set(CMAKE_CXX_STANDARD 20)
1010
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
1111

1212
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
13+
set(CMAKE_CUDA_FLAGS_RELEASE "${CMAKE_CUDA_FLAGS_RELEASE} -Xptxas -O3")
1314

1415
################
1516
# Core Library #

genmetaballs/src/cuda/core/camera.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,9 @@ CUDA_CALLABLE PixelCoordRange::Iterator& PixelCoordRange::Iterator::operator++()
2626
return *this;
2727
}
2828

29-
CUDA_CALLABLE bool PixelCoordRange::Sentinel::operator==(const Iterator& it) const {
29+
CUDA_CALLABLE bool operator!=(const PixelCoordRange::Iterator& it, const PixelCoordRange::Sentinel& sentinel) {
3030
// stop if we reach the end of rows, or if the range is empty
31-
return it.py >= py_end || it.px_start >= it.px_end || it.py_start >= py_end;
31+
return it.py < sentinel.py_end && it.px_start < it.px_end && it.py_start < sentinel.py_end;
3232
}
3333

3434
CUDA_CALLABLE PixelCoordRange::Iterator PixelCoordRange::begin() const {

genmetaballs/src/cuda/core/camera.cuh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,11 +49,11 @@ struct PixelCoordRange {
4949
// the Sentinel class only needs to hold the stop value (i.e. final row)
5050
struct Sentinel {
5151
uint32_t py_end;
52-
53-
// stopping criterion: true if current row (py) reaches py_end
54-
CUDA_CALLABLE bool operator==(const Iterator& it) const;
5552
};
5653

54+
// stopping criterion: true if current row (py) reaches py_end
55+
friend CUDA_CALLABLE bool operator!=(const Iterator& it, const Sentinel& sentinel);
56+
5757
// range methods
5858
CUDA_CALLABLE Iterator begin() const;
5959
CUDA_CALLABLE Sentinel end() const;

tests/cpp_tests/test_confidence.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ TEST(GpuConfidenceTest, ConfidenceMultipleValuesGPU_AllTypes) {
101101

102102
std::vector<float> actual;
103103
if (conf_case.is_two_param) {
104-
TwoParameterConfidence conf(conf_case.beta4, conf_case.beta5);
104+
TwoParameterConfidence conf{conf_case.beta4, conf_case.beta5};
105105
actual = gpu_get_confidence(sumexpd_vec, conf);
106106
} else {
107107
ZeroParameterConfidence conf;

0 commit comments

Comments
 (0)