Skip to content

Commit 801830a

Browse files
author
bragadeesh
committed
Merge branch 'develop'
Conflicts: src/CMakeLists.txt Update the revision number to 2.4 to resolve the conflict
2 parents d0394b1 + d6dcb18 commit 801830a

File tree

5 files changed

+57
-27
lines changed

5 files changed

+57
-27
lines changed

README.md

+4-5
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,12 @@ clFFT
22
=====
33
[![Build Status](https://travis-ci.org/clMathLibraries/clFFT.png)](https://travis-ci.org/clMathLibraries/clFFT)
44

5-
clMath is a software library containing FFT and BLAS functions written
5+
clFFT is a software library containing FFT functions written
66
in OpenCL. In addition to GPU devices, the libraries also support
7-
running on CPU devices to facilitate debugging and multicore
7+
running on CPU devices to facilitate debugging and heterogeneous
88
programming.
99

10-
clMath 2.1 is the latest version and is available as source only.
11-
clMath's predecessor APPML 1.10 has pre-built binaries available for
12-
download on both Linux and Windows platforms.
10+
Pre-built binaries are available [here][binary_release].
1311

1412
## Introduction to clFFT
1513

@@ -210,3 +208,4 @@ int main( void )
210208
[[email protected]]: https://github.com/clMathLibraries/clFFT/wiki/Build
211209
[Contributing]: CONTRIBUTING.md
212210
[Apache License, Version 2.0]: http://www.apache.org/licenses/LICENSE-2.0
211+
[binary_release]: https://github.com/clMathLibraries/clFFT/releases

src/CMakeLists.txt

+20-15
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
# ########################################################################
22
# Copyright 2013 Advanced Micro Devices, Inc.
3-
#
3+
#
44
# Licensed under the Apache License, Version 2.0 (the "License");
55
# you may not use this file except in compliance with the License.
66
# You may obtain a copy of the License at
7-
#
7+
#
88
# http://www.apache.org/licenses/LICENSE-2.0
9-
#
9+
#
1010
# Unless required by applicable law or agreed to in writing, software
1111
# distributed under the License is distributed on an "AS IS" BASIS,
1212
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -40,7 +40,7 @@ if( NOT DEFINED CLFFT_VERSION_MAJOR )
4040
endif( )
4141

4242
if( NOT DEFINED CLFFT_VERSION_MINOR )
43-
set( CLFFT_VERSION_MINOR 2 )
43+
set( CLFFT_VERSION_MINOR 4 )
4444
endif( )
4545

4646
if( NOT DEFINED CLFFT_VERSION_PATCH )
@@ -72,7 +72,7 @@ option( BUILD_TEST "Build the library testing suite (dependency on google test,
7272
option( BUILD_LOADLIBRARIES "Build the optional dynamic load libraries that the FFT runtime will search for" ON )
7373
option( BUILD_SHARED_LIBRARY "Build shared libraries." ON)
7474

75-
# If BOOST_ROOT is defined as an environment value, use that value and cache it so it's visible in the cmake-gui.
75+
# If BOOST_ROOT is defined as an environment value, use that value and cache it so it's visible in the cmake-gui.
7676
# Otherwise, create a sensible default that the user can change
7777
if( DEFINED ENV{BOOST_ROOT} )
7878
set( BOOST_ROOT $ENV{BOOST_ROOT} CACHE PATH "Environment variable defining the root of the Boost installation" )
@@ -105,8 +105,8 @@ else()
105105
endif()
106106
endif()
107107

108-
# These variables are meant to contain string which should be appended to the installation paths
109-
# of library and executable binaries, respectively. They are meant to be user configurable/overridable.
108+
# These variables are meant to contain string which should be appended to the installation paths
109+
# of library and executable binaries, respectively. They are meant to be user configurable/overridable.
110110
set( SUFFIX_LIB_DEFAULT "" )
111111
set( SUFFIX_BIN_DEFAULT "" )
112112

@@ -155,7 +155,7 @@ find_package( OpenCL )
155155
# This will define FFTW_FOUND
156156
find_package( FFTW )
157157

158-
if( (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} VERSION_LESS 2.8) )
158+
if( (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} VERSION_LESS 2.8) )
159159
message( STATUS "Cmake version 2.8 or greater needed to use GTest" )
160160
else()
161161
# This will define GTEST_FOUND
@@ -195,7 +195,7 @@ if( MSVC )
195195
# CMake sets huge stack frames for windows, for whatever reason. We go with compiler default.
196196
string( REGEX REPLACE "/STACK:[0-9]+" "" CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS}" )
197197
string( REGEX REPLACE "/STACK:[0-9]+" "" CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS}" )
198-
string( REGEX REPLACE "/STACK:[0-9]+" "" CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS}" )
198+
string( REGEX REPLACE "/STACK:[0-9]+" "" CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS}" )
199199

200200
elseif( CMAKE_COMPILER_IS_GNUCXX )
201201
message( STATUS "Detected GNU fortran compiler." )
@@ -208,7 +208,12 @@ elseif( CMAKE_COMPILER_IS_GNUCXX )
208208

209209
set( CMAKE_CXX_FLAGS "-pthread ${CMAKE_CXX_FLAGS}" )
210210
set( CMAKE_C_FLAGS "-pthread ${CMAKE_C_FLAGS}" )
211-
211+
212+
# For linux debug builds, define the same preprocessing symbols as win to keep it simple
213+
if( CMAKE_BUILD_TYPE MATCHES "Debug" )
214+
add_definitions( "/D_DEBUG" )
215+
endif( )
216+
212217
if( BUILD64 )
213218
set( CMAKE_CXX_FLAGS "-m64 ${CMAKE_CXX_FLAGS}" )
214219
set( CMAKE_C_FLAGS "-m64 ${CMAKE_C_FLAGS}" )
@@ -237,12 +242,12 @@ message( STATUS "CMAKE_EXE_LINKER link flags: " ${CMAKE_EXE_LINKER_FLAGS} )
237242

238243
# configure a header file to pass the CMake version settings to the source, and package the header files in the output archive
239244
configure_file( "${PROJECT_SOURCE_DIR}/include/clFFT.version.h.in" "${PROJECT_BINARY_DIR}/include/clFFT.version.h" )
240-
install( FILES
241-
"${PROJECT_BINARY_DIR}/include/clFFT.version.h"
245+
install( FILES
246+
"${PROJECT_BINARY_DIR}/include/clFFT.version.h"
242247
"include/clFFT.h"
243248
"include/clAmdFft.h"
244-
"include/clAmdFft.version.h"
245-
DESTINATION
249+
"include/clAmdFft.version.h"
250+
DESTINATION
246251
"./include" )
247252

248253

@@ -278,7 +283,7 @@ else( )
278283
message( "GoogleTest unit tests will NOT be built" )
279284
endif( )
280285

281-
# The following code is setting variables to control the behavior of CPack to generate our
286+
# The following code is setting variables to control the behavior of CPack to generate our
282287
if( WIN32 )
283288
set( CPACK_SOURCE_GENERATOR "ZIP" )
284289
set( CPACK_GENERATOR "ZIP" )

src/FindOpenCL.cmake

+6
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@ find_path(OPENCL_INCLUDE_DIRS
5656
PATHS
5757
/usr/include
5858
/usr/local/include
59+
/usr/local/cuda/include
60+
/opt/cuda/include
5961
DOC "OpenCL header file path"
6062
)
6163
mark_as_advanced( OPENCL_INCLUDE_DIRS )
@@ -70,6 +72,8 @@ if( LIB64 )
7072
${OPENCL_ROOT}/lib
7173
$ENV{AMDAPPSDKROOT}/lib
7274
$ENV{CUDA_PATH}/lib
75+
/usr/local/cuda/lib
76+
/opt/cuda/lib
7377
DOC "OpenCL dynamic library path"
7478
PATH_SUFFIXES x86_64 x64
7579
PATHS
@@ -82,6 +86,8 @@ else( )
8286
${OPENCL_ROOT}/lib
8387
$ENV{AMDAPPSDKROOT}/lib
8488
$ENV{CUDA_PATH}/lib
89+
/usr/local/cuda/lib
90+
/opt/cuda/lib
8591
DOC "OpenCL dynamic library path"
8692
PATH_SUFFIXES x86 Win32
8793
PATHS

src/library/private.h

+19
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,8 @@ inline tstring clfftErrorStatusAsString( const cl_int& status )
270270

271271
// This is used to either wrap an OpenCL function call, or to explicitly check a variable for an OpenCL error condition.
272272
// If an error occurs, we issue a return statement to exit the calling function.
273+
#if defined( _DEBUG )
274+
273275
#define OPENCL_V( fn, msg ) \
274276
{ \
275277
clfftStatus vclStatus = static_cast< clfftStatus >( fn ); \
@@ -288,6 +290,23 @@ inline tstring clfftErrorStatusAsString( const cl_int& status )
288290
} \
289291
}
290292

293+
#else
294+
295+
#define OPENCL_V( fn, msg ) \
296+
{ \
297+
clfftStatus vclStatus = static_cast< clfftStatus >( fn ); \
298+
switch( vclStatus ) \
299+
{ \
300+
case CL_SUCCESS: /**< No error */ \
301+
break; \
302+
default: \
303+
{ \
304+
return vclStatus; \
305+
} \
306+
} \
307+
}
308+
#endif
309+
291310
static inline bool IsPo2 (size_t u) {
292311
return (u != 0) && (0 == (u & (u-1)));
293312
}

src/tests/cl_transform.h

+8-7
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,6 @@ class clfft {
151151
// OpenCL resources that need to be carefully managed
152152
std::unique_ptr< _cl_context, clContext_deleter > context;
153153
std::unique_ptr< _cl_command_queue, clCommQueue_deleter > queue;
154-
std::unique_ptr< _cl_event, clEvent_deleter > an_event;
155154
std::vector< std::unique_ptr< _cl_mem, clMem_deleter > > cl_mem_input;
156155
std::vector< std::unique_ptr< _cl_mem, clMem_deleter > > cl_mem_output;
157156
std::vector< cl_device_id > device_id;
@@ -630,7 +629,6 @@ class clfft {
630629

631630
// In order to call clfftEnqueueTransform, we need to pass naked pointers
632631
cl_command_queue tempQueue = queue.get( );
633-
cl_event tempEvent = an_event.get( );
634632
size_t buffer_size = 0;
635633

636634
EXPECT_EQ( CLFFT_SUCCESS, clfftBakePlan(*plan_handle, 1, &tempQueue, NULL, NULL ));
@@ -683,7 +681,7 @@ class clfft {
683681

684682
// In order to call clfftEnqueueTransform, we need to pass naked pointers
685683
cl_command_queue tempQueue = queue.get( );
686-
cl_event tempEvent = an_event.get( );
684+
std::unique_ptr< _cl_event, clEvent_deleter > tempEvent;
687685
std::unique_ptr< _cl_mem, clMem_deleter > intermediate_buffer;
688686

689687
throw_if_total_memory_footprint_is_too_large_for_device();
@@ -726,6 +724,7 @@ class clfft {
726724
for( cl_uint i = 0; i < cl_mem_output.size( ); ++i )
727725
tempOutput[ i ] = cl_mem_output[ i ].get( );
728726

727+
cl_event tevent = NULL;
729728
if( buffer_size )
730729
{
731730
status = clfftEnqueueTransform(*plan_handle,
@@ -734,7 +733,7 @@ class clfft {
734733
&tempQueue,
735734
0,
736735
NULL,
737-
&tempEvent,
736+
&tevent,
738737
&tempInput[ 0 ],
739738
&tempOutput[ 0 ],
740739
intermediate_buffer.get() );
@@ -747,24 +746,26 @@ class clfft {
747746
&tempQueue,
748747
0,
749748
NULL,
750-
&tempEvent,
749+
&tevent,
751750
&tempInput[ 0 ],
752751
&tempOutput[ 0 ],
753752
NULL );
754753
}
755754
clFinish(tempQueue);
755+
tempEvent.reset(tevent); tevent = NULL;
756756

757757
if( status != CLFFT_SUCCESS )
758758
{
759759
throw std::runtime_error(prettyPrintclFFTStatus(status).c_str());
760760
}
761761

762762
// wait for the kernel call to finish execution
763-
cl_int wait_status = clWaitForEvents(1, &tempEvent);
763+
const cl_event revent = tempEvent.get();
764+
cl_int wait_status = clWaitForEvents(1, &revent);
764765
if( wait_status == CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST )
765766
{
766767
cl_int error_code;
767-
clGetEventInfo( tempEvent, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &error_code, NULL );
768+
clGetEventInfo( revent, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &error_code, NULL );
768769
throw std::runtime_error(prettyPrintclFFTStatus(error_code).c_str());
769770
}
770771
else if( wait_status != CL_SUCCESS )

0 commit comments

Comments
 (0)