diff --git a/CMakeLists.txt b/CMakeLists.txt index 459f11b3b3..a12cb60548 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -51,7 +51,7 @@ string(REGEX REPLACE ".*#define TBB_VERSION_MAJOR ([0-9]+).*" "\\1" _tbb_ver_maj string(REGEX REPLACE ".*#define TBB_VERSION_MINOR ([0-9]+).*" "\\1" _tbb_ver_minor "${_tbb_version_info}") string(REGEX REPLACE ".*#define TBB_INTERFACE_VERSION ([0-9]+).*" "\\1" TBB_INTERFACE_VERSION "${_tbb_version_info}") string(REGEX REPLACE ".*#define __TBB_BINARY_VERSION ([0-9]+).*" "\\1" TBB_BINARY_VERSION "${_tbb_version_info}") -set(TBB_BINARY_MINOR_VERSION 0) +set(TBB_BINARY_MINOR_VERSION 1) set(TBBMALLOC_BINARY_VERSION 2) set(TBBBIND_BINARY_VERSION 3) @@ -203,18 +203,18 @@ else() install(DIRECTORY include DESTINATION .) - install(EXPORT ${CMAKE_PROJECT_NAME}Targets + install(EXPORT ${PROJECT_NAME}Targets NAMESPACE TBB:: - DESTINATION lib/cmake/${CMAKE_PROJECT_NAME}) - file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_PROJECT_NAME}Config.cmake - "include(\${CMAKE_CURRENT_LIST_DIR}/${CMAKE_PROJECT_NAME}Targets.cmake)\n") + DESTINATION lib/cmake/${PROJECT_NAME}) + file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config.cmake + "include(\${CMAKE_CURRENT_LIST_DIR}/${PROJECT_NAME}Targets.cmake)\n") - write_basic_package_version_file("${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_PROJECT_NAME}ConfigVersion.cmake" + write_basic_package_version_file("${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake" COMPATIBILITY AnyNewerVersion) - install(FILES "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_PROJECT_NAME}Config.cmake" - "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_PROJECT_NAME}ConfigVersion.cmake" - DESTINATION lib/cmake/${CMAKE_PROJECT_NAME}) + install(FILES "${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config.cmake" + "${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake" + DESTINATION lib/cmake/${PROJECT_NAME}) # ------------------------------------------------------------------- endif() diff --git a/README.md b/README.md index d6c3470976..386a7070bf 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ # oneAPI Threading Building Blocks (Beta) -[![Apache License Version 2.0](https://img.shields.io/badge/license-Apache_2.0-green.svg)](LICENSE) +[![Apache License Version 2.0](https://img.shields.io/badge/license-Apache_2.0-green.svg)](LICENSE.txt) oneAPI Threading Building Blocks (oneTBB) lets you easily write parallel C++ programs that take full advantage of multicore performance, that are portable, composable and have future-proof scalability. @@ -19,7 +19,7 @@ Please report issues and suggestions via ## How to Contribute To contribute to oneTBB, please open a GitHub pull request (preferred) or send us a patch by e-mail. -oneAPI Threading Building Blocks is licensed under [Apache License, Version 2.0](LICENSE). +oneAPI Threading Building Blocks is licensed under [Apache License, Version 2.0](LICENSE.txt). By its terms, contributions submitted to the project are also done under that license. ## Engineering team contacts diff --git a/cmake/compilers/GNU.cmake b/cmake/compilers/GNU.cmake index f86e6e8084..9bff049f47 100644 --- a/cmake/compilers/GNU.cmake +++ b/cmake/compilers/GNU.cmake @@ -45,4 +45,4 @@ endif() # TBB malloc settings set(TBBMALLOC_LIB_COMPILE_FLAGS -fno-rtti -fno-exceptions) - +set(TBB_OPENMP_FLAG -fopenmp) diff --git a/cmake/compilers/Intel.cmake b/cmake/compilers/Intel.cmake index 9817b50eb0..871dc4b9d8 100644 --- a/cmake/compilers/Intel.cmake +++ b/cmake/compilers/Intel.cmake @@ -15,14 +15,17 @@ if (MSVC) include(${CMAKE_CURRENT_LIST_DIR}/MSVC.cmake) set(TBB_WARNING_LEVEL ${TBB_WARNING_LEVEL} /W3) + set(TBB_OPENMP_FLAG /Qopenmp) elseif (APPLE) include(${CMAKE_CURRENT_LIST_DIR}/AppleClang.cmake) set(TBB_COMMON_COMPILE_FLAGS ${TBB_COMMON_COMPILE_FLAGS} -fstack-protector -Wformat -Wformat-security $<$>:-fno-omit-frame-pointer -qno-opt-report-embed -D_FORTIFY_SOURCE=2>) + set(TBB_OPENMP_FLAG -qopenmp) else() include(${CMAKE_CURRENT_LIST_DIR}/GNU.cmake) set(TBB_LIB_LINK_FLAGS ${TBB_LIB_LINK_FLAGS} -static-intel -Wl,-z,relro,-z,now,) set(TBB_COMMON_COMPILE_FLAGS ${TBB_COMMON_COMPILE_FLAGS} -fstack-protector -Wformat -Wformat-security $<$>:-qno-opt-report-embed -D_FORTIFY_SOURCE=2> $<$:-falign-stack=maintain-16-byte>) + set(TBB_OPENMP_FLAG -qopenmp) endif() diff --git a/cmake/compilers/MSVC.cmake b/cmake/compilers/MSVC.cmake index a0962402d1..54a0fb8801 100644 --- a/cmake/compilers/MSVC.cmake +++ b/cmake/compilers/MSVC.cmake @@ -57,3 +57,5 @@ if (TBB_WINDOWS_DRIVER) set(TBB_COMMON_COMPILE_FLAGS ${TBB_COMMON_COMPILE_FLAGS} /D _UNICODE /DUNICODE /DWINAPI_FAMILY=WINAPI_FAMILY_APP /D__WRL_NO_DEFAULT_LIB__) endif() + +set(TBB_OPENMP_FLAG /openmp) diff --git a/cmake/packaging.cmake b/cmake/packaging.cmake index 52eca45247..10702fc065 100644 --- a/cmake/packaging.cmake +++ b/cmake/packaging.cmake @@ -14,7 +14,7 @@ # Note: current implementation uses CMAKE_BUILD_TYPE, # this parameter is not defined for multi-config generators. -set(CPACK_PACKAGE_NAME "${CMAKE_PROJECT_NAME}") +set(CPACK_PACKAGE_NAME "${PROJECT_NAME}") set(CPACK_PACKAGE_VERSION "${TBB_VERSION}") string(TOLOWER ${CPACK_PACKAGE_NAME}-${PROJECT_VERSION}-${CMAKE_SYSTEM_NAME}_${TBB_OUTPUT_DIR_BASE}_${CMAKE_BUILD_TYPE} CPACK_PACKAGE_FILE_NAME) set(CPACK_GENERATOR ZIP) diff --git a/cmake/vars_utils.cmake b/cmake/vars_utils.cmake index 252ea17e05..a7b2fbe97d 100644 --- a/cmake/vars_utils.cmake +++ b/cmake/vars_utils.cmake @@ -22,19 +22,19 @@ else() set(TBB_VARS_TEMPLATE "linux/env/vars.sh.in") endif() -get_filename_component(TBB_VARS_TEMPLATE_NAME ${CMAKE_SOURCE_DIR}/integration/${TBB_VARS_TEMPLATE} NAME) +get_filename_component(TBB_VARS_TEMPLATE_NAME ${PROJECT_SOURCE_DIR}/integration/${TBB_VARS_TEMPLATE} NAME) string(REPLACE ".in" "" TBB_VARS_NAME ${TBB_VARS_TEMPLATE_NAME}) macro(tbb_gen_vars target) add_custom_command(TARGET ${target} POST_BUILD COMMAND ${CMAKE_COMMAND} -DBINARY_DIR=${CMAKE_BINARY_DIR} - -DSOURCE_DIR=${CMAKE_SOURCE_DIR} + -DSOURCE_DIR=${PROJECT_SOURCE_DIR} -DBIN_PATH=$ -DVARS_TEMPLATE=${TBB_VARS_TEMPLATE} -DVARS_NAME=${TBB_VARS_NAME} -DTBB_INSTALL_VARS=${TBB_INSTALL_VARS} - -P ${CMAKE_SOURCE_DIR}/integration/cmake/generate_vars.cmake + -P ${PROJECT_SOURCE_DIR}/integration/cmake/generate_vars.cmake ) endmacro(tbb_gen_vars) diff --git a/doc/DoxygenLayout.xml b/doc/DoxygenLayout.xml index 30f060df81..cb906b0031 100644 --- a/doc/DoxygenLayout.xml +++ b/doc/DoxygenLayout.xml @@ -19,6 +19,7 @@ + diff --git a/include/tbb/concurrent_map.h b/include/tbb/concurrent_map.h index 40f73cb982..74d23db6ca 100644 --- a/include/tbb/concurrent_map.h +++ b/include/tbb/concurrent_map.h @@ -91,6 +91,7 @@ class concurrent_map : public concurrent_skip_list typename std::enable_if::value, diff --git a/include/tbb/concurrent_priority_queue.h b/include/tbb/concurrent_priority_queue.h index 77ef2b0c1d..b1d48256aa 100644 --- a/include/tbb/concurrent_priority_queue.h +++ b/include/tbb/concurrent_priority_queue.h @@ -234,8 +234,6 @@ class concurrent_priority_queue { }; cpq_operation( const value_type& value, operation_type t ) : type(t), elem(const_cast(&value)) {} - - cpq_operation( operation_type t ) : type(t) {} }; // class cpq_operation class functor { @@ -477,7 +475,7 @@ void swap( concurrent_priority_queue& lhs, inline namespace v1 { using detail::d1::concurrent_priority_queue; -}; // inline namespace v1 +} // inline namespace v1 } // namespace tbb #endif // __TBB_concurrent_priority_queue_H diff --git a/include/tbb/concurrent_set.h b/include/tbb/concurrent_set.h index e62cce936c..adf8bba4d1 100644 --- a/include/tbb/concurrent_set.h +++ b/include/tbb/concurrent_set.h @@ -74,6 +74,7 @@ class concurrent_set : public concurrent_skip_list void merge(concurrent_set& source) { @@ -148,6 +149,7 @@ class concurrent_multiset : public concurrent_skip_list void merge(concurrent_set& source) { diff --git a/include/tbb/concurrent_unordered_map.h b/include/tbb/concurrent_unordered_map.h index 309df0cc23..80fb662426 100644 --- a/include/tbb/concurrent_unordered_map.h +++ b/include/tbb/concurrent_unordered_map.h @@ -69,6 +69,7 @@ class concurrent_unordered_map // Include constructors of base type using base_type::base_type; + using base_type::operator=; // Observers mapped_type& operator[]( const key_type& key ) { @@ -220,7 +221,7 @@ class concurrent_unordered_multimap // Include constructors of base type using base_type::base_type; - + using base_type::operator=; using base_type::insert; template diff --git a/include/tbb/concurrent_unordered_set.h b/include/tbb/concurrent_unordered_set.h index 4c0b51215c..d7932a55b0 100644 --- a/include/tbb/concurrent_unordered_set.h +++ b/include/tbb/concurrent_unordered_set.h @@ -67,6 +67,7 @@ class concurrent_unordered_set // Include constructors of base_type; using base_type::base_type; + using base_type::operator=; template void merge( concurrent_unordered_set& source ) { @@ -162,6 +163,7 @@ class concurrent_unordered_multiset // Include constructors of base_type; using base_type::base_type; + using base_type::operator=; template void merge( concurrent_unordered_set& source ) { diff --git a/include/tbb/detail/_aggregator.h b/include/tbb/detail/_aggregator.h index 247be3107e..9b846915da 100644 --- a/include/tbb/detail/_aggregator.h +++ b/include/tbb/detail/_aggregator.h @@ -143,7 +143,6 @@ class aggregator : public aggregator_generic { HandlerType handle_operations; public: aggregator() = default; - explicit aggregator( HandlerType h ) : handle_operations(h) {} void initialize_handler( HandlerType h ) { handle_operations = h; } @@ -167,8 +166,8 @@ class aggregating_functor { }; // class aggregating_functor -}; // namespace d1 -}; // namespace detail -}; // namespace tbb +} // namespace d1 +} // namespace detail +} // namespace tbb #endif // __TBB_detail__aggregator_H diff --git a/include/tbb/detail/_concurrent_unordered_base.h b/include/tbb/detail/_concurrent_unordered_base.h index 94a8a78618..93911bd6fa 100644 --- a/include/tbb/detail/_concurrent_unordered_base.h +++ b/include/tbb/detail/_concurrent_unordered_base.h @@ -708,6 +708,8 @@ class concurrent_unordered_base { return my_midpoint_node != my_end_node; } + size_type grainsize() const { return 1; } + const_range_type( const_range_type& range, split ) : my_instance(range.my_instance), my_begin_node(range.my_midpoint_node), diff --git a/include/tbb/detail/_config.h b/include/tbb/detail/_config.h index 3e7489aef4..953732ed82 100644 --- a/include/tbb/detail/_config.h +++ b/include/tbb/detail/_config.h @@ -296,6 +296,8 @@ #define __TBB_TSX_INTRINSICS_PRESENT ((__RTM__ || _MSC_VER>=1700 || __INTEL_COMPILER) && !__ANDROID__) +#define __TBB_WAITPKG_INTRINSICS_PRESENT ((__INTEL_COMPILER >= 1900 || __TBB_GCC_VERSION >= 110000 || __TBB_CLANG_VERSION >= 120000) && !__ANDROID__) + /** Internal TBB features & modes **/ /** __TBB_SOURCE_DIRECTLY_INCLUDED is a mode used in whitebox testing when @@ -334,7 +336,7 @@ #define __TBB_ARENA_OBSERVER __TBB_SCHEDULER_OBSERVER #endif /* __TBB_ARENA_OBSERVER */ -#if TBB_PREVIEW_NUMA_SUPPORT || __TBB_BUILD +#ifndef __TBB_NUMA_SUPPORT #define __TBB_NUMA_SUPPORT 1 #endif diff --git a/include/tbb/detail/_flow_graph_body_impl.h b/include/tbb/detail/_flow_graph_body_impl.h index a66a42a683..eb4e59934c 100644 --- a/include/tbb/detail/_flow_graph_body_impl.h +++ b/include/tbb/detail/_flow_graph_body_impl.h @@ -218,7 +218,6 @@ class type_to_key_function_body_leaf : public type_to_key_function_body(body); } @@ -233,7 +232,6 @@ class type_to_key_function_body_leaf : public type_to_key_funct const Output& operator()(const Input &i) override { return body(i); } - B get_body() { return body; } type_to_key_function_body_leaf* clone() override { return new type_to_key_function_body_leaf< Input, Output&, B>(body); } @@ -313,12 +311,6 @@ class input_node_task_bypass : public graph_task { // ------------------------ end of node task bodies ----------------------------------- -//! An empty functor that takes an Input and returns a default constructed Output -template< typename Input, typename Output > -struct empty_body { - Output operator()( const Input & ) const { return Output(); } -}; - template class decrementer; @@ -342,15 +334,12 @@ class decrementer friend class limiter_node; - void reset_receiver( reset_flags ) override { - } + void reset_receiver( reset_flags ) {} public: - // Since decrementer does not make use of possibly unconstructed owner inside its - // constructor, my_node can be directly initialized with 'this' pointer passed from the - // owner, hence making method 'set_owner' needless. - decrementer() : my_node(NULL) {} - void set_owner( T *node ) { my_node = node; } + decrementer(T* owner) : my_node(owner) { + // Do not work with the passed pointer here as it may not be fully initialized yet + } }; template @@ -372,13 +361,11 @@ class decrementer : public continue_receiver, no_copy { typedef continue_msg input_type; typedef continue_msg output_type; - decrementer() : continue_receiver( /*number_of_predecessors=*/0, no_priority ) - // Since decrementer does not make use of possibly unconstructed owner inside its - // constructor, my_node can be directly initialized with 'this' pointer passed from the - // owner, hence making method 'set_owner' needless. - , my_node(NULL) - {} - void set_owner( T *node ) { my_node = node; } + decrementer(T* owner) + : continue_receiver( /*number_of_predecessors=*/0, no_priority ), my_node(owner) + { + // Do not work with the passed pointer here as it may not be fully initialized yet + } }; #endif // __TBB__flow_graph_body_impl_H diff --git a/include/tbb/detail/_flow_graph_cache_impl.h b/include/tbb/detail/_flow_graph_cache_impl.h index e93b16c69a..688ef944d8 100644 --- a/include/tbb/detail/_flow_graph_cache_impl.h +++ b/include/tbb/detail/_flow_graph_cache_impl.h @@ -44,7 +44,8 @@ class node_cache { typename mutex_type::scoped_lock lock( my_mutex ); for ( size_t i = internal_size(); i != 0; --i ) { T &s = internal_pop(); - if ( &s == &n ) return; // only remove one predecessor per request + if ( &s == &n ) + break; // only remove one predecessor per request internal_push(s); } } @@ -92,11 +93,12 @@ class predecessor_cache : public node_cache< sender, M > { typedef sender predecessor_type; typedef receiver successor_type; - predecessor_cache( ) : my_owner( NULL ) { } - - void set_owner( successor_type *owner ) { my_owner = owner; } + predecessor_cache( successor_type* owner ) : my_owner( owner ) { + __TBB_ASSERT( my_owner, "predecessor_cache should have an owner." ); + // Do not work with the passed pointer here as it may not be fully initialized yet + } - bool get_item( output_type &v ) { + bool get_item( output_type& v ) { bool msg = false; @@ -115,8 +117,7 @@ class predecessor_cache : public node_cache< sender, M > { if (msg == false) { // Relinquish ownership of the edge - if (my_owner) - register_successor(*src, *my_owner); + register_successor(*src, *my_owner); } else { // Retain ownership of the edge this->add(*src); @@ -127,20 +128,18 @@ class predecessor_cache : public node_cache< sender, M > { // If we are removing arcs (rf_clear_edges), call clear() rather than reset(). void reset() { - if (my_owner) { - for(;;) { - predecessor_type *src; - { - if (this->internal_empty()) break; - src = &this->internal_pop(); - } - register_successor(*src, *my_owner); + for(;;) { + predecessor_type *src; + { + if (this->internal_empty()) break; + src = &this->internal_pop(); } + register_successor(*src, *my_owner); } } protected: - successor_type *my_owner; + successor_type* my_owner; }; //! An cache of predecessors that supports requests and reservations @@ -152,7 +151,11 @@ class reservable_predecessor_cache : public predecessor_cache< T, M > { typedef sender predecessor_type; typedef receiver successor_type; - reservable_predecessor_cache( ) : reserved_src(NULL) { } + reservable_predecessor_cache( successor_type* owner ) + : predecessor_cache(owner), reserved_src(NULL) + { + // Do not work with the passed pointer here as it may not be fully initialized yet + } bool try_reserve( output_type &v ) { @@ -222,22 +225,22 @@ class successor_cache : no_copy { mutex_type my_mutex; typedef receiver successor_type; - typedef receiver *pointer_type; + typedef receiver* pointer_type; typedef sender owner_type; // TODO revamp: introduce heapified collection of successors for strict priorities typedef std::list< pointer_type > successors_type; successors_type my_successors; - owner_type *my_owner; + owner_type* my_owner; public: - successor_cache( ) : my_owner(NULL) {} - - void set_owner( owner_type *owner ) { my_owner = owner; } + successor_cache( owner_type* owner ) : my_owner(owner) { + // Do not work with the passed pointer here as it may not be fully initialized yet + } virtual ~successor_cache() {} - void register_successor( successor_type &r ) { + void register_successor( successor_type& r ) { typename mutex_type::scoped_lock l(my_mutex, true); if( r.priority() != no_priority ) my_successors.push_front( &r ); @@ -245,7 +248,7 @@ class successor_cache : no_copy { my_successors.push_back( &r ); } - void remove_successor( successor_type &r ) { + void remove_successor( successor_type& r ) { typename mutex_type::scoped_lock l(my_mutex, true); for ( typename successors_type::iterator i = my_successors.begin(); i != my_successors.end(); ++i ) { @@ -265,8 +268,8 @@ class successor_cache : no_copy { my_successors.clear(); } - virtual graph_task* try_put_task( const T &t ) = 0; - }; // successor_cache + virtual graph_task* try_put_task( const T& t ) = 0; +}; // successor_cache //! An abstract cache of successors, specialized to continue_msg template @@ -277,38 +280,38 @@ class successor_cache< continue_msg, M > : no_copy { mutex_type my_mutex; typedef receiver successor_type; - typedef receiver *pointer_type; + typedef receiver* pointer_type; + typedef sender owner_type; typedef std::list< pointer_type > successors_type; successors_type my_successors; - sender *my_owner; + owner_type* my_owner; public: - successor_cache( ) : my_owner(NULL) {} - - void set_owner( sender *owner ) { my_owner = owner; } + successor_cache( sender* owner ) : my_owner(owner) { + // Do not work with the passed pointer here as it may not be fully initialized yet + } virtual ~successor_cache() {} - void register_successor( successor_type &r ) { + void register_successor( successor_type& r ) { typename mutex_type::scoped_lock l(my_mutex, true); if( r.priority() != no_priority ) my_successors.push_front( &r ); else my_successors.push_back( &r ); - if ( my_owner && r.is_continue_receiver() ) { + __TBB_ASSERT( my_owner, "Cache of successors must have an owner." ); + if ( r.is_continue_receiver() ) { r.register_predecessor( *my_owner ); } } - void remove_successor( successor_type &r ) { + void remove_successor( successor_type& r ) { typename mutex_type::scoped_lock l(my_mutex, true); - for ( successors_type::iterator i = my_successors.begin(); - i != my_successors.end(); ++i ) { - if ( *i == & r ) { - // TODO: Check if we need to test for continue_receiver before - // removing from r. - if ( my_owner ) - r.remove_predecessor( *my_owner ); + for ( successors_type::iterator i = my_successors.begin(); i != my_successors.end(); ++i ) { + if ( *i == &r ) { + __TBB_ASSERT(my_owner, "Cache of successors must have an owner."); + // TODO: check if we need to test for continue_receiver before removing from r. + r.remove_predecessor( *my_owner ); my_successors.erase(i); break; } @@ -324,24 +327,26 @@ class successor_cache< continue_msg, M > : no_copy { my_successors.clear(); } - virtual graph_task* try_put_task( const continue_msg &t ) = 0; + virtual graph_task* try_put_task( const continue_msg& t ) = 0; }; // successor_cache< continue_msg > //! A cache of successors that are broadcast to template class broadcast_cache : public successor_cache { + typedef successor_cache base_type; typedef M mutex_type; typedef typename successor_cache::successors_type successors_type; public: - broadcast_cache( ) {} + broadcast_cache( typename base_type::owner_type* owner ): base_type(owner) { + // Do not work with the passed pointer here as it may not be fully initialized yet + } // as above, but call try_put_task instead, and return the last task we received (if any) graph_task* try_put_task( const T &t ) override { graph_task * last_task = nullptr; - bool upgraded = true; - typename mutex_type::scoped_lock l(this->my_mutex, upgraded); + typename mutex_type::scoped_lock l(this->my_mutex, /*write=*/true); typename successors_type::iterator i = this->my_successors.begin(); while ( i != this->my_successors.end() ) { graph_task *new_task = (*i)->try_put_task(t); @@ -353,10 +358,6 @@ class broadcast_cache : public successor_cache { } else { // failed if ( (*i)->register_predecessor(*this->my_owner) ) { - if (!upgraded) { - l.upgrade_to_writer(); - upgraded = true; - } i = this->my_successors.erase(i); } else { ++i; @@ -368,9 +369,8 @@ class broadcast_cache : public successor_cache { // call try_put_task and return list of received tasks bool gather_successful_try_puts( const T &t, graph_task_list& tasks ) { - bool upgraded = true; bool is_at_least_one_put_successful = false; - typename mutex_type::scoped_lock l(this->my_mutex, upgraded); + typename mutex_type::scoped_lock l(this->my_mutex, /*write=*/true); typename successors_type::iterator i = this->my_successors.begin(); while ( i != this->my_successors.end() ) { graph_task * new_task = (*i)->try_put_task(t); @@ -383,10 +383,6 @@ class broadcast_cache : public successor_cache { } else { // failed if ( (*i)->register_predecessor(*this->my_owner) ) { - if (!upgraded) { - l.upgrade_to_writer(); - upgraded = true; - } i = this->my_successors.erase(i); } else { ++i; @@ -400,13 +396,16 @@ class broadcast_cache : public successor_cache { //! A cache of successors that are put in a round-robin fashion template class round_robin_cache : public successor_cache { + typedef successor_cache base_type; typedef size_t size_type; typedef M mutex_type; typedef typename successor_cache::successors_type successors_type; public: - round_robin_cache( ) {} + round_robin_cache( typename base_type::owner_type* owner ): base_type(owner) { + // Do not work with the passed pointer here as it may not be fully initialized yet + } size_type size() { typename mutex_type::scoped_lock l(this->my_mutex, false); @@ -414,8 +413,7 @@ class round_robin_cache : public successor_cache { } graph_task* try_put_task( const T &t ) override { - bool upgraded = true; - typename mutex_type::scoped_lock l(this->my_mutex, upgraded); + typename mutex_type::scoped_lock l(this->my_mutex, /*write=*/true); typename successors_type::iterator i = this->my_successors.begin(); while ( i != this->my_successors.end() ) { graph_task* new_task = (*i)->try_put_task(t); @@ -423,10 +421,6 @@ class round_robin_cache : public successor_cache { return new_task; } else { if ( (*i)->register_predecessor(*this->my_owner) ) { - if (!upgraded) { - l.upgrade_to_writer(); - upgraded = true; - } i = this->my_successors.erase(i); } else { diff --git a/include/tbb/detail/_flow_graph_impl.h b/include/tbb/detail/_flow_graph_impl.h index b11c7a02de..e489494f3c 100644 --- a/include/tbb/detail/_flow_graph_impl.h +++ b/include/tbb/detail/_flow_graph_impl.h @@ -52,9 +52,6 @@ class graph_iterator { typedef const GraphNodeType& const_reference; typedef std::forward_iterator_tag iterator_category; - //! Default constructor - graph_iterator() : my_graph(NULL), current_node(NULL) {} - //! Copy constructor graph_iterator(const graph_iterator& other) : my_graph(other.my_graph), current_node(other.current_node) @@ -137,9 +134,9 @@ class graph_task : public task { // TODO revamp: rename to my_priority node_priority_t priority; void destruct_and_deallocate(const execution_data& ed); + task* cancel(execution_data& ed) override; protected: void finalize(const execution_data& ed); - task* cancel(execution_data& ed) override; private: // To organize task_list graph_task* my_next{ nullptr }; @@ -160,29 +157,38 @@ typedef tbb::concurrent_priority_queue graph class priority_task_selector : public task { public: priority_task_selector(graph_task_priority_queue_t& priority_queue, small_object_allocator& allocator) - : my_priority_queue(priority_queue), my_allocator(allocator) {} + : my_priority_queue(priority_queue), my_allocator(allocator), my_task() {} task* execute(execution_data& ed) override { - graph_task* t = nullptr; - // TODO revamp: hold functors in priority queue instead of real tasks - bool result = my_priority_queue.try_pop(t); - __TBB_ASSERT_EX( result, "Number of critical tasks for scheduler and tasks" - " in graph's priority queue mismatched" ); - __TBB_ASSERT( t && t != SUCCESSFULLY_ENQUEUED, - "Incorrect task submitted to graph priority queue" ); - __TBB_ASSERT( t->priority != no_priority, - "Tasks from graph's priority queue must have priority" ); - // TODO revamp: consider cancellation and exception handling for the task t. - task* t_next = t->execute(ed); + next_task(); + __TBB_ASSERT(my_task, nullptr); + task* t_next = my_task->execute(ed); my_allocator.delete_object(this, ed); return t_next; } task* cancel(execution_data& ed) override { + if (!my_task) { + next_task(); + } + __TBB_ASSERT(my_task, nullptr); + task* t_next = my_task->cancel(ed); my_allocator.delete_object(this, ed); - return nullptr; + return t_next; } private: + void next_task() { + // TODO revamp: hold functors in priority queue instead of real tasks + bool result = my_priority_queue.try_pop(my_task); + __TBB_ASSERT_EX(result, "Number of critical tasks for scheduler and tasks" + " in graph's priority queue mismatched"); + __TBB_ASSERT(my_task && my_task != SUCCESSFULLY_ENQUEUED, + "Incorrect task submitted to graph priority queue"); + __TBB_ASSERT(my_task->priority != no_priority, + "Tasks from graph's priority queue must have priority"); + } + graph_task_priority_queue_t& my_priority_queue; small_object_allocator my_allocator; + graph_task* my_task; }; template class run_and_put_task; diff --git a/include/tbb/detail/_flow_graph_indexer_impl.h b/include/tbb/detail/_flow_graph_indexer_impl.h index 33ef530265..c4d33328b5 100644 --- a/include/tbb/detail/_flow_graph_indexer_impl.h +++ b/include/tbb/detail/_flow_graph_indexer_impl.h @@ -45,11 +45,6 @@ std::get(my_input).set_up(p, indexer_node_put_task, g); indexer_helper::template set_indexer_node_pointer(my_input, p, g); } - template - static inline void reset_inputs(InputTuple &my_input, reset_flags f) { - indexer_helper::reset_inputs(my_input, f); - std::get(my_input).reset_receiver(f); - } }; template @@ -60,10 +55,6 @@ graph_task* (*indexer_node_put_task)(const T&, void *) = do_try_put; std::get<0>(my_input).set_up(p, indexer_node_put_task, g); } - template - static inline void reset_inputs(InputTuple &my_input, reset_flags f) { - std::get<0>(my_input).reset_receiver(f); - } }; template @@ -91,9 +82,6 @@ graph& graph_reference() const override { return *my_graph; } - - public: - void reset_receiver(reset_flags /*f*/) override { } }; template @@ -142,7 +130,6 @@ type(char(t)), my_arg(e) {} indexer_node_base_operation(const successor_type &s, op_type t) : type(char(t)), my_succ(const_cast(&s)) {} - indexer_node_base_operation(op_type t) : type(char(t)) {} }; typedef aggregating_functor handler_type; @@ -175,15 +162,15 @@ } // ---------- end aggregator ----------- public: - indexer_node_base(graph& g) : graph_node(g), input_ports_type() { + indexer_node_base(graph& g) : graph_node(g), input_ports_type(), my_successors(this) { indexer_helper::set_indexer_node_pointer(this->my_inputs, this, g); - my_successors.set_owner(this); my_aggregator.initialize_handler(handler_type(this)); } - indexer_node_base(const indexer_node_base& other) : graph_node(other.my_graph), input_ports_type(), sender() { + indexer_node_base(const indexer_node_base& other) + : graph_node(other.my_graph), input_ports_type(), sender(), my_successors(this) + { indexer_helper::set_indexer_node_pointer(this->my_inputs, this, other.my_graph); - my_successors.set_owner(this); my_aggregator.initialize_handler(handler_type(this)); } @@ -209,7 +196,6 @@ void reset_node(reset_flags f) override { if(f & rf_clear_edges) { my_successors.clear(); - indexer_helper::reset_inputs(this->my_inputs,f); } } diff --git a/include/tbb/detail/_flow_graph_item_buffer_impl.h b/include/tbb/detail/_flow_graph_item_buffer_impl.h index 15d63c07e1..47186a9df8 100644 --- a/include/tbb/detail/_flow_graph_item_buffer_impl.h +++ b/include/tbb/detail/_flow_graph_item_buffer_impl.h @@ -62,7 +62,9 @@ class item_buffer { } bool my_item_valid(size_type i) const { return (i < my_tail) && (i >= my_head) && (item(i).second != no_item); } +#if TBB_USE_ASSERT bool my_item_reserved(size_type i) const { return item(i).second == reserved_item; } +#endif // object management in buffer const item_type &get_my_item(size_t i) const { diff --git a/include/tbb/detail/_flow_graph_join_impl.h b/include/tbb/detail/_flow_graph_join_impl.h index e0763acc37..cf889d785c 100644 --- a/include/tbb/detail/_flow_graph_join_impl.h +++ b/include/tbb/detail/_flow_graph_join_impl.h @@ -26,12 +26,22 @@ struct forwarding_base : no_assign { forwarding_base(graph &g) : graph_ref(g) {} virtual ~forwarding_base() {} + graph& graph_ref; + }; + + struct queueing_forwarding_base : forwarding_base { + using forwarding_base::forwarding_base; // decrement_port_count may create a forwarding task. If we cannot handle the task // ourselves, ask decrement_port_count to deal with it. virtual graph_task* decrement_port_count(bool handle_task) = 0; + }; + + struct reserving_forwarding_base : forwarding_base { + using forwarding_base::forwarding_base; + // decrement_port_count may create a forwarding task. If we cannot handle the task + // ourselves, ask decrement_port_count to deal with it. + virtual graph_task* decrement_port_count() = 0; virtual void increment_port_count() = 0; - // moved here so input ports can queue tasks - graph& graph_ref; }; // specialization that lets us keep a copy of the current_key for building results. @@ -40,7 +50,7 @@ struct matching_forwarding_base : public forwarding_base { typedef typename std::decay::type current_key_type; matching_forwarding_base(graph &g) : forwarding_base(g) { } - virtual graph_task* increment_key_count(current_key_type const & /*t*/, bool /*handle_task*/) = 0; // {return NULL;} + virtual graph_task* increment_key_count(current_key_type const & /*t*/) = 0; current_key_type current_key; // so ports can refer to FE's desired items }; @@ -104,15 +114,17 @@ template static inline void set_key_functors(InputTuple &my_input, KeyFuncTuple &my_key_funcs) { std::get(my_input).set_my_key_func(std::get(my_key_funcs)); - std::get(my_key_funcs) = NULL; + std::get(my_key_funcs) = nullptr; join_helper::set_key_functors(my_input, my_key_funcs); } template< typename KeyFuncTuple> static inline void copy_key_functors(KeyFuncTuple &my_inputs, KeyFuncTuple &other_inputs) { - if(std::get(other_inputs).get_my_key_func()) { - std::get(my_inputs).set_my_key_func(std::get(other_inputs).get_my_key_func()->clone()); - } + __TBB_ASSERT( + std::get(other_inputs).get_my_key_func(), + "key matching join node should not be instantiated without functors." + ); + std::get(my_inputs).set_my_key_func(std::get(other_inputs).get_my_key_func()->clone()); join_helper::copy_key_functors(my_inputs, other_inputs); } @@ -174,14 +186,16 @@ template static inline void set_key_functors(InputTuple &my_input, KeyFuncTuple &my_key_funcs) { std::get<0>(my_input).set_my_key_func(std::get<0>(my_key_funcs)); - std::get<0>(my_key_funcs) = NULL; + std::get<0>(my_key_funcs) = nullptr; } template< typename KeyFuncTuple> static inline void copy_key_functors(KeyFuncTuple &my_inputs, KeyFuncTuple &other_inputs) { - if(std::get<0>(other_inputs).get_my_key_func()) { - std::get<0>(my_inputs).set_my_key_func(std::get<0>(other_inputs).get_my_key_func()->clone()); - } + __TBB_ASSERT( + std::get<0>(other_inputs).get_my_key_func(), + "key matching join node should not be instantiated without functors." + ); + std::get<0>(my_inputs).set_my_key_func(std::get<0>(other_inputs).get_my_key_func()->clone()); } template static inline void reset_inputs(InputTuple &my_input, reset_flags f) { @@ -222,23 +236,27 @@ void handle_operations(reserving_port_operation* op_list) { reserving_port_operation *current; - bool no_predecessors; + bool was_missing_predecessors = false; while(op_list) { current = op_list; op_list = op_list->next; switch(current->type) { case reg_pred: - no_predecessors = my_predecessors.empty(); + was_missing_predecessors = my_predecessors.empty(); my_predecessors.add(*(current->my_pred)); - if ( no_predecessors ) { - (void) my_join->decrement_port_count(true); // may try to forward + if ( was_missing_predecessors ) { + (void) my_join->decrement_port_count(); // may try to forward } current->status.store( SUCCEEDED, std::memory_order_release); break; case rem_pred: - my_predecessors.remove(*(current->my_pred)); - if(my_predecessors.empty()) my_join->increment_port_count(); - current->status.store( SUCCEEDED, std::memory_order_release); + if ( !my_predecessors.empty() ) { + my_predecessors.remove(*(current->my_pred)); + if ( my_predecessors.empty() ) // was the last predecessor + my_join->increment_port_count(); + } + // TODO: consider returning failure if there were no predecessors to remove + current->status.store( SUCCEEDED, std::memory_order_release ); break; case res_item: if ( reserved ) { @@ -273,7 +291,7 @@ template friend class broadcast_cache; template friend class round_robin_cache; graph_task* try_put_task( const T & ) override { - return NULL; + return nullptr; } graph& graph_reference() const override { @@ -283,21 +301,14 @@ public: //! Constructor - reserving_port() : reserved(false) { - my_join = NULL; - my_predecessors.set_owner( this ); + reserving_port() : my_join(nullptr), my_predecessors(this), reserved(false) { my_aggregator.initialize_handler(handler_type(this)); } // copy constructor - reserving_port(const reserving_port& /* other */) : receiver() { - reserved = false; - my_join = NULL; - my_predecessors.set_owner( this ); - my_aggregator.initialize_handler(handler_type(this)); - } + reserving_port(const reserving_port& /* other */) = delete; - void set_join_node_pointer(forwarding_base *join) { + void set_join_node_pointer(reserving_forwarding_base *join) { my_join = join; } @@ -334,7 +345,7 @@ my_aggregator.execute(&op_data); } - void reset_receiver( reset_flags f) override { + void reset_receiver( reset_flags f) { if(f & rf_clear_edges) my_predecessors.clear(); else my_predecessors.reset(); @@ -347,7 +358,7 @@ friend class get_graph_helper; #endif - forwarding_base *my_join; + reserving_forwarding_base *my_join; reservable_predecessor_cache< T, null_mutex > my_predecessors; bool reserved; }; // reserving_port @@ -374,16 +385,16 @@ // constructor for value parameter queueing_port_operation(const T& e, op_type t) : type(char(t)), my_val(e) - , bypass_t(NULL) + , bypass_t(nullptr) {} // constructor for pointer parameter queueing_port_operation(const T* p, op_type t) : type(char(t)), my_arg(const_cast(p)) - , bypass_t(NULL) + , bypass_t(nullptr) {} // constructor with no parameter queueing_port_operation(op_type t) : type(char(t)) - , bypass_t(NULL) + , bypass_t(nullptr) {} }; @@ -399,7 +410,7 @@ op_list = op_list->next; switch(current->type) { case try__put_task: { - graph_task* rtask = NULL; + graph_task* rtask = nullptr; was_empty = this->buffer_empty(); this->push_back(current->my_val); if (was_empty) rtask = my_join->decrement_port_count(false); @@ -451,18 +462,15 @@ //! Constructor queueing_port() : item_buffer() { - my_join = NULL; + my_join = nullptr; my_aggregator.initialize_handler(handler_type(this)); } //! copy constructor - queueing_port(const queueing_port& /* other */) : receiver(), item_buffer() { - my_join = NULL; - my_aggregator.initialize_handler(handler_type(this)); - } + queueing_port(const queueing_port& /* other */) = delete; //! record parent for tallying available items - void set_join_node_pointer(forwarding_base *join) { + void set_join_node_pointer(queueing_forwarding_base *join) { my_join = join; } @@ -480,7 +488,7 @@ return; } - void reset_receiver(reset_flags) override { + void reset_receiver(reset_flags) { item_buffer::reset(); } @@ -489,7 +497,7 @@ friend class get_graph_helper; #endif - forwarding_base *my_join; + queueing_forwarding_base *my_join; }; // queueing_port #include "_flow_graph_tagged_buffer_impl.h" @@ -585,10 +593,10 @@ template friend class round_robin_cache; graph_task* try_put_task(const input_type& v) override { key_matching_port_operation op_data(v, try__put); - graph_task* rtask = NULL; + graph_task* rtask = nullptr; my_aggregator.execute(&op_data); if(op_data.status == SUCCEEDED) { - rtask = my_join->increment_key_count((*(this->get_key_func()))(v), false); // may spawn + rtask = my_join->increment_key_count((*(this->get_key_func()))(v)); // may spawn // rtask has to reflect the return status of the try_put if(!rtask) rtask = SUCCESSFULLY_ENQUEUED; } @@ -602,16 +610,17 @@ public: key_matching_port() : receiver(), buffer_type() { - my_join = NULL; + my_join = nullptr; my_aggregator.initialize_handler(handler_type(this)); } // copy constructor - key_matching_port(const key_matching_port& /*other*/) : receiver(), buffer_type() { - my_join = NULL; - my_aggregator.initialize_handler(handler_type(this)); - } - + key_matching_port(const key_matching_port& /*other*/) = delete; +#if __INTEL_COMPILER <= 2021 + // Suppress superfluous diagnostic about virtual keyword absence in a destructor of an inherited + // class while the parent class has the virtual keyword for the destrocutor. + virtual +#endif ~key_matching_port() { } void set_join_node_pointer(forwarding_base *join) { @@ -637,7 +646,7 @@ return; } - void reset_receiver(reset_flags ) override { + void reset_receiver(reset_flags ) { buffer_type::reset(); } @@ -657,19 +666,19 @@ class join_node_FE; template - class join_node_FE : public forwarding_base { + class join_node_FE : public reserving_forwarding_base { public: static const int N = std::tuple_size::value; typedef OutputTuple output_type; typedef InputTuple input_type; typedef join_node_base base_node_type; // for forwarding - join_node_FE(graph &g) : forwarding_base(g), my_node(NULL) { + join_node_FE(graph &g) : reserving_forwarding_base(g), my_node(nullptr) { ports_with_no_inputs = N; join_helper::set_join_node_pointer(my_inputs, this); } - join_node_FE(const join_node_FE& other) : forwarding_base((other.forwarding_base::graph_ref)), my_node(NULL) { + join_node_FE(const join_node_FE& other) : reserving_forwarding_base((other.reserving_forwarding_base::graph_ref)), my_node(nullptr) { ports_with_no_inputs = N; join_helper::set_join_node_pointer(my_inputs, this); } @@ -681,19 +690,17 @@ } // if all input_ports have predecessors, spawn forward to try and consume tuples - graph_task* decrement_port_count(bool handle_task) override { + graph_task* decrement_port_count() override { if(ports_with_no_inputs.fetch_sub(1) == 1) { if(is_graph_active(this->graph_ref)) { small_object_allocator allocator{}; typedef forward_task_bypass task_type; graph_task* t = allocator.new_object(graph_ref, allocator, *my_node); graph_ref.reserve_wait(); - if( !handle_task ) - return t; spawn_in_graph_arena(this->graph_ref, *t); } } - return NULL; + return nullptr; } input_type &input_ports() { return my_inputs; } @@ -730,19 +737,19 @@ }; // join_node_FE template - class join_node_FE : public forwarding_base { + class join_node_FE : public queueing_forwarding_base { public: static const int N = std::tuple_size::value; typedef OutputTuple output_type; typedef InputTuple input_type; typedef join_node_base base_node_type; // for forwarding - join_node_FE(graph &g) : forwarding_base(g), my_node(NULL) { + join_node_FE(graph &g) : queueing_forwarding_base(g), my_node(nullptr) { ports_with_no_items = N; join_helper::set_join_node_pointer(my_inputs, this); } - join_node_FE(const join_node_FE& other) : forwarding_base((other.forwarding_base::graph_ref)), my_node(NULL) { + join_node_FE(const join_node_FE& other) : queueing_forwarding_base((other.queueing_forwarding_base::graph_ref)), my_node(nullptr) { ports_with_no_items = N; join_helper::set_join_node_pointer(my_inputs, this); } @@ -771,8 +778,6 @@ return nullptr; } - void increment_port_count() override { __TBB_ASSERT(false, NULL); } // should never be called - input_type &input_ports() { return my_inputs; } protected: @@ -853,14 +858,12 @@ unref_key_type my_val; output_type* my_output; graph_task* bypass_t; - bool enqueue_task; // constructor for value parameter - key_matching_FE_operation(const unref_key_type& e , bool q_task , op_type t) : type(char(t)), my_val(e), - my_output(NULL), bypass_t(NULL), enqueue_task(q_task) {} - key_matching_FE_operation(output_type *p, op_type t) : type(char(t)), my_output(p), bypass_t(NULL), - enqueue_task(true) {} + key_matching_FE_operation(const unref_key_type& e , op_type t) : type(char(t)), my_val(e), + my_output(nullptr), bypass_t(nullptr) {} + key_matching_FE_operation(output_type *p, op_type t) : type(char(t)), my_output(p), bypass_t(nullptr) {} // constructor with no parameter - key_matching_FE_operation(op_type t) : type(char(t)), my_output(NULL), bypass_t(NULL), enqueue_task(true) {} + key_matching_FE_operation(op_type t) : type(char(t)), my_output(nullptr), bypass_t(nullptr) {} }; typedef aggregating_functor handler_type; @@ -869,11 +872,11 @@ // called from aggregator, so serialized // returns a task pointer if the a task would have been enqueued but we asked that - // it be returned. Otherwise returns NULL. - graph_task* fill_output_buffer(unref_key_type &t, bool should_enqueue, bool handle_task) { + // it be returned. Otherwise returns nullptr. + graph_task* fill_output_buffer(unref_key_type &t) { output_type l_out; - graph_task* rtask = NULL; - bool do_fwd = should_enqueue && this->buffer_empty() && is_graph_active(this->graph_ref); + graph_task* rtask = nullptr; + bool do_fwd = this->buffer_empty() && is_graph_active(this->graph_ref); this->current_key = t; this->delete_with_key(this->current_key); // remove the key if(join_helper::get_items(my_inputs, l_out)) { // <== call back @@ -883,12 +886,6 @@ typedef forward_task_bypass task_type; rtask = allocator.new_object(this->graph_ref, allocator, *my_node); this->graph_ref.reserve_wait(); - if( handle_task ) { - // TODO revamp: make spawn_in_graph_arena extract reference to the graph - // from the passed task - spawn_in_graph_arena(this->graph_ref, *rtask); - rtask = NULL; - } do_fwd = false; } // retire the input values @@ -915,20 +912,16 @@ case inc_count: { // called from input ports count_element_type *p = 0; unref_key_type &t = current->my_val; - bool do_enqueue = current->enqueue_task; if(!(this->find_ref_with_key(t,p))) { count_element_type ev; ev.my_key = t; ev.my_value = 0; this->insert_with_key(ev); - if(!(this->find_ref_with_key(t,p))) { - __TBB_ASSERT(false, "should find key after inserting it"); - } + bool found = this->find_ref_with_key(t, p); + __TBB_ASSERT_EX(found, "should find key after inserting it"); } if(++(p->my_value) == size_t(N)) { - graph_task* rtask = fill_output_buffer(t, true, do_enqueue); - __TBB_ASSERT(!rtask || !do_enqueue, "task should not be returned"); - current->bypass_t = rtask; + current->bypass_t = fill_output_buffer(t); } } current->status.store( SUCCEEDED, std::memory_order_release); @@ -952,7 +945,7 @@ public: template - join_node_FE(graph &g, FunctionTuple &TtoK_funcs) : forwarding_base_type(g), my_node(NULL) { + join_node_FE(graph &g, FunctionTuple &TtoK_funcs) : forwarding_base_type(g), my_node(nullptr) { join_helper::set_join_node_pointer(my_inputs, this); join_helper::set_key_functors(my_inputs, TtoK_funcs); my_aggregator.initialize_handler(handler_type(this)); @@ -962,7 +955,7 @@ join_node_FE(const join_node_FE& other) : forwarding_base_type((other.forwarding_base_type::graph_ref)), key_to_count_buffer_type(), output_buffer_type() { - my_node = NULL; + my_node = nullptr; join_helper::set_join_node_pointer(my_inputs, this); join_helper::copy_key_functors(my_inputs, const_cast(other.my_inputs)); my_aggregator.initialize_handler(handler_type(this)); @@ -981,16 +974,12 @@ // if all input_ports have items, spawn forward to try and consume tuples // return a task if we are asked and did create one. - graph_task *increment_key_count(unref_key_type const & t, bool handle_task) override { // called from input_ports - key_matching_FE_operation op_data(t, handle_task, inc_count); + graph_task *increment_key_count(unref_key_type const & t) override { // called from input_ports + key_matching_FE_operation op_data(t, inc_count); my_aggregator.execute(&op_data); return op_data.bypass_t; } - graph_task *decrement_port_count(bool /*handle_task*/) override { __TBB_ASSERT(false, NULL); return NULL; } - - void increment_port_count() override { __TBB_ASSERT(false, NULL); } // should never be called - input_type &input_ports() { return my_inputs; } protected: @@ -1062,10 +1051,10 @@ }; graph_task* bypass_t; join_node_base_operation(const output_type& e, op_type t) : type(char(t)), - my_arg(const_cast(&e)), bypass_t(NULL) {} + my_arg(const_cast(&e)), bypass_t(nullptr) {} join_node_base_operation(const successor_type &s, op_type t) : type(char(t)), - my_succ(const_cast(&s)), bypass_t(NULL) {} - join_node_base_operation(op_type t) : type(char(t)), bypass_t(NULL) {} + my_succ(const_cast(&s)), bypass_t(nullptr) {} + join_node_base_operation(op_type t) : type(char(t)), bypass_t(nullptr) {} }; typedef aggregating_functor handler_type; @@ -1108,7 +1097,7 @@ break; case do_fwrd_bypass: { bool build_succeeded; - graph_task *last_task = NULL; + graph_task *last_task = nullptr; output_type out; // forwarding must be exclusive, because try_to_make_tuple and tuple_accepted // are separate locked methods in the FE. We could conceivably fetch the front @@ -1143,23 +1132,25 @@ } // ---------- end aggregator ----------- public: - join_node_base(graph &g) : graph_node(g), input_ports_type(g), forwarder_busy(false) { - my_successors.set_owner(this); + join_node_base(graph &g) + : graph_node(g), input_ports_type(g), forwarder_busy(false), my_successors(this) + { input_ports_type::set_my_node(this); my_aggregator.initialize_handler(handler_type(this)); } join_node_base(const join_node_base& other) : graph_node(other.graph_node::my_graph), input_ports_type(other), - sender(), forwarder_busy(false), my_successors() { - my_successors.set_owner(this); + sender(), forwarder_busy(false), my_successors(this) + { input_ports_type::set_my_node(this); my_aggregator.initialize_handler(handler_type(this)); } template - join_node_base(graph &g, FunctionTuple f) : graph_node(g), input_ports_type(g, f), forwarder_busy(false) { - my_successors.set_owner(this); + join_node_base(graph &g, FunctionTuple f) + : graph_node(g), input_ports_type(g, f), forwarder_busy(false), my_successors(this) + { input_ports_type::set_my_node(this); my_aggregator.initialize_handler(handler_type(this)); } diff --git a/include/tbb/detail/_flow_graph_node_impl.h b/include/tbb/detail/_flow_graph_node_impl.h index b16872c311..b57405e991 100644 --- a/include/tbb/detail/_flow_graph_node_impl.h +++ b/include/tbb/detail/_flow_graph_node_impl.h @@ -34,10 +34,6 @@ class function_input_queue : public item_buffer { return this->item_buffer::front(); } - bool pop( T& t ) { - return this->pop_front( t ); - } - void pop() { this->destroy_front(); } @@ -67,27 +63,19 @@ class function_input_base : public receiver, no_assign { static_assert(!has_policy::value || !has_policy::value, ""); //! Constructor for function_input_base - function_input_base( - graph &g, size_t max_concurrency, node_priority_t a_priority) - : my_graph_ref(g), my_max_concurrency(max_concurrency) - , my_concurrency(0), my_priority(a_priority) - , my_queue(!has_policy::value ? new input_queue_type() : NULL) - , forwarder_busy(false) + function_input_base( graph &g, size_t max_concurrency, node_priority_t a_priority ) + : my_graph_ref(g), my_max_concurrency(max_concurrency) + , my_concurrency(0), my_priority(a_priority) + , my_queue(!has_policy::value ? new input_queue_type() : NULL) + , my_predecessors(this) + , forwarder_busy(false) { - my_predecessors.set_owner(this); my_aggregator.initialize_handler(handler_type(this)); } //! Copy constructor - function_input_base( const function_input_base& src) - : receiver(), no_assign() - , my_graph_ref(src.my_graph_ref), my_max_concurrency(src.my_max_concurrency) - , my_concurrency(0), my_priority(src.my_priority) - , my_queue(src.my_queue ? new input_queue_type() : NULL), forwarder_busy(false) - { - my_predecessors.set_owner(this); - my_aggregator.initialize_handler(handler_type(this)); - } + function_input_base( const function_input_base& src ) + : function_input_base(src.my_graph_ref, src.my_max_concurrency, src.my_priority) {} //! Destructor // The queue is allocated by the constructor for {multi}function_node. @@ -135,7 +123,7 @@ class function_input_base : public receiver, no_assign { input_queue_type *my_queue; predecessor_cache my_predecessors; - void reset_receiver( reset_flags f) override { + void reset_receiver( reset_flags f) { if( f & rf_clear_edges) my_predecessors.clear(); else my_predecessors.reset(); @@ -255,7 +243,7 @@ class function_input_base : public receiver, no_assign { //! Creates tasks for postponed messages if available and if concurrency allows void internal_forward(operation_type *op) { op->bypass_t = NULL; - if (my_concurrency < my_max_concurrency || !my_max_concurrency) + if (my_concurrency < my_max_concurrency) op->bypass_t = perform_queued_requests(); if(op->bypass_t) op->status.store(SUCCEEDED, std::memory_order_release); @@ -379,7 +367,7 @@ class function_input : public function_input_baseclone() ), my_init_body(src.my_init_body->clone() ) { } -#if __INTEL_COMPILER == 2021 +#if __INTEL_COMPILER <= 2021 // Suppress superfluous diagnostic about virtual keyword absence in a destructor of an inherited // class while the parent class has the virtual keyword for the destrocutor. virtual @@ -459,20 +447,24 @@ template struct clear_element { (void)std::get(p).successors().clear(); clear_element::clear_this(p); } +#if TBB_USE_ASSERT template static bool this_empty(P &p) { if(std::get(p).successors().empty()) return clear_element::this_empty(p); return false; } +#endif }; template<> struct clear_element<1> { template static void clear_this(P &p) { (void)std::get<0>(p).successors().clear(); } +#if TBB_USE_ASSERT template static bool this_empty(P &p) { return std::get<0>(p).successors().empty(); } +#endif }; template @@ -706,10 +698,8 @@ class function_output : public sender { typedef typename sender::successor_type successor_type; typedef broadcast_cache broadcast_cache_type; - function_output( graph& g) : my_graph_ref(g) { my_successors.set_owner(this); } - function_output(const function_output & other) : sender(), my_graph_ref(other.my_graph_ref) { - my_successors.set_owner(this); - } + function_output(graph& g) : my_successors(this), my_graph_ref(g) {} + function_output(const function_output& other) = delete; //! Adds a new successor to this node bool register_successor( successor_type &r ) override { @@ -723,18 +713,6 @@ class function_output : public sender { return true; } - // for multifunction_node. The function_body that implements - // the node will have an input and an output tuple of ports. To put - // an item to a successor, the body should - // - // get(output_ports).try_put(output_value); - // - // if task pointer is returned will always spawn and return true, else - // return value will be bool returned from successors.try_put. - graph_task *try_put_task(const output_type &i) { // not a virtual method in this class - return my_successors.try_put_task(i); - } - broadcast_cache_type &successors() { return my_successors; } graph& graph_reference() const { return my_graph_ref; } @@ -750,8 +728,8 @@ class multifunction_output : public function_output { typedef function_output base_type; using base_type::my_successors; - multifunction_output(graph& g) : base_type(g) {my_successors.set_owner(this);} - multifunction_output( const multifunction_output& other) : base_type(other.my_graph_ref) { my_successors.set_owner(this); } + multifunction_output(graph& g) : base_type(g) {} + multifunction_output(const multifunction_output& other) : base_type(other.my_graph_ref) {} bool try_put(const output_type &i) { graph_task *res = try_put_task(i); diff --git a/include/tbb/detail/_flow_graph_trace_impl.h b/include/tbb/detail/_flow_graph_trace_impl.h index e7709a5b01..d214cee4e4 100644 --- a/include/tbb/detail/_flow_graph_trace_impl.h +++ b/include/tbb/detail/_flow_graph_trace_impl.h @@ -312,8 +312,6 @@ static inline void fgt_node_desc( const NodeType * /*node*/, const char * /*desc static inline void fgt_graph_desc( const void * /*g*/, const char * /*desc*/ ) { } -static inline void fgt_body( void * /*node*/, void * /*body*/ ) { } - template< int N, typename PortsTuple > static inline void fgt_multioutput_node( void* /*codeptr*/, string_resource_index /*t*/, void * /*g*/, void * /*input_port*/, PortsTuple & /*ports*/ ) { } @@ -325,7 +323,6 @@ static inline void fgt_multiinput_node( void* /*codeptr*/, string_resource_index static inline void fgt_multiinput_multioutput_node( void* /*codeptr*/, string_resource_index /*t*/, void * /*node*/, void * /*graph*/ ) { } -static inline void fgt_node( void* /*codeptr*/, string_resource_index /*t*/, void * /*g*/, void * /*output_port*/ ) { } static inline void fgt_node( void* /*codeptr*/, string_resource_index /*t*/, void * /*g*/, void * /*input_port*/, void * /*output_port*/ ) { } static inline void fgt_node( void* /*codeptr*/, string_resource_index /*t*/, void * /*g*/, void * /*input_port*/, void * /*decrement_port*/, void * /*output_port*/ ) { } diff --git a/include/tbb/detail/_flow_graph_types_impl.h b/include/tbb/detail/_flow_graph_types_impl.h index d374321fe7..a4f15a5618 100644 --- a/include/tbb/detail/_flow_graph_types_impl.h +++ b/include/tbb/detail/_flow_graph_types_impl.h @@ -73,50 +73,6 @@ struct make_sequence < 0, S... > { typedef sequence type; }; -// Until C++14 std::initializer_list does not guarantee life time of contained objects. -template -class initializer_list_wrapper { -public: - typedef T value_type; - typedef const T& reference; - typedef const T& const_reference; - typedef size_t size_type; - - typedef T* iterator; - typedef const T* const_iterator; - - initializer_list_wrapper( std::initializer_list il ) noexcept : my_begin( static_cast(malloc( il.size()*sizeof( T ) )) ) { - iterator dst = my_begin; - for ( typename std::initializer_list::const_iterator src = il.begin(); src != il.end(); ++src ) - new (dst++) T( *src ); - my_end = dst; - } - - initializer_list_wrapper( const initializer_list_wrapper& ilw ) noexcept : my_begin( static_cast(malloc( ilw.size()*sizeof( T ) )) ) { - iterator dst = my_begin; - for ( typename std::initializer_list::const_iterator src = ilw.begin(); src != ilw.end(); ++src ) - new (dst++) T( *src ); - my_end = dst; - } - - initializer_list_wrapper( initializer_list_wrapper&& ilw ) noexcept : my_begin( ilw.my_begin ), my_end( ilw.my_end ) { - ilw.my_begin = ilw.my_end = NULL; - } - - ~initializer_list_wrapper() { - if ( my_begin ) - free( my_begin ); - } - - const_iterator begin() const noexcept { return my_begin; } - const_iterator end() const noexcept { return my_end; } - size_t size() const noexcept { return (size_t)(my_end - my_begin); } - -private: - iterator my_begin; - iterator my_end; -}; - //! type mimicking std::pair but with trailing fill to ensure each element of an array //* will have the correct alignment template @@ -163,7 +119,7 @@ struct default_constructed { }; // struct to allow us to copy and test the type of objects struct WrapperBase { virtual ~WrapperBase() {} - virtual void CopyTo(void* /*newSpace*/) const { } + virtual void CopyTo(void* /*newSpace*/) const = 0; }; // Wrapper contains a T, with the ability to test what T is. The Wrapper can be @@ -191,7 +147,7 @@ struct Wrapper: public WrapperBase { }; public: explicit Wrapper( const T& other ) : value_space(other) { } - explicit Wrapper(const Wrapper& other) : value_space(other.value_space) { } + explicit Wrapper(const Wrapper& other) = delete; void CopyTo(void* newSpace) const override { _unwind_space guard((pointer_type)newSpace); diff --git a/include/tbb/detail/_pipeline_filters.h b/include/tbb/detail/_pipeline_filters.h index 949610cb51..1a5d402725 100644 --- a/include/tbb/detail/_pipeline_filters.h +++ b/include/tbb/detail/_pipeline_filters.h @@ -318,7 +318,6 @@ class filter_node_ptr { void operator=(filter_node *); void operator=(const filter_node_ptr &); void operator=(filter_node_ptr &&); - filter_node* operator->() const; filter_node& operator*() const; operator bool() const; }; @@ -418,10 +417,6 @@ inline void filter_node_ptr::operator=(filter_node_ptr && rhs) { } } -inline filter_node* filter_node_ptr::operator->() const { - return my_node; -} - inline filter_node& filter_node_ptr::operator*() const{ __TBB_ASSERT(my_node,"NULL node is used"); return *my_node; diff --git a/include/tbb/detail/_range_common.h b/include/tbb/detail/_range_common.h index 0ebbc1579f..71ffd8d862 100644 --- a/include/tbb/detail/_range_common.h +++ b/include/tbb/detail/_range_common.h @@ -44,12 +44,31 @@ class proportional_split : no_assign { size_t right() const { return my_right; } // used when range does not support proportional split - operator split() const { return split(); } + explicit operator split() const { return split(); } private: size_t my_left, my_right; }; +template +struct range_split_object_provider { + template + static split get( PartitionerSplitType& ) { return split(); } +}; + +template +struct range_split_object_provider::value>::type> { + template + static PartitionerSplitType& get( PartitionerSplitType& split_obj ) { return split_obj; } +}; + +template +auto get_range_split_object( PartitionerSplitType& split_obj ) +-> decltype(range_split_object_provider::get(split_obj)) { + return range_split_object_provider::get(split_obj); +} + } // namespace d0 } // namespace detail } // namespace tbb diff --git a/include/tbb/detail/_task.h b/include/tbb/detail/_task.h index 63e0e9f9b2..556bc27146 100644 --- a/include/tbb/detail/_task.h +++ b/include/tbb/detail/_task.h @@ -29,6 +29,7 @@ #include #include #include +#include namespace tbb { namespace detail { @@ -60,12 +61,14 @@ using suspend_callback_type = void(*)(void*, suspend_point_type*); void __TBB_EXPORTED_FUNC suspend(suspend_callback_type suspend_callback, void* user_callback); void __TBB_EXPORTED_FUNC resume(suspend_point_type* tag); suspend_point_type* __TBB_EXPORTED_FUNC current_suspend_point(); +void __TBB_EXPORTED_FUNC notify_waiters(d1::wait_context& wc); class thread_data; class task_dispatcher; class external_waiter; struct task_accessor; struct task_arena_impl; +struct wait_node; } // namespace r1 namespace d1 { @@ -92,36 +95,64 @@ inline void resume(suspend_point tag) { } #endif /* __TBB_RESUMABLE_TASKS */ +// TODO align wait_context on cache lane class wait_context { - static constexpr std::uint64_t abandon_wait_flag = 1LLU << 33; - static constexpr std::uint64_t overflow_mask = ~((1LLU << 32) - 1) & ~abandon_wait_flag; - - std::uint64_t m_version_and_traits{}; + // The flag works as a lock for the wait_context + // All functions use this lock to work with the wait list + static constexpr std::uint64_t lock_flag = 1LLU << 34; + // The flag signals to the last decrimenting thread to proceed wait list + static constexpr std::uint64_t waiter_flag = 1LLU << 33; + static constexpr std::uint64_t overflow_mask = ~((1LLU << 32) - 1) & ~(lock_flag | waiter_flag); + using lock_guard = std::lock_guard; + + std::uint64_t m_version_and_traits{1}; std::atomic m_ref_count{}; - suspend_point m_waiting_coroutine{}; + // Pointer to the head of the wait list + r1::wait_node* m_wait_head{nullptr}; - void abandon_wait() { - __TBB_ASSERT((m_ref_count.load(std::memory_order_relaxed) & abandon_wait_flag) == 0, "The wait object can be abandoned only once"); - add_reference(abandon_wait_flag); - } + bool is_locked(); + void lock(); + void unlock(); + + bool publish_wait_list(); + + template + bool try_register_waiter(r1::wait_node& waiter, F&& condition); + + void unregister_waiter(r1::wait_node& waiter); + + void notify_waiters(); void add_reference(std::int64_t delta) { call_itt_task_notify(releasing, this); std::uint64_t r = m_ref_count.fetch_add(delta) + delta; + __TBB_ASSERT_EX((r & overflow_mask) == 0, "Overflow is detected"); - if (r == abandon_wait_flag) { - // There is no any more references but the waiting stack is - // suspended (abandoned) so resume it. - __TBB_ASSERT(m_waiting_coroutine != nullptr, nullptr); - m_ref_count.store(0, std::memory_order_relaxed); - r1::resume(m_waiting_coroutine); + + if ((r & ~lock_flag) == waiter_flag) { + // Some external waiters or coroutine waiters sleep in wait list + // Should to notify them that work is done + r1::notify_waiters(*this); } } bool continue_execution() const { std::uint64_t r = m_ref_count.load(std::memory_order_acquire); __TBB_ASSERT_EX((r & overflow_mask) == 0, "Overflow is detected"); - return (r & ~abandon_wait_flag) > 0; + return (r & ~(waiter_flag | lock_flag)) > 0; + } + + void wait_for_notification_completion() { + // This function is preventing a couple of races: + // - wait_context might be still locked with notifying thread when notified thread destroys the wait_context + // e.g. lock or unlock might access destroyed object + // - wait_context might again reach zero ref_count while notifying thread still in a process + // i.e. currently notifying thread might remove lock and waiter flags and the next notifiyng thread might face the first issue + + atomic_backoff backoff; + while (!continue_execution() && m_ref_count.load(std::memory_order_relaxed) & (lock_flag | waiter_flag)) { + backoff.pause(); + } } friend class r1::thread_data; @@ -129,15 +160,26 @@ class wait_context { friend class r1::external_waiter; friend class task_group; friend class task_group_base; + friend class std::lock_guard; friend struct r1::task_arena_impl; friend struct r1::suspend_point_type; + friend void r1::notify_waiters(d1::wait_context& wc); public: // Despite the internal reference count is uin64_t we limit the user interface with uint32_t // to preserve a part of the internal reference count for special needs. wait_context(std::uint32_t ref_count) : m_ref_count{ref_count} { suppress_unused_warning(m_version_and_traits); } wait_context(const wait_context&) = delete; + ~wait_context() { + __TBB_ASSERT(!continue_execution(), NULL); + // Wait until all notifiyng threads leave wait_context + wait_for_notification_completion(); + } + void reserve(std::uint32_t delta = 1) { + // Wait until the notifiyng thread completes nofications + // To prevent calling notify_waiters by more than one thread + wait_for_notification_completion(); add_reference(delta); } @@ -223,9 +265,6 @@ class alignas(task_alignment) task : public task_traits { virtual task* execute(execution_data&) = 0; virtual task* cancel(execution_data&) = 0; - // TODO: remove in Gold release - static task_group_context* current_execute_data() { return current_context(); } - private: std::uint64_t m_reserved[5]{}; diff --git a/include/tbb/detail/_template_helpers.h b/include/tbb/detail/_template_helpers.h index 9f213956ce..7967df6b26 100644 --- a/include/tbb/detail/_template_helpers.h +++ b/include/tbb/detail/_template_helpers.h @@ -313,13 +313,7 @@ struct stored_pack : stored_pack std::forward(params)... , pack.leftmost_value ); } - template< typename Ret, typename F, typename... Preceding > - static Ret call(F&& f, const pack_type& pack, Preceding&&... params) { - return pack_remainder::template call( - std::forward(f), static_cast(pack), - std::forward(params)... , pack.leftmost_value - ); - } + template< typename Ret, typename F, typename... Preceding > static Ret call(F&& f, pack_type&& pack, Preceding&&... params) { return pack_remainder::template call( diff --git a/include/tbb/detail/_utils.h b/include/tbb/detail/_utils.h index c78a65d395..7b7bb9291c 100644 --- a/include/tbb/detail/_utils.h +++ b/include/tbb/detail/_utils.h @@ -272,11 +272,6 @@ void run_initializer(const Functor& f, std::atomic& state ) { state.store(do_once_state::executed, std::memory_order_release); } -// Run the initializer which can require repeated call -inline void run_initializer( bool (*f)(), std::atomic& state ) { - state.store(f() ? do_once_state::executed : do_once_state::uninitialized, std::memory_order_release); -} - } // namespace d0 namespace d1 { diff --git a/include/tbb/enumerable_thread_specific.h b/include/tbb/enumerable_thread_specific.h index 3f947ac1f2..20227af466 100644 --- a/include/tbb/enumerable_thread_specific.h +++ b/include/tbb/enumerable_thread_specific.h @@ -529,11 +529,9 @@ class segmented_iterator // assignment template segmented_iterator& operator=( const segmented_iterator& other) { - if(this != &other) { - my_segcont = other.my_segcont; - outer_iter = other.outer_iter; - if(outer_iter != my_segcont->end()) inner_iter = other.inner_iter; - } + my_segcont = other.my_segcont; + outer_iter = other.outer_iter; + if(outer_iter != my_segcont->end()) inner_iter = other.inner_iter; return *this; } @@ -638,7 +636,6 @@ template struct construct_by_finit: no_assign { Finit f; void construct(void* where) {new(where) T(f());} - construct_by_finit( const Finit& f_ ) : f(f_) {} construct_by_finit( Finit&& f_ ) : f(std::move(f_)) {} }; diff --git a/include/tbb/flow_graph.h b/include/tbb/flow_graph.h index f71bfabaea..86602affa1 100644 --- a/include/tbb/flow_graph.h +++ b/include/tbb/flow_graph.h @@ -209,9 +209,6 @@ class receiver { virtual graph_task *try_put_task(const T& t) = 0; virtual graph& graph_reference() const = 0; - //! put receiver back in initial state - virtual void reset_receiver(reset_flags f = rf_reset_protocol) = 0; - template friend class successor_cache; virtual bool is_continue_receiver() { return false; } @@ -307,7 +304,7 @@ class continue_receiver : public receiver< continue_msg > { // error in gcc 4.1.2 template friend class limiter_node; - void reset_receiver( reset_flags f ) override { + virtual void reset_receiver( reset_flags f ) { my_current_count = 0; if (f & rf_clear_edges) { my_predecessor_count = my_initial_predecessor_count; @@ -438,7 +435,7 @@ inline void graph::reset( reset_flags f ) { // reset context deactivate_graph(*this); - if(my_context) my_context->reset(); + my_context->reset(); cancelled = false; caught_exception = false; // reset all the nodes comprising the graph @@ -492,34 +489,33 @@ class input_node : public graph_node, public sender< Output > { //! Constructor for a node with a successor template< typename Body > __TBB_NOINLINE_SYM input_node( graph &g, Body body ) - : graph_node(g), my_active(false), - my_body( new input_body_leaf< output_type, Body>(body) ), - my_init_body( new input_body_leaf< output_type, Body>(body) ), - my_reserved(false), my_has_cached_item(false) + : graph_node(g), my_active(false) + , my_body( new input_body_leaf< output_type, Body>(body) ) + , my_init_body( new input_body_leaf< output_type, Body>(body) ) + , my_successors(this), my_reserved(false), my_has_cached_item(false) { - my_successors.set_owner(this); - fgt_node_with_body( CODEPTR(), FLOW_INPUT_NODE, &this->my_graph, - static_cast *>(this), this->my_body ); + fgt_node_with_body(CODEPTR(), FLOW_INPUT_NODE, &this->my_graph, + static_cast *>(this), this->my_body); } #if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET template input_node( const node_set& successors, Body body ) - : input_node(successors.graph_reference(), body) { + : input_node(successors.graph_reference(), body) + { make_edges(*this, successors); } #endif //! Copy constructor - __TBB_NOINLINE_SYM input_node( const input_node& src ) : - graph_node(src.my_graph), sender(), - my_active(false), - my_body( src.my_init_body->clone() ), my_init_body(src.my_init_body->clone() ), - my_reserved(false), my_has_cached_item(false) + __TBB_NOINLINE_SYM input_node( const input_node& src ) + : graph_node(src.my_graph), sender() + , my_active(false) + , my_body(src.my_init_body->clone()), my_init_body(src.my_init_body->clone()) + , my_successors(this), my_reserved(false), my_has_cached_item(false) { - my_successors.set_owner(this); fgt_node_with_body(CODEPTR(), FLOW_INPUT_NODE, &this->my_graph, - static_cast *>(this), this->my_body ); + static_cast *>(this), this->my_body); } //! The destructor @@ -897,7 +893,6 @@ class split_node : public graph_node, public receiver { __TBB_ASSERT(!(f & rf_clear_edges) || clear_element::this_empty(my_output_ports), "split_node reset failed"); } - void reset_receiver(reset_flags /*f*/) override {} graph& graph_reference() const override { return my_graph; } @@ -1013,10 +1008,9 @@ class broadcast_node : public graph_node, public receiver, public sender { broadcast_cache my_successors; public: - __TBB_NOINLINE_SYM explicit broadcast_node(graph& g) : graph_node(g) { - my_successors.set_owner( this ); + __TBB_NOINLINE_SYM explicit broadcast_node(graph& g) : graph_node(g), my_successors(this) { fgt_node( CODEPTR(), FLOW_BROADCAST_NODE, &this->my_graph, - static_cast *>(this), static_cast *>(this) ); + static_cast *>(this), static_cast *>(this) ); } #if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET @@ -1027,13 +1021,7 @@ class broadcast_node : public graph_node, public receiver, public sender { #endif // Copy constructor - __TBB_NOINLINE_SYM broadcast_node( const broadcast_node& src ) : - graph_node(src.my_graph), receiver(), sender() - { - my_successors.set_owner( this ); - fgt_node( CODEPTR(), FLOW_BROADCAST_NODE, &this->my_graph, - static_cast *>(this), static_cast *>(this) ); - } + __TBB_NOINLINE_SYM broadcast_node( const broadcast_node& src ) : broadcast_node(src.my_graph) {} //! Adds a successor bool register_successor( successor_type &r ) override { @@ -1062,8 +1050,6 @@ class broadcast_node : public graph_node, public receiver, public sender { return my_graph; } - void reset_receiver(reset_flags /*f*/) override {} - void reset_node(reset_flags f) override { if (f&rf_clear_edges) { my_successors.clear(); @@ -1291,9 +1277,8 @@ class buffer_node //! Constructor __TBB_NOINLINE_SYM explicit buffer_node( graph &g ) : graph_node(g), reservable_item_buffer(), receiver(), - sender(), forwarder_busy(false) + sender(), my_successors(this), forwarder_busy(false) { - my_successors.set_owner(this); my_aggregator.initialize_handler(handler_type(this)); fgt_node( CODEPTR(), FLOW_BUFFER_NODE, &this->my_graph, static_cast *>(this), static_cast *>(this) ); @@ -1307,15 +1292,7 @@ class buffer_node #endif //! Copy constructor - __TBB_NOINLINE_SYM buffer_node( const buffer_node& src ) - : graph_node(src.my_graph), reservable_item_buffer(), - receiver(), sender(), forwarder_busy(false) - { - my_successors.set_owner(this); - my_aggregator.initialize_handler(handler_type(this)); - fgt_node( CODEPTR(), FLOW_BUFFER_NODE, &this->my_graph, - static_cast *>(this), static_cast *>(this) ); - } + __TBB_NOINLINE_SYM buffer_node( const buffer_node& src ) : buffer_node(src.my_graph) {} // // message sender implementation @@ -1416,8 +1393,6 @@ class buffer_node return my_graph; } - void reset_receiver(reset_flags /*f*/) override { } - protected: void reset_node( reset_flags f) override { reservable_item_buffer::reset(); @@ -1889,11 +1864,6 @@ class limiter_node : public graph_node, public receiver< T >, public sender< T > } } - void forward() { - __TBB_ASSERT(false, "Should never be called"); - return; - } - graph_task* decrement_counter( long long delta ) { { spin_mutex::scoped_lock lock(my_mutex); @@ -1908,9 +1878,6 @@ class limiter_node : public graph_node, public receiver< T >, public sender< T > } void initialize() { - my_predecessors.set_owner(this); - my_successors.set_owner(this); - decrement.set_owner(this); fgt_node( CODEPTR(), FLOW_LIMITER_NODE, &this->my_graph, static_cast *>(this), static_cast *>(&decrement), @@ -1923,7 +1890,8 @@ class limiter_node : public graph_node, public receiver< T >, public sender< T > //! Constructor limiter_node(graph &g, size_t threshold) - : graph_node(g), my_threshold(threshold), my_count(0), my_tries(0), decrement() + : graph_node(g), my_threshold(threshold), my_count(0), my_tries(0), my_predecessors(this) + , my_successors(this), decrement(this) { initialize(); } @@ -1937,12 +1905,7 @@ class limiter_node : public graph_node, public receiver< T >, public sender< T > #endif //! Copy constructor - limiter_node( const limiter_node& src ) : - graph_node(src.my_graph), receiver(), sender(), - my_threshold(src.my_threshold), my_count(0), my_tries(0), decrement() - { - initialize(); - } + limiter_node( const limiter_node& src ) : limiter_node(src.my_graph, src.my_threshold) {} //! Replace the current successor with this new successor bool register_successor( successor_type &r ) override { @@ -2028,10 +1991,6 @@ class limiter_node : public graph_node, public receiver< T >, public sender< T > graph& graph_reference() const override { return my_graph; } - void reset_receiver(reset_flags /*f*/) override { - __TBB_ASSERT(false,NULL); // should never be called - } - void reset_node( reset_flags f) override { my_count = 0; if(f & rf_clear_edges) { @@ -2118,15 +2077,6 @@ class join_node > : public unfolded_join_nod #if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING join_node(graph &g) : unfolded_type(g) {} - -#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET - template - join_node(const node_set& nodes, key_matching = key_matching()) - : join_node(nodes.graph_reference()) { - make_edges_in_order(nodes, *this); - } -#endif - #endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */ template @@ -2768,25 +2718,13 @@ class async_node typedef Input input_type; typedef Output output_type; typedef receiver receiver_type; - typedef typename receiver_type::predecessor_type predecessor_type; - typedef typename sender::successor_type successor_type; + typedef receiver successor_type; + typedef sender predecessor_type; typedef receiver_gateway gateway_type; typedef async_body_base async_body_base_type; typedef typename base_type::output_ports_type output_ports_type; private: - struct try_put_functor { - typedef multifunction_output output_port_type; - output_port_type *port; - // TODO: pass value by copy since we do not want to block asynchronous thread. - const Output *value; - bool result; - try_put_functor(output_port_type &p, const Output &v) : port(&p), value(&v), result(false) { } - void operator()() { - result = port->try_put(*value); - } - }; - class receiver_gateway_impl: public receiver_gateway { public: receiver_gateway_impl(async_node* node): my_node(node) {} @@ -2880,13 +2818,15 @@ class async_node // Define sender< Output > //! Add a new successor to this node - bool register_successor( successor_type &r ) override { - return output_port<0>(*this).register_successor(r); + bool register_successor(successor_type&) override { + __TBB_ASSERT(false, "Successors must be registered only via ports"); + return false; } //! Removes a successor from this node - bool remove_successor( successor_type &r ) override { - return output_port<0>(*this).remove_successor(r); + bool remove_successor(successor_type&) override { + __TBB_ASSERT(false, "Successors must be removed only via ports"); + return false; } template @@ -2915,10 +2855,11 @@ class overwrite_node : public graph_node, public receiver, public sender { typedef typename receiver::predecessor_type predecessor_type; typedef typename sender::successor_type successor_type; - __TBB_NOINLINE_SYM explicit overwrite_node(graph &g) : graph_node(g), my_buffer_is_valid(false) { - my_successors.set_owner( this ); + __TBB_NOINLINE_SYM explicit overwrite_node(graph &g) + : graph_node(g), my_successors(this), my_buffer_is_valid(false) + { fgt_node( CODEPTR(), FLOW_OVERWRITE_NODE, &this->my_graph, - static_cast *>(this), static_cast *>(this) ); + static_cast *>(this), static_cast *>(this) ); } #if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET @@ -2929,17 +2870,11 @@ class overwrite_node : public graph_node, public receiver, public sender { #endif //! Copy constructor; doesn't take anything from src; default won't work - __TBB_NOINLINE_SYM overwrite_node( const overwrite_node& src ) : - graph_node(src.my_graph), receiver(), sender(), my_buffer_is_valid(false) - { - my_successors.set_owner( this ); - fgt_node( CODEPTR(), FLOW_OVERWRITE_NODE, &this->my_graph, - static_cast *>(this), static_cast *>(this) ); - } + __TBB_NOINLINE_SYM overwrite_node( const overwrite_node& src ) : overwrite_node(src.my_graph) {} ~overwrite_node() {} - bool register_successor( successor_type &s ) override { + bool register_successor( successor_type &s ) override { spin_mutex::scoped_lock l( my_mutex ); if (my_buffer_is_valid && is_graph_active( my_graph )) { // We have a valid value that must be forwarded immediately. @@ -3048,7 +2983,6 @@ class overwrite_node : public graph_node, public receiver, public sender { broadcast_cache< input_type, null_rw_mutex > my_successors; input_type my_buffer; bool my_buffer_is_valid; - void reset_receiver(reset_flags /*f*/) override {} void reset_node( reset_flags f) override { my_buffer_is_valid = false; diff --git a/include/tbb/memory_pool.h b/include/tbb/memory_pool.h index 7c3e57c99e..6d47d5ec1d 100644 --- a/include/tbb/memory_pool.h +++ b/include/tbb/memory_pool.h @@ -213,8 +213,15 @@ void *memory_pool::allocate_request(intptr_t pool_id, size_t & bytes) { const size_t unit_size = sizeof(typename Alloc::value_type); __TBBMALLOC_ASSERT( 0 == bytes%unit_size, NULL); void *ptr; - __TBB_TRY { ptr = self.my_alloc.allocate( bytes/unit_size ); } - __TBB_CATCH(...) { return 0; } +#if TBB_USE_EXCEPTIONS + try { +#endif + ptr = self.my_alloc.allocate( bytes/unit_size ); +#if TBB_USE_EXCEPTIONS + } catch(...) { + return 0; + } +#endif return ptr; } #if __TBB_MSVC_UNREACHABLE_CODE_IGNORED diff --git a/include/tbb/parallel_for.h b/include/tbb/parallel_for.h index 9a8301986a..1a294327bf 100644 --- a/include/tbb/parallel_for.h +++ b/include/tbb/parallel_for.h @@ -58,7 +58,7 @@ struct start_for : public task { //! Splitting constructor used to generate children. /** parent_ becomes left child. Newly constructed object is right child. */ start_for( start_for& parent_, typename Partitioner::split_type& split_obj, small_object_allocator& alloc ) : - my_range(parent_.my_range, split_obj), + my_range(parent_.my_range, get_range_split_object(split_obj)), my_body(parent_.my_body), my_partition(parent_.my_partition, split_obj), my_allocator(alloc) {} diff --git a/include/tbb/parallel_reduce.h b/include/tbb/parallel_reduce.h index 526d7952f7..25a9a8d2a0 100644 --- a/include/tbb/parallel_reduce.h +++ b/include/tbb/parallel_reduce.h @@ -83,7 +83,7 @@ struct start_reduce : public task { //! Splitting constructor used to generate children. /** parent_ becomes left child. Newly constructed object is right child. */ start_reduce( start_reduce& parent_, typename Partitioner::split_type& split_obj, small_object_allocator& alloc ) : - my_range(parent_.my_range, split_obj), + my_range(parent_.my_range, get_range_split_object(split_obj)), my_body(parent_.my_body), my_partition(parent_.my_partition, split_obj), my_allocator(alloc), @@ -238,21 +238,10 @@ struct start_deterministic_reduce : public task { /** parent_ becomes left child. Newly constructed object is right child. */ start_deterministic_reduce( start_deterministic_reduce& parent_, typename Partitioner::split_type& split_obj, Body& body, small_object_allocator& alloc ) : - my_range(parent_.my_range, split_obj), + my_range(parent_.my_range, get_range_split_object(split_obj)), my_body(body), my_partition(parent_.my_partition, split_obj), my_allocator(alloc) {} - //! Construct right child from the given range as response to the demand. - /** parent_ remains left child. Newly constructed object is right child. */ - start_deterministic_reduce( start_deterministic_reduce& parent_, const Range& r, depth_t d, Body& body, - small_object_allocator& alloc ) : - my_range(r), - my_body(body), - my_partition(parent_.my_partition, split()), - my_allocator(alloc) - { - my_partition.align_depth( d ); - } static void run(const Range& range, Body& body, Partitioner& partitioner, task_group_context& context) { if ( !range.empty() ) { wait_node wn; @@ -274,16 +263,10 @@ struct start_deterministic_reduce : public task { void run_body( Range &r ) { my_body( r ); } - //! Spawn right task, serves as callback for partitioner void offer_work(typename Partitioner::split_type& split_obj, execution_data& ed) { offer_work_impl(ed, *this, split_obj); } - //! Spawn right task, serves as callback for partitioner - void offer_work(const Range& r, depth_t d, execution_data& ed) { - offer_work_impl(ed, *this, r, d); - } - private: template void offer_work_impl(execution_data& ed, Args&&... args) { diff --git a/include/tbb/partitioner.h b/include/tbb/partitioner.h index b625398a24..10905106bf 100644 --- a/include/tbb/partitioner.h +++ b/include/tbb/partitioner.h @@ -133,7 +133,6 @@ struct tree_node : public node { small_object_allocator m_allocator; std::atomic m_child_stolen{false}; - tree_node(small_object_allocator& alloc) : m_allocator{alloc} {} tree_node(node* parent, int ref_count, small_object_allocator& alloc) : node{parent, ref_count} , m_allocator{alloc} {} @@ -260,11 +259,6 @@ struct partition_type_base { void note_affinity( slot_id ) {} template bool check_being_stolen(Task&, const execution_data&) { return false; } // part of old should_execute_range() - template - bool check_for_demand(Task& ) { return false; } - bool is_divisible() { return true; } // part of old should_execute_range() - depth_t max_depth() { return 0; } - void align_depth(depth_t) { } template split_type get_split() { return split(); } Partition& self() { return *static_cast(this); } // CRTP helper @@ -532,8 +526,6 @@ class static_partition_type : public linear_affinity_mode typedef detail::proportional_split split_type; static_partition_type( const static_partitioner& ) : linear_affinity_mode() {} - static_partition_type( static_partition_type& p, split ) - : linear_affinity_mode(p, split()) {} static_partition_type( static_partition_type& p, const proportional_split& split_obj ) : linear_affinity_mode(p, split_obj) {} }; @@ -597,7 +589,6 @@ class simple_partitioner { public: bool should_execute_range(const execution_data& ) {return false;} partition_type( const simple_partitioner& ) {} - partition_type( const partition_type& ) {} partition_type( const partition_type&, split ) {} }; }; @@ -632,8 +623,6 @@ class auto_partitioner { } partition_type( const auto_partitioner& ) : num_chunks(get_initial_auto_partitioner_divisor()*__TBB_INITIAL_CHUNKS/4) {} - partition_type( const partition_type& ) - : num_chunks(get_initial_auto_partitioner_divisor()*__TBB_INITIAL_CHUNKS/4) {} partition_type( partition_type& pt, split ) { num_chunks = pt.num_chunks = (pt.num_chunks+1u) / 2u; } diff --git a/include/tbb/profiling.h b/include/tbb/profiling.h index 7367cf679d..eb8217fef7 100644 --- a/include/tbb/profiling.h +++ b/include/tbb/profiling.h @@ -126,7 +126,7 @@ namespace d1 { } #else inline void call_itt_task_notify(d1::notify_type, void *) {} -#endif // TBB_USE_PROFILING_TOOLS +#endif // TBB_USE_PROFILING_TOOLS inline void call_itt_notify(d1::notify_type t, void *ptr) { r1::call_itt_notify((int)t, ptr); @@ -192,46 +192,8 @@ namespace d1 { inline void call_itt_notify(notify_type /*t*/, void* /*ptr*/) {} inline void call_itt_task_notify(notify_type /*t*/, void* /*ptr*/) {} - - inline void itt_make_task_group( itt_domain_enum /*domain*/, void* /*group*/, unsigned long long /*group_extra*/, - void* /*parent*/, unsigned long long /*parent_extra*/, string_resource_index /*name_index*/ ) {} - - inline void itt_metadata_str_add( itt_domain_enum /*domain*/, void* /*addr*/, unsigned long long /*addr_extra*/, - string_resource_index /*key*/, const char* /*value*/ ) {} - - inline void register_node_addr( itt_domain_enum /*domain*/, void* /*addr*/, unsigned long long /*addr_extra*/, string_resource_index /*key*/, void* /*value*/ ) {} - - inline void itt_relation_add( itt_domain_enum /*domain*/, void* /*addr0*/, unsigned long long /*addr0_extra*/, - itt_relation /*relation*/, void* /*addr1*/, unsigned long long /*addr1_extra*/ ) {} - - inline void itt_task_begin( itt_domain_enum /*domain*/, void* /*task*/, unsigned long long /*task_extra*/, - void* /*parent*/, unsigned long long /*parent_extra*/, string_resource_index /*name_index*/ ) {} - - inline void itt_task_end( itt_domain_enum /*domain*/ ) {} - - inline void itt_region_begin( itt_domain_enum /*domain*/, void* /*region*/, unsigned long long /*region_extra*/, - void* /*parent*/, unsigned long long /*parent_extra*/, string_resource_index /*name_index*/ ) {} - - inline void itt_region_end( itt_domain_enum /*domain*/, void* /*region*/, unsigned long long /*region_extra*/ ) {} - #endif // TBB_USE_PROFILING_TOOLS - template - inline void store_with_release_itt(std::atomic& dst, T src) { - call_itt_task_notify(releasing, &dst); - dst.store(src, std::memory_order_release); - } - - template - inline T load_with_acquire_itt(const std::atomic& src) { - call_itt_task_notify(acquired, &src); - return src.load(std::memory_order_acquire); - } -} // namespace d1 -} // namespace detail - -namespace detail { -namespace d1 { #if TBB_USE_PROFILING_TOOLS && !(TBB_USE_PROFILING_TOOLS == 2) class event { /** This class supports user event traces through itt. diff --git a/include/tbb/scalable_allocator.h b/include/tbb/scalable_allocator.h index 6a8c69972e..2f36117800 100644 --- a/include/tbb/scalable_allocator.h +++ b/include/tbb/scalable_allocator.h @@ -191,19 +191,6 @@ std::size_t pool_msize(MemoryPool *memPool, void *object); } // namespace rml -#if TBB_USE_EXCEPTIONS - #define __TBB_TRY try - #define __TBB_CATCH(e) catch(e) - #define __TBB_THROW(e) throw e - #define __TBB_RETHROW() throw -#else /* !TBB_USE_EXCEPTIONS */ - inline bool __TBB_false() { return false; } - #define __TBB_TRY - #define __TBB_CATCH(e) if ( __TBB_false() ) - #define __TBB_THROW(e) tbb::detail::suppress_unused_warning(e) - #define __TBB_RETHROW() ((void)0) -#endif /* !TBB_USE_EXCEPTIONS */ - namespace tbb { namespace detail { namespace d1 { @@ -211,7 +198,11 @@ namespace d1 { // keep throw in a separate function to prevent code bloat template void throw_exception(const E &e) { - __TBB_THROW(e); +#if TBB_USE_EXCEPTIONS + throw e; +#else + suppress_unused_warning(e); +#endif } template diff --git a/include/tbb/task_arena.h b/include/tbb/task_arena.h index 5fedf9078d..35c370b133 100644 --- a/include/tbb/task_arena.h +++ b/include/tbb/task_arena.h @@ -131,44 +131,24 @@ class task_arena_base { #if __TBB_NUMA_SUPPORT //! The NUMA node index to which the arena will be attached numa_node_id my_numa_id; - - // Do not access my_numa_id without the following runtime check. - // Despite my_numa_id is accessible, it does not exist in task_arena_base on user side - // if TBB_PREVIEW_NUMA_SUPPORT macro is not defined by the user. To be sure that - // my_numa_id exists in task_arena_base layout we check the traits. - // TODO: Consider increasing interface version for task_arena_base instead of this runtime check. - numa_node_id numa_id() { - return (my_version_and_traits & numa_support_flag) == numa_support_flag ? my_numa_id : automatic; - } #endif - enum { - default_flags = 0 -#if __TBB_NUMA_SUPPORT - , numa_support_flag = 1 -#endif - }; + enum { default_flags = 0 }; task_arena_base(int max_concurrency, unsigned reserved_for_masters, priority a_priority) : -#if __TBB_NUMA_SUPPORT - my_version_and_traits(default_flags | numa_support_flag) -#else my_version_and_traits(default_flags) -#endif , my_initialization_state(do_once_state::uninitialized) , my_arena(nullptr) , my_max_concurrency(max_concurrency) , my_master_slots(reserved_for_masters) , my_priority(a_priority) -#if __TBB_NUMA_SUPPORT , my_numa_id(automatic) -#endif {} #if __TBB_NUMA_SUPPORT task_arena_base(const constraints& constraints_, unsigned reserved_for_masters, priority a_priority) - : my_version_and_traits(default_flags | numa_support_flag) + : my_version_and_traits(default_flags) , my_initialization_state(do_once_state::uninitialized) , my_arena(nullptr) , my_max_concurrency(constraints_.max_concurrency) diff --git a/include/tbb/task_group.h b/include/tbb/task_group.h index 14ec4c365a..70a6d6dd9f 100644 --- a/include/tbb/task_group.h +++ b/include/tbb/task_group.h @@ -360,13 +360,15 @@ class task_group_base : no_copy { task_group_status internal_run_and_wait(const F& f) { function_stack_task t{ f, m_wait_ctx }; m_wait_ctx.reserve(); + bool cancellation_status = false; try_call([&] { execute_and_wait(t, m_context, m_wait_ctx, m_context); }).on_completion([&] { // TODO: the reset method is not thread-safe. Ensure the correct behavior. + cancellation_status = m_context.is_group_execution_cancelled(); m_context.reset(); }); - return m_context.is_group_execution_cancelled() ? canceled : complete; + return cancellation_status ? canceled : complete; } template @@ -401,13 +403,15 @@ class task_group_base : no_copy { } task_group_status wait() { + bool cancellation_status = false; try_call([&] { d1::wait(m_wait_ctx, m_context); }).on_completion([&] { // TODO: the reset method is not thread-safe. Ensure the correct behavior. + cancellation_status = m_context.is_group_execution_cancelled(); m_context.reset(); }); - return m_context.is_group_execution_cancelled() ? canceled : complete; + return cancellation_status ? canceled : complete; } bool is_canceling() { diff --git a/include/tbb/version.h b/include/tbb/version.h index b5bd318b9a..38c2607feb 100644 --- a/include/tbb/version.h +++ b/include/tbb/version.h @@ -26,12 +26,12 @@ // "Patch" version for custom releases #define TBB_VERSION_PATCH 0 // Suffix string -#define __TBB_VERSION_SUFFIX "-beta09" +#define __TBB_VERSION_SUFFIX "-beta10" // Full official version string #define TBB_VERSION_STRING __TBB_STRING(TBB_VERSION_MAJOR) "." __TBB_STRING(TBB_VERSION_MINOR) __TBB_VERSION_SUFFIX // Full interface version -#define TBB_INTERFACE_VERSION 12003 +#define TBB_INTERFACE_VERSION 12004 // Major interface version #define TBB_INTERFACE_VERSION_MAJOR (TBB_INTERFACE_VERSION/1000) // Minor interface version diff --git a/python/rml/ipc_server.cpp b/python/rml/ipc_server.cpp index 5728dc40bb..bf860484ce 100644 --- a/python/rml/ipc_server.cpp +++ b/python/rml/ipc_server.cpp @@ -772,7 +772,9 @@ void ipc_stopper::run() { if( my_state.load(std::memory_order_acquire)!=st_quit ) { if( !my_server.stop_one() ) { my_server.add_stop_thread(); - tbb::detail::r1::prolonged_pause(); + // Workaround for prolonged_pause. + // Reconsider this for new updates. + tbb::detail::r1::prolonged_pause_impl(); } } } diff --git a/src/tbb/CMakeLists.txt b/src/tbb/CMakeLists.txt index b814ee3ac7..b145667b27 100644 --- a/src/tbb/CMakeLists.txt +++ b/src/tbb/CMakeLists.txt @@ -70,10 +70,11 @@ target_compile_options(tbb # Avoid use of target_link_libraries here as it changes /DEF option to \DEF on Windows. set_target_properties(tbb PROPERTIES - LINK_FLAGS ${TBB_LINK_DEF_FILE_FLAG}${CMAKE_CURRENT_SOURCE_DIR}/def/${TBB_DEF_FILE_PREFIX}-tbb.def DEFINE_SYMBOL "" VERSION ${TBB_BINARY_VERSION}.${TBB_BINARY_MINOR_VERSION} SOVERSION ${TBB_BINARY_VERSION} + LINK_FLAGS ${TBB_LINK_DEF_FILE_FLAG}${CMAKE_CURRENT_SOURCE_DIR}/def/${TBB_DEF_FILE_PREFIX}-tbb.def + LINK_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/def/${TBB_DEF_FILE_PREFIX}-tbb.def ) # Prefer using target_link_options instead of target_link_libraries to specify link options because diff --git a/src/tbb/allocator.cpp b/src/tbb/allocator.cpp index 056faebaa1..438493c867 100644 --- a/src/tbb/allocator.cpp +++ b/src/tbb/allocator.cpp @@ -54,21 +54,15 @@ namespace r1 { //! Initialization routine used for first indirect call via allocate_handler. static void* initialize_allocate_handler(std::size_t size); -//! Initialization routine used for first indirect call via deallocate_handler. -static void initialize_deallocate_handler(void* ptr); - //! Handler for memory allocation static void* (*allocate_handler)(std::size_t size) = &initialize_allocate_handler; //! Handler for memory deallocation -static void (*deallocate_handler)(void* pointer) = &initialize_deallocate_handler; +static void (*deallocate_handler)(void* pointer) = nullptr; //! Initialization routine used for first indirect call via cache_aligned_allocate_handler. static void* initialize_cache_aligned_allocate_handler(std::size_t n, std::size_t alignment); -//! Initialization routine used for first indirect call via cache_aligned_deallocate_handler. -static void initialize_cache_aligned_deallocate_handler(void* ptr); - //! Allocates memory using standard malloc. It is used when scalable_allocator is not available static void* std_cache_aligned_allocate(std::size_t n, std::size_t alignment); @@ -79,7 +73,7 @@ static void std_cache_aligned_deallocate(void* p); static void* (*cache_aligned_allocate_handler)(std::size_t n, std::size_t alignment) = &initialize_cache_aligned_allocate_handler; //! Handler for padded memory deallocation -static void (*cache_aligned_deallocate_handler)(void* p) = &initialize_cache_aligned_deallocate_handler; +static void (*cache_aligned_deallocate_handler)(void* p) = nullptr; //! Table describing how to link the handlers. static const dynamic_link_descriptor MallocLinkTable[] = { @@ -142,13 +136,6 @@ static void* initialize_allocate_handler(std::size_t size) { return (*allocate_handler)(size); } -//! Executed on very first call through deallocate_handler -static void initialize_deallocate_handler(void* ptr) { - initialize_cache_aligned_allocator(); - __TBB_ASSERT(deallocate_handler != &initialize_deallocate_handler, NULL); - (*deallocate_handler)(ptr); -} - //! Executed on very first call through cache_aligned_allocate_handler static void* initialize_cache_aligned_allocate_handler(std::size_t bytes, std::size_t alignment) { initialize_cache_aligned_allocator(); @@ -156,13 +143,6 @@ static void* initialize_cache_aligned_allocate_handler(std::size_t bytes, std::s return (*cache_aligned_allocate_handler)(bytes, alignment); } -//! Executed on very first call through cache_aligned_deallocate_handler -static void initialize_cache_aligned_deallocate_handler(void* ptr) { - initialize_cache_aligned_allocator(); - __TBB_ASSERT(cache_aligned_deallocate_handler != &initialize_cache_aligned_deallocate_handler, NULL); - (*cache_aligned_deallocate_handler)(ptr); -} - // TODO: use CPUID to find actual line size, though consider backward compatibility // nfs - no false sharing static constexpr std::size_t nfs_size = 128; @@ -191,6 +171,7 @@ void* __TBB_EXPORTED_FUNC cache_aligned_allocate(std::size_t size) { } void __TBB_EXPORTED_FUNC cache_aligned_deallocate(void* p) { + __TBB_ASSERT(cache_aligned_deallocate_handler, "Initialization has not been yet."); (*cache_aligned_deallocate_handler)(p); } @@ -231,6 +212,7 @@ void* __TBB_EXPORTED_FUNC allocate_memory(std::size_t size) { void __TBB_EXPORTED_FUNC deallocate_memory(void* p) { if (p) { + __TBB_ASSERT(deallocate_handler, "Initialization has not been yet."); (*deallocate_handler)(p); } } @@ -240,7 +222,7 @@ bool __TBB_EXPORTED_FUNC is_tbbmalloc_used() { void* void_ptr = allocate_handler(1); deallocate_handler(void_ptr); } - __TBB_ASSERT(allocate_handler != &initialize_allocate_handler && deallocate_handler != &initialize_deallocate_handler, NULL); + __TBB_ASSERT(allocate_handler != &initialize_allocate_handler && deallocate_handler != nullptr, NULL); // Cast to void avoids type mismatch errors on some compilers (e.g. __IBMCPP__) __TBB_ASSERT((reinterpret_cast(allocate_handler) == reinterpret_cast(&std::malloc)) == (reinterpret_cast(deallocate_handler) == reinterpret_cast(&std::free)), "Both shim pointers must refer to routines from the same package (either TBB or CRT)"); diff --git a/src/tbb/arena.cpp b/src/tbb/arena.cpp index 68a4c9c6f1..49e381224c 100644 --- a/src/tbb/arena.cpp +++ b/src/tbb/arena.cpp @@ -19,6 +19,7 @@ #include "arena.h" #include "itt_notify.h" #include "semaphore.h" +#include "waiters.h" #include "tbb/detail/_task.h" #include "tbb/tbb_allocator.h" @@ -103,40 +104,6 @@ std::size_t arena::occupy_free_slot(thread_data& tls) { return index; } -class outermost_worker_waiter { - arena& my_arena; - stealing_loop_backoff my_backoff; -public: - outermost_worker_waiter(arena& a) : my_arena( a ), my_backoff( int(a.my_num_slots) ) {} - - bool continue_execution(arena_slot& slot, d1::task*& t) const { - __TBB_ASSERT(t == nullptr, nullptr); - if (my_arena.is_recall_requested()) { - return false; - } - t = get_self_recall_task(slot); - return true; - } - - void pause() { - if (my_backoff.pause()) { - my_arena.is_out_of_work(); - } - } - - void reset_wait() { - my_backoff.reset_wait(); - } - - d1::wait_context* wait_ctx() { - return nullptr; - } - - static bool postpone_execution(d1::task&) { - return false; - } -}; - std::uintptr_t arena::calculate_stealing_threshold() { stack_anchor_type anchor; return r1::calculate_stealing_threshold(reinterpret_cast(&anchor), my_market->worker_stack_size()); @@ -251,7 +218,7 @@ void arena::free_arena () { __TBB_ASSERT( is_alive(my_guard), NULL ); __TBB_ASSERT( !my_references.load(std::memory_order_relaxed), "There are threads in the dying arena" ); __TBB_ASSERT( !my_num_workers_requested && !my_num_workers_allotted, "Dying arena requests workers" ); - __TBB_ASSERT( my_pool_state.load(std::memory_order_relaxed) == SNAPSHOT_EMPTY || !my_max_num_workers, + __TBB_ASSERT( my_pool_state.load(std::memory_order_relaxed) == SNAPSHOT_EMPTY || !my_max_num_workers, "Inconsistent state of a dying arena" ); #if __TBB_ENQUEUE_ENFORCED_CONCURRENCY __TBB_ASSERT( !my_global_concurrency_mode, NULL ); @@ -447,7 +414,7 @@ void task_arena_impl::initialize(d1::task_arena_base& ta) { governor::one_time_init(); if (ta.my_max_concurrency < 1) { #if __TBB_NUMA_SUPPORT - ta.my_max_concurrency = numa_default_concurrency(ta.numa_id()); + ta.my_max_concurrency = numa_default_concurrency(ta.my_numa_id); #else /*__TBB_NUMA_SUPPORT*/ ta.my_max_concurrency = (int)governor::default_num_threads(); #endif /*__TBB_NUMA_SUPPORT*/ @@ -462,7 +429,7 @@ void task_arena_impl::initialize(d1::task_arena_base& ta) { market::global_market( /*is_public=*/false); #if __TBB_NUMA_SUPPORT ta.my_arena->my_numa_binding_observer = construct_binding_observer( - static_cast(&ta), ta.numa_id(), ta.my_arena->my_num_slots); + static_cast(&ta), ta.my_numa_id, ta.my_arena->my_num_slots); #endif /*__TBB_NUMA_SUPPORT*/ } @@ -685,14 +652,15 @@ void task_arena_impl::execute(d1::task_arena_base& ta, d1::delegate_base& d) { context_guard.set_ctx(ta.my_arena->my_default_ctx); nested_arena_context scope(*td, *ta.my_arena, index1); #if _WIN64 - try_call([&] { + try { #endif d(); __TBB_ASSERT(same_arena || governor::is_thread_data_set(td), nullptr); #if _WIN64 - }).on_exception([&] { + } catch (...) { context_guard.restore_default(); - }); + throw; + } #endif } @@ -701,8 +669,10 @@ void task_arena_impl::wait(d1::task_arena_base& ta) { thread_data* td = governor::get_thread_data(); __TBB_ASSERT_EX(td, "Scheduler is not initialized"); __TBB_ASSERT(td->my_arena != ta.my_arena || td->my_arena_index == 0, "internal_wait is not supported within a worker context" ); - while (ta.my_arena->num_workers_active() || ta.my_arena->my_pool_state.load(std::memory_order_acquire) != arena::SNAPSHOT_EMPTY) { - yield(); + if (ta.my_arena->my_max_num_workers != 0) { + while (ta.my_arena->num_workers_active() || ta.my_arena->my_pool_state.load(std::memory_order_acquire) != arena::SNAPSHOT_EMPTY) { + yield(); + } } } @@ -716,26 +686,32 @@ int task_arena_impl::max_concurrency(const d1::task_arena_base *ta) { if( a ) { // Get parameters from the arena __TBB_ASSERT( !ta || ta->my_max_concurrency==1, NULL ); return a->my_num_reserved_slots + a->my_max_num_workers; - } else { - __TBB_ASSERT( !ta || ta->my_max_concurrency==d1::task_arena_base::automatic, NULL ); - return int(governor::default_num_threads()); } + + if (ta && ta->my_max_concurrency == 1) { + return 1; + } + + __TBB_ASSERT(!ta || ta->my_max_concurrency==d1::task_arena_base::automatic, NULL ); + return int(governor::default_num_threads()); } void isolate_within_arena(d1::delegate_base& d, std::intptr_t isolation) { // TODO: Decide what to do if the scheduler is not initialized. Is there a use case for it? thread_data* tls = governor::get_thread_data(); assert_pointers_valid(tls, tls->my_task_dispatcher); - isolation_type previous_isolation = tls->my_task_dispatcher->m_execute_data_ext.isolation; + task_dispatcher* dispatcher = tls->my_task_dispatcher; + isolation_type previous_isolation = dispatcher->m_execute_data_ext.isolation; try_call([&] { // We temporarily change the isolation tag of the currently running task. It will be restored in the destructor of the guard. isolation_type current_isolation = isolation ? isolation : reinterpret_cast(&d); // Save the current isolation value and set new one - previous_isolation = tls->my_task_dispatcher->set_isolation(current_isolation); + previous_isolation = dispatcher->set_isolation(current_isolation); // Isolation within this callable d(); }).on_completion([&] { - tls->my_task_dispatcher->set_isolation(previous_isolation); + __TBB_ASSERT(governor::get_thread_data()->my_task_dispatcher == dispatcher, NULL); + dispatcher->set_isolation(previous_isolation); }); } diff --git a/src/tbb/arena.h b/src/tbb/arena.h index f1165036e7..b43556de3e 100644 --- a/src/tbb/arena.h +++ b/src/tbb/arena.h @@ -138,7 +138,7 @@ struct stack_anchor_type { Intrusive list node base class is used by market to form a list of arenas. **/ struct arena_base : padded { //! The number of workers that have been marked out by the resource manager to service the arena. - unsigned my_num_workers_allotted; // heavy use in stealing loop + std::atomic my_num_workers_allotted; // heavy use in stealing loop //! Reference counter for the arena. /** Worker and master references are counted separately: first several bits are for references @@ -171,12 +171,19 @@ struct arena_base : padded { //! The number of workers requested by the master thread owning the arena. unsigned my_max_num_workers; - //! The number of workers that are currently requested from the resource manager. + //! The total number of workers that are requested from the resource manager. + int my_total_num_workers_requested; + + //! The number of workers that are really requested from the resource manager. + //! Possible values are in [0, my_max_num_workers] int my_num_workers_requested; //! The index in the array of per priority lists of arenas this object is in. /*const*/ unsigned my_priority_level; + //! The max priority level of arena in market. + std::atomic my_is_top_priority{false}; + //! Current task pool state and estimate of available tasks amount. /** The estimate is either 0 (SNAPSHOT_EMPTY) or infinity (SNAPSHOT_FULL). Special state is "busy" (any other unsigned value). @@ -217,6 +224,9 @@ struct arena_base : padded { std::atomic my_global_concurrency_mode; #endif /* __TBB_ENQUEUE_ENFORCED_CONCURRENCY */ + //! Waiting object for external and coroutine waiters. + concurrent_monitor my_sleep_monitors; + //! Waiting object for master threads that cannot join the arena. concurrent_monitor my_exit_monitors; @@ -277,7 +287,7 @@ class arena: public padded //! Reference increment values for externals and workers static const unsigned ref_external = 1; - static const unsigned ref_worker = 1< //! Check if the recall is requested by the market. bool is_recall_requested() const { - return num_workers_active() > my_num_workers_allotted; + return num_workers_active() > my_num_workers_allotted.load(std::memory_order_relaxed); } //! If necessary, raise a flag that there is new job in arena. @@ -394,6 +404,7 @@ inline void arena::on_thread_leaving ( ) { // state (including the fact if it is alive) under the lock. // std::uintptr_t aba_epoch = my_aba_epoch; + unsigned priority_level = my_priority_level; market* m = my_market; __TBB_ASSERT(my_references.load(std::memory_order_relaxed) >= ref_param, "broken arena reference counter"); #if __TBB_ENQUEUE_ENFORCED_CONCURRENCY @@ -415,15 +426,11 @@ inline void arena::on_thread_leaving ( ) { } #endif if ( (my_references -= ref_param ) == 0 ) - m->try_destroy_arena( this, aba_epoch ); + m->try_destroy_arena( this, aba_epoch, priority_level ); } template void arena::advertise_new_work() { - if (my_max_num_workers == 0 && my_num_reserved_slots > 1) { - // No workers are available. It is the worker-less arena. - return; - } if( work_type == work_enqueued ) { #if __TBB_ENQUEUE_ENFORCED_CONCURRENCY if ( my_market->my_num_workers_soft_limit.load(std::memory_order_acquire) == 0 && @@ -437,6 +444,9 @@ void arena::advertise_new_work() { my_pool_state.store(SNAPSHOT_FULL, std::memory_order_release); my_max_num_workers = 1; my_market->adjust_demand(*this, my_max_num_workers); + + // Notify all sleeping threads that work has appeared in the arena. + my_sleep_monitors.notify_all(); return; } #endif /* __TBB_ENQUEUE_ENFORCED_CONCURRENCY */ @@ -447,6 +457,7 @@ void arena::advertise_new_work() { else if( work_type == wakeup ) { atomic_fence(std::memory_order_seq_cst); } + // Double-check idiom that, in case of spawning, is deliberately sloppy about memory fences. // Technically, to avoid missed wakeups, there should be a full memory fence between the point we // released the task pool (i.e. spawned task) and read the arena's state. However, adding such a @@ -491,6 +502,9 @@ void arena::advertise_new_work() { #endif /* __TBB_ENQUEUE_ENFORCED_CONCURRENCY */ // TODO: investigate adjusting of arena's demand by a single worker. my_market->adjust_demand( *this, my_max_num_workers ); + + // Notify all sleeping threads that work has appeared in the arena. + my_sleep_monitors.notify_all(); } } } diff --git a/src/tbb/arena_slot.h b/src/tbb/arena_slot.h index cd42ddb4ec..ca74a92e0e 100644 --- a/src/tbb/arena_slot.h +++ b/src/tbb/arena_slot.h @@ -87,6 +87,7 @@ struct alignas(max_nfs_size) arena_slot_private_state { class arena_slot : private arena_slot_shared_state, private arena_slot_private_state { friend class arena; + friend class outermost_worker_waiter; friend class task_dispatcher; friend class thread_data; friend class nested_arena_context; @@ -350,6 +351,7 @@ class arena_slot : private arena_slot_shared_state, private arena_slot_private_s task_pool.store(victim_task_pool, std::memory_order_release); } +#if TBB_USE_ASSERT bool is_local_task_pool_quiescent() const { d1::task** tp = task_pool.load(std::memory_order_relaxed); return tp == EmptyTaskPool || tp == LockedTaskPool; @@ -364,6 +366,7 @@ class arena_slot : private arena_slot_shared_state, private arena_slot_private_s __TBB_ASSERT(is_local_task_pool_quiescent(), "Task pool is not quiescent"); return head.load(std::memory_order_relaxed) == 0 && tail.load(std::memory_order_relaxed) == 0; } +#endif // TBB_USE_ASSERT //! Leave the task pool /** Leaving task pool automatically releases the task pool if it is locked. **/ diff --git a/src/tbb/concurrent_monitor.cpp b/src/tbb/concurrent_monitor.cpp index b02fb05022..b3675c1846 100644 --- a/src/tbb/concurrent_monitor.cpp +++ b/src/tbb/concurrent_monitor.cpp @@ -82,6 +82,29 @@ void concurrent_monitor::notify_one_relaxed() { to_thread_context(n)->semaphore().V(); } +void concurrent_monitor::notify_all_relaxed() { + if( waitset_ec.empty() ) + return; + waitset_t temp; + const waitset_node_t* end; + { + tbb::spin_mutex::scoped_lock l( mutex_ec ); + epoch.store( epoch.load( std::memory_order_relaxed ) + 1, std::memory_order_relaxed ); + waitset_ec.flush_to( temp ); + end = temp.end(); + for( waitset_node_t* n=temp.front(); n!=end; n=n->next ) + to_thread_context(n)->in_waitset = false; + } + waitset_node_t* nxt; + for( waitset_node_t* n=temp.front(); n!=end; n=nxt ) { + nxt = n->next; + to_thread_context(n)->semaphore().V(); + } +#if TBB_USE_ASSERT + temp.clear(); +#endif +} + void concurrent_monitor::abort_all_relaxed() { if( waitset_ec.empty() ) return; diff --git a/src/tbb/concurrent_monitor.h b/src/tbb/concurrent_monitor.h index 363fb0716e..241ee06666 100644 --- a/src/tbb/concurrent_monitor.h +++ b/src/tbb/concurrent_monitor.h @@ -48,7 +48,6 @@ class circular_doubly_linked_list_with_sentinel : no_copy { inline bool empty() const {return size()==0;} inline node_t* front() const {return head.next;} inline node_t* last() const {return head.prev;} - inline node_t* begin() const {return front();} inline const node_t* end() const {return &head;} //! add to the back of the list @@ -157,6 +156,12 @@ class concurrent_monitor : no_copy { //! Notify one thread about the event. Relaxed version. void notify_one_relaxed(); + //! Notify all waiting threads of the event + void notify_all() {atomic_fence( std::memory_order_seq_cst ); notify_all_relaxed();} + + // ! Notify all waiting threads of the event; Relaxed version + void notify_all_relaxed(); + //! Notify waiting threads of the event that satisfies the given predicate template void notify( const P& predicate ) { atomic_fence( std::memory_order_seq_cst ); @@ -210,35 +215,6 @@ void concurrent_monitor::notify_relaxed( const P& predicate ) { #endif } -// Additional possible methods that are not required right now -// //! Notify all waiting threads of the event -// void notify_all() {atomic_fence( std::memory_order_seq_cst ); notify_all_relaxed();} - -// Additional possible methods that are not required right now -//! Notify all waiting threads of the event; Relaxed version -// void concurrent_monitor::notify_all_relaxed() { -// if( waitset_ec.empty() ) -// return; -// waitset_t temp; -// const waitset_node_t* end; -// { -// tbb::spin_mutex::scoped_lock l( mutex_ec ); -// epoch.store( epoch.load( std::memory_order_relaxed ) + 1, std::memory_order_relaxed ); -// waitset_ec.flush_to( temp ); -// end = temp.end(); -// for( waitset_node_t* n=temp.front(); n!=end; n=n->next ) -// to_thread_context(n)->in_waitset = false; -// } -// waitset_node_t* nxt; -// for( waitset_node_t* n=temp.front(); n!=end; n=nxt ) { -// nxt = n->next; -// to_thread_context(n)->semaphore().V(); -// } -// #if TBB_USE_ASSERT -// temp.clear(); -// #endif -// } - // Additional possible methods that are not required right now //! Wait for a condition to be satisfied with waiting-on context // template diff --git a/src/tbb/def/lin32-tbb.def b/src/tbb/def/lin32-tbb.def index efe60e0597..2218e8eb28 100644 --- a/src/tbb/def/lin32-tbb.def +++ b/src/tbb/def/lin32-tbb.def @@ -76,6 +76,7 @@ _ZN3tbb6detail2r19downgradeERNS0_2d112rtm_rw_mutex11scoped_lockE; _ZN3tbb6detail2r17suspendEPFvPvPNS1_18suspend_point_typeEES2_; _ZN3tbb6detail2r16resumeEPNS1_18suspend_point_typeE; _ZN3tbb6detail2r121current_suspend_pointEv; +_ZN3tbb6detail2r114notify_waitersERNS0_2d112wait_contextE; /* Task dispatcher (task_dispatcher.cpp) */ _ZN3tbb6detail2r114execution_slotEPKNS0_2d114execution_dataE; diff --git a/src/tbb/def/lin64-tbb.def b/src/tbb/def/lin64-tbb.def index a2fdf49d51..f7685094d9 100644 --- a/src/tbb/def/lin64-tbb.def +++ b/src/tbb/def/lin64-tbb.def @@ -76,6 +76,7 @@ _ZN3tbb6detail2r19downgradeERNS0_2d112rtm_rw_mutex11scoped_lockE; _ZN3tbb6detail2r17suspendEPFvPvPNS1_18suspend_point_typeEES2_; _ZN3tbb6detail2r16resumeEPNS1_18suspend_point_typeE; _ZN3tbb6detail2r121current_suspend_pointEv; +_ZN3tbb6detail2r114notify_waitersERNS0_2d112wait_contextE; /* Task dispatcher (task_dispatcher.cpp) */ _ZN3tbb6detail2r114execution_slotEPKNS0_2d114execution_dataE; diff --git a/src/tbb/def/mac64-tbb.def b/src/tbb/def/mac64-tbb.def index c907873684..78d1d64942 100644 --- a/src/tbb/def/mac64-tbb.def +++ b/src/tbb/def/mac64-tbb.def @@ -78,6 +78,7 @@ __ZN3tbb6detail2r19downgradeERNS0_2d112rtm_rw_mutex11scoped_lockE __ZN3tbb6detail2r17suspendEPFvPvPNS1_18suspend_point_typeEES2_ __ZN3tbb6detail2r16resumeEPNS1_18suspend_point_typeE __ZN3tbb6detail2r121current_suspend_pointEv +__ZN3tbb6detail2r114notify_waitersERNS0_2d112wait_contextE # Task dispatcher (task_dispatcher.cpp) __ZN3tbb6detail2r114execution_slotEPKNS0_2d114execution_dataE diff --git a/src/tbb/def/win32-tbb.def b/src/tbb/def/win32-tbb.def index 6d15ee64ba..b2c93a486b 100644 --- a/src/tbb/def/win32-tbb.def +++ b/src/tbb/def/win32-tbb.def @@ -70,6 +70,7 @@ EXPORTS ?current_suspend_point@r1@detail@tbb@@YAPAUsuspend_point_type@123@XZ ?resume@r1@detail@tbb@@YAXPAUsuspend_point_type@123@@Z ?suspend@r1@detail@tbb@@YAXP6AXPAXPAUsuspend_point_type@123@@Z0@Z +?notify_waiters@r1@detail@tbb@@YAXAAVwait_context@d1@23@@Z ; Task dispatcher (task_dispatcher.cpp) ?spawn@r1@detail@tbb@@YAXAAVtask@d1@23@AAVtask_group_context@523@G@Z diff --git a/src/tbb/def/win64-tbb.def b/src/tbb/def/win64-tbb.def index 7c248f93ae..89f27b98c9 100644 --- a/src/tbb/def/win64-tbb.def +++ b/src/tbb/def/win64-tbb.def @@ -70,6 +70,7 @@ EXPORTS ?suspend@r1@detail@tbb@@YAXP6AXPEAXPEAUsuspend_point_type@123@@Z0@Z ?resume@r1@detail@tbb@@YAXPEAUsuspend_point_type@123@@Z ?current_suspend_point@r1@detail@tbb@@YAPEAUsuspend_point_type@123@XZ +?notify_waiters@r1@detail@tbb@@YAXAEAVwait_context@d1@23@@Z ; Task dispatcher (task_dispatcher.cpp) ?spawn@r1@detail@tbb@@YAXAEAVtask@d1@23@AEAVtask_group_context@523@@Z diff --git a/src/tbb/global_control.cpp b/src/tbb/global_control.cpp index 7a94e971f4..b642dce370 100644 --- a/src/tbb/global_control.cpp +++ b/src/tbb/global_control.cpp @@ -235,7 +235,7 @@ struct global_control_impl { erase_if_present(c, gc); return c->my_list.empty(); } - +#if TBB_USE_ASSERT static bool is_present(d1::global_control& gc) { __TBB_ASSERT_RELEASE(gc.my_param < global_control::parameter_max, NULL); control_storage* const c = controls[gc.my_param]; @@ -247,6 +247,7 @@ struct global_control_impl { } return false; } +#endif // TBB_USE_ASSERT }; void __TBB_EXPORTED_FUNC create(d1::global_control& gc) { @@ -259,10 +260,11 @@ void __TBB_EXPORTED_FUNC destroy(d1::global_control& gc) { bool remove_and_check_if_empty(d1::global_control& gc) { return global_control_impl::remove_and_check_if_empty(gc); } +#if TBB_USE_ASSERT bool is_present(d1::global_control& gc) { return global_control_impl::is_present(gc); } - +#endif // TBB_USE_ASSERT std::size_t __TBB_EXPORTED_FUNC global_control_active_value(int param) { __TBB_ASSERT_RELEASE(param < global_control::parameter_max, NULL); return controls[param]->active_value(); diff --git a/src/tbb/governor.cpp b/src/tbb/governor.cpp index 54dfc071a1..df5708dc90 100644 --- a/src/tbb/governor.cpp +++ b/src/tbb/governor.cpp @@ -58,7 +58,7 @@ void governor::acquire_resources () { #endif if( status ) handle_perror(status, "TBB failed to initialize task scheduler TLS\n"); - is_speculation_enabled = cpu_has_speculation(); + detect_cpu_features(cpu_features); is_rethrow_broken = gcc_rethrow_exception_broken(); } @@ -241,7 +241,7 @@ bool finalize_impl(d1::task_scheduler_handle& handle) { if (td) { task_dispatcher* task_disp = td->my_task_dispatcher; __TBB_ASSERT(task_disp, nullptr); - if (task_disp->m_properties.outermost && !td->my_is_worker) { // is not inside a tbb parallel region + if (task_disp->m_properties.outermost && !td->my_is_worker) { // is not inside a parallel region governor::auto_terminate(td); } } diff --git a/src/tbb/governor.h b/src/tbb/governor.h index dc58630633..f0443244c1 100644 --- a/src/tbb/governor.h +++ b/src/tbb/governor.h @@ -65,7 +65,7 @@ class governor { static bool UsePrivateRML; // Flags for runtime-specific conditions - static bool is_speculation_enabled; + static cpu_features_type cpu_features; static bool is_rethrow_broken; //! Create key for thread-local storage and initialize RML. @@ -136,7 +136,9 @@ class governor { static bool does_client_join_workers (const rml::tbb_client &client); - static bool speculation_enabled() { return is_speculation_enabled; } + static bool speculation_enabled() { return cpu_features.rtm_enabled; } + + static bool wait_package_enabled() { return cpu_features.waitpkg_enabled; } static bool rethrow_exception_broken() { return is_rethrow_broken; } diff --git a/src/tbb/intrusive_list.h b/src/tbb/intrusive_list.h index ccaeec8840..1aebff8797 100644 --- a/src/tbb/intrusive_list.h +++ b/src/tbb/intrusive_list.h @@ -65,11 +65,6 @@ class intrusive_list_base { iterator_impl( pointer_type pos ) : my_pos(pos) {} - iterator_impl& operator=( const T& val ) { - my_pos = &node(val); - return *this; - } - iterator_impl& operator++() { my_pos = my_pos->my_next_node; return *this; diff --git a/src/tbb/main.cpp b/src/tbb/main.cpp index e524475de6..1968b5c216 100644 --- a/src/tbb/main.cpp +++ b/src/tbb/main.cpp @@ -39,8 +39,8 @@ unsigned governor::DefaultNumberOfThreads; size_t governor::DefaultPageSize; rml::tbb_factory governor::theRMLServerFactory; bool governor::UsePrivateRML; -bool governor::is_speculation_enabled; bool governor::is_rethrow_broken; +cpu_features_type governor::cpu_features; //------------------------------------------------------------------------ // market data diff --git a/src/tbb/market.cpp b/src/tbb/market.cpp index 8f44c29343..78757d2029 100644 --- a/src/tbb/market.cpp +++ b/src/tbb/market.cpp @@ -313,7 +313,7 @@ void market::detach_arena ( arena& a ) { ++my_arenas_aba_epoch; } -void market::try_destroy_arena ( arena* a, uintptr_t aba_epoch ) { +void market::try_destroy_arena ( arena* a, uintptr_t aba_epoch, unsigned priority_level ) { bool locked = true; __TBB_ASSERT( a, NULL ); // we hold reference to the market, so it cannot be destroyed at any moment here @@ -321,14 +321,14 @@ void market::try_destroy_arena ( arena* a, uintptr_t aba_epoch ) { __TBB_ASSERT( my_ref_count!=0, NULL ); my_arenas_list_mutex.lock(); assert_market_valid(); - arena_list_type::iterator it = my_arenas[a->my_priority_level].begin(); - for ( ; it != my_arenas[a->my_priority_level].end(); ++it ) { + arena_list_type::iterator it = my_arenas[priority_level].begin(); + for ( ; it != my_arenas[priority_level].end(); ++it ) { if ( a == &*it ) { if ( it->my_aba_epoch == aba_epoch ) { // Arena is alive if ( !a->my_num_workers_requested && !a->my_references.load(std::memory_order_relaxed) ) { __TBB_ASSERT( - !a->my_num_workers_allotted && + !a->my_num_workers_allotted.load(std::memory_order_relaxed) && (a->my_pool_state == arena::SNAPSHOT_EMPTY || !a->my_max_num_workers), "Inconsistent arena state" ); @@ -364,7 +364,7 @@ arena* market::arena_in_need ( arena_list_type* arenas, arena* hint ) { } while ( arenas[curr_priority_level].empty() ); it = arenas[curr_priority_level].begin(); } - if( a.num_workers_active() < a.my_num_workers_allotted ) { + if( a.num_workers_active() < a.my_num_workers_allotted.load(std::memory_order_relaxed) ) { a.my_references += arena::ref_worker; return &a; } @@ -389,14 +389,21 @@ int market::update_allotment ( arena_list_type* arenas, int workers_demand, int int unassigned_workers = max_workers; int assigned = 0; int carry = 0; + unsigned max_priority_level = num_priority_levels; for (unsigned list_idx = 0; list_idx < num_priority_levels; ++list_idx ) { int assigned_per_priority = 0; for (arena_list_type::iterator it = arenas[list_idx].begin(); it != arenas[list_idx].end(); ++it) { arena& a = *it; - if (a.my_num_workers_requested <= 0) { - __TBB_ASSERT(!a.my_num_workers_allotted, nullptr); + __TBB_ASSERT(a.my_num_workers_requested >= 0 && a.my_num_workers_requested <= int(a.my_max_num_workers), nullptr); + if (a.my_num_workers_requested == 0) { + __TBB_ASSERT(!a.my_num_workers_allotted.load(std::memory_order_relaxed), nullptr); continue; } + + if (max_priority_level == num_priority_levels) { + max_priority_level = list_idx; + } + int allotted = 0; #if __TBB_ENQUEUE_ENFORCED_CONCURRENCY if (my_num_workers_soft_limit.load(std::memory_order_relaxed) == 0) { @@ -412,7 +419,8 @@ int market::update_allotment ( arena_list_type* arenas, int workers_demand, int // a.my_num_workers_requested may temporarily exceed a.my_max_num_workers allotted = min(allotted, (int)a.my_max_num_workers); } - a.my_num_workers_allotted = allotted; + a.my_num_workers_allotted.store(allotted, std::memory_order_relaxed); + a.my_is_top_priority.store(list_idx == max_priority_level, std::memory_order_relaxed); assigned += allotted; assigned_per_priority += allotted; } @@ -505,19 +513,26 @@ void market::adjust_demand ( arena& a, int delta ) { if ( !delta ) return; my_arenas_list_mutex.lock(); - int prev_req = a.my_num_workers_requested; - a.my_num_workers_requested += delta; - if ( a.my_num_workers_requested <= 0 ) { - a.my_num_workers_allotted = 0; - if ( prev_req <= 0 ) { - my_arenas_list_mutex.unlock(); - return; - } - delta = -prev_req; + a.my_total_num_workers_requested += delta; + int target_workers = 0; + // Cap target_workers into interval [0, a.my_max_num_workers] + if (a.my_total_num_workers_requested > 0) { + target_workers = a.my_total_num_workers_requested < int(a.my_max_num_workers) ? + a.my_total_num_workers_requested : a.my_max_num_workers; } - else if ( prev_req < 0 ) { - delta = a.my_num_workers_requested; + + delta = target_workers - a.my_num_workers_requested; + + if (delta == 0) { + my_arenas_list_mutex.unlock(); + return; + } + + a.my_num_workers_requested += delta; + if (a.my_num_workers_requested == 0) { + a.my_num_workers_allotted.store(0, std::memory_order_relaxed); } + my_total_demand += delta; my_priority_level_demand[a.my_priority_level] += delta; unsigned effective_soft_limit = my_num_workers_soft_limit.load(std::memory_order_relaxed); @@ -540,9 +555,14 @@ void market::adjust_demand ( arena& a, int delta ) { my_num_workers_requested += delta; __TBB_ASSERT( my_num_workers_requested <= (int)effective_soft_limit, NULL ); + int target_epoch = my_adjust_demand_target_epoch++; + my_arenas_list_mutex.unlock(); + + spin_wait_until_eq(my_adjust_demand_current_epoch, target_epoch); // Must be called outside of any locks my_server->adjust_job_count_estimate( delta ); + my_adjust_demand_current_epoch.store(target_epoch + 1, std::memory_order_release); } void market::process( job& j ) { diff --git a/src/tbb/market.h b/src/tbb/market.h index b4d9f1152c..3572641659 100644 --- a/src/tbb/market.h +++ b/src/tbb/market.h @@ -100,6 +100,12 @@ class market : no_copy, rml::tbb_client { //! Number of workers currently requested from RML int my_num_workers_requested; + //! The target serialization epoch for callers of adjust_job_count_estimate + int my_adjust_demand_target_epoch; + + //! The current serialization epoch for callers of adjust_job_count_estimate + std::atomic my_adjust_demand_current_epoch; + //! First unused index of worker /** Used to assign indices to the new workers coming from RML, and busy part of my_workers array. **/ @@ -215,7 +221,7 @@ class market : no_copy, rml::tbb_client { unsigned arena_index, std::size_t stack_size ); //! Removes the arena from the market's list - void try_destroy_arena ( arena*, uintptr_t aba_epoch ); + void try_destroy_arena ( arena*, uintptr_t aba_epoch, unsigned pririty_level ); //! Removes the arena from the market's list void detach_arena ( arena& ); diff --git a/src/tbb/misc.cpp b/src/tbb/misc.cpp index 34a150d7bf..261ae147ec 100644 --- a/src/tbb/misc.cpp +++ b/src/tbb/misc.cpp @@ -74,50 +74,57 @@ void PrintExtraVersionInfo( const char* category, const char* format, ... ) { } } -void PrintRMLVersionInfo( void* arg, const char* server_info ) { - PrintExtraVersionInfo( server_info, (const char *)arg ); -} - //! check for transaction support. #if _MSC_VER #include // for __cpuid #endif -bool cpu_has_speculation() { -#if (__TBB_x86_32 || __TBB_x86_64) -#if (__INTEL_COMPILER || __GNUC__ || _MSC_VER || __SUNPRO_CC) - bool result = false; - const int rtm_ebx_mask = 1<<11; + +#if __TBB_x86_32 || __TBB_x86_64 +void check_cpuid(int leaf, int sub_leaf, int registers[4]) { #if _MSC_VER - int info[4] = {0,0,0,0}; - const int reg_ebx = 1; - __cpuidex(info, 7, 0); - result = (info[reg_ebx] & rtm_ebx_mask)!=0; -#elif __GNUC__ || __SUNPRO_CC - int32_t reg_ebx = 0; - int32_t reg_eax = 7; - int32_t reg_ecx = 0; - __asm__ __volatile__ ( "movl %%ebx, %%esi\n" - "cpuid\n" - "movl %%ebx, %0\n" - "movl %%esi, %%ebx\n" - : "=a"(reg_ebx) : "0" (reg_eax), "c" (reg_ecx) : "esi", -#if __TBB_x86_64 - "ebx", + __cpuidex(registers, leaf, sub_leaf); +#else + int reg_eax = 0; + int reg_ebx = 0; + int reg_ecx = 0; + int reg_edx = 0; +#if __TBB_x86_32 && __PIC__ + // On 32-bit systems with position-independent code GCC fails to work around the stuff in EBX + // register. We help it using backup and restore. + __asm__("mov %%ebx, %%esi\n\t" + "cpuid\n\t" + "xchg %%ebx, %%esi" + : "=a"(reg_eax), "=S"(reg_ebx), "=c"(reg_ecx), "=d"(reg_edx) + : "0"(leaf), "2"(sub_leaf) // read value from eax and ecx + ); +#else + __asm__("cpuid" + : "=a"(reg_eax), "=b"(reg_ebx), "=c"(reg_ecx), "=d"(reg_edx) + : "0"(leaf), "2"(sub_leaf) // read value from eax and ecx + ); #endif - "edx" - ); - result = (reg_ebx & rtm_ebx_mask)!=0 ; + registers[0] = reg_eax; + registers[1] = reg_ebx; + registers[2] = reg_ecx; + registers[3] = reg_edx; #endif - return result; -#else - #error Speculation detection not enabled for compiler -#endif /* __INTEL_COMPILER || __GNUC__ || _MSC_VER */ -#else /* (__TBB_x86_32 || __TBB_x86_64) */ - return false; +} +#endif + +void detect_cpu_features(cpu_features_type& cpu_features) { + suppress_unused_warning(cpu_features); +#if __TBB_x86_32 || __TBB_x86_64 + const int rtm_ebx_mask = 1 << 11; + const int waitpkg_ecx_mask = 1 << 5; + int registers[4] = {0}; + + // Check RTM and WAITPKG + check_cpuid(7, 0, registers); + cpu_features.rtm_enabled = (registers[1] & rtm_ebx_mask) != 0; + cpu_features.waitpkg_enabled = (registers[2] & waitpkg_ecx_mask) != 0; #endif /* (__TBB_x86_32 || __TBB_x86_64) */ } } // namespace r1 } // namespace detail } // namespace tbb - diff --git a/src/tbb/misc.h b/src/tbb/misc.h index 9773e9ba17..ba31e50a66 100644 --- a/src/tbb/misc.h +++ b/src/tbb/misc.h @@ -211,7 +211,12 @@ T1 atomic_update(std::atomic& dst, T1 newValue, Pred compare) { inline void destroy_process_mask(){} #endif /* __TBB_USE_OS_AFFINITY_SYSCALL */ -bool cpu_has_speculation(); +struct cpu_features_type { + bool rtm_enabled{false}; + bool waitpkg_enabled{false}; +}; + +void detect_cpu_features(cpu_features_type& cpu_features); #if __TBB_NUMA_SUPPORT class binding_handler; @@ -273,6 +278,7 @@ static inline void abort_transaction() { #endif } +#if TBB_USE_ASSERT static inline unsigned char is_in_transaction() { #if __TBB_TSX_INTRINSICS_PRESENT return _xtest(); @@ -280,6 +286,7 @@ static inline unsigned char is_in_transaction() { return 0; #endif } +#endif // TBB_USE_ASSERT } // namespace r1 } // namespace detail diff --git a/src/tbb/observer_proxy.cpp b/src/tbb/observer_proxy.cpp index 66f4acb63b..6cae72438f 100644 --- a/src/tbb/observer_proxy.cpp +++ b/src/tbb/observer_proxy.cpp @@ -40,7 +40,7 @@ observer_proxy::observer_proxy( d1::task_scheduler_observer& tso ) #endif /* TBB_USE_ASSERT */ } -observer_proxy::~observer_proxy () { +observer_proxy::~observer_proxy() { __TBB_ASSERT( !my_ref_count, "Attempt to destroy proxy still in use" ); poison_value(my_ref_count); poison_pointer(my_prev); @@ -50,7 +50,7 @@ observer_proxy::~observer_proxy () { #endif /* TBB_USE_ASSERT */ } -void observer_list::clear () { +void observer_list::clear() { // Though the method will work fine for the empty list, we require the caller // to check for the list emptiness before invoking it to avoid extra overhead. __TBB_ASSERT( !empty(), NULL ); @@ -72,15 +72,25 @@ void observer_list::clear () { __TBB_ASSERT(is_alive(p->my_ref_count), "Observer's proxy died prematurely"); __TBB_ASSERT(p->my_ref_count.load(std::memory_order_relaxed) == 1, "Reference for observer is missing"); poison_pointer(p->my_observer); - poison_value(p->my_ref_count); remove(p); + --p->my_ref_count; delete p; } - __TBB_ASSERT(my_head == nullptr && my_tail == nullptr, nullptr); } + + // If observe(false) is called concurrently with the destruction of the arena, + // need to wait until all proxies are removed. + for (atomic_backoff backoff; ; backoff.pause()) { + scoped_lock lock(mutex(), /*is_writer=*/false); + if (my_head == nullptr) { + break; + } + } + + __TBB_ASSERT(my_head == nullptr && my_tail == nullptr, nullptr); } -void observer_list::insert ( observer_proxy* p ) { +void observer_list::insert( observer_proxy* p ) { scoped_lock lock(mutex(), /*is_writer=*/true); if (my_head) { p->my_prev = my_tail; @@ -217,6 +227,9 @@ void observer_list::do_notify_exit_observers(observer_proxy* last, bool worker) remove_ref_fast(p); if (p) { lock.release(); + if (p != prev && prev) { + remove_ref(prev); + } remove_ref(p); } return; diff --git a/src/tbb/observer_proxy.h b/src/tbb/observer_proxy.h index ca0e60e39a..33ec92b7e6 100644 --- a/src/tbb/observer_proxy.h +++ b/src/tbb/observer_proxy.h @@ -35,10 +35,10 @@ class observer_list { typedef aligned_space my_mutex_type; //! Pointer to the head of this list. - observer_proxy* my_head; + observer_proxy* my_head{nullptr}; //! Pointer to the tail of this list. - observer_proxy* my_tail; + observer_proxy* my_tail{nullptr}; //! Mutex protecting this list. my_mutex_type my_mutex; @@ -57,7 +57,7 @@ class observer_list { void do_notify_exit_observers( observer_proxy* last, bool worker ); public: - observer_list () : my_head(NULL), my_tail(NULL) {} + observer_list () = default; //! Removes and destroys all observer proxies from the list. /** Cannot be used concurrently with other methods. **/ @@ -79,11 +79,11 @@ class observer_list { //! Accessor to the reader-writer mutex associated with the list. spin_rw_mutex& mutex () { return my_mutex.begin()[0]; } - bool empty () const { return my_head == NULL; } + bool empty () const { return my_head == nullptr; } //! Call entry notifications on observers added after last was notified. /** Updates last to become the last notified observer proxy (in the global list) - or leaves it to be NULL. The proxy has its refcount incremented. **/ + or leaves it to be nullptr. The proxy has its refcount incremented. **/ inline void notify_entry_observers( observer_proxy*& last, bool worker ); //! Call exit notifications on last and observers added before it. @@ -120,7 +120,7 @@ class observer_proxy { ~observer_proxy(); }; // class observer_proxy -inline void observer_list::remove_ref_fast( observer_proxy*& p ) { +void observer_list::remove_ref_fast( observer_proxy*& p ) { if( p->my_observer ) { // Can decrement refcount quickly, as it cannot drop to zero while under the lock. std::uintptr_t r = --p->my_ref_count; @@ -131,13 +131,13 @@ inline void observer_list::remove_ref_fast( observer_proxy*& p ) { } } -inline void observer_list::notify_entry_observers(observer_proxy*& last, bool worker) { +void observer_list::notify_entry_observers(observer_proxy*& last, bool worker) { if (last == my_tail) return; do_notify_entry_observers(last, worker); } -inline void observer_list::notify_exit_observers( observer_proxy*& last, bool worker ) { +void observer_list::notify_exit_observers( observer_proxy*& last, bool worker ) { if (last == nullptr) { return; } diff --git a/src/tbb/queuing_rw_mutex.cpp b/src/tbb/queuing_rw_mutex.cpp index 5d34909514..24c2d11516 100644 --- a/src/tbb/queuing_rw_mutex.cpp +++ b/src/tbb/queuing_rw_mutex.cpp @@ -532,18 +532,23 @@ struct queuing_rw_mutex_impl { void __TBB_EXPORTED_FUNC acquire(d1::queuing_rw_mutex& m, d1::queuing_rw_mutex::scoped_lock& s, bool write) { queuing_rw_mutex_impl::acquire(m, s, write); } + bool __TBB_EXPORTED_FUNC try_acquire(d1::queuing_rw_mutex& m, d1::queuing_rw_mutex::scoped_lock& s, bool write) { return queuing_rw_mutex_impl::try_acquire(m, s, write); } + void __TBB_EXPORTED_FUNC release(d1::queuing_rw_mutex::scoped_lock& s) { queuing_rw_mutex_impl::release(s); } + bool __TBB_EXPORTED_FUNC upgrade_to_writer(d1::queuing_rw_mutex::scoped_lock& s) { return queuing_rw_mutex_impl::upgrade_to_writer(s); } + bool __TBB_EXPORTED_FUNC downgrade_to_reader(d1::queuing_rw_mutex::scoped_lock& s) { return queuing_rw_mutex_impl::downgrade_to_reader(s); } + void __TBB_EXPORTED_FUNC construct(d1::queuing_rw_mutex& m) { queuing_rw_mutex_impl::construct(m); } diff --git a/src/tbb/rml_tbb.cpp b/src/tbb/rml_tbb.cpp index bd9a051646..48edd7f6ca 100644 --- a/src/tbb/rml_tbb.cpp +++ b/src/tbb/rml_tbb.cpp @@ -106,12 +106,6 @@ ::rml::factory::status_type FACTORY::make_server( SERVER*& s, CLIENT& c) { return (*my_make_server_routine)(*this,s,c); } -void FACTORY::call_with_server_info( ::rml::server_info_callback_t cb, void* arg ) const { - // Failure of following assertion means that factory was not successfully opened. - __TBB_ASSERT_EX( my_call_with_server_info_routine, NULL ); - (*my_call_with_server_info_routine)( cb, arg ); -} - } // namespace rml } // namespace r1 } // namespace detail diff --git a/src/tbb/rml_tbb.h b/src/tbb/rml_tbb.h index f873085c9d..5dfce10d4e 100644 --- a/src/tbb/rml_tbb.h +++ b/src/tbb/rml_tbb.h @@ -84,9 +84,6 @@ class tbb_factory: public ::rml::factory { //! Close factory void close(); - - //! Call the callback with the server build info - void call_with_server_info( ::rml::server_info_callback_t cb, void* arg ) const; }; } // namespace rml diff --git a/src/tbb/rtm_rw_mutex.cpp b/src/tbb/rtm_rw_mutex.cpp index 3563840f31..4450d3cfb1 100644 --- a/src/tbb/rtm_rw_mutex.cpp +++ b/src/tbb/rtm_rw_mutex.cpp @@ -60,7 +60,7 @@ struct rtm_rw_mutex_impl { } s.m_transaction_state = d1::rtm_rw_mutex::rtm_type::rtm_not_in_mutex; } - + //! Acquire write lock on the given mutex. static void acquire_writer(d1::rtm_rw_mutex& m, d1::rtm_rw_mutex::scoped_lock& s, bool only_speculate) { __TBB_ASSERT(s.m_transaction_state == d1::rtm_rw_mutex::rtm_type::rtm_not_in_mutex, "scoped_lock already in transaction"); @@ -89,7 +89,7 @@ struct rtm_rw_mutex_impl { ++num_retries; } while((abort_code & speculation_retry) != 0 && (num_retries < retry_threshold_write)); } - + if(only_speculate) return; s.m_mutex = &m; // should apply a real try_lock... s.m_mutex->lock(); // kill transactional writers @@ -98,7 +98,7 @@ struct rtm_rw_mutex_impl { s.m_transaction_state = d1::rtm_rw_mutex::rtm_type::rtm_real_writer; return; } - + //! Acquire read lock on given mutex. // only_speculate : true if we are doing a try_acquire. If true and we fail to speculate, don't // really acquire the lock, return and do a try_acquire on the contained spin_rw_mutex. If @@ -129,8 +129,8 @@ struct rtm_rw_mutex_impl { } // fallback path // retry only if there is any hope of getting into a transaction soon - // Retry in the following cases (from Section 8.3.5 of Intel(R) - // Architecture Instruction Set Extensions Programming Reference): + // Retry in the following cases (from Section 8.3.5 of + // Intel(R) Architecture Instruction Set Extensions Programming Reference): // 1. abort caused by XABORT instruction (bit 0 of EAX register is set) // 2. the transaction may succeed on a retry (bit 1 of EAX register is set) // 3. if another logical processor conflicted with a memory address @@ -139,13 +139,13 @@ struct rtm_rw_mutex_impl { ++num_retries; } while((abort_code & speculation_retry) != 0 && (num_retries < retry_threshold_read)); } - + if(only_speculate) return; s.m_mutex = &m; s.m_mutex->lock_shared(); s.m_transaction_state = d1::rtm_rw_mutex::rtm_type::rtm_real_reader; } - + //! Upgrade reader to become a writer. /** Returns whether the upgrade happened without releasing and re-acquiring the lock */ static bool upgrade(d1::rtm_rw_mutex::scoped_lock& s) { @@ -175,7 +175,7 @@ struct rtm_rw_mutex_impl { return false; } } - + //! Downgrade writer to a reader. static bool downgrade(d1::rtm_rw_mutex::scoped_lock& s) { switch (s.m_transaction_state) { @@ -193,7 +193,7 @@ struct rtm_rw_mutex_impl { return false; } } - + //! Try to acquire write lock on the given mutex. // There may be reader(s) which acquired the spin_rw_mutex, as well as possibly // transactional reader(s). If this is the case, the acquire will fail, and assigning @@ -216,7 +216,7 @@ struct rtm_rw_mutex_impl { } return false; } - + //! Try to acquire read lock on the given mutex. static bool try_acquire_reader(d1::rtm_rw_mutex& m, d1::rtm_rw_mutex::scoped_lock& s) { // speculatively acquire the lock. If this fails, do try_lock_shared on the spin_rw_mutex. diff --git a/src/tbb/scheduler_common.h b/src/tbb/scheduler_common.h index 0f1eedd779..5581569607 100644 --- a/src/tbb/scheduler_common.h +++ b/src/tbb/scheduler_common.h @@ -189,7 +189,8 @@ inline std::uint64_t machine_time_stamp() { return (std::uint64_t(hi) << 32) | lo; #endif } -inline void prolonged_pause() { + +inline void prolonged_pause_impl() { // Assumption based on practice: 1000-2000 ticks seems to be a suitable invariant for the // majority of platforms. Currently, skip platforms that define __TBB_STEALING_PAUSE // because these platforms require very careful tuning. @@ -206,7 +207,7 @@ inline void prolonged_pause() { } while (prev < finish); } #else -inline void prolonged_pause() { +inline void prolonged_pause_impl() { #ifdef __TBB_ipf static const long PauseTime = 1500; #else @@ -215,7 +216,22 @@ inline void prolonged_pause() { // TODO IDEA: Update PauseTime adaptively? machine_pause(PauseTime); } -#endif // (_WIN32 || _WIN64 || __linux__) && (__TBB_x86_32 || __TBB_x86_64) +#endif + +inline void prolonged_pause() { +#if __TBB_WAITPKG_INTRINSICS_PRESENT && (_WIN32 || _WIN64 || __linux__) && (__TBB_x86_32 || __TBB_x86_64) + if (governor::wait_package_enabled()) { + std::uint64_t time_stamp = machine_time_stamp(); + // _tpause function directs the processor to enter an implementation-dependent optimized state + // until the Time Stamp Counter reaches or exceeds the value specified in second parameter. + // Constant "700" is ticks to wait for. + // First parameter 0 selects between a lower power (cleared) or faster wakeup (set) optimized state. + _tpause(0, time_stamp + 700); + } + else +#endif + prolonged_pause_impl(); +} class stealing_loop_backoff { const int my_pause_threshold; diff --git a/src/tbb/task.cpp b/src/tbb/task.cpp index 91bbc68a18..f2ce477cd8 100644 --- a/src/tbb/task.cpp +++ b/src/tbb/task.cpp @@ -20,6 +20,7 @@ #include "arena.h" #include "thread_data.h" #include "task_dispatcher.h" +#include "waiters.h" #include "itt_notify.h" #include "tbb/detail/_task.h" @@ -30,6 +31,72 @@ namespace tbb { namespace detail { + +namespace d1 { + +bool wait_context::is_locked() { + return m_ref_count.load(std::memory_order_relaxed) & lock_flag; +} + +void wait_context::lock() { + atomic_backoff backoff; + + auto try_lock = [&] { return !(m_ref_count.fetch_or(lock_flag) & lock_flag); }; + + // While is_locked return true try_lock is not invoked + while (is_locked() || !try_lock()) { + backoff.pause(); + } +} + +void wait_context::unlock() { + __TBB_ASSERT(is_locked(), NULL); + m_ref_count.fetch_and(~lock_flag); +} + +bool wait_context::publish_wait_list() { + // Try to add waiter_flag to the ref_counter + // Important : This function should never add waiter_flag if work is done otherwise waiter_flag will be never removed + + auto expected = m_ref_count.load(std::memory_order_relaxed); + __TBB_ASSERT(is_locked() || m_version_and_traits == 0, NULL); + + while (!(expected & waiter_flag) && continue_execution()) { + if (m_ref_count.compare_exchange_strong(expected, expected | waiter_flag)) { + __TBB_ASSERT(!(expected & waiter_flag), NULL); + expected |= waiter_flag; + break; + } + } + + // There is waiter_flag in ref_count + return expected & waiter_flag; +} + +void wait_context::unregister_waiter(r1::wait_node& node) { + lock_guard lock(*this); + + if (m_wait_head != nullptr) { + if (m_wait_head == &node) { + m_wait_head = node.my_next; + } + node.unlink(); + } +} + +void wait_context::notify_waiters() { + lock_guard lock(*this); + + if (m_wait_head != nullptr) { + m_wait_head->notify_all(*this); + m_wait_head = nullptr; + } + + m_ref_count.store(m_ref_count.load(std::memory_order_relaxed) & ~waiter_flag, std::memory_order_relaxed); +} + +} // namespace d1 + namespace r1 { //------------------------------------------------------------------------ @@ -153,10 +220,24 @@ void thread_data::do_post_resume_action() { __TBB_ASSERT(my_post_resume_arg, "The post resume action must have an argument"); switch (my_post_resume_action) { - case post_resume_action::abandon: + case post_resume_action::register_waiter: { - d1::wait_context& wo = *static_cast(my_post_resume_arg); - wo.abandon_wait(); + auto& data = *static_cast(my_post_resume_arg); + + // Support of backward compatibility + if (data.wo.m_version_and_traits == 0) { + data.wo.m_wait_head = reinterpret_cast(data.node.my_suspend_point); + if (!data.wo.publish_wait_list()) { + r1::resume(data.node.my_suspend_point); + } + break; + } + + auto wait_condition = [&data] { return data.wo.continue_execution(); }; + if (!data.wo.try_register_waiter(data.node, wait_condition)) { + r1::resume(data.node.my_suspend_point); + } + break; } case post_resume_action::callback: @@ -206,6 +287,12 @@ suspend_point_type* current_suspend_point() { #endif /* __TBB_RESUMABLE_TASKS */ +void notify_waiters(d1::wait_context& wc) { + __TBB_ASSERT(wc.m_version_and_traits > 0, NULL); + + wc.notify_waiters(); +} + } // namespace r1 } // namespace detail } // namespace tbb diff --git a/src/tbb/task_dispatcher.cpp b/src/tbb/task_dispatcher.cpp index 773cca6fd7..45d7883c73 100644 --- a/src/tbb/task_dispatcher.cpp +++ b/src/tbb/task_dispatcher.cpp @@ -15,6 +15,7 @@ */ #include "task_dispatcher.h" +#include "waiters.h" namespace tbb { namespace detail { @@ -150,37 +151,6 @@ d1::task_group_context* __TBB_EXPORTED_FUNC current_context() { } } -class external_waiter { - d1::wait_context& m_wait_ctx; - stealing_loop_backoff my_backoff; -public: - external_waiter(d1::wait_context& wo, int num_workers) : m_wait_ctx( wo ), my_backoff( num_workers ) {} - - bool continue_execution(arena_slot& slot, d1::task*& t) const { - __TBB_ASSERT(t == nullptr, nullptr); - if (!m_wait_ctx.continue_execution()) - return false; - t = get_self_recall_task(slot); - return true; - } - - void pause() { - my_backoff.pause(); - } - - void reset_wait() { - my_backoff.reset_wait(); - } - - d1::wait_context* wait_ctx() { - return &m_wait_ctx; - } - - static bool postpone_execution(d1::task&) { - return false; - } -}; - void task_dispatcher::execute_and_wait(d1::task* t, d1::wait_context& wait_ctx, d1::task_group_context& w_ctx) { // Get an associated task dispatcher thread_data* tls = governor::get_thread_data(); @@ -195,10 +165,9 @@ void task_dispatcher::execute_and_wait(d1::task* t, d1::wait_context& wait_ctx, } // Waiting on special object tied to a waiting thread. - external_waiter waiter{ wait_ctx, int(tls->my_arena->my_num_slots) }; + external_waiter waiter{ *tls->my_arena, wait_ctx }; t = local_td.local_wait_for_all(t, waiter); __TBB_ASSERT_EX(t == nullptr, "External waiter must not leave dispatch loop with a task"); - __TBB_ASSERT(wait_ctx.continue_execution() == false, "Thread can only leave dispatch loop when waiting object allowed this"); // Master (external) thread couldn't exit the dispatch loop in an idle state if (local_td.m_thread_data->my_inbox.is_idle_state(true)) { @@ -213,34 +182,6 @@ void task_dispatcher::execute_and_wait(d1::task* t, d1::wait_context& wait_ctx, #if __TBB_RESUMABLE_TASKS -class coroutine_waiter { - stealing_loop_backoff my_backoff; -public: - coroutine_waiter(int num_workers) : my_backoff(num_workers) {} - - bool continue_execution(arena_slot& slot, d1::task*& t) const { - __TBB_ASSERT(t == nullptr, nullptr); - t = get_self_recall_task(slot); - return true; - } - - void pause() { - my_backoff.pause(); - } - - void reset_wait() { - my_backoff.reset_wait(); - } - - d1::wait_context* wait_ctx() { - return nullptr; - } - - static bool postpone_execution(d1::task& t) { - return task_accessor::is_resume_task(t); - } -}; - #if _WIN32 /* [[noreturn]] */ void __stdcall co_local_wait_for_all(void* arg) noexcept #else @@ -267,7 +208,8 @@ class coroutine_waiter { // Endless loop here because coroutine could be reused for (;;) { - coroutine_waiter waiter(m_thread_data->my_arena->my_num_slots); + arena* a = m_thread_data->my_arena; + coroutine_waiter waiter(*a); d1::task* resume_task = local_wait_for_all(nullptr, waiter); assert_task_valid(resume_task); __TBB_ASSERT(this == m_thread_data->my_task_dispatcher, nullptr); diff --git a/src/tbb/task_dispatcher.h b/src/tbb/task_dispatcher.h index f9ac45a28c..89d99fbc21 100644 --- a/src/tbb/task_dispatcher.h +++ b/src/tbb/task_dispatcher.h @@ -22,6 +22,7 @@ #include "tbb/global_control.h" #include "scheduler_common.h" +#include "waiters.h" #include "arena_slot.h" #include "arena.h" #include "thread_data.h" @@ -61,11 +62,13 @@ inline d1::task* get_self_recall_task(arena_slot& slot) { inline d1::task* suspend_point_type::resume_task::execute(d1::execution_data& ed) { execution_data_ext& ed_ext = static_cast(ed); + resume_node node{ed_ext.task_disp->get_suspend_point()}; + thread_data::register_waiter_data wait_data{*ed_ext.wait_ctx, node}; + if (ed_ext.wait_ctx) { // The wait_ctx is present only in external_waiter. In that case we leave the current stack // in the abandoned state to resume when waiting completes. - ed_ext.wait_ctx->m_waiting_coroutine = ed_ext.task_disp->get_suspend_point(); - ed_ext.task_disp->m_thread_data->set_post_resume_action(thread_data::post_resume_action::abandon, ed_ext.wait_ctx); + ed_ext.task_disp->m_thread_data->set_post_resume_action(thread_data::post_resume_action::register_waiter, &wait_data); } else { // If wait_ctx is null, it can be only a worker thread on outermost level because // coroutine_waiter interrupts bypass loop before the resume_task execution. @@ -215,7 +218,7 @@ d1::task* task_dispatcher::receive_or_steal_task( break; // Stealing success, end of stealing attempt } // Nothing to do, pause a little. - waiter.pause(); + waiter.pause(slot); } // end of nonlocal task retrieval loop if (inbox.is_idle_state(true)) { inbox.set_is_idle(false); @@ -362,7 +365,11 @@ inline void task_dispatcher::recall_point() { __TBB_ASSERT(m_suspend_point->m_is_owner_recalled.load(std::memory_order_relaxed) == false, nullptr); d1::suspend([](suspend_point_type* sp) { sp->m_is_owner_recalled.store(true, std::memory_order_release); + sp->m_arena->my_sleep_monitors.notify([sp] (std::uintptr_t tag) { + return tag == std::uintptr_t(sp); + }); }); + if (m_thread_data->my_inbox.is_idle_state(true)) { m_thread_data->my_inbox.set_is_idle(false); } @@ -370,15 +377,6 @@ inline void task_dispatcher::recall_point() { } #endif /* __TBB_RESUMABLE_TASKS */ -template -d1::task* local_wait_for_all(d1::task* t, Waiter& waiter) { - if (governor::is_itt_present()) { - return local_wait_for_all(t, waiter); - } else { - return local_wait_for_all(t, waiter); - } -} - #if __TBB_PREVIEW_CRITICAL_TASKS inline d1::task* task_dispatcher::get_critical_task(d1::task* t, execution_data_ext& ed, isolation_type isolation, bool critical_allowed) { __TBB_ASSERT( critical_allowed || !m_properties.critical_task_allowed, nullptr ); diff --git a/src/tbb/thread_data.h b/src/tbb/thread_data.h index 86d61eeb38..ceda3e1fc9 100644 --- a/src/tbb/thread_data.h +++ b/src/tbb/thread_data.h @@ -38,6 +38,7 @@ class task; class arena_slot; class task_group_context; class task_dispatcher; +struct resume_node; //------------------------------------------------------------------------ // Thread Data @@ -150,7 +151,7 @@ class thread_data : public ::rml::job //! The list of possible post resume actions. enum class post_resume_action { invalid, - abandon, + register_waiter, callback, cleanup, notify, @@ -169,6 +170,11 @@ class thread_data : public ::rml::job } }; + struct register_waiter_data { + d1::wait_context& wo; + resume_node& node; + }; + //! Suspends the current coroutine (task_dispatcher). void suspend(void* suspend_callback, void* user_callback); diff --git a/src/tbb/waiters.h b/src/tbb/waiters.h new file mode 100644 index 0000000000..d1882fa260 --- /dev/null +++ b/src/tbb/waiters.h @@ -0,0 +1,302 @@ +/* + Copyright (c) 2005-2020 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef _TBB_waiters_H +#define _TBB_waiters_H + +#include "tbb/detail/_task.h" +#include "scheduler_common.h" +#include "arena.h" + +namespace tbb { +namespace detail { +namespace r1 { + +// Organizes wait list in wait_context +struct wait_node { + virtual void notify(d1::wait_context& wo) = 0; + + void notify_all(d1::wait_context& wo) { + wait_node* curr_node = this; + wait_node* next_node = nullptr; + + for (; curr_node != nullptr; curr_node = next_node) { + next_node = curr_node->my_next; + curr_node->notify(wo); + } + } + + void link(wait_node* next_node) { + my_next = next_node; + my_prev = nullptr; + + if (next_node != nullptr) { + next_node->my_prev = this; + } + } + + void unlink() { + if (my_prev) { + my_prev->my_next = my_next; + } + + if (my_next) { + my_next->my_prev = my_prev; + } + } + + wait_node* my_next{nullptr}; + wait_node* my_prev{nullptr}; +}; + +struct sleep_node : public wait_node { + sleep_node(arena& a) : my_arena(a) + {} + + void notify(d1::wait_context& wo) override { + std::uintptr_t wait_tag = reinterpret_cast(&wo); + my_arena.my_sleep_monitors.notify( + [&wait_tag] (std::uintptr_t tag) { + return tag == wait_tag; + } + ); + } + + arena& my_arena; +}; + +struct resume_node : public wait_node { + resume_node(suspend_point_type* sp) : my_suspend_point(sp) + {} + + void notify(d1::wait_context&) override { + r1::resume(my_suspend_point); + } + + suspend_point_type* my_suspend_point; +}; + +inline d1::task* get_self_recall_task(arena_slot& slot); + +class waiter_base { +public: + waiter_base(arena& a) : my_arena(a), my_backoff(int(a.my_num_slots)) {} + + bool pause() { + if (my_backoff.pause()) { + my_arena.is_out_of_work(); + return true; + } + + return false; + } + + void reset_wait() { + my_backoff.reset_wait(); + } + +protected: + arena& my_arena; + stealing_loop_backoff my_backoff; +}; + +class outermost_worker_waiter : public waiter_base { +public: + using waiter_base::waiter_base; + + bool continue_execution(arena_slot& slot, d1::task*& t) const { + __TBB_ASSERT(t == nullptr, nullptr); + + if (is_worker_should_leave(slot)) { + // Leave dispatch loop + return false; + } + + t = get_self_recall_task(slot); + return true; + } + + void pause(arena_slot&) { + waiter_base::pause(); + } + + + d1::wait_context* wait_ctx() { + return nullptr; + } + + static bool postpone_execution(d1::task&) { + return false; + } + +private: + using base_type = waiter_base; + + bool is_worker_should_leave(arena_slot& slot) const { + bool is_top_priority_arena = my_arena.my_is_top_priority.load(std::memory_order_relaxed); + bool is_task_pool_empty = slot.task_pool.load(std::memory_order_relaxed) == EmptyTaskPool; + + if (is_top_priority_arena) { + // Worker in most priority arena do not leave arena, until all work in task_pool is done + if (is_task_pool_empty && my_arena.is_recall_requested()) { + return true; + } + } else { + if (my_arena.is_recall_requested()) { + // If worker has work in task pool, we must notify other threads, + // because can appear missed wake up of other threads + if (!is_task_pool_empty) { + my_arena.advertise_new_work(); + } + return true; + } + } + + return false; + } +}; + +class sleep_waiter : public waiter_base { +protected: + using waiter_base::waiter_base; + + bool is_arena_empty() { + return my_arena.my_pool_state.load(std::memory_order_relaxed) == arena::SNAPSHOT_EMPTY; + } +}; + +class external_waiter : public sleep_waiter { +public: + external_waiter(arena& a, d1::wait_context& wo) + : sleep_waiter(a), my_wait_ctx(wo) + {} + + bool continue_execution(arena_slot& slot, d1::task*& t) const { + __TBB_ASSERT(t == nullptr, nullptr); + if (!my_wait_ctx.continue_execution()) + return false; + t = get_self_recall_task(slot); + return true; + } + + void pause(arena_slot&) { + if (!sleep_waiter::pause()) { + return; + } + + // Support of backward compatibility + if (my_wait_ctx.m_version_and_traits == 0) { + return; + } + + concurrent_monitor::thread_context thr_ctx; + auto sleep_condition = [&] { return is_arena_empty() && my_wait_ctx.continue_execution(); }; + + if (sleep_condition()) { + my_arena.my_sleep_monitors.prepare_wait(thr_ctx, reinterpret_cast(&my_wait_ctx)); + sleep_node node{my_arena}; + + if (my_wait_ctx.try_register_waiter(node, sleep_condition)) { + my_arena.my_sleep_monitors.commit_wait(thr_ctx); + my_wait_ctx.unregister_waiter(node); + } else { + my_arena.my_sleep_monitors.cancel_wait(thr_ctx); + } + } + } + + d1::wait_context* wait_ctx() { + return &my_wait_ctx; + } + + static bool postpone_execution(d1::task&) { + return false; + } + +private: + d1::wait_context& my_wait_ctx; +}; + +#if __TBB_RESUMABLE_TASKS + +class coroutine_waiter : public sleep_waiter { +public: + using sleep_waiter::sleep_waiter; + + bool continue_execution(arena_slot& slot, d1::task*& t) const { + __TBB_ASSERT(t == nullptr, nullptr); + t = get_self_recall_task(slot); + return true; + } + + void pause(arena_slot& slot) { + if (!sleep_waiter::pause()) { + return; + } + + concurrent_monitor::thread_context thr_ctx; + suspend_point_type* sp = slot.default_task_dispatcher().m_suspend_point; + + auto sleep_condition = [&] { return is_arena_empty() && !sp->m_is_owner_recalled.load(std::memory_order_relaxed); }; + + if (sleep_condition()) { + my_arena.my_sleep_monitors.prepare_wait(thr_ctx, (std::uintptr_t)sp); + if (sleep_condition()) { + my_arena.my_sleep_monitors.commit_wait(thr_ctx); + } else { + my_arena.my_sleep_monitors.cancel_wait(thr_ctx); + } + } + } + + void reset_wait() { + my_backoff.reset_wait(); + } + + d1::wait_context* wait_ctx() { + return nullptr; + } + + static bool postpone_execution(d1::task& t) { + return task_accessor::is_resume_task(t); + } +}; + +#endif // __TBB_RESUMABLE_TASKS + +} // namespace r1 + +namespace d1 { +template +bool wait_context::try_register_waiter(r1::wait_node& waiter, F&& condition) { + bool result = condition(); + if (result) { + lock_guard lock(*this); + + result = result && publish_wait_list(); + if (result) { + waiter.link(m_wait_head); + m_wait_head = &waiter; + } + } + return result; +} +} // namespace d1 + +} // namespace detail +} // namespace tbb + +#endif // _TBB_waiters_H diff --git a/src/tbbbind/CMakeLists.txt b/src/tbbbind/CMakeLists.txt index 9f5485be72..cb0a95fd03 100644 --- a/src/tbbbind/CMakeLists.txt +++ b/src/tbbbind/CMakeLists.txt @@ -37,10 +37,12 @@ target_compile_options(tbbbind # Avoid use of target_link_libraries here as it changes /DEF option to \DEF on Windows. set_target_properties(tbbbind PROPERTIES - LINK_FLAGS ${TBB_LINK_DEF_FILE_FLAG}${CMAKE_CURRENT_SOURCE_DIR}/def/${TBB_DEF_FILE_PREFIX}-tbbbind.def DEFINE_SYMBOL "" VERSION ${TBBBIND_BINARY_VERSION}.${TBB_BINARY_MINOR_VERSION} - SOVERSION ${TBBBIND_BINARY_VERSION}) + SOVERSION ${TBBBIND_BINARY_VERSION} + LINK_FLAGS ${TBB_LINK_DEF_FILE_FLAG}${CMAKE_CURRENT_SOURCE_DIR}/def/${TBB_DEF_FILE_PREFIX}-tbbbind.def + LINK_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/def/${TBB_DEF_FILE_PREFIX}-tbbbind.def +) # Prefer using target_link_options instead of target_link_libraries to specify link options because # target_link_libraries may incorrectly handle some options (on Windows, for example). diff --git a/src/tbbmalloc/CMakeLists.txt b/src/tbbmalloc/CMakeLists.txt index b3270a130e..4887a04f33 100644 --- a/src/tbbmalloc/CMakeLists.txt +++ b/src/tbbmalloc/CMakeLists.txt @@ -53,10 +53,12 @@ target_compile_options(tbbmalloc # Avoid use of target_link_libraries here as it changes /DEF option to \DEF on Windows. set_target_properties(tbbmalloc PROPERTIES - LINK_FLAGS ${TBB_LINK_DEF_FILE_FLAG}${CMAKE_CURRENT_SOURCE_DIR}/def/${TBB_DEF_FILE_PREFIX}-tbbmalloc.def DEFINE_SYMBOL "" VERSION ${TBBMALLOC_BINARY_VERSION}.${TBB_BINARY_MINOR_VERSION} - SOVERSION ${TBBMALLOC_BINARY_VERSION}) + SOVERSION ${TBBMALLOC_BINARY_VERSION} + LINK_FLAGS ${TBB_LINK_DEF_FILE_FLAG}${CMAKE_CURRENT_SOURCE_DIR}/def/${TBB_DEF_FILE_PREFIX}-tbbmalloc.def + LINK_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/def/${TBB_DEF_FILE_PREFIX}-tbbmalloc.def +) # Prefer using target_link_options instead of target_link_libraries to specify link options because # target_link_libraries may incorrectly handle some options (on Windows, for example). diff --git a/src/tbbmalloc/shared_utils.h b/src/tbbmalloc/shared_utils.h index 6904663e85..4dc82a04d5 100644 --- a/src/tbbmalloc/shared_utils.h +++ b/src/tbbmalloc/shared_utils.h @@ -51,11 +51,6 @@ static inline T alignUpGeneric(T arg, uintptr_t alignment) { return arg; } -template // generic function to find length of array -inline size_t arrayLength(const T(&)[N]) { - return N; -} - /* * Compile time Log2 calculation */ diff --git a/src/tbbmalloc_proxy/CMakeLists.txt b/src/tbbmalloc_proxy/CMakeLists.txt index 6ebc39ac2c..21dcda9516 100644 --- a/src/tbbmalloc_proxy/CMakeLists.txt +++ b/src/tbbmalloc_proxy/CMakeLists.txt @@ -44,6 +44,7 @@ if (UNIX AND NOT APPLE) # Avoid use of target_link_libraries here as it changes /DEF option to \DEF on Windows. set_target_properties(tbbmalloc_proxy PROPERTIES LINK_FLAGS ${TBB_LINK_DEF_FILE_FLAG}${CMAKE_CURRENT_SOURCE_DIR}/def/${TBB_DEF_FILE_PREFIX}-proxy.def + LINK_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/def/${TBB_DEF_FILE_PREFIX}-proxy.def DEFINE_SYMBOL "") endif() diff --git a/src/tbbmalloc_proxy/proxy.cpp b/src/tbbmalloc_proxy/proxy.cpp index 514ff71398..21f3b8f71d 100644 --- a/src/tbbmalloc_proxy/proxy.cpp +++ b/src/tbbmalloc_proxy/proxy.cpp @@ -347,7 +347,10 @@ void operator delete[](void* ptr, const std::nothrow_t&) noexcept { #include "function_replacement.h" -#include "../tbbmalloc/shared_utils.h" +template // generic function to find length of array +inline size_t arrayLength(const T(&)[N]) { + return N; +} void __TBB_malloc_safer_delete( void *ptr) { diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 8fcfb2b444..82a23de7b3 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -248,6 +248,13 @@ endif() tbb_add_test(SUBDIR tbb NAME test_tbb_header DEPENDENCIES TBB::tbb) target_sources(test_tbb_header PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/tbb/test_tbb_header_secondary.cpp) +if (NOT "${TBB_OPENMP_FLAG}" STREQUAL "" AND NOT "${CMAKE_SYSTEM_PROCESSOR}" MATCHES "mips") + tbb_add_test(SUBDIR tbb NAME test_openmp DEPENDENCIES TBB::tbb) + set_target_properties(test_openmp PROPERTIES + COMPILE_FLAGS ${TBB_OPENMP_FLAG} + LINK_FLAGS ${TBB_OPENMP_FLAG} + ) +endif() # Define the conformance tests tbb_add_test(SUBDIR conformance NAME conformance_tick_count DEPENDENCIES TBB::tbb) diff --git a/test/common/common_arena_constraints.h b/test/common/common_arena_constraints.h index 502dcd2126..561a9cd505 100644 --- a/test/common/common_arena_constraints.h +++ b/test/common/common_arena_constraints.h @@ -14,7 +14,6 @@ limitations under the License. */ -#define TBB_PREVIEW_NUMA_SUPPORT 1 #include "tbb/detail/_config.h" #include "common/test.h" diff --git a/test/common/concurrent_associative_common.h b/test/common/concurrent_associative_common.h index 1dc1446eff..560f8af4e4 100644 --- a/test/common/concurrent_associative_common.h +++ b/test/common/concurrent_associative_common.h @@ -606,6 +606,8 @@ void test_basic_common() REQUIRE(!ccont.range().empty()); REQUIRE((256 == CheckRecursiveRange(cont.range()).first)); REQUIRE((256 == CheckRecursiveRange(ccont.range()).first)); + REQUIRE(cont.range().grainsize() > 0); + REQUIRE(ccont.range().grainsize() > 0); // void swap(T&); cont.swap(newcont); @@ -1486,4 +1488,58 @@ void test_insert_by_generic_pair() { CountingKey::reset(); } +template +void test_swap_not_always_equal_allocator() { + static_assert(std::is_same>::value, + "Incorrect allocator in not always equal test"); + Container c1{}; + Container c2{Value::make(1), Value::make(2)}; + + Container c1_copy = c1; + Container c2_copy = c2; + + c1.swap(c2); + + REQUIRE_MESSAGE(c1 == c2_copy, "Incorrect swap with not always equal allocator"); + REQUIRE_MESSAGE(c2 == c1_copy, "Incorrect swap with not always equal allocator"); +} + +#if TBB_USE_EXCEPTIONS +template +void test_exception_on_copy_ctor() { + Container c1; + c1.emplace(Value::make(ThrowOnCopy{})); + + using container_allocator_type = std::allocator; + using alloc_traits = std::allocator_traits; + container_allocator_type container_allocator; + Container* c2_ptr = alloc_traits::allocate(container_allocator, 1); + + ThrowOnCopy::activate(); + // Test copy ctor + try { + alloc_traits::construct(container_allocator, c2_ptr, c1); + } catch ( int error_code ) { + REQUIRE_MESSAGE(error_code == ThrowOnCopy::error_code(), "Incorrect code was thrown"); + } + + REQUIRE_MESSAGE(c2_ptr->empty(), "Incorrect container state after throwing copy constructor"); + + alloc_traits::deallocate(container_allocator, c2_ptr, 1); + c2_ptr = alloc_traits::allocate(container_allocator, 1); + + // Test copy ctor with allocator + try { + auto value_allocator = c1.get_allocator(); + alloc_traits::construct(container_allocator, c2_ptr, c1, value_allocator); + } catch( int error_code ) { + REQUIRE_MESSAGE(error_code == ThrowOnCopy::error_code(), "Incorrect code was thrown"); + } + + REQUIRE_MESSAGE(c2_ptr->empty(), "Incorrect container state after throwing copy ctor with allocator"); + alloc_traits::deallocate(container_allocator, c2_ptr, 1); + ThrowOnCopy::deactivate(); +} +#endif // TBB_USE_EXCEPTIONS + #endif // __TBB_test_common_concurrent_associative_common_H diff --git a/test/common/concurrent_ordered_common.h b/test/common/concurrent_ordered_common.h index b1eab37b53..36a122e2a0 100644 --- a/test/common/concurrent_ordered_common.h +++ b/test/common/concurrent_ordered_common.h @@ -65,6 +65,7 @@ void check_container_order( const Container& cont ) { template void test_ordered_methods() { Container cont; + const Container& ccont = cont; int r, random_threshold = 10, uncontained_key = random_threshold / 2; for (int i = 0; i < 100; ++i) { @@ -97,12 +98,24 @@ void test_ordered_methods() { } } + typename Container::range_type cont_range = cont.range(); + typename Container::const_range_type ccont_range = ccont.range(); + REQUIRE_MESSAGE(cont_range.size() == ccont_range.size(), "Incorrect ordered container range size"); + REQUIRE_MESSAGE(cont_range.size() == cont.size(), "Incorrect ordered container range size"); + typename Container::iterator l_bound = cont.lower_bound(key); typename Container::iterator u_bound = cont.upper_bound(key); REQUIRE_MESSAGE(l_bound == l_bound_check, "lower_bound() returned wrong iterator"); REQUIRE_MESSAGE(u_bound == u_bound_check, "upper_bound() returned wrong iterator"); + using const_iterator = typename Container::const_iterator; + const_iterator cl_bound = ccont.lower_bound(key); + const_iterator cu_bound = ccont.upper_bound(key); + + REQUIRE_MESSAGE(cl_bound == const_iterator(l_bound), "lower_bound() const returned wrong iterator"); + REQUIRE_MESSAGE(cu_bound == const_iterator(u_bound), "upper_bound() const returned wrong iterator"); + REQUIRE((l_bound == eq_range.first && u_bound == eq_range.second)); } } @@ -284,6 +297,8 @@ void check_heterogeneous_bound_functions() { "incorrect key_type for heterogeneous bounds test"); // Initialization Container c; + const Container& cc = c; + int size = 10; for (int i = 0; i < size; ++i) { c.insert(Value::make(i)); @@ -300,6 +315,8 @@ void check_heterogeneous_bound_functions() { REQUIRE_MESSAGE(c.lower_bound(k) == c.lower_bound(key), "Incorrect heterogeneous lower_bound return value"); REQUIRE_MESSAGE(c.upper_bound(k) == c.upper_bound(key), "Incorrect heterogeneous upper_bound return value"); + REQUIRE_MESSAGE(cc.lower_bound(k) == cc.lower_bound(key), "Incorrect const heterogeneous lower_bound return value"); + REQUIRE_MESSAGE(cc.upper_bound(k) == cc.upper_bound(key), "Incorrect const heterogeneous upper_bound return value"); } } diff --git a/test/common/concurrent_priority_queue_common.h b/test/common/concurrent_priority_queue_common.h index a65206bae9..e7bcac2edd 100644 --- a/test/common/concurrent_priority_queue_common.h +++ b/test/common/concurrent_priority_queue_common.h @@ -125,12 +125,16 @@ void type_tester( const std::vector& vec, Compare comp ) { queue_type q4(q1); examine(q4, vec_sorted); + // Copy ctor with allocator + auto alloc = q1.get_allocator(); + queue_type q4_alloc(q1, alloc); + examine(q4_alloc, vec_sorted); + // Constructor from the half-open interval queue_type q5(vec.begin(), vec.end()); examine(q5, vec_sorted); // Constructor from the allocator object - auto alloc = q1.get_allocator(); queue_type q6(alloc); q6.assign(vec.begin(), vec.end()); examine(q6, vec_sorted); diff --git a/test/common/config.h b/test/common/config.h index cec90e9db3..aa0dcf8852 100644 --- a/test/common/config.h +++ b/test/common/config.h @@ -45,13 +45,6 @@ #define MAX_TUPLE_TEST_SIZE 5 #endif #else - #if _MSC_VER -// test sizes <= 8 don't get "decorated name length exceeded" errors. (disable : 4503) - #if MAX_TUPLE_TEST_SIZE > 8 - #undef MAX_TUPLE_TEST_SIZE - #define MAX_TUPLE_TEST_SIZE 8 - #endif - #endif #if MAX_TUPLE_TEST_SIZE > __TBB_VARIADIC_MAX #undef MAX_TUPLE_TEST_SIZE #define MAX_TUPLE_TEST_SIZE __TBB_VARIADIC_MAX diff --git a/test/common/containers_common.h b/test/common/containers_common.h index 4c97a80aae..3374c7bb45 100644 --- a/test/common/containers_common.h +++ b/test/common/containers_common.h @@ -128,6 +128,27 @@ void test_allocator_traits_support() { test_is_always_equal(); } +#if TBB_USE_EXCEPTIONS +struct ThrowOnCopy { + static int error_code() { return 8; }; + static bool is_active; + ThrowOnCopy() = default; + ThrowOnCopy( const ThrowOnCopy& ) { + if (is_active) { + throw error_code(); + } + } + static void activate() { is_active = true; } + static void deactivate() { is_active = false; } + + bool operator<( const ThrowOnCopy& ) const { return true; } + bool operator==( const ThrowOnCopy& ) const { return true; } +}; // struct ThrowOnCopy + +bool ThrowOnCopy::is_active = false; + +#endif + namespace std { template struct hash> { @@ -144,6 +165,15 @@ struct hash> { } }; +#if TBB_USE_EXCEPTIONS +template <> +struct hash { + std::size_t operator()( const ThrowOnCopy& ) const { + return 1; + } +}; +#endif + template struct equal_to> { std::size_t operator()( const std::weak_ptr& rhs, const std::weak_ptr& lhs ) const { diff --git a/test/common/graph_utils.h b/test/common/graph_utils.h index f337e4f593..c50e177b80 100644 --- a/test/common/graph_utils.h +++ b/test/common/graph_utils.h @@ -314,9 +314,7 @@ struct harness_counting_receiver : public tbb::flow::receiver { size_t n = my_count; CHECK( n == num_copies*max_value ); } - - void reset_receiver(tbb::flow::reset_flags /*f*/) override { my_count = 0; } -}; + }; //! Counts the number of puts received template< typename T > @@ -335,6 +333,11 @@ struct harness_mapped_receiver : public tbb::flow::receiver { my_count = 0; } +#if __INTEL_COMPILER <= 2021 + // Suppress superfluous diagnostic about virtual keyword absence in a destructor of an inherited + // class while the parent class has the virtual keyword for the destrocutor. + virtual +#endif ~harness_mapped_receiver() { if ( my_multiset ) delete my_multiset; } @@ -374,7 +377,7 @@ struct harness_mapped_receiver : public tbb::flow::receiver { } } - void reset_receiver(tbb::flow::reset_flags /*f*/) override { + void reset_receiver(tbb::flow::reset_flags /*f*/) { my_count = 0; if(my_multiset) delete my_multiset; my_multiset = new multiset_type; diff --git a/test/common/memory_usage.h b/test/common/memory_usage.h index 594491c2ce..88b5f2d7b6 100644 --- a/test/common/memory_usage.h +++ b/test/common/memory_usage.h @@ -30,7 +30,7 @@ #include /* for use in LinuxKernelVersion() */ // Parse file utility for THP info -#include "../../src/tbbmalloc/shared_utils.h" +#include "src/tbbmalloc/shared_utils.h" #elif __APPLE__ && !__ARM_ARCH #include diff --git a/test/common/node_handling_support.h b/test/common/node_handling_support.h index 0241c207c9..81dedb63bb 100644 --- a/test/common/node_handling_support.h +++ b/test/common/node_handling_support.h @@ -83,6 +83,7 @@ void test_node_handle( Container test_table ) { nh = test_table.unsafe_extract(test_table.begin()); REQUIRE_MESSAGE(!nh.empty(), "Node handle: node_type object is empty after valid move assignment"); + REQUIRE_MESSAGE(nh.get_allocator() == test_table.get_allocator(), "Node handle: node_type object allocator is incorrect"); REQUIRE_MESSAGE(compare_handle_getters(nh, expected_value), "Node handle: node_type object does not contains expected value after valid move assignment"); diff --git a/test/common/utils.h b/test/common/utils.h index 972f754b5e..f9745b1758 100644 --- a/test/common/utils.h +++ b/test/common/utils.h @@ -397,6 +397,17 @@ tbb::blocked_range make_blocked_range( T(& array)[N] ) { return tbb::blocked_range(array, array + N); } +template +void check_range_bounds_after_splitting( const tbb::blocked_range& original, const tbb::blocked_range& first, + const tbb::blocked_range& second, const T& expected_first_end ) +{ + REQUIRE(first.begin() == original.begin()); + REQUIRE(first.end() == expected_first_end); + REQUIRE(second.begin() == expected_first_end); + REQUIRE(second.end() == original.end()); + REQUIRE(first.size() + second.size() == original.size()); +} + //! Functor with N dummy iterations in it`s body class DummyBody { int m_numIters; diff --git a/test/conformance/conformance_blocked_range.cpp b/test/conformance/conformance_blocked_range.cpp index fbaf629eb7..a6273a6b6d 100644 --- a/test/conformance/conformance_blocked_range.cpp +++ b/test/conformance/conformance_blocked_range.cpp @@ -108,7 +108,6 @@ void ParallelTest() { } } - //! Testing blocked_range interface //! \brief \ref interface \ref requirement TEST_CASE("Basic serial") { @@ -124,6 +123,26 @@ TEST_CASE("Basic parallel") { } } +//! Testing blocked_range with proportional splitting +//! \brief \ref interface \ref requirement +TEST_CASE("blocked_range proportional splitting") { + tbb::blocked_range original(0, 100); + tbb::blocked_range first(original); + tbb::proportional_split ps(3, 1); + tbb::blocked_range second(first, ps); + + // Test proportional_split -> split conversion + tbb::blocked_range copy(original); + tbb::split s = tbb::split(ps); + tbb::blocked_range splitted_copy(copy, s); + CHECK(copy.size() == original.size() / 2); + CHECK(splitted_copy.size() == copy.size()); + + + int expected_first_end = original.begin() + ps.left() * (original.end() - original.begin()) / (ps.left() + ps.right()); + utils::check_range_bounds_after_splitting(original, first, second, expected_first_end); +} + #if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT //! Testing blocked_range deduction guides //! \brief \ref interface diff --git a/test/conformance/conformance_blocked_range2d.cpp b/test/conformance/conformance_blocked_range2d.cpp index 2c56e471ba..d041b4ea2f 100644 --- a/test/conformance/conformance_blocked_range2d.cpp +++ b/test/conformance/conformance_blocked_range2d.cpp @@ -150,6 +150,24 @@ TEST_CASE("Parallel test") { } } +//! Testing blocked_range2d with proportional splitting +//! \brief \ref interface \ref requirement +TEST_CASE("blocked_range2d proportional splitting") { + tbb::blocked_range2d original(0, 100, 0, 100); + tbb::blocked_range2d first(original); + tbb::proportional_split ps(3, 1); + tbb::blocked_range2d second(first, ps); + + int expected_first_end = original.rows().begin() + ps.left() * (original.rows().end() - original.rows().begin()) / (ps.left() + ps.right()); + if (first.rows().size() == second.rows().size()) { + // Splitting was made by cols + utils::check_range_bounds_after_splitting(original.cols(), first.cols(), second.cols(), expected_first_end); + } else { + // Splitting was made by rows + utils::check_range_bounds_after_splitting(original.rows(), first.rows(), second.rows(), expected_first_end); + } +} + #if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT //! Testing blocked_range2d deduction guides //! \brief \ref interface diff --git a/test/conformance/conformance_blocked_range3d.cpp b/test/conformance/conformance_blocked_range3d.cpp index 5aa63479d2..d2780b9696 100644 --- a/test/conformance/conformance_blocked_range3d.cpp +++ b/test/conformance/conformance_blocked_range3d.cpp @@ -183,6 +183,29 @@ TEST_CASE("Parallel test") { } } +//! Testing blocked_range3d with proportional splitting +//! \brief \ref interface \ref requirement +TEST_CASE("blocked_range3d proportional splitting") { + tbb::blocked_range3d original(0, 100, 0, 100, 0, 100); + tbb::blocked_range3d first(original); + tbb::proportional_split ps(3, 1); + tbb::blocked_range3d second(first, ps); + + int expected_first_end = original.rows().begin() + ps.left() * (original.rows().end() - original.rows().begin()) / (ps.left() + ps.right()); + if (first.rows().size() == second.rows().size()) { + if (first.cols().size() == second.cols().size()) { + // Splitting was made by pages + utils::check_range_bounds_after_splitting(original.pages(), first.pages(), second.pages(), expected_first_end); + } else { + // Splitting was made by cols + utils::check_range_bounds_after_splitting(original.cols(), first.cols(), second.cols(), expected_first_end); + } + } else { + // Splitting was made by rows + utils::check_range_bounds_after_splitting(original.rows(), first.rows(), second.rows(), expected_first_end); + } +} + #if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT //! Testing blocked_range3d deduction guides //! \brief \ref interface diff --git a/test/conformance/conformance_blocked_rangeNd.cpp b/test/conformance/conformance_blocked_rangeNd.cpp index 99f065276a..552bcfbd9f 100644 --- a/test/conformance/conformance_blocked_rangeNd.cpp +++ b/test/conformance/conformance_blocked_rangeNd.cpp @@ -254,3 +254,20 @@ TEST_CASE("Parallel test") { } } +//! Testing blocked_rangeNd with proportional splitting +//! \brief \ref interface \ref requirement +TEST_CASE("blocked_rangeNd proportional splitting") { + tbb::blocked_rangeNd original{{0, 100}, {0, 100}}; + tbb::blocked_rangeNd first(original); + tbb::proportional_split ps(3, 1); + tbb::blocked_rangeNd second(first, ps); + + int expected_first_end = original.dim(0).begin() + ps.left() * (original.dim(0).end() - original.dim(0).begin()) / (ps.left() + ps.right()); + if (first.dim(0).size() == second.dim(0).size()) { + // Splitting was made by cols + utils::check_range_bounds_after_splitting(original.dim(1), first.dim(1), second.dim(1), expected_first_end); + } else { + // Splitting was made by rows + utils::check_range_bounds_after_splitting(original.dim(0), first.dim(0), second.dim(0), expected_first_end); + } +} diff --git a/test/conformance/conformance_combinable.cpp b/test/conformance/conformance_combinable.cpp index 7232a1b9eb..4d54fcc9ab 100644 --- a/test/conformance/conformance_combinable.cpp +++ b/test/conformance/conformance_combinable.cpp @@ -203,6 +203,12 @@ void RunParallelScalarTests(const char* /* test_name */) { combine_sum += sums.combine(my_combine); combine_ref_sum += sums.combine(my_combine_ref); + // test combinable::clear() + tbb::combinable sums_to_clear; + tbb::parallel_for( tbb::blocked_range(0, N, 10000), ParallelScalarBody(sums_to_clear) ); + sums_to_clear.clear(); + CHECK_MESSAGE(sums_to_clear.combine(my_combine) == 0, "Failed combinable::clear test"); + // test parallel combinable preinitialized with a functor that returns 0 FunctorAddFinit my_finit_decl; tbb::combinable finit_combinable(my_finit_decl); diff --git a/test/conformance/conformance_concurrent_priority_queue.cpp b/test/conformance/conformance_concurrent_priority_queue.cpp index 193e7d42c1..2f190c5d07 100644 --- a/test/conformance/conformance_concurrent_priority_queue.cpp +++ b/test/conformance/conformance_concurrent_priority_queue.cpp @@ -171,6 +171,7 @@ void test_steal_move_ctor() { container_type dst{std::move(fixture.cpq_src)}; REQUIRE_MESSAGE(previous == MoveOperationTracker::special_member_calls(), "Steal move ctor should not create any new elements"); REQUIRE_MESSAGE(dst == src_copy, "cpq content changed during steal move"); + REQUIRE_MESSAGE(!(dst != src_copy), "cpq content changed during steal move"); } void test_steal_move_ctor_with_allocator() { @@ -185,6 +186,7 @@ void test_steal_move_ctor_with_allocator() { fixture_type::cpq_type dst(std::move(fixture.cpq_src), arena_fixture.source_allocator); REQUIRE_MESSAGE(previous == MoveOperationTracker::special_member_calls(), "Steal move ctor should not create any new elements"); REQUIRE_MESSAGE(dst == src_copy, "cpq content changed during steal move"); + REQUIRE_MESSAGE(!(dst != src_copy), "cpq content changed during steal move"); } void test_per_element_move_ctor_with_allocator() { @@ -202,6 +204,7 @@ void test_per_element_move_ctor_with_allocator() { REQUIRE_MESSAGE(move_ctor_called_cpq_size_times == MoveOperationTracker::special_member_calls(), "Per element move ctor should move initialize all new elements"); REQUIRE_MESSAGE(dst == src_copy, "cpq content changed during move"); + REQUIRE_MESSAGE(!(dst != src_copy), "cpq content changed during move"); } void test_steal_move_assign_operator() { @@ -216,6 +219,7 @@ void test_steal_move_assign_operator() { REQUIRE_MESSAGE(previous == MoveOperationTracker::special_member_calls(), "Steal move assign operator should not create any new elements"); REQUIRE_MESSAGE(dst == src_copy, "cpq content changed during steal move assignment"); + REQUIRE_MESSAGE(!(dst != src_copy), "cpq content changed during steal move assignment"); } void test_steal_move_assign_operator_with_stateful_allocator() { @@ -232,6 +236,7 @@ void test_steal_move_assign_operator_with_stateful_allocator() { dst = std::move(fixture.cpq_src); REQUIRE_MESSAGE(previous == MoveOperationTracker::special_member_calls(), "Steal move assign operator should not create any new elements"); REQUIRE_MESSAGE(dst == src_copy, "cpq content changed during steal move assignment"); + REQUIRE_MESSAGE(!(dst != src_copy), "cpq content changed during steal move assignment"); } void test_per_element_move_assign_operator() { @@ -250,6 +255,7 @@ void test_per_element_move_assign_operator() { REQUIRE_MESSAGE(move_ctor_called_cpq_size_times == MoveOperationTracker::special_member_calls(), "Per element move assignment should move initialize all new elements"); REQUIRE_MESSAGE(dst == src_copy, "cpq content changed during per element move assignment"); + REQUIRE_MESSAGE(!(dst != src_copy), "cpq content changed during per element move assignment"); } void test_cpq_move_constructor() { @@ -450,6 +456,7 @@ void test_assignment_clear_swap() { REQUIRE_MESSAGE(!qo.empty(), "Failed assignment empty test"); REQUIRE_MESSAGE(v == toVector(qo), "Failed assignment equality test"); REQUIRE_MESSAGE(qo == q, "Failed assignment equality test"); + REQUIRE_MESSAGE(!(qo != q), "Failed assignment inequality test"); cpq_type assigned_q; // Testing assign member function diff --git a/test/conformance/conformance_enumerable_thread_specific.cpp b/test/conformance/conformance_enumerable_thread_specific.cpp index bbfd18f969..5660591447 100644 --- a/test/conformance/conformance_enumerable_thread_specific.cpp +++ b/test/conformance/conformance_enumerable_thread_specific.cpp @@ -86,6 +86,8 @@ class minimal: utils::NoAssign { ~minimal() { ++destruction_counter; REQUIRE(is_constructed); is_constructed = false; } void set_value( const int i ) { REQUIRE(is_constructed); my_value = i; } int value( ) const { REQUIRE(is_constructed); return my_value; } + + bool operator==( const minimal& other ) { return my_value == other.my_value; } }; static size_t AlignMask = 0; // set to cache-line-size - 1 @@ -135,6 +137,8 @@ class ThrowingConstructor { public: int m_cnt; ThrowingConstructor() : m_checktype(), m_throwing_field() { m_cnt = 0;} + + bool operator==( const ThrowingConstructor& other ) { return m_cnt == other.m_cnt; } private: }; @@ -400,7 +404,8 @@ void run_parallel_scalar_tests_nocombine(const char* /* test_name */, const char } // use const_range_type - typename ets_type::const_range_type cr = sums.range(); + const ets_type& csums = sums; + typename ets_type::const_range_type cr = csums.range(); for ( typename ets_type::const_range_type::iterator i = cr.begin(); i != cr.end(); ++i ) { test_helper::sum(const_range_sum, *i); } @@ -409,6 +414,15 @@ void run_parallel_scalar_tests_nocombine(const char* /* test_name */, const char typedef typename tbb::enumerable_thread_specific, tbb::ets_key_per_instance> cached_ets_type; cached_ets_type cconst(sums); + tbb::parallel_for( tbb::blocked_range(0, N, RANGE_MIN), [&]( const tbb::blocked_range& ) { + bool exists = false; + T& ref = cconst.local(exists); + CHECK((exists || ref == T())); + } ); + cached_ets_type cconst_to_assign1 = cconst; + cached_ets_type cconst_to_assign2; + cconst_to_assign2 = std::move(cconst_to_assign1); + REQUIRE(cconst_to_assign2.size() == cconst.size()); for ( typename cached_ets_type::const_iterator i = cconst.begin(); i != cconst.end(); ++i ) { test_helper::sum(cconst_sum, *i); @@ -668,6 +682,11 @@ void run_parallel_vector_tests(const char* /* test_name */, const char *allocato auto it2(it); it = fvs.begin(); REQUIRE(it != it2); + typename tbb::flattened2d::iterator it3; + typename tbb::flattened2d::const_iterator cit = fvs.begin(); + it3 = cit; + REQUIRE(it3 == cit); + REQUIRE(it3.operator->() == &(*it3)); for(typename tbb::flattened2d::const_iterator i = fvs.begin(); i != fvs.end(); ++i) { ++elem_cnt; diff --git a/test/conformance/conformance_graph.cpp b/test/conformance/conformance_graph.cpp index 426411d0b1..5784cc1471 100644 --- a/test/conformance/conformance_graph.cpp +++ b/test/conformance/conformance_graph.cpp @@ -32,16 +32,6 @@ using namespace tbb::flow; using namespace std; -//! const graph -//! \brief \ref error_guessing -TEST_CASE("const graph"){ - const graph g; - CHECK_MESSAGE((g.cbegin() == g.cend()), "Starting graph is empty"); - - graph g2; - CHECK_MESSAGE((g2.begin() == g2.end()), "Starting graph is empty"); -} - //! Graph reset //! \brief \ref requirement TEST_CASE("graph reset") { @@ -144,4 +134,3 @@ TEST_CASE("graph reset") { // TODO: Add check that default invocaiton is the same as with rf_reset_protocol // TODO: See if specification for broadcast_node and other service nodes is sufficient for reset checks } - diff --git a/test/conformance/conformance_mutex.cpp b/test/conformance/conformance_mutex.cpp index 3beb1d137a..a7fc350c23 100644 --- a/test/conformance/conformance_mutex.cpp +++ b/test/conformance/conformance_mutex.cpp @@ -86,6 +86,15 @@ void TestTryAcquire(const char* mutex_name) { } } +template <> +void TestTryAcquire( const char* mutex_name ) { + tbb::null_mutex tested_mutex; + typename tbb::null_mutex::scoped_lock lock(tested_mutex); + CHECK_MESSAGE(lock.try_acquire(tested_mutex), "ERROR for " << mutex_name << ": try_acquire failed though it should not"); + lock.release(); + CHECK_MESSAGE(lock.try_acquire(tested_mutex), "ERROR for " << mutex_name << ": try_acquire failed though it should not"); +} + //! Test try_acquire functionality of a non-reenterable mutex template void TestTryAcquireReader(const char* mutex_name) { @@ -112,6 +121,17 @@ void TestTryAcquireReader(const char* mutex_name) { } } +template <> +void TestTryAcquireReader( const char* mutex_name ) { + tbb::null_rw_mutex tested_mutex; + typename tbb::null_rw_mutex::scoped_lock lock(tested_mutex, false); + CHECK_MESSAGE(lock.try_acquire(tested_mutex, false), "Error for " << mutex_name << ": try_acquire on read failed though it should not"); + CHECK_MESSAGE(lock.try_acquire(tested_mutex, true), "Error for " << mutex_name << ": try_acquire on write failed though it should not"); + lock.release(); + CHECK_MESSAGE(lock.try_acquire(tested_mutex, false), "Error for " << mutex_name << ": try_acquire on read failed though it should not"); + CHECK_MESSAGE(lock.try_acquire(tested_mutex, true), "Error for " << mutex_name << ": try_acquire on write failed though it should not"); +} + template struct ArrayCounter { using mutex_type = M; @@ -417,6 +437,7 @@ TEST_CASE("Lockable requirement test") { TestTryAcquire("Queuing RW Mutex"); TestTryAcquire("Speculative Spin Mutex"); TestTryAcquire("Speculative Spin RW Mutex"); + TestTryAcquire("Null Mutex"); } //! Testing ReaderWriterMutex requirements @@ -434,6 +455,7 @@ TEST_CASE("Shared mutexes (reader/writer) test") { TestRWStateMultipleChange("Queuing RW Mutex"); TestTryAcquireReader("Speculative Spin RW Mutex"); TestRWStateMultipleChange("Speculative Spin RW Mutex"); + TestTryAcquireReader("Null RW Mutex"); } //! Testing ISO C++ Mutex and Shared Mutex requirements. diff --git a/test/conformance/conformance_parallel_for.cpp b/test/conformance/conformance_parallel_for.cpp index 7797cffba5..150a47ea0e 100644 --- a/test/conformance/conformance_parallel_for.cpp +++ b/test/conformance/conformance_parallel_for.cpp @@ -232,6 +232,7 @@ void TestParallelForWithStepSupport() { static tbb::affinity_partitioner affinity_p; tbb::auto_partitioner auto_p; tbb::simple_partitioner simple_p; + tbb::static_partitioner static_p; empty_partitioner_tag p; // Try out all partitioner combinations @@ -239,6 +240,7 @@ void TestParallelForWithStepSupport() { TestParallelForWithStepSupportHelper< Flavor,T,const tbb::auto_partitioner >(auto_p); TestParallelForWithStepSupportHelper< Flavor,T,const tbb::simple_partitioner >(simple_p); TestParallelForWithStepSupportHelper< Flavor,T,tbb::affinity_partitioner >(affinity_p); + TestParallelForWithStepSupportHelper< Flavor,T,tbb::static_partitioner >(static_p); // Testing some corner cases tbb::parallel_for(static_cast(2), static_cast(1), static_cast(1), TestFunctor()); @@ -309,4 +311,7 @@ TEST_CASE("Testing parallel_for with partitioners") { parallel_for(Range1(false, true), b, tbb::auto_partitioner()); parallel_for(Range6(false, true), b, tbb::auto_partitioner()); + + parallel_for(Range1(true, false), b, tbb::static_partitioner()); + parallel_for(Range6(false, true), b, tbb::static_partitioner()); } diff --git a/test/conformance/conformance_task_arena.cpp b/test/conformance/conformance_task_arena.cpp index 15b0c42823..466a622cb9 100644 --- a/test/conformance/conformance_task_arena.cpp +++ b/test/conformance/conformance_task_arena.cpp @@ -96,3 +96,12 @@ TEST_CASE("Task arena observer") { REQUIRE(observer.is_callbacks_called()); } +//! Test task arena copy constructor +//! \brief \ref interface \ref requirement +TEST_CASE("Task arena copy constructor") { + tbb::task_arena arena(1); + tbb::task_arena copy = arena; + + REQUIRE(arena.max_concurrency() == copy.max_concurrency()); + REQUIRE(arena.is_active() == copy.is_active()); +} diff --git a/test/tbb/test_arena_priorities.cpp b/test/tbb/test_arena_priorities.cpp index 405346dd37..5095e094dd 100644 --- a/test/tbb/test_arena_priorities.cpp +++ b/test/tbb/test_arena_priorities.cpp @@ -14,9 +14,6 @@ limitations under the License. */ - -#define TBB_PREVIEW_NUMA_SUPPORT __TBB_CPF_BUILD - #include "common/test.h" #include "tbb/task_group.h" @@ -88,9 +85,12 @@ tbb::task_arena* do_allocate_and_construct( const ArenaArgs&... arena_args ) break; case explicit_initialize_with_different_constructor_parameters: - result_arena = new tbb::task_arena( dummy_max_concurrency, dummy_reserved_for_masters ); - result_arena->initialize( arena_args... ); + { + tbb::task_arena tmp(dummy_max_concurrency, dummy_reserved_for_masters); + result_arena = new tbb::task_arena(tmp); + result_arena->initialize(arena_args...); break; + } default: REQUIRE_MESSAGE( false, "Not implemented method of initialization." ); @@ -124,7 +124,6 @@ tbb::task_arena* allocate_and_construct_arena( { const int reserved_for_masters = 0; -#if TBB_PREVIEW_NUMA_SUPPORT static bool use_constraints = false; use_constraints = !use_constraints; @@ -132,7 +131,6 @@ tbb::task_arena* allocate_and_construct_arena( tbb::task_arena::constraints properties{tbb::task_arena::automatic, arena_max_concurrency}; return decide_on_arguments( properties, reserved_for_masters, a_priority ); } -#endif return decide_on_arguments( arena_max_concurrency, reserved_for_masters, a_priority ); } diff --git a/test/tbb/test_async_node.cpp b/test/tbb/test_async_node.cpp index 3346fc8f20..bcbb751bda 100644 --- a/test/tbb/test_async_node.cpp +++ b/test/tbb/test_async_node.cpp @@ -725,7 +725,7 @@ void test_follows() { node_t node(follows(preds[0], preds[1], preds[2]), unlimited, [&](int input, node_t::gateway_type& gtw) { async_activity.submit(input, >w); - }); + }, no_priority); buffer_node buf(g); make_edge(node, buf); @@ -759,7 +759,7 @@ void test_precedes() { node_t node(precedes(successors[0]), unlimited, [&](int input, node_t::gateway_type& gtw) { async_activity.submit(input, >w); - }); + }, no_priority); make_edge(start, node); diff --git a/test/tbb/test_broadcast_node.cpp b/test/tbb/test_broadcast_node.cpp index f7dadeb0e3..626ac151f7 100644 --- a/test/tbb/test_broadcast_node.cpp +++ b/test/tbb/test_broadcast_node.cpp @@ -76,9 +76,6 @@ class counting_array_receiver : public tbb::flow::receiver { tbb::flow::graph& graph_reference() const override { return my_graph; } - - void reset_receiver(tbb::flow::reset_flags /*f*/) override { } - }; template< typename T > diff --git a/test/tbb/test_composite_node.cpp b/test/tbb/test_composite_node.cpp index e7bfedd15e..671fc87418 100644 --- a/test/tbb/test_composite_node.cpp +++ b/test/tbb/test_composite_node.cpp @@ -159,6 +159,9 @@ void add_all_nodes (){ output_only_type c_node(g); c_node.set_external_ports(output_tuple); + // Reset is not suppose to do anything. Check that it can be called. + g.reset(); + c_node.add_visible_nodes(src, fxn, m_fxn, bc, lim, ind, s, ct, j, q, bf, pq, wo, ovw, seq); c_node.add_nodes(src, fxn, m_fxn, bc, lim, ind, s, ct, j, q, bf, pq, wo, ovw, seq); @@ -318,10 +321,10 @@ int test_adder(bool hidden = false) { int sum_total=0; int result=0; for ( int i = 1; i < 4; ++i ) { - s.try_put(i); - c.try_put(i); - sum_total += adder_sum(i); - g.wait_for_all(); + s.try_put(i); + c.try_put(i); + sum_total += adder_sum(i); + g.wait_for_all(); } int j; @@ -342,10 +345,10 @@ int test_adder(bool hidden = false) { sum_total=0; result=0; for ( int i = 10; i < 20; ++i ) { - s.try_put(i); - c.try_put(i); - sum_total += adder_sum(i); - g.wait_for_all(); + s.try_put(i); + c.try_put(i); + sum_total += adder_sum(i); + g.wait_for_all(); } for ( int i = 10; i < 20; ++i ) { diff --git a/test/tbb/test_concurrent_hash_map.cpp b/test/tbb/test_concurrent_hash_map.cpp index a28f9cdee3..e263fc4754 100644 --- a/test/tbb/test_concurrent_hash_map.cpp +++ b/test/tbb/test_concurrent_hash_map.cpp @@ -557,7 +557,8 @@ TEST_CASE("Test exception in constructors") { using allocator_type = StaticSharedCountingAllocator>>; using map_type = tbb::concurrent_hash_map, allocator_type>; - auto init_list = {std::pair(1, 42), std::pair(2, 42), std::pair(3, 42)}; + auto init_list = {std::pair(1, 42), std::pair(2, 42), std::pair(3, 42), + std::pair(4, 42), std::pair(5, 42), std::pair(6, 42)}; map_type map(init_list); allocator_type::set_limits(1); @@ -582,6 +583,24 @@ TEST_CASE("Test exception in constructors") { map_type map4(init_list, test_hash); utils::suppress_unused_warning(map4); }(), const std::bad_alloc); + + REQUIRE_THROWS_AS( [&] { + map_type map5(init_list); + utils::suppress_unused_warning(map5); + }(), const std::bad_alloc); + + allocator_type::set_limits(0); + map_type big_map{}; + for (std::size_t i = 0; i < 1000; ++i) { + big_map.insert(std::pair(i, 42)); + } + + allocator_type::init_counters(); + allocator_type::set_limits(300); + REQUIRE_THROWS_AS( [&] { + map_type map6(big_map); + utils::suppress_unused_warning(map6); + }(), const std::bad_alloc); } #endif // TBB_USE_EXCEPTIONS diff --git a/test/tbb/test_concurrent_map.cpp b/test/tbb/test_concurrent_map.cpp index 1f48d9d27a..f2dd9ab688 100644 --- a/test/tbb/test_concurrent_map.cpp +++ b/test/tbb/test_concurrent_map.cpp @@ -217,3 +217,31 @@ TEST_CASE("concurrent_map/multimap with specific key/mapped types") { TEST_CASE("broken internal structure for multimap") { test_cycles_absense(); } + +//! \brief \ref error_guessing +TEST_CASE("concurrent_map::swap with not always equal allocator") { + using not_always_equal_alloc_map_type = tbb::concurrent_map, + NotAlwaysEqualAllocator>>; + test_swap_not_always_equal_allocator(); +} + +//! \brief \ref error_guessing +TEST_CASE("concurrent_multimap::swap with not always equal allocator") { + using not_always_equal_alloc_mmap_type = tbb::concurrent_multimap, + NotAlwaysEqualAllocator>>; + test_swap_not_always_equal_allocator(); +} + +#if TBB_USE_EXCEPTIONS +//! \brief \ref error_guessing +TEST_CASE("concurrent_map throwing copy constructor") { + using exception_map_type = tbb::concurrent_map; + test_exception_on_copy_ctor(); +} + +//! \brief \ref error_guessing +TEST_CASE("concurrent_multimap throwing copy constructor") { + using exception_mmap_type = tbb::concurrent_multimap; + test_exception_on_copy_ctor(); +} +#endif // TBB_USE_EXCEPTIONS diff --git a/test/tbb/test_concurrent_queue.cpp b/test/tbb/test_concurrent_queue.cpp index 718d04a3fb..18f1dc86d5 100644 --- a/test/tbb/test_concurrent_queue.cpp +++ b/test/tbb/test_concurrent_queue.cpp @@ -114,6 +114,43 @@ void TestQueueWorksWithSSE() { } #endif /* HAVE_m256 */ } +#if TBB_USE_EXCEPTIONS + int rnd_elem = -1; + int global_counter = -1; + +struct throw_element { + throw_element() = default; + throw_element(const throw_element&) { + if (global_counter++ == rnd_elem) { + throw std::exception{}; + } + } + + throw_element& operator= (const throw_element&) = default; +}; + +template +void CopyWithThrowElement() { + utils::FastRandom<> rnd(42); + + Queue source; + + constexpr size_t queue_size = 100000; + for (std::size_t i = 0; i < queue_size; ++i) { + source.emplace(); + } + + for (std::size_t i = 0; i < 100; ++i) { + global_counter = 0; + rnd_elem = rnd.get() % queue_size; + + REQUIRE_THROWS_AS( [&] { + Queue copy(source); + utils::suppress_unused_warning(copy); + }(), std::exception); + } +} +#endif // TBB_USE_EXCEPTIONS //! Test work with different fypes //! \brief \ref error_guessing @@ -154,4 +191,11 @@ TEST_CASE("Test exception in allocation") { }(), const std::bad_alloc); } } + +//! \brief \ref regression \ref error_guessing +TEST_CASE("Test exception in allocation") { + CopyWithThrowElement>(); + CopyWithThrowElement>(); +} + #endif // TBB_USE_EXCEPTIONS diff --git a/test/tbb/test_concurrent_set.cpp b/test/tbb/test_concurrent_set.cpp index 8be30dda40..d38defa29e 100644 --- a/test/tbb/test_concurrent_set.cpp +++ b/test/tbb/test_concurrent_set.cpp @@ -204,6 +204,32 @@ TEST_CASE("concurrent_multiset with std::scoped_allocator_adaptor") { } //! \brief \ref regression -TEST_CASE("broken internal structure for multimap") { +TEST_CASE("broken internal structure for multiset") { test_cycles_absense(); } + +//! \brief \ref error_guessing +TEST_CASE("concurrent_set::swap with not always equal allocator") { + using not_always_equal_alloc_set_type = tbb::concurrent_set, NotAlwaysEqualAllocator>; + test_swap_not_always_equal_allocator(); +} + +//! \brief \ref error_guessing +TEST_CASE("concurrent_multiset::swap with not always equal allocator") { + using not_always_equal_alloc_mset_type = tbb::concurrent_multiset, NotAlwaysEqualAllocator>; + test_swap_not_always_equal_allocator(); +} + +#if TBB_USE_EXCEPTIONS +//! \brief \ref error_guessing +TEST_CASE("concurrent_set throwing copy constructor") { + using exception_set_type = tbb::concurrent_set; + test_exception_on_copy_ctor(); +} + +//! \brief \ref error_guessing +TEST_CASE("concurrent_multiset throwing copy constructor") { + using exception_mset_type = tbb::concurrent_multiset; + test_exception_on_copy_ctor(); +} +#endif // TBB_USE_EXCEPTIONS diff --git a/test/tbb/test_concurrent_unordered_map.cpp b/test/tbb/test_concurrent_unordered_map.cpp index daae51f186..0d68a61800 100644 --- a/test/tbb/test_concurrent_unordered_map.cpp +++ b/test/tbb/test_concurrent_unordered_map.cpp @@ -193,4 +193,50 @@ TEST_CASE("concurrent_unordered map/multimap with specific key/mapped types") { test_specific_types(); } +//! \brief \ref error_guessing +TEST_CASE("concurrent_unordered_map::swap with not always equal allocator") { + using not_always_equal_alloc_map_type = tbb::concurrent_unordered_map, std::equal_to, + NotAlwaysEqualAllocator>>; + test_swap_not_always_equal_allocator(); +} + +//! \brief \ref error_guessing +TEST_CASE("concurrent_unordered_multimap::swap with not always equal allocator") { + using not_always_equal_alloc_mmap_type = tbb::concurrent_unordered_multimap, std::equal_to, + NotAlwaysEqualAllocator>>; + test_swap_not_always_equal_allocator(); +} + +#if TBB_USE_EXCEPTIONS +//! \brief \ref error_guessing +TEST_CASE("concurrent_unordered_map throwing copy constructor") { + using exception_map_type = tbb::concurrent_unordered_map; + test_exception_on_copy_ctor(); +} + +//! \brief \ref error_guessing +TEST_CASE("concurrent_unordered_multimap throwing copy constructor") { + using exception_mmap_type = tbb::concurrent_unordered_multimap; + test_exception_on_copy_ctor(); +} + +//! \brief \ref error_guessing +TEST_CASE("concurrent_unordered_map whitebox throwing copy constructor") { + using allocator_type = StaticSharedCountingAllocator>>; + using exception_mmap_type = tbb::concurrent_unordered_map, std::equal_to, allocator_type>; + + exception_mmap_type map; + for (std::size_t i = 0; i < 10; ++i) { + map.insert(std::pair(i, 42)); + } + + allocator_type::set_limits(1); + REQUIRE_THROWS_AS( [&] { + exception_mmap_type map1(map); + utils::suppress_unused_warning(map1); + }(), const std::bad_alloc); +} + +#endif // TBB_USE_EXCEPTIONS + // TODO: add test_scoped_allocator support with broken macro diff --git a/test/tbb/test_concurrent_unordered_set.cpp b/test/tbb/test_concurrent_unordered_set.cpp index 5ca9b0df67..36e128116c 100644 --- a/test/tbb/test_concurrent_unordered_set.cpp +++ b/test/tbb/test_concurrent_unordered_set.cpp @@ -183,3 +183,31 @@ TEST_CASE("concurrent_unordered_set with std::scoped_allocator_adaptor") { TEST_CASE("concurrent_unordered_multiset with std::scoped_allocator_adaptor") { test_scoped_allocator(); } + +//! \brief \ref error_guessing +TEST_CASE("concurrent_unordered_set::swap with not always equal allocator") { + using not_always_equal_alloc_set_type = tbb::concurrent_unordered_set, std::equal_to, + NotAlwaysEqualAllocator>; + test_swap_not_always_equal_allocator(); +} + +//! \brief \ref error_guessing +TEST_CASE("concurrent_unordered_multiset::swap with not always equal allocator") { + using not_always_equal_alloc_mset_type = tbb::concurrent_unordered_multiset, std::equal_to, + NotAlwaysEqualAllocator>; + test_swap_not_always_equal_allocator(); +} + +#if __TBB_USE_EXCEPTIONS +//! \brief \ref error_guessing +TEST_CASE("concurrent_unordered_set throwing copy constructor") { + using exception_set_type = tbb::concurrent_unordered_set; + test_exception_on_copy_ctor(); +} + +//! \brief \ref error_guessing +TEST_CASE("concurrent_unordered_multimap throwing copy constructor") { + using exception_mset_type = tbb::concurrent_unordered_multiset; + test_exception_on_copy_ctor(); +} +#endif // __TBB_USE_EXCEPTIONS diff --git a/test/tbb/test_concurrent_vector.cpp b/test/tbb/test_concurrent_vector.cpp index fb9f4f6a7e..9a0a328ab5 100644 --- a/test/tbb/test_concurrent_vector.cpp +++ b/test/tbb/test_concurrent_vector.cpp @@ -668,7 +668,7 @@ TEST_CASE("Reducing concurrent_vector") { //! \brief \ref error_guessing -TEST_CASE("swap with NotAlwaysEqualAllocator allocators"){ +TEST_CASE("swap with not always equal allocators"){ using allocator_type = NotAlwaysEqualAllocator; using vector_type = tbb::concurrent_vector; diff --git a/test/tbb/test_continue_node.cpp b/test/tbb/test_continue_node.cpp index 54abda4817..ca177cfd31 100644 --- a/test/tbb/test_continue_node.cpp +++ b/test/tbb/test_continue_node.cpp @@ -305,11 +305,11 @@ void test_follows_and_precedes_api() { follows_and_precedes_testing::test_follows > - (messages_for_follows, pass_through); + (messages_for_follows, pass_through, node_priority_t(0)); follows_and_precedes_testing::test_precedes > - (messages_for_precedes, pass_through); + (messages_for_precedes, /* number_of_predecessors = */0, pass_through, node_priority_t(1)); } #endif // __TBB_PREVIEW_FLOW_GRAPH_NODE_SET diff --git a/test/tbb/test_dynamic_link.cpp b/test/tbb/test_dynamic_link.cpp index 95858b2e5e..2418e1fbe6 100644 --- a/test/tbb/test_dynamic_link.cpp +++ b/test/tbb/test_dynamic_link.cpp @@ -57,9 +57,7 @@ static const tbb::detail::r1::dynamic_link_descriptor LinkTable[] = { #include "src/tbb/dynamic_link.cpp" #include "common/utils_dynamic_libs.h" -//! Testing dynamic_link -//! \brief \ref error_guessing -TEST_CASE("Test dynamic_link") { +void test_dynamic_link(const char* lib_name) { #if __TBB_DYNAMIC_LOAD_ENABLED #if !_WIN32 // Check if the executable exports its symbols. @@ -71,7 +69,7 @@ TEST_CASE("Test dynamic_link") { // the dynamic_link call - let it be an empty string. // Generally speaking the test has sense only on Linux but on Windows it // checks the dynamic_link graceful behavior with incorrect library name. - if (tbb::detail::r1::dynamic_link("", LinkTable, sizeof(LinkTable) / sizeof(LinkTable[0]))) { + if (tbb::detail::r1::dynamic_link(lib_name, LinkTable, sizeof(LinkTable) / sizeof(LinkTable[0]))) { REQUIRE_MESSAGE((foo1_handler && foo2_handler), "The symbols are corrupted by dynamic_link"); REQUIRE_MESSAGE((foo1_handler() == FOO_IMPLEMENTATION && foo2_handler() == FOO_IMPLEMENTATION), "dynamic_link returned the successful code but symbol(s) are wrong"); @@ -80,3 +78,15 @@ TEST_CASE("Test dynamic_link") { } #endif } + +//! Testing dynamic_link with non-existing library +//! \brief \ref error_guessing +TEST_CASE("Test dynamic_link with non-existing library") { + test_dynamic_link("tbb_unrealNAME.so"); +} + +//! Testing dynamic_link +//! \brief \ref error_guessing +TEST_CASE("Test dynamic_link") { + test_dynamic_link(""); +} diff --git a/test/tbb/test_eh_algorithms.cpp b/test/tbb/test_eh_algorithms.cpp index e26c7267f5..7bd2782209 100644 --- a/test/tbb/test_eh_algorithms.cpp +++ b/test/tbb/test_eh_algorithms.cpp @@ -1094,6 +1094,7 @@ TEST_CASE("parallel_for_each exception handling test #5") { g_ExceptionInMaster = (j & 1) != 0; g_SolitaryException = (j & 2) != 0; + Test5_parallel_for_each >(); Test5_parallel_for_each >(); Test5_parallel_for_each >(); } diff --git a/test/tbb/test_flow_graph.cpp b/test/tbb/test_flow_graph.cpp index 56a03a14fd..9fad662cb0 100644 --- a/test/tbb/test_flow_graph.cpp +++ b/test/tbb/test_flow_graph.cpp @@ -339,3 +339,32 @@ TEST_CASE("Test parallel"){ TEST_CASE("Test graph_arena"){ test_graph_arena(); } + +//! Graph iterator +//! \brief \ref error_guessing +TEST_CASE("graph iterator") { + using namespace tbb::flow; + + graph g; + + auto past_end = g.end(); + ++past_end; + + continue_node n(g, [](const continue_msg &){return 1;}); + + size_t item_count = 0; + + for(auto it = g.cbegin(); it != g.cend(); it++) + ++item_count; + CHECK_MESSAGE((item_count == 1), "Should find 1 item"); + + item_count = 0; + auto jt(g.begin()); + for(; jt != g.end(); jt++) + ++item_count; + CHECK_MESSAGE((item_count == 1), "Should find 1 item"); + + graph g2; + continue_node n2(g, [](const continue_msg &){return 1;}); + CHECK_MESSAGE((g.begin() != g2.begin()), "Different graphs should have different iterators"); +} diff --git a/test/tbb/test_flow_graph_priorities.cpp b/test/tbb/test_flow_graph_priorities.cpp index 10e732b4c8..f3216d2461 100644 --- a/test/tbb/test_flow_graph_priorities.cpp +++ b/test/tbb/test_flow_graph_priorities.cpp @@ -782,6 +782,38 @@ void test(int num_threads) { } // namespace ManySuccessors +#if TBB_USE_EXCEPTIONS +namespace Exceptions { + void test() { + using namespace tbb::flow; + graph g; + std::srand(42); + continue_node c(g, [](continue_msg) { + return std::rand() % 10; + }, 2); + function_node f(g, unlimited, [](int v) { + if (v > 4) { + throw std::runtime_error("Exception::test"); + } + }, 1); + make_edge(c, f); + for (int i = 0; i < 10; ++i) { + try { + for (int j = 0; j < 50; ++j) { + c.try_put(continue_msg()); + } + g.wait_for_all(); + FAIL("Unreachable code. The exception is expected"); + } catch (std::runtime_error&) { + CHECK(g.is_cancelled()); + } catch (...) { + FAIL("Unexpected exception"); + } + } + } +} // namespace Exceptions +#endif + //! Test node prioritization //! \brief \ref requirement TEST_CASE("Priority nodes take precedence"){ @@ -825,3 +857,12 @@ TEST_CASE("Many successors") { ManySuccessors::test( p ); } } + +#if TBB_USE_EXCEPTIONS +//! Test for exceptions +//! \brief \ref error_guessing +TEST_CASE("Exceptions") { + Exceptions::test(); +} +#endif + diff --git a/test/tbb/test_flow_graph_whitebox.cpp b/test/tbb/test_flow_graph_whitebox.cpp index 8e5935c05d..667d0e1597 100644 --- a/test/tbb/test_flow_graph_whitebox.cpp +++ b/test/tbb/test_flow_graph_whitebox.cpp @@ -408,19 +408,52 @@ TestJoinNode() { INFO(" done\n"); } -void -TestLimiterNode() { +template +struct limiter_node_type { + using type = tbb::flow::limiter_node; + using dtype = DecrementerType; +}; + +template <> +struct limiter_node_type { + using type = tbb::flow::limiter_node; + using dtype = tbb::flow::continue_msg; +}; + +template +struct DecrementerHelper { + template + static void check(Decrementer&&) {} + static DType makeDType() { + return DType(1); + } +}; + +template <> +struct DecrementerHelper { + template + static void check(Decrementer&& d) { + CHECK_MESSAGE(d.my_predecessor_count == 0, "error in pred count"); + CHECK_MESSAGE(d.my_initial_predecessor_count == 0, "error in initial pred count"); + CHECK_MESSAGE(d.my_current_count == 0, "error in current count"); + } + static tbb::flow::continue_msg makeDType() { + return tbb::flow::continue_msg(); + } +}; + +template +void TestLimiterNode() { int out_int{}; tbb::flow::graph g; - tbb::flow::limiter_node ln(g,1); + using dtype = typename limiter_node_type::dtype; + typename limiter_node_type::type ln(g,1); INFO("Testing limiter_node: preds and succs"); - CHECK_MESSAGE( (ln.decrement.my_predecessor_count == 0), "error in pred count"); - CHECK_MESSAGE( (ln.decrement.my_initial_predecessor_count == 0), "error in initial pred count"); - CHECK_MESSAGE( (ln.decrement.my_current_count == 0), "error in current count"); + DecrementerHelper::check(ln.decrement); CHECK_MESSAGE( (ln.my_threshold == 1), "error in my_threshold"); tbb::flow::queue_node inq(g); tbb::flow::queue_node outq(g); - tbb::flow::broadcast_node bn(g); + tbb::flow::broadcast_node bn(g); tbb::flow::make_edge(inq,ln); tbb::flow::make_edge(ln,outq); @@ -437,7 +470,7 @@ TestLimiterNode() { g.wait_for_all(); CHECK_MESSAGE( (!outq.try_get(out_int)), "limiter_node incorrectly passed second input"); CHECK_MESSAGE( (!ln.my_predecessors.empty()), "input edge to limiter_node not reversed"); - bn.try_put(tbb::flow::continue_msg()); + bn.try_put(DecrementerHelper::makeDType()); g.wait_for_all(); CHECK_MESSAGE( (outq.try_get(out_int) && out_int == 2), "limiter_node didn't pass second value"); g.wait_for_all(); @@ -451,9 +484,7 @@ TestLimiterNode() { INFO(" rf_clear_edges"); // currently the limiter_node will not pass another message g.reset(tbb::flow::rf_clear_edges); - CHECK_MESSAGE( (ln.decrement.my_predecessor_count == 0), "error in pred count"); - CHECK_MESSAGE( (ln.decrement.my_initial_predecessor_count == 0), "error in initial pred count"); - CHECK_MESSAGE( (ln.decrement.my_current_count == 0), "error in current count"); + DecrementerHelper::check(ln.decrement); CHECK_MESSAGE( (ln.my_threshold == 1), "error in my_threshold"); CHECK_MESSAGE( (ln.my_predecessors.empty()), "preds not reset(rf_clear_edges)"); CHECK_MESSAGE( (ln.my_successors.empty()), "preds not reset(rf_clear_edges)"); @@ -467,7 +498,7 @@ TestLimiterNode() { g.wait_for_all(); CHECK_MESSAGE( (outq.try_get(out_int)),"missing output after reset(rf_clear_edges)"); CHECK_MESSAGE( (out_int == 4), "input incorrect (4)"); - bn.try_put(tbb::flow::continue_msg()); + bn.try_put(DecrementerHelper::makeDType()); g.wait_for_all(); CHECK_MESSAGE( (!outq.try_get(out_int)),"second output incorrectly passed (rf_clear_edges)"); INFO(" done\n"); @@ -747,7 +778,9 @@ TEST_CASE("Test join node"){ //! Test limiter_node //! \brief \ref error_guessing TEST_CASE("Test limiter node"){ - TestLimiterNode(); + TestLimiterNode(); + TestLimiterNode(); + TestLimiterNode(); } //! Test indexer_node @@ -769,3 +802,135 @@ TEST_CASE("Test scalar node"){ TestScalarNode >("overwrite_node"); TestScalarNode >("write_once_node"); } + +//! try_get in inactive graph +//! \brief \ref error_guessing +TEST_CASE("try_get in inactive graph"){ + tbb::flow::graph g; + + tbb::flow::input_node src(g, [&](tbb::flow_control& fc) -> bool { fc.stop(); return 0;}); + deactivate_graph(g); + + int tmp = -1; + CHECK_MESSAGE((src.try_get(tmp) == false), "try_get can not succeed"); + + src.activate(); + tmp = -1; + CHECK_MESSAGE((src.try_get(tmp) == false), "try_get can not succeed"); +} + +//! Test make_edge in inactive graph +//! \brief \ref error_guessing +TEST_CASE("Test make_edge in inactive graph"){ + tbb::flow::graph g; + + tbb::flow::continue_node c(g, [](const tbb::flow::continue_msg&){ return 1; }); + + tbb::flow::function_node f(g, tbb::flow::serial, serial_fn_body(serial_fn_state0)); + + c.try_put(tbb::flow::continue_msg()); + g.wait_for_all(); + + deactivate_graph(g); + + make_edge(c, f); +} + +//! Test make_edge from overwrite_node in inactive graph +//! \brief \ref error_guessing +TEST_CASE("Test make_edge from overwrite_node in inactive graph"){ + tbb::flow::graph g; + + tbb::flow::queue_node q(g); + + tbb::flow::overwrite_node on(g); + + on.try_put(1); + g.wait_for_all(); + + deactivate_graph(g); + + make_edge(on, q); + + int tmp = -1; + CHECK_MESSAGE((q.try_get(tmp) == false), "Message should not be passed on"); +} + +//! Test iterators directly +//! \brief \ref error_guessing +TEST_CASE("graph_iterator details"){ + tbb::flow::graph g; + const tbb::flow::graph cg; + + tbb::flow::graph::iterator b = g.begin(); + tbb::flow::graph::iterator b2 = g.begin(); + ++b2; + // Cast to a volatile pointer to workaround self assignment warnings from some compilers. + tbb::flow::graph::iterator* volatile b2_ptr = &b2; + b2 = *b2_ptr; + b = b2; + CHECK_MESSAGE((b == b2), "Assignment should make iterators equal"); +} + +//! const graph +//! \brief \ref error_guessing +TEST_CASE("const graph"){ + using namespace tbb::flow; + + const graph g; + CHECK_MESSAGE((g.cbegin() == g.cend()), "Starting graph is empty"); + CHECK_MESSAGE((g.begin() == g.end()), "Starting graph is empty"); + + graph g2; + CHECK_MESSAGE((g2.begin() == g2.end()), "Starting graph is empty"); +} + +//! Send message to continue_node while graph is inactive +//! \brief \ref error_guessing +TEST_CASE("Send message to continue_node while graph is inactive") { + using namespace tbb::flow; + + graph g; + + continue_node c(g, [](const continue_msg&){ return 1; }); + buffer_node b(g); + + make_edge(c, b); + + deactivate_graph(g); + + c.try_put(continue_msg()); + g.wait_for_all(); + + int tmp = -1; + CHECK_MESSAGE((b.try_get(tmp) == false), "Message should not arrive"); + CHECK_MESSAGE((tmp == -1), "Value should not be altered"); +} + + +//! Bypass of a successor's message in a node with lightweight policy +//! \brief \ref error_guessing +TEST_CASE("Bypass of a successor's message in a node with lightweight policy") { + using namespace tbb::flow; + + graph g; + + auto body = [](const int&v)->int { return v * 2; }; + function_node f1(g, unlimited, body); + + auto body2 = [](const int&v)->int {return v / 2;}; + function_node f2(g, unlimited, body2); + + buffer_node b(g); + + make_edge(f1, f2); + make_edge(f2, b); + + f1.try_put(1); + g.wait_for_all(); + + int tmp = -1; + CHECK_MESSAGE((b.try_get(tmp) == true), "Functional nodes can work in succession"); + CHECK_MESSAGE((tmp == 1), "Value should not be altered"); +} + diff --git a/test/tbb/test_function_node.cpp b/test/tbb/test_function_node.cpp index 5115c12430..40e638b9ee 100644 --- a/test/tbb/test_function_node.cpp +++ b/test/tbb/test_function_node.cpp @@ -464,7 +464,7 @@ void test_follows_and_precedes_api() { (messages_for_follows, tbb::flow::unlimited, pass_msg); follows_and_precedes_testing::test_precedes > - (messages_for_precedes, tbb::flow::unlimited, pass_msg); + (messages_for_precedes, tbb::flow::unlimited, pass_msg, tbb::flow::node_priority_t(1)); } #endif @@ -544,3 +544,14 @@ TEST_CASE("Deduction guides test"){ test_deduction_guides(); } #endif + +//! try_release and try_consume test +//! \brief \ref error_guessing +TEST_CASE("try_release try_consume"){ + tbb::flow::graph g; + + tbb::flow::function_node fn(g, tbb::flow::unlimited, [](const int&v){return v;}); + + CHECK_MESSAGE((fn.try_release()==false), "try_release should initially return false on a node"); + CHECK_MESSAGE((fn.try_consume()==false), "try_consume should initially return false on a node"); +} diff --git a/test/tbb/test_indexer_node.cpp b/test/tbb/test_indexer_node.cpp index 26d6a72989..9ae121bc64 100644 --- a/test/tbb/test_indexer_node.cpp +++ b/test/tbb/test_indexer_node.cpp @@ -14,6 +14,7 @@ limitations under the License. */ +#define MAX_TUPLE_TEST_SIZE 10 #include "common/config.h" #include "tbb/flow_graph.h" @@ -253,16 +254,18 @@ class parallel_test { } for(int nInputs = 1; nInputs <= MaxNInputs; ++nInputs) { tbb::flow::graph g; - IType* my_indexer = new IType(g); //makeIndexer::create(); + IType* my_indexer_ptr = new IType(g); //makeIndexer::create(); + IType my_indexer = *my_indexer_ptr; tbb::flow::queue_node outq1(g); tbb::flow::queue_node outq2(g); - tbb::flow::make_edge(*my_indexer, outq1); - tbb::flow::make_edge(*my_indexer, outq2); + tbb::flow::make_edge(my_indexer, outq1); + tbb::flow::make_edge(my_indexer, outq2); - input_node_helper::add_input_nodes((*my_indexer), g, nInputs); + input_node_helper::add_input_nodes(my_indexer, g, nInputs); g.wait_for_all(); + makeIndexer::destroy(my_indexer_ptr); reset_outputCheck(SIZE, Count); for(int i=0; i < Count*SIZE; ++i) { @@ -282,10 +285,9 @@ class parallel_test { CHECK_MESSAGE(!outq1.try_get(v), ""); CHECK_MESSAGE(!outq2.try_get(v), ""); - input_node_helper::remove_input_nodes((*my_indexer), nInputs); - tbb::flow::remove_edge(*my_indexer, outq1); - tbb::flow::remove_edge(*my_indexer, outq2); - makeIndexer::destroy(my_indexer); + input_node_helper::remove_input_nodes(my_indexer, nInputs); + tbb::flow::remove_edge(my_indexer, outq1); + tbb::flow::remove_edge(my_indexer, outq2); } } }; @@ -551,7 +553,7 @@ class generate_test { #if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET template -void check_edge(tbb::flow::graph& g, +bool check_edge(tbb::flow::graph& g, tbb::flow::broadcast_node& start, tbb::flow::buffer_node& buf, input_t input_value) { @@ -563,27 +565,36 @@ void check_edge(tbb::flow::graph& g, CHECK_MESSAGE( ((is_get_succeeded)), "There is no item in the buffer"); CHECK_MESSAGE( ((tbb::flow::cast_to(msg) == input_value)), "Wrong item value"); + return true; } -void test_follows() { +template +void sink(T...) {} + +template +void check_edge(tbb::flow::graph& g, BN& bn, tbb::flow::buffer_node& buf, Type, tbb::detail::index_sequence) { + sink(check_edge(g, std::get(bn), buf, typename std::tuple_element::type(Seq))...); +} + +template +void test_follows_impl(std::tuple t, tbb::detail::index_sequence seq) { using namespace tbb::flow; - using indexer_output_t = indexer_node::output_type; + using indexer_output_t = typename indexer_node::output_type; graph g; - broadcast_node start(g); - - broadcast_node start1(g); - broadcast_node start2(g); - broadcast_node start3(g); + auto bn = std::make_tuple(broadcast_node(g)...); - indexer_node my_indexer(follows(start1, start2, start3)); + indexer_node my_indexer(follows(std::get(bn)...)); buffer_node buf(g); make_edge(my_indexer, buf); - check_edge(g, start1, buf, 1); - check_edge(g, start2, buf, 2.2f); - check_edge(g, start3, buf, 3.3); + check_edge(g, bn, buf, t, seq); +} + +template +void test_follows() { + test_follows_impl(std::tuple(), tbb::detail::make_index_sequence()); } void test_precedes() { @@ -613,7 +624,16 @@ void test_precedes() { } void test_follows_and_precedes_api() { - test_follows(); + test_follows(); + test_follows(); + test_follows(); + test_follows(); + test_follows(); + test_follows(); + test_follows(); + test_follows(); + test_follows(); + test_follows(); test_precedes(); } #endif // __TBB_PREVIEW_FLOW_GRAPH_NODE_SET @@ -646,7 +666,7 @@ TEST_CASE("Serial and parallel test") { for (int p = 0; p < 2; ++p) { generate_test::do_test(); #if MAX_TUPLE_TEST_SIZE >= 4 - generate_test::do_test(); + generate_test::do_test(); #endif #if MAX_TUPLE_TEST_SIZE >= 6 generate_test::do_test(); diff --git a/test/tbb/test_input_node.cpp b/test/tbb/test_input_node.cpp index d1867feef5..9294398d08 100644 --- a/test/tbb/test_input_node.cpp +++ b/test/tbb/test_input_node.cpp @@ -63,8 +63,6 @@ class test_push_receiver : public tbb::flow::receiver, utils::NoAssign { tbb::flow::graph& graph_reference() const override { return my_graph; } - - void reset_receiver(tbb::flow::reset_flags /*f*/) override {} }; template< typename T > @@ -367,3 +365,12 @@ TEST_CASE("Deduction guides"){ } #endif +//! Test try_get before activation +//! \brief \ref error_guessing +TEST_CASE("try_get before activation"){ + tbb::flow::graph g; + tbb::flow::input_node in(g, [&](tbb::flow_control& fc) -> bool { fc.stop(); return 0;}); + + int tmp = -1; + CHECK_MESSAGE((in.try_get(tmp) == false), "try_get before activation should not succeed"); +} diff --git a/test/tbb/test_intrusive_list.cpp b/test/tbb/test_intrusive_list.cpp index 3412ad69fa..fddbc2060f 100644 --- a/test/tbb/test_intrusive_list.cpp +++ b/test/tbb/test_intrusive_list.cpp @@ -81,6 +81,10 @@ void check_list_nodes( List& il, int value_step ) { int i; Iterator it = il.begin(); + + Iterator it_default; + REQUIRE_MESSAGE(it_default != it, "Incorrect default constructed intrusive_list::iterator"); + for ( i = value_step - 1; it != il.end(); ++it, i += value_step ) { REQUIRE_MESSAGE(it->Data() == i, "Unexpected node value while iterating forward"); REQUIRE_MESSAGE(it->m_Canary == NoliMeTangere, "Memory corruption"); diff --git a/test/tbb/test_join_node.cpp b/test/tbb/test_join_node.cpp index 585cc78508..945c851960 100644 --- a/test/tbb/test_join_node.cpp +++ b/test/tbb/test_join_node.cpp @@ -134,7 +134,15 @@ void test_follows_and_precedes_api() { >(messages_for_follows); follows_and_precedes_testing::test_follows , tbb::flow::buffer_node>(messages_for_follows); - // TODO: add tests for key_matching and message based key matching + auto b = [](msg_t) { return msg_t(); }; + class hash_compare { + public: + std::size_t hash(msg_t) const { return 0; } + bool equal(msg_t, msg_t) const { return true; } + }; + follows_and_precedes_testing::test_follows + >, tbb::flow::buffer_node> + (messages_for_follows, b, b, b); follows_and_precedes_testing::test_precedes >(messages_for_precedes); @@ -142,6 +150,9 @@ void test_follows_and_precedes_api() { >(messages_for_precedes); follows_and_precedes_testing::test_precedes >(messages_for_precedes); + follows_and_precedes_testing::test_precedes + >> + (messages_for_precedes, b, b, b); } #endif @@ -175,6 +186,101 @@ void test_deduction_guides() { #endif +namespace multiple_predecessors { + +using namespace tbb::flow; + +using join_node_t = join_node, reserving>; +using queue_node_t = queue_node>; + +void twist_join_connections( + buffer_node& bn1, buffer_node& bn2, buffer_node& bn3, + join_node_t& jn) +{ + // order, in which edges are created/destroyed, is important + make_edge(bn1, input_port<0>(jn)); + make_edge(bn2, input_port<0>(jn)); + make_edge(bn3, input_port<0>(jn)); + + remove_edge(bn3, input_port<0>(jn)); + make_edge (bn3, input_port<2>(jn)); + + remove_edge(bn2, input_port<0>(jn)); + make_edge (bn2, input_port<1>(jn)); +} + +std::unique_ptr connect_join_via_make_edge( + graph& g, buffer_node& bn1, buffer_node& bn2, + buffer_node& bn3, queue_node_t& qn) +{ + std::unique_ptr jn( new join_node_t(g) ); + twist_join_connections( bn1, bn2, bn3, *jn ); + make_edge(*jn, qn); + return jn; +} + +#if TBB_PREVIEW_FLOW_GRAPH_FEATURES +std::unique_ptr connect_join_via_follows( + graph&, buffer_node& bn1, buffer_node& bn2, + buffer_node& bn3, queue_node_t& qn) +{ + auto bn_set = make_node_set(bn1, bn2, bn3); + std::unique_ptr jn( new join_node_t(follows(bn_set)) ); + make_edge(*jn, qn); + return jn; +} + +std::unique_ptr connect_join_via_precedes( + graph&, buffer_node& bn1, buffer_node& bn2, + buffer_node& bn3, queue_node_t& qn) +{ + auto qn_set = make_node_set(qn); + auto qn_copy_set = qn_set; + std::unique_ptr jn( new join_node_t(precedes(qn_copy_set)) ); + twist_join_connections( bn1, bn2, bn3, *jn ); + return jn; +} +#endif // TBB_PREVIEW_FLOW_GRAPH_FEATURES + +void run_and_check( + graph& g, buffer_node& bn1, buffer_node& bn2, + buffer_node& bn3, queue_node_t& qn, bool expected) +{ + std::tuple msg; + + bn1.try_put(continue_msg()); + bn2.try_put(continue_msg()); + bn3.try_put(continue_msg()); + g.wait_for_all(); + + CHECK_MESSAGE( + (qn.try_get(msg) == expected), + "Unexpected message absence/existence at the end of the graph." + ); +} + +template +void test(ConnectJoinNodeFunc&& connect_join_node) { + graph g; + buffer_node bn1(g); + buffer_node bn2(g); + buffer_node bn3(g); + queue_node_t qn(g); + + auto jn = connect_join_node(g, bn1, bn2, bn3, qn); + + run_and_check(g, bn1, bn2, bn3, qn, /*expected=*/true); + + remove_edge(bn3, input_port<2>(*jn)); + remove_edge(bn2, input_port<1>(*jn)); + remove_edge(bn1, input_port<0>(*jn)); + remove_edge(*jn, qn); + + run_and_check(g, bn1, bn2, bn3, qn, /*expected=*/false); +} +} // namespace multiple_predecessors + + #if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET //! Test follows and precedes API //! \brief \ref error_guessing @@ -210,3 +316,15 @@ TEST_CASE("Main test"){ TEST_CASE("Recirculation test"){ generate_recirc_test >::do_test(); } + +//! Test maintaining correct count of ports without input +//! \brief \ref error_guessing +TEST_CASE("Test removal of the predecessor while having none") { + using namespace multiple_predecessors; + + test(connect_join_via_make_edge); +#if TBB_PREVIEW_FLOW_GRAPH_FEATURES + test(connect_join_via_follows); + test(connect_join_via_precedes); +#endif +} diff --git a/test/tbb/test_join_node.h b/test/tbb/test_join_node.h index 43bba1c3cf..35a2778ec0 100644 --- a/test/tbb/test_join_node.h +++ b/test/tbb/test_join_node.h @@ -449,7 +449,7 @@ class name_of > { }; // The additional policy to differ message based key matching from usual key matching. -// It only has sense for the test because join_node is created with the key_matching policy for the both cases. +// It only makes sense for the test because join_node is created with the key_matching policy for the both cases. template ::type > > struct message_based_key_matching {}; @@ -1308,7 +1308,7 @@ class serial_queue_helper<1, JType> { }; // -// Single reservable predecessor at each port, single accepting successor +// Single reservable predecessor at each port, single accepting and rejecting successor // * put to buffer before port0, then put to buffer before port1, ... // * fill buffer before port0 then fill buffer before port1, ... @@ -1320,7 +1320,7 @@ void test_one_serial(JType &my_join, tbb::flow::graph &g) { std::vector flags; serial_queue_helper::add_queue_nodes(g, my_join); typedef TType q3_input_type; - tbb::flow::queue_node< q3_input_type > q3(g); + tbb::flow::queue_node< q3_input_type > q3(g); tbb::flow::make_edge(my_join, q3); @@ -1354,18 +1354,23 @@ void test_one_serial(JType &my_join, tbb::flow::graph &g) { } } + tbb::flow::remove_edge(my_join, q3); + tbb::flow::limiter_node limiter(g, Count / 2); + tbb::flow::make_edge(my_join, limiter); + tbb::flow::make_edge(limiter, q3); + // fill each queue completely before filling the next. serial_queue_helper::fill_one_queue(Count); g.wait_for_all(); - for(int i = 0; i < Count; ++i) { + for(int i = 0; i < Count / 2; ++i) { q3_input_type v; g.wait_for_all(); CHECK_MESSAGE( (q3.try_get(v)), "Error in try_get()"); if(is_key_matching) { int j = int(std::get<0>(v))/2; serial_queue_helper::check_queue_value(j, v); - flags[i] = true; + flags[j] = true; } else { serial_queue_helper::check_queue_value(i, v); @@ -1373,13 +1378,10 @@ void test_one_serial(JType &my_join, tbb::flow::graph &g) { } if(is_key_matching) { - for(int i = 0; i < Count; ++i) { - CHECK_MESSAGE(flags[i], ""); - } + CHECK(std::count(flags.begin(), flags.end(), true) == Count / 2); } serial_queue_helper::remove_queue_nodes(my_join); - } template diff --git a/test/tbb/test_join_node_key_matching.cpp b/test/tbb/test_join_node_key_matching.cpp index b6eac84048..33ffaa1c95 100644 --- a/test/tbb/test_join_node_key_matching.cpp +++ b/test/tbb/test_join_node_key_matching.cpp @@ -14,6 +14,7 @@ limitations under the License. */ +#define MAX_TUPLE_TEST_SIZE 10 #include "common/config.h" #include "test_join_node.h" @@ -52,26 +53,32 @@ void test_deduction_guides() { } #endif +template +using make_tuple = decltype(std::tuple_cat(T1(), std::tuple())); +using T1 = std::tuple>; +using T2 = make_tuple>; +using T3 = make_tuple>; +using T4 = make_tuple>; +using T5 = make_tuple>; +using T6 = make_tuple>; +using T7 = make_tuple>; +using T8 = make_tuple>; +using T9 = make_tuple>; +using T10 = make_tuple>; + //! Test serial key matching on special input types //! \brief \ref error_guessing -TEST_CASE("Serial test on tuples"){ +TEST_CASE("Serial test on tuples") { INFO("key_matching\n"); generate_test, MyKeySecond >, tbb::flow::key_matching >::do_test(); generate_test, MyKeySecond >, tbb::flow::key_matching >::do_test(); -#if MAX_TUPLE_TEST_SIZE >= 3 generate_test, MyKeySecond, MyKeyWithBrokenMessageKey >, tbb::flow::key_matching >::do_test(); -#endif -#if MAX_TUPLE_TEST_SIZE >= 7 - generate_test, - MyKeyWithBrokenMessageKey, - MyKeyFirst, - MyKeySecond, - MyKeyWithBrokenMessageKey, - MyKeySecond, - MyKeySecond - >, tbb::flow::key_matching >::do_test(); -#endif +} + +//! Serial test with different tuple sizes +//! \brief \ref error_guessing +TEST_CASE_TEMPLATE("Serial N tests on tuples", T, T2, T3, T4, T5, T6, T7, T8, T9, T10) { + generate_test>::do_test(); } #if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT @@ -88,19 +95,10 @@ TEST_CASE("Parallel test on tuples"){ generate_test, MyKeySecond >, tbb::flow::key_matching >::do_test(); generate_test, MyKeySecond >, tbb::flow::key_matching >::do_test(); generate_test, MyKeySecond >, tbb::flow::key_matching >::do_test(); +} -#if MAX_TUPLE_TEST_SIZE >= 10 - generate_test, - MyKeySecond, - MyKeyFirst, - MyKeyWithBrokenMessageKey, - MyKeyWithBrokenMessageKey, - MyKeySecond, - MyKeySecond, - MyKeyFirst, - MyKeySecond, - MyKeyWithBrokenMessageKey - >, tbb::flow::key_matching >::do_test(); -#endif +//! Parallel test with different tuple sizes +//! \brief \ref error_guessing +TEST_CASE_TEMPLATE("Parallel N tests on tuples", T, T2, T3, T4, T5, T6, T7, T8, T9, T10) { + generate_test>::do_test(); } diff --git a/test/tbb/test_join_node_msg_key_matching.cpp b/test/tbb/test_join_node_msg_key_matching.cpp index 533bb7fdf0..c50ba30cbe 100644 --- a/test/tbb/test_join_node_msg_key_matching.cpp +++ b/test/tbb/test_join_node_msg_key_matching.cpp @@ -16,6 +16,7 @@ // Message based key matching is a preview feature #define TBB_PREVIEW_FLOW_GRAPH_FEATURES 1 +#define MAX_TUPLE_TEST_SIZE 10 #include "common/config.h" @@ -24,6 +25,25 @@ //! \file test_join_node_msg_key_matching.cpp //! \brief Test for [preview] functionality +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET +#include +#include +void test_follows_and_precedes_api() { + using msg_t = MyMessageKeyWithoutKey; + using JoinOutputType = std::tuple; + + std::array messages_for_follows = { {msg_t(), msg_t(), msg_t()} }; + std::vector messages_for_precedes = { msg_t(), msg_t(), msg_t() }; + + follows_and_precedes_testing::test_follows + >, tbb::flow::buffer_node> + (messages_for_follows); + follows_and_precedes_testing::test_precedes + >> + (messages_for_precedes); +} +#endif + #if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT struct message_key { int my_key; @@ -44,54 +64,41 @@ void test_deduction_guides() { broadcast_node bm1(g), bm2(g); broadcast_node bm3(g); join_node > j0(g); - -#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET - join_node j1(follows(bm1, bm2), key_matching()); - static_assert(std::is_same_v>>); - - join_node j2(precedes(bm3), key_matching()); - static_assert(std::is_same_v>>); -#endif - join_node j3(j0); static_assert(std::is_same_v>>); } #endif -//! Serial test with different tuple sizes +//! Serial test with matching policies //! \brief \ref error_guessing -TEST_CASE("Serial test"){ +TEST_CASE("Serial test") { generate_test, MyMessageKeyWithoutKey >, message_based_key_matching >::do_test(); generate_test, MyMessageKeyWithBrokenKey >, message_based_key_matching >::do_test(); -#if MAX_TUPLE_TEST_SIZE >= 3 - generate_test, MyMessageKeyWithoutKeyMethod, MyMessageKeyWithBrokenKey >, message_based_key_matching >::do_test(); -#endif -#if MAX_TUPLE_TEST_SIZE >= 7 - generate_test, - MyMessageKeyWithoutKeyMethod, - MyMessageKeyWithBrokenKey, - MyMessageKeyWithoutKey, - MyMessageKeyWithoutKeyMethod, - MyMessageKeyWithBrokenKey, - MyMessageKeyWithoutKey - >, message_based_key_matching >::do_test(); -#endif +} -#if MAX_TUPLE_TEST_SIZE >= 10 - generate_test, - MyMessageKeyWithBrokenKey, - MyMessageKeyWithoutKey, - MyMessageKeyWithoutKeyMethod, - MyMessageKeyWithBrokenKey, - MyMessageKeyWithoutKeyMethod, - MyMessageKeyWithoutKeyMethod, - MyMessageKeyWithBrokenKey, - MyMessageKeyWithoutKeyMethod, - MyMessageKeyWithBrokenKey - >, message_based_key_matching >::do_test(); -#endif +template +using make_tuple = decltype(std::tuple_cat(T1(), std::tuple())); +using T1 = std::tuple>; +using T2 = make_tuple>; +using T3 = make_tuple < T2, MyMessageKeyWithoutKey>; +using T4 = make_tuple < T3, MyMessageKeyWithoutKeyMethod>; +using T5 = make_tuple < T4, MyMessageKeyWithBrokenKey>; +using T6 = make_tuple < T5, MyMessageKeyWithoutKeyMethod>; +using T7 = make_tuple < T6, MyMessageKeyWithoutKeyMethod>; +using T8 = make_tuple < T7, MyMessageKeyWithBrokenKey>; +using T9 = make_tuple < T8, MyMessageKeyWithoutKeyMethod>; +using T10 = make_tuple < T9, MyMessageKeyWithBrokenKey>; + +//! Serial test with different tuple sizes +//! \brief \ref error_guessing +TEST_CASE_TEMPLATE("Serial N tests", T, T2, T3, T4, T5, T6, T7, T8, T9, T10) { + generate_test >::do_test(); +} + +//! Parallel test with different tuple sizes +//! \brief \ref error_guessing +TEST_CASE_TEMPLATE("Parallel N tests", T, T2, T3, T4, T5, T6, T7, T8, T9, T10) { + generate_test >::do_test(); } //! Parallel test with special key types diff --git a/test/tbb/test_limiter_node.cpp b/test/tbb/test_limiter_node.cpp index 6d82adc0c5..825dd23202 100644 --- a/test/tbb/test_limiter_node.cpp +++ b/test/tbb/test_limiter_node.cpp @@ -51,8 +51,6 @@ struct serial_receiver : public tbb::flow::receiver, utils::NoAssign { tbb::flow::graph& graph_reference() const override { return my_graph; } - - void reset_receiver(tbb::flow::reset_flags /*f*/) override {next_value = T(0);} }; template< typename T > @@ -71,8 +69,6 @@ struct parallel_receiver : public tbb::flow::receiver, utils::NoAssign { tbb::flow::graph& graph_reference() const override { return my_graph; } - - void reset_receiver(tbb::flow::reset_flags /*f*/) override {my_count = 0;} }; template< typename T > @@ -414,6 +410,36 @@ void test_decrementer() { g.wait_for_all(); } +void test_try_put_without_successors() { + tbb::flow::graph g; + std::size_t try_put_num{3}; + tbb::flow::buffer_node bn(g); + tbb::flow::limiter_node ln(g, try_put_num); + tbb::flow::make_edge(bn, ln); + std::size_t i = 1; + for (; i <= try_put_num; i++) + bn.try_put(i); + + std::atomic counter{0}; + tbb::flow::function_node fn(g, tbb::flow::unlimited, + [&](int input) { + counter += input; + return int{}; + } + ); + tbb::flow::make_edge(ln, fn); + g.wait_for_all(); + CHECK((counter == i * try_put_num / 2)); + + // Check the lost message + tbb::flow::remove_edge(bn, ln); + ln.decrement.try_put(tbb::flow::continue_msg()); + bn.try_put(try_put_num + 1); + g.wait_for_all(); + CHECK((counter == i * try_put_num / 2)); + +} + #if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET #include #include @@ -492,6 +518,12 @@ TEST_CASE("Decrementer") { test_decrementer(); } +//! Test try_put() without successor +//! \brief \ref error_guessing +TEST_CASE("Test try_put() without successors") { + test_try_put_without_successors(); +} + #if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET //! Test follows and precedes API //! \brief \ref error_guessing diff --git a/test/tbb/test_multifunction_node.cpp b/test/tbb/test_multifunction_node.cpp index 5cfbd1c837..d8c24e6b67 100644 --- a/test/tbb/test_multifunction_node.cpp +++ b/test/tbb/test_multifunction_node.cpp @@ -552,5 +552,34 @@ TEST_CASE("Lightweight testing"){ TEST_CASE("Test follows-precedes API"){ test_follows_and_precedes_api(); } +//! Test priority constructor with follows and precedes API +//! \brief \ref error_guessing +TEST_CASE("Test priority with follows and precedes"){ + using namespace tbb::flow; + + using multinode = multifunction_node>; + + graph g; + + buffer_node b1(g); + buffer_node b2(g); + + multinode node(precedes(b1, b2), unlimited, [](const int& i, multinode::output_ports_type& op) -> void { + if (i % 2) + std::get<0>(op).try_put(i); + else + std::get<1>(op).try_put(i); + } + , node_priority_t(0)); + + node.try_put(0); + node.try_put(1); + g.wait_for_all(); + + int storage; + CHECK_MESSAGE((b1.try_get(storage) && !b1.try_get(storage) && b2.try_get(storage) && !b2.try_get(storage)), + "Not exact edge quantity was made"); +} + #endif diff --git a/test/tbb/test_openmp.cpp b/test/tbb/test_openmp.cpp new file mode 100644 index 0000000000..8a4798f29f --- /dev/null +++ b/test/tbb/test_openmp.cpp @@ -0,0 +1,161 @@ +/* + Copyright (c) 2005-2020 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "common/test.h" +#include "common/utils.h" +#include "common/utils_env.h" +#include "tbb/global_control.h" +#include "tbb/blocked_range.h" +#include "tbb/parallel_for.h" +#include "tbb/parallel_reduce.h" + +// Test mixing OpenMP and TBB +#include + +//! \file test_openmp.cpp +//! \brief Test for [internal] functionality + +using data_type = short; + +void SerialConvolve( data_type c[], const data_type a[], int m, const data_type b[], int n ) { + for (int i = 0; i < m + n - 1; ++i) { + int start = i < n ? 0 : i - n + 1; + int finish = i < m ? i + 1 : m; + data_type sum = 0; + for (int j = start; j < finish; ++j) + sum += a[j] * b[i - j]; + c[i] = sum; + } +} + +#if _MSC_VER && !defined(__INTEL_COMPILER) + // Suppress overzealous warning about short+=short + #pragma warning( push ) + #pragma warning( disable: 4244 ) +#endif + +class InnerBody: utils::NoAssign { + const data_type* my_a; + const data_type* my_b; + const int i; +public: + data_type sum; + InnerBody( data_type /*c*/[], const data_type a[], const data_type b[], int ii ) : + my_a(a), my_b(b), i(ii), sum(0) + {} + InnerBody( InnerBody& x, tbb::split ) : + my_a(x.my_a), my_b(x.my_b), i(x.i), sum(0) + { + } + void join( InnerBody& x ) { sum += x.sum; } + void operator()( const tbb::blocked_range& range ) { + for (int j = range.begin(); j != range.end(); ++j) + sum += my_a[j] * my_b[i - j]; + } +}; + +#if _MSC_VER && !defined(__INTEL_COMPILER) + #pragma warning( pop ) +#endif + +//! Test OpenMMP loop around TBB loop +void OpenMP_TBB_Convolve( data_type c[], const data_type a[], int m, const data_type b[], int n ) { +#pragma omp parallel + { +#pragma omp for + for (int i = 0; i < m + n - 1; ++i) { + int start = i < n ? 0 : i - n + 1; + int finish = i < m ? i + 1 : m; + InnerBody body(c, a, b, i); + tbb::parallel_reduce(tbb::blocked_range(start, finish, 10), body); + c[i] = body.sum; + } + } +} + +class OuterBody: utils::NoAssign { + const data_type* my_a; + const data_type* my_b; + data_type* my_c; + const int m; + const int n; +public: + OuterBody( data_type c[], const data_type a[], int m_, const data_type b[], int n_ ) : + my_a(a), my_b(b), my_c(c), m(m_), n(n_) + {} + void operator()( const tbb::blocked_range& range ) const { + for (int i = range.begin(); i != range.end(); ++i) { + int start = i < n ? 0 : i - n + 1; + int finish = i < m ? i + 1 : m; + data_type sum = 0; +#pragma omp parallel for reduction(+:sum) + for (int j = start; j < finish; ++j) + sum += my_a[j] * my_b[i - j]; + my_c[i] = sum; + } + } +}; + +//! Test TBB loop around OpenMP loop +void TBB_OpenMP_Convolve( data_type c[], const data_type a[], int m, const data_type b[], int n ) { + tbb::parallel_for(tbb::blocked_range(0, m + n - 1, 10), OuterBody(c, a, m, b, n)); +} + +#if __INTEL_COMPILER +void TestNumThreads() { + utils::SetEnv("KMP_AFFINITY", "compact"); + // Make an OpenMP call before initializing TBB + int omp_nthreads = omp_get_max_threads(); + #pragma omp parallel + {} + int tbb_nthreads = tbb::this_task_arena::max_concurrency(); + // For the purpose of testing, assume that OpenMP and TBB should utilize the same # of threads. + // If it's not true on some platforms, the test will need to be adjusted. + REQUIRE_MESSAGE(tbb_nthreads == omp_nthreads, "Initialization of TBB is possibly affected by OpenMP"); +} +#endif // __INTEL_COMPILER + +const int M = 17 * 17; +const int N = 13 * 13; +data_type A[M], B[N]; +data_type expected[M+N], actual[M+N]; + +template +void RunTest( Func F, int m, int n, std::size_t p) { + tbb::global_control limit(tbb::global_control::max_allowed_parallelism, p); + memset(actual, -1, (m + n) * sizeof(data_type)); + F(actual, A, m, B, n); + CHECK(memcmp(actual, expected, (m + n - 1) * sizeof(data_type)) == 0); +} + +//! \brief \ref error_guessing +TEST_CASE("Testing oneTBB with OpenMP") { +#if __INTEL_COMPILER + TestNumThreads(); // Testing initialization-related behavior; must be the first +#endif // __INTEL_COMPILER + + for (std::size_t p = utils::MinThread; p <= utils::MaxThread; ++p) { + for (std::size_t m = 1; m <= M; m *= 17) { + for (std::size_t n = 1; n <= N; n *= 13) { + for (std::size_t i = 0; i < m; ++i) A[i] = data_type(1 + i / 5); + for (std::size_t i = 0; i < n; ++i) B[i] = data_type(1 + i / 7); + SerialConvolve( expected, A, m, B, n ); + RunTest( OpenMP_TBB_Convolve, m, n, p ); + RunTest( TBB_OpenMP_Convolve, m, n, p ); + } + } + } +} diff --git a/test/tbb/test_overwrite_node.cpp b/test/tbb/test_overwrite_node.cpp index e1fa3eb565..3d1981a9b2 100644 --- a/test/tbb/test_overwrite_node.cpp +++ b/test/tbb/test_overwrite_node.cpp @@ -225,3 +225,12 @@ TEST_CASE("Deduction guides"){ } #endif +//! Test try_release +//! \brief \ref error_guessing +TEST_CASE("try_release"){ + tbb::flow::graph g; + + tbb::flow::overwrite_node on(g); + + CHECK_MESSAGE ((on.try_release()== true), "try_release should return true"); +} diff --git a/test/tbb/test_parallel_pipeline.cpp b/test/tbb/test_parallel_pipeline.cpp index 25e01cd668..4ba658fb42 100644 --- a/test/tbb/test_parallel_pipeline.cpp +++ b/test/tbb/test_parallel_pipeline.cpp @@ -457,7 +457,7 @@ void run_filter_set( resetCounters(); tbb::parallel_pipeline( n_tokens, copy123, context... ); checkCounters(my_t); - tbb::filter move123( filter123 ); + tbb::filter move123( std::move(filter123) ); resetCounters(); tbb::parallel_pipeline( n_tokens, move123, context... ); checkCounters(my_t); diff --git a/test/tbb/test_queue_node.cpp b/test/tbb/test_queue_node.cpp index 68a86b6155..8b48729642 100644 --- a/test/tbb/test_queue_node.cpp +++ b/test/tbb/test_queue_node.cpp @@ -530,3 +530,28 @@ TEST_CASE("Deduction guides"){ } #endif +//! Test operations on a reserved queue_node +//! \brief \ref error_guessing +TEST_CASE("queue_node with reservation"){ + tbb::flow::graph g; + + tbb::flow::queue_node q(g); + + bool res = q.try_put(42); + CHECK_MESSAGE( res, "queue_node must accept input." ); + + int val = 1; + res = q.try_reserve(val); + CHECK_MESSAGE( res, "queue_node must reserve as it has an item." ); + CHECK_MESSAGE( (val == 42), "queue_node must reserve once passed item." ); + + int out_arg = -1; + CHECK_MESSAGE((q.try_reserve(out_arg) == false), "Reserving a reserved node should fail."); + CHECK_MESSAGE((out_arg == -1), "Reserving a reserved node should not update its argument."); + + out_arg = -1; + CHECK_MESSAGE((q.try_get(out_arg) == false), "Getting from reserved node should fail."); + CHECK_MESSAGE((out_arg == -1), "Getting from reserved node should not update its argument."); + g.wait_for_all(); + +} diff --git a/test/tbb/test_task.cpp b/test/tbb/test_task.cpp index 57ee5e535f..6c4f2dc443 100644 --- a/test/tbb/test_task.cpp +++ b/test/tbb/test_task.cpp @@ -18,12 +18,14 @@ #include "common/utils.h" #include "common/spin_barrier.h" #include "common/utils_concurrency_limit.h" +#include "common/cpu_usertime.h" #include "tbb/task.h" #include "tbb/task_group.h" #include "tbb/parallel_for.h" #include "tbb/cache_aligned_allocator.h" #include "tbb/global_control.h" +#include "tbb/concurrent_vector.h" #include #include @@ -110,6 +112,7 @@ void test_cancellation_on_exception( bool reset_ctx ) { } wait.reserve(1); } + wait.release(1); REQUIRE_MESSAGE(task.execute_counter() == (reset_ctx ? iter_counter : 1), "Some task was not executed"); REQUIRE_MESSAGE(task.cancel_counter() == iter_counter, "Some task was not canceled after the exception occurs"); @@ -129,6 +132,8 @@ TEST_CASE("Test that task was executed p times") { wait.reserve(1); } + wait.release(1); + REQUIRE_MESSAGE(CountingTask<>::execute_counter() == iter_counter, "The task was not executed necessary times"); REQUIRE_MESSAGE(CountingTask<>::cancel_counter() == 0, "Some instance of the task was canceled"); CountingTask<>::reset(); @@ -165,6 +170,7 @@ TEST_CASE("Simple test parallelism usage") { tbb::detail::d1::wait(wait, test_context); wait.reserve(threads_num); } + wait.release(threads_num); REQUIRE_MESSAGE(task_type::execute_counter() == iter_counter * threads_num, "Some task was not executed"); REQUIRE_MESSAGE(task_type::cancel_counter() == 0, "Some task was canceled"); @@ -221,6 +227,7 @@ TEST_CASE("Test parallelism usage with parallel_for") { wait.reserve(task_threads_num); } + wait.release(task_threads_num); REQUIRE_MESSAGE(task_type::execute_counter() == task_threads_num * iter_count, "Some task was not executed"); REQUIRE_MESSAGE(task_type::cancel_counter() == 0, "Some task was canceled"); @@ -263,6 +270,7 @@ TEST_CASE("Test parallelism usage with spawn tasks in different threads") { tbb::detail::d1::execute_and_wait(vector_test_task[threads_num - 1], test_context, wait, test_context); wait.reserve(threads_num); } + wait.release(threads_num); REQUIRE_MESSAGE(task_type::execute_counter() == iter_count * threads_num, "Some task was not executed"); REQUIRE_MESSAGE(task_type::cancel_counter() == 0, "Some task was canceled"); @@ -372,8 +380,8 @@ TEST_CASE("Isolation + resumable tasks") { std::vector> test_task; tbb::detail::d1::wait_context wait(1); ++suspend_count; - tbb::task::suspend([&wait, &test_context, &test_task] (tbb::task::suspend_point tag) { - tbb::this_task_arena::isolate([&wait, &test_context, &test_task, &tag] { + tbb::this_task_arena::isolate([&wait, &test_context, &test_task] { + tbb::task::suspend([&wait, &test_context, &test_task] (tbb::task::suspend_point tag) { test_task.emplace_back(tag, wait); tbb::detail::d1::spawn(test_task[0], test_context); }); @@ -576,6 +584,7 @@ TEST_CASE("Stress testing") { tbb::detail::d1::wait(wait, test_context); wait.reserve(task_number); } + wait.release(task_number); }); @@ -583,6 +592,52 @@ TEST_CASE("Stress testing") { REQUIRE_MESSAGE(task_type::cancel_counter() == 0, "Some task was canceled"); } +//! \brief \ref error_guessing +TEST_CASE("All workers sleep") { + std::size_t thread_number = utils::get_platform_max_threads(); + tbb::concurrent_vector suspend_points; + + tbb::task_group test_gr; + + utils::SpinBarrier barrier(thread_number); + auto resumble_task = [&] { + barrier.wait(); + tbb::task::suspend([&] (tbb::task::suspend_point sp) { + suspend_points.push_back(sp); + barrier.wait(); + }); + }; + + for (std::size_t i = 0; i < thread_number - 1; ++i) { + test_gr.run(resumble_task); + } + + barrier.wait(); + barrier.wait(); + TestCPUUserTime(thread_number); + + for (auto sp : suspend_points) + tbb::task::resume(sp); + test_gr.wait(); +} + +//! \brief \ref error_guessing +TEST_CASE("External threads sleep") { + if (utils::get_platform_max_threads() < 2) return; + utils::SpinBarrier barrier(2); + + tbb::task_group test_gr; + + test_gr.run([&] { + barrier.wait(); + TestCPUUserTime(2); + }); + + barrier.wait(); + + test_gr.wait(); +} + #endif // __TBB_RESUMABLE_TASKS //! \brief \ref error_guessing @@ -644,6 +699,7 @@ TEST_CASE("Enqueue with exception") { wait.reserve(task_number); }); } + wait.release(task_number); REQUIRE_MESSAGE(task_type::execute_counter() == task_number * iter_count, "Some task was not executed"); diff --git a/test/tbb/test_task_arena.cpp b/test/tbb/test_task_arena.cpp index 6fc9c28330..e514196b28 100644 --- a/test/tbb/test_task_arena.cpp +++ b/test/tbb/test_task_arena.cpp @@ -20,18 +20,23 @@ #include "common/spin_barrier.h" #include "common/utils.h" #include "common/utils_report.h" +#include "common/utils_concurrency_limit.h" #include "tbb/task_arena.h" #include "tbb/task_scheduler_observer.h" #include "tbb/enumerable_thread_specific.h" #include "tbb/parallel_for.h" #include "tbb/global_control.h" +#include "tbb/concurrent_set.h" +#include "tbb/spin_mutex.h" +#include "tbb/spin_rw_mutex.h" #include #include #include #include #include +#include //#include "harness_fp.h" @@ -303,7 +308,7 @@ struct TestArenaEntryBody : FPModeContext { : FPModeContext(idx+i) , my_stage(s) , is_caught(false) -#if TBB_USE_EXCEPTION +#if TBB_USE_EXCEPTIONS , is_expected( (idx&(1< canceled_task{}; + + auto parallel_func = [&test_arena, &canceled_task] (std::size_t) { + for (std::size_t i = 0; i < 1000; ++i) { + try { + test_arena.execute([] { + volatile bool suppress_unreachable_code_warning = true; + if (suppress_unreachable_code_warning) { + throw -1; + } + }); + FAIL("An exception should have thrown."); + } catch (int) { + ++canceled_task; + } catch (...) { + FAIL("Wrong type of exception."); + } + } + }; + + utils::NativeParallelFor(thread_number, parallel_func); + CHECK(canceled_task == thread_number * 1000); +} + +#endif // TBB_USE_EXCEPTIONS + +class simple_observer : public tbb::task_scheduler_observer { + static std::atomic idx_counter; + int my_idx; + int myMaxConcurrency; // concurrency of the associated arena + int myNumReservedSlots; // reserved slots in the associated arena + void on_scheduler_entry( bool is_worker ) override { + int current_index = tbb::this_task_arena::current_thread_index(); + CHECK(current_index < (myMaxConcurrency > 1 ? myMaxConcurrency : 2)); + if (is_worker) { + CHECK(current_index >= myNumReservedSlots); + } + } + void on_scheduler_exit( bool /*is_worker*/ ) override + {} +public: + simple_observer(tbb::task_arena &a, int maxConcurrency, int numReservedSlots) + : tbb::task_scheduler_observer(a), my_idx(idx_counter++) + , myMaxConcurrency(maxConcurrency) + , myNumReservedSlots(numReservedSlots) { + observe(true); + } + + friend bool operator<(const simple_observer& lhs, const simple_observer& rhs) { + return lhs.my_idx < rhs.my_idx; + } +}; + +std::atomic simple_observer::idx_counter{}; + +struct arena_handler { + enum arena_status { + alive, + deleting, + deleted + }; + + tbb::task_arena* arena; + + std::atomic status{alive}; + tbb::spin_rw_mutex arena_in_use{}; + + tbb::concurrent_set observers; + + arena_handler(tbb::task_arena* ptr) : arena(ptr) + {} + + friend bool operator<(const arena_handler& lhs, const arena_handler& rhs) { + return lhs.arena < rhs.arena; + } +}; + +// TODO: Add observer operations +void StressTestMixFunctionality() { + enum operation_type { + create_arena, + delete_arena, + attach_observer, + detach_observer, + arena_execute, + enqueue_task, + last_operation_marker + }; + + std::size_t operations_number = last_operation_marker; + std::size_t thread_number = utils::get_platform_max_threads(); + utils::FastRandom<> operation_rnd(42); + tbb::spin_mutex random_operation_guard; + + auto get_random_operation = [&operation_rnd, &random_operation_guard, operations_number] () { + tbb::spin_mutex::scoped_lock lock(random_operation_guard); + return static_cast(operation_rnd.get() % operations_number); + }; + + utils::FastRandom<> arena_rnd(42); + tbb::spin_mutex random_arena_guard; + auto get_random_arena = [&arena_rnd, &random_arena_guard] () { + tbb::spin_mutex::scoped_lock lock(random_arena_guard); + return arena_rnd.get(); + }; + + tbb::concurrent_set arenas_pool; + + std::vector thread_pool; + + utils::SpinBarrier thread_barrier(thread_number); + std::size_t max_operations = 100000; + std::atomic curr_operation{}; + auto thread_func = [&] () { + arenas_pool.emplace(new tbb::task_arena()); + thread_barrier.wait(); + while (curr_operation++ < max_operations) { + switch (get_random_operation()) { + case create_arena : + { + arenas_pool.emplace(new tbb::task_arena()); + break; + } + case delete_arena : + { + auto curr_arena = arenas_pool.begin(); + for (; curr_arena != arenas_pool.end(); ++curr_arena) { + arena_handler::arena_status curr_status = arena_handler::alive; + if (curr_arena->status.compare_exchange_strong(curr_status, arena_handler::deleting)) { + break; + } + } + + if (curr_arena == arenas_pool.end()) break; + + tbb::spin_rw_mutex::scoped_lock lock(curr_arena->arena_in_use, /*writer*/ true); + + delete curr_arena->arena; + curr_arena->status.store(arena_handler::deleted); + + break; + } + case attach_observer : + { + tbb::spin_rw_mutex::scoped_lock lock{}; + auto curr_arena = arenas_pool.begin(); + for (; curr_arena != arenas_pool.end(); ++curr_arena) { + if (lock.try_acquire(curr_arena->arena_in_use, /*writer*/ false)) { + if (curr_arena->status == arena_handler::alive) { + break; + } else { + lock.release(); + } + } + } + + if (curr_arena == arenas_pool.end()) break; + + { + curr_arena->observers.emplace(*curr_arena->arena, thread_number, 1); + } + + break; + } + case detach_observer : + { + auto arena_number = get_random_arena() % arenas_pool.size(); + auto curr_arena = arenas_pool.begin(); + std::advance(curr_arena, arena_number); + + for (auto it = curr_arena->observers.begin(); it != curr_arena->observers.end(); ++it) { + if (it->is_observing()) { + it->observe(false); + break; + } + } + + break; + } + case arena_execute : + { + tbb::spin_rw_mutex::scoped_lock lock{}; + auto curr_arena = arenas_pool.begin(); + for (; curr_arena != arenas_pool.end(); ++curr_arena) { + if (lock.try_acquire(curr_arena->arena_in_use, /*writer*/ false)) { + if (curr_arena->status == arena_handler::alive) { + break; + } else { + lock.release(); + } + } + } + + if (curr_arena == arenas_pool.end()) break; + + curr_arena->arena->execute([] () { + tbb::parallel_for(tbb::blocked_range(0, 10000), [] (tbb::blocked_range&) { + std::atomic sum{}; + // Make some work + for (; sum < 100; ++sum) ; + }); + }); + + break; + } + case enqueue_task : + { + tbb::spin_rw_mutex::scoped_lock lock{}; + auto curr_arena = arenas_pool.begin(); + for (; curr_arena != arenas_pool.end(); ++curr_arena) { + if (lock.try_acquire(curr_arena->arena_in_use, /*writer*/ false)) { + if (curr_arena->status == arena_handler::alive) { + break; + } else { + lock.release(); + } + } + } + + if (curr_arena == arenas_pool.end()) break; + + curr_arena->arena->enqueue([] { + std::atomic sum{}; + // Make some work + for (; sum < 100000; ++sum) ; + }); + + break; + } + case last_operation_marker : + break; + } + } + }; + + for (std::size_t i = 0; i < thread_number - 1; ++i) { + thread_pool.emplace_back(thread_func); + } + + thread_func(); + + for (std::size_t i = 0; i < thread_number - 1; ++i) { + if (thread_pool[i].joinable()) thread_pool[i].join(); + } + + for (auto& handler : arenas_pool) { + if (handler.status != arena_handler::deleted) delete handler.arena; + } +} + +struct enqueue_test_helper { + enqueue_test_helper(tbb::task_arena& arena, tbb::enumerable_thread_specific& ets , std::atomic& task_counter) + : my_arena(arena), my_ets(ets), my_task_counter(task_counter) + {} + + enqueue_test_helper(const enqueue_test_helper& ef) : my_arena(ef.my_arena), my_ets(ef.my_ets), my_task_counter(ef.my_task_counter) + {} + + void operator() () const { + CHECK(my_ets.local()); + if (my_task_counter++ < 100000) my_arena.enqueue(enqueue_test_helper(my_arena, my_ets, my_task_counter)); + std::this_thread::yield(); + } + + tbb::task_arena& my_arena; + tbb::enumerable_thread_specific& my_ets; + std::atomic& my_task_counter; +}; + //--------------------------------------------------// //! Test for task arena in concurrent cases @@ -1487,3 +1767,52 @@ TEST_CASE("Multiple waits") { TEST_CASE("Small stack size") { TestSmallStackSize(); } + +#if TBB_USE_EXCEPTIONS +//! \brief \ref requirement \ref stress +TEST_CASE("Test for exceptions during execute.") { + ExceptionInExecute(); +} +#endif // TBB_USE_EXCEPTIONS + +//! \brief \ref stress +TEST_CASE("Stress test with mixing functionality") { + StressTestMixFunctionality(); +} + +//! \brief \ref stress +TEST_CASE("Workers oversubscription") { + std::size_t num_threads = utils::get_platform_max_threads(); + tbb::enumerable_thread_specific ets; + tbb::global_control gl(tbb::global_control::max_allowed_parallelism, num_threads * 2); + tbb::task_arena arena(num_threads * 2); + + utils::SpinBarrier barrier(num_threads * 2); + + arena.execute([&] { + tbb::parallel_for(std::size_t(0), num_threads * 2, + [&] (const std::size_t&) { + ets.local() = true; + barrier.wait(); + } + ); + }); + + std::this_thread::yield(); + + std::atomic task_counter{0}; + for (std::size_t i = 0; i < num_threads / 4 + 1; ++i) { + arena.enqueue(enqueue_test_helper(arena, ets, task_counter)); + } + + while (task_counter < 100000) std::this_thread::yield(); + + arena.execute([&] { + tbb::parallel_for(std::size_t(0), num_threads * 2, + [&] (const std::size_t&) { + CHECK(ets.local()); + barrier.wait(); + } + ); + }); +} diff --git a/test/tbb/test_task_group.cpp b/test/tbb/test_task_group.cpp index 8140efa1a6..d81b7721a1 100644 --- a/test/tbb/test_task_group.cpp +++ b/test/tbb/test_task_group.cpp @@ -477,15 +477,15 @@ void LaunchChildrenWithFunctor () { bool exceptionCaught = false; try { status = g.wait(); - } - catch ( TestException& e ) { + } catch ( TestException& e ) { CHECK_MESSAGE( e.what(), "Empty what() string" ); CHECK_MESSAGE( strcmp(e.what(), EXCEPTION_DESCR1) == 0, "Unknown exception" ); exceptionCaught = true; ++g_ExceptionCount; } catch( ... ) { CHECK_MESSAGE( false, "Unknown exception" ); } - if (g_Throw && !exceptionCaught && status != tbb::canceled) - CHECK_MESSAGE( false, "No exception in the child task group" ); + if (g_Throw && !exceptionCaught && status != tbb::canceled) { + CHECK_MESSAGE(false, "No exception in the child task group"); + } if ( g_Rethrow && g_ExceptionCount > SKIP_GROUPS ) { throw test_exception(EXCEPTION_DESCR2); } @@ -515,7 +515,7 @@ void TestManualCancellationWithFunctor () { CHECK_MESSAGE( g_TaskCount <= g_ExecutedAtCancellation + utils::ConcurrencyTracker::PeakParallelism(), "Too many tasks survived cancellation" ); } -#if TBB_USE_EXCEPTION +#if TBB_USE_EXCEPTIONS template void TestExceptionHandling1 () { ResetGlobals( true, false ); @@ -555,6 +555,23 @@ void TestExceptionHandling2 () { CHECK_MESSAGE( g_ExceptionCount < NUM_GROUPS - SKIP_GROUPS, "None of the child groups was cancelled" ); } +template +void TestExceptionHandling3() { + task_group_type tg; + try { + tg.run_and_wait([]() { + volatile bool suppress_unreachable_code_warning = true; + if (suppress_unreachable_code_warning) { + throw 1; + } + }); + } catch (int error) { + CHECK(error == 1); + } catch ( ... ) { + CHECK_MESSAGE( false, "Unexpected exception" ); + } +} + template class LaunchChildrenDriver { public: @@ -584,8 +601,10 @@ void TestMissingWait () { try { task_group_type tg; driver.Launch( tg ); - if ( Throw ) + volatile bool suppress_unreachable_code_warning = Throw; + if (suppress_unreachable_code_warning) { throw int(); // Initiate stack unwinding + } } catch ( const tbb::missing_wait& e ) { CHECK_MESSAGE( e.what(), "Error message is absent" ); @@ -608,9 +627,10 @@ void TestMissingWait () { template void RunCancellationAndExceptionHandlingTests() { TestManualCancellationWithFunctor(); -#if TBB_USE_EXCEPTION +#if TBB_USE_EXCEPTIONS TestExceptionHandling1(); TestExceptionHandling2(); + TestExceptionHandling3(); TestMissingWait(); TestMissingWait(); #endif @@ -767,7 +787,7 @@ TEST_CASE("Fibonacci test for the task group") { //! Cancellation and exception test for the task group //! \brief \ref interface \ref requirement TEST_CASE("Cancellation and exception test for the task group") { - for (unsigned p=MinThread; p <= MaxThread; ++p) { + for (unsigned p = MinThread; p <= MaxThread; ++p) { tbb::global_control limit(tbb::global_control::max_allowed_parallelism, p); g_MaxConcurrency = p; RunCancellationAndExceptionHandlingTests(); @@ -944,3 +964,33 @@ TEST_CASE("Test for stack overflow avoidance mechanism within arena") { }); CHECK(tasks_executed == 10000 + second_thread_executed); } + +//! Test checks that we can submit work to task_group asynchronously with waiting. +//! \brief \ref regression +TEST_CASE("Async task group") { + int num_threads = tbb::this_task_arena::max_concurrency(); + tbb::task_arena a(2*num_threads, num_threads); + utils::SpinBarrier barrier(num_threads + 2); + tbb::task_group tg[2]; + std::atomic finished[2]{}; + finished[0] = false; finished[1] = false; + for (int i = 0; i < 2; ++i) { + a.enqueue([i, &tg, &finished, &barrier] { + barrier.wait(); + for (int j = 0; j < 10000; ++j) { + tg[i].run([] {}); + std::this_thread::yield(); + } + finished[i] = true; + }); + } + utils::NativeParallelFor(num_threads, [&](int idx) { + barrier.wait(); + a.execute([idx, &tg, &finished] { + while (!finished[idx%2]) { + tg[idx%2].wait(); + } + tg[idx%2].wait(); + }); + }); +} diff --git a/test/tbb/test_tbb_header.cpp b/test/tbb/test_tbb_header.cpp index 76bf7cf09c..1317861c20 100644 --- a/test/tbb/test_tbb_header.cpp +++ b/test/tbb/test_tbb_header.cpp @@ -32,7 +32,6 @@ #define TBB_PREVIEW_VARIADIC_PARALLEL_INVOKE 1 #define TBB_PREVIEW_BLOCKED_RANGE_ND 1 #define TBB_PREVIEW_ISOLATED_TASK_GROUP 1 -#define TBB_PREVIEW_NUMA_SUPPORT 1 #endif #if __TBB_TEST_SECONDARY diff --git a/test/tbbmalloc/test_malloc_overload.cpp b/test/tbbmalloc/test_malloc_overload.cpp index cf8da63afa..d04f475afa 100644 --- a/test/tbbmalloc/test_malloc_overload.cpp +++ b/test/tbbmalloc/test_malloc_overload.cpp @@ -127,7 +127,7 @@ using namespace std; #endif #include "tbb/detail/_utils.h" // tbb::detail::is_aligned -#include "../src/tbbmalloc/shared_utils.h" // alignDown, alignUp, estimatedCacheLineSize +#include "src/tbbmalloc/shared_utils.h" // alignDown, alignUp, estimatedCacheLineSize /* start of code replicated from src/tbbmalloc */