include/oneapi/tbb/detail/_flow_graph_node_impl.h

/*
    Copyright (c) 2005-2025 Intel Corporation

    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.
    You may obtain a copy of the License at

        http://www.apache.org/licenses/LICENSE-2.0

    Unless required by applicable law or agreed to in writing, software
    distributed under the License is distributed on an "AS IS" BASIS,
    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    See the License for the specific language governing permissions and
    limitations under the License.
*/

#ifndef __TBB__flow_graph_node_impl_H
#define __TBB__flow_graph_node_impl_H

#ifndef __TBB_flow_graph_H
#error Do not #include this internal file directly; use public TBB headers instead.
#endif

#include "_flow_graph_item_buffer_impl.h"

template< typename T, typename A >
class function_input_queue : public item_buffer<T,A> {
public:
    bool empty() const {
        return this->buffer_empty();
    }

    const T& front() const {
        return this->item_buffer<T, A>::front();
    }

#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT
    const message_metainfo& front_metainfo() const {
        return this->item_buffer<T,A>::front_metainfo();
    }
#endif

    void pop() {
        this->destroy_front();
    }

    bool push( T& t ) {
        return this->push_back( t );
    }

#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT
    bool push( T& t, const message_metainfo& metainfo ) {
        return this->push_back(t, metainfo);
    }
#endif
};

//! Input and scheduling for a function node that takes a type Input as input
//  The only up-ref is apply_body_impl, which should implement the function
//  call and any handling of the result.
template< typename Input, typename Policy, typename A, typename ImplType >
class function_input_base : public receiver<Input>, no_assign {
    enum op_type {reg_pred, rem_pred, try_fwd, tryput_bypass, app_body_bypass, occupy_concurrency
    };
    typedef function_input_base<Input, Policy, A, ImplType> class_type;

public:

    //! The input type of this receiver
    typedef Input input_type;
    typedef typename receiver<input_type>::predecessor_type predecessor_type;
    typedef predecessor_cache<input_type, null_mutex > predecessor_cache_type;
    typedef function_input_queue<input_type, A> input_queue_type;
    typedef typename allocator_traits<A>::template rebind_alloc<input_queue_type> allocator_type;
    static_assert(!has_policy<queueing, Policy>::value || !has_policy<rejecting, Policy>::value, "");

    //! Constructor for function_input_base
    function_input_base( graph &g, size_t max_concurrency, node_priority_t a_priority, bool is_no_throw )
        : my_graph_ref(g), my_max_concurrency(max_concurrency)
        , my_concurrency(0), my_priority(a_priority), my_is_no_throw(is_no_throw)
        , my_queue(!has_policy<rejecting, Policy>::value ? new input_queue_type() : nullptr)
        , my_predecessors(this)
        , forwarder_busy(false)
    {
        my_aggregator.initialize_handler(handler_type(this));
    }

    //! Copy constructor
    function_input_base( const function_input_base& src )
        : function_input_base(src.my_graph_ref, src.my_max_concurrency, src.my_priority, src.my_is_no_throw) {}

    //! Destructor
    // The queue is allocated by the constructor for {multi}function_node.
    // TODO: pass the graph_buffer_policy to the base so it can allocate the queue instead.
    // This would be an interface-breaking change.
    virtual ~function_input_base() {
        delete my_queue;
        my_queue = nullptr;
    }

    graph_task* try_put_task( const input_type& t) override {
        return try_put_task_base(t __TBB_FLOW_GRAPH_METAINFO_ARG(message_metainfo{}));
    }

#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT
    graph_task* try_put_task( const input_type& t, const message_metainfo& metainfo ) override {
        return try_put_task_base(t, metainfo);
    }
#endif // __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT

    //! Adds src to the list of cached predecessors.
    bool register_predecessor( predecessor_type &src ) override {
        operation_type op_data(reg_pred);
        op_data.r = &src;
        my_aggregator.execute(&op_data);
        return true;
    }

    //! Removes src from the list of cached predecessors.
    bool remove_predecessor( predecessor_type &src ) override {
        operation_type op_data(rem_pred);
        op_data.r = &src;
        my_aggregator.execute(&op_data);
        return true;
    }

protected:

    void reset_function_input_base( reset_flags f) {
        my_concurrency = 0;
        if(my_queue) {
            my_queue->reset();
        }
        reset_receiver(f);
        forwarder_busy = false;
    }

    graph& my_graph_ref;
    const size_t my_max_concurrency;
    size_t my_concurrency;
    node_priority_t my_priority;
    const bool my_is_no_throw;
    input_queue_type *my_queue;
    predecessor_cache<input_type, null_mutex > my_predecessors;

    void reset_receiver( reset_flags f) {
        if( f & rf_clear_edges) my_predecessors.clear();
        else
            my_predecessors.reset();
        __TBB_ASSERT(!(f & rf_clear_edges) || my_predecessors.empty(), "function_input_base reset failed");
    }

    graph& graph_reference() const override {
        return my_graph_ref;
    }

    graph_task* try_get_postponed_task(const input_type& i) {
        operation_type op_data(i, app_body_bypass);  // tries to pop an item or get_item
        my_aggregator.execute(&op_data);
        return op_data.bypass_t;
    }

private:

    friend class apply_body_task_bypass< class_type, input_type >;
#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT
    friend class apply_body_task_bypass< class_type, input_type, trackable_messages_graph_task >;
#endif
    friend class forward_task_bypass< class_type >;

    class operation_type : public d1::aggregated_operation< operation_type > {
    public:
        char type;
        union {
            input_type *elem;
            predecessor_type *r;
        };
        graph_task* bypass_t;
#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT
        message_metainfo* metainfo;
#endif
        operation_type(const input_type& e, op_type t) :
            type(char(t)), elem(const_cast<input_type*>(&e)), bypass_t(nullptr)
#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT
            , metainfo(nullptr)
#endif
        {}
#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT
        operation_type(const input_type& e, op_type t, const message_metainfo& info) :
            type(char(t)), elem(const_cast<input_type*>(&e)), bypass_t(nullptr),
            metainfo(const_cast<message_metainfo*>(&info)) {}
#endif
        operation_type(op_type t) : type(char(t)), r(nullptr), bypass_t(nullptr) {}
    };

    bool forwarder_busy;
    typedef d1::aggregating_functor<class_type, operation_type> handler_type;
    friend class d1::aggregating_functor<class_type, operation_type>;
    d1::aggregator< handler_type, operation_type > my_aggregator;

    graph_task* perform_queued_requests() {
        graph_task* new_task = nullptr;
        if(my_queue) {
            if(!my_queue->empty()) {
                ++my_concurrency;
                // TODO: consider removing metainfo from the queue using move semantics to avoid
                // ref counter increase
                new_task = create_body_task(my_queue->front()
                                            __TBB_FLOW_GRAPH_METAINFO_ARG(my_queue->front_metainfo()));

                my_queue->pop();
            }
        }
        else {
            input_type i;
#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT
            message_metainfo metainfo;
#endif
            if(my_predecessors.get_item(i __TBB_FLOW_GRAPH_METAINFO_ARG(metainfo))) {
                ++my_concurrency;
                new_task = create_body_task(i __TBB_FLOW_GRAPH_METAINFO_ARG(std::move(metainfo)));
            }
        }
        return new_task;
    }
    void handle_operations(operation_type *op_list) {
        operation_type* tmp;
        while (op_list) {
            tmp = op_list;
            op_list = op_list->next;
            switch (tmp->type) {
            case reg_pred:
                my_predecessors.add(*(tmp->r));
                tmp->status.store(SUCCEEDED, std::memory_order_release);
                if (!forwarder_busy) {
                    forwarder_busy = true;
                    spawn_forward_task();
                }
                break;
            case rem_pred:
                my_predecessors.remove(*(tmp->r));
                tmp->status.store(SUCCEEDED, std::memory_order_release);
                break;
            case app_body_bypass: {
                tmp->bypass_t = nullptr;
                __TBB_ASSERT(my_max_concurrency != 0, nullptr);
                --my_concurrency;
                if(my_concurrency<my_max_concurrency)
                    tmp->bypass_t = perform_queued_requests();
                tmp->status.store(SUCCEEDED, std::memory_order_release);
            }
                break;
            case tryput_bypass: internal_try_put_task(tmp);  break;
            case try_fwd: internal_forward(tmp);  break;
            case occupy_concurrency:
                if (my_concurrency < my_max_concurrency) {
                    ++my_concurrency;
                    tmp->status.store(SUCCEEDED, std::memory_order_release);
                } else {
                    tmp->status.store(FAILED, std::memory_order_release);
                }
                break;
            }
        }
    }

    //! Put to the node, but return the task instead of enqueueing it
    void internal_try_put_task(operation_type *op) {
        __TBB_ASSERT(my_max_concurrency != 0, nullptr);
        if (my_concurrency < my_max_concurrency) {
            ++my_concurrency;
            graph_task* new_task = create_body_task(*(op->elem)
                                                    __TBB_FLOW_GRAPH_METAINFO_ARG(*(op->metainfo)));
            op->bypass_t = new_task;
            op->status.store(SUCCEEDED, std::memory_order_release);
        } else if ( my_queue && my_queue->push(*(op->elem)
                    __TBB_FLOW_GRAPH_METAINFO_ARG(*(op->metainfo))) )
        {
            op->bypass_t = SUCCESSFULLY_ENQUEUED;
            op->status.store(SUCCEEDED, std::memory_order_release);
        } else {
            op->bypass_t = nullptr;
            op->status.store(FAILED, std::memory_order_release);
        }
    }

    //! Creates tasks for postponed messages if available and if concurrency allows
    void internal_forward(operation_type *op) {
        op->bypass_t = nullptr;
        if (my_concurrency < my_max_concurrency)
            op->bypass_t = perform_queued_requests();
        if(op->bypass_t)
            op->status.store(SUCCEEDED, std::memory_order_release);
        else {
            forwarder_busy = false;
            op->status.store(FAILED, std::memory_order_release);
        }
    }

    graph_task* internal_try_put_bypass( const input_type& t
                                         __TBB_FLOW_GRAPH_METAINFO_ARG(const message_metainfo& metainfo))
    {
        operation_type op_data(t, tryput_bypass __TBB_FLOW_GRAPH_METAINFO_ARG(metainfo));
        my_aggregator.execute(&op_data);
        if( op_data.status == SUCCEEDED ) {
            return op_data.bypass_t;
        }
        return nullptr;
    }

    graph_task* try_put_task_base(const input_type& t
                                  __TBB_FLOW_GRAPH_METAINFO_ARG(const message_metainfo& metainfo))
    {
        if ( my_is_no_throw )
            return try_put_task_impl(t, has_policy<lightweight, Policy>()
                                     __TBB_FLOW_GRAPH_METAINFO_ARG(metainfo));
        else
            return try_put_task_impl(t, std::false_type()
                                     __TBB_FLOW_GRAPH_METAINFO_ARG(metainfo));
    }

    graph_task* try_put_task_impl( const input_type& t, /*lightweight=*/std::true_type
                                   __TBB_FLOW_GRAPH_METAINFO_ARG(const message_metainfo& metainfo))
    {
        if( my_max_concurrency == 0 ) {
            return apply_body_bypass(t __TBB_FLOW_GRAPH_METAINFO_ARG(metainfo));
        } else {
            operation_type check_op(t, occupy_concurrency);
            my_aggregator.execute(&check_op);
            if( check_op.status == SUCCEEDED ) {
                return apply_body_bypass(t __TBB_FLOW_GRAPH_METAINFO_ARG(metainfo));
            }
            return internal_try_put_bypass(t __TBB_FLOW_GRAPH_METAINFO_ARG(metainfo));
        }
    }

    graph_task* try_put_task_impl( const input_type& t, /*lightweight=*/std::false_type
                                   __TBB_FLOW_GRAPH_METAINFO_ARG(const message_metainfo& metainfo))
    {
        if( my_max_concurrency == 0 ) {
            return create_body_task(t __TBB_FLOW_GRAPH_METAINFO_ARG(metainfo));
        } else {
            return internal_try_put_bypass(t __TBB_FLOW_GRAPH_METAINFO_ARG(metainfo));
        }
    }

    //! Applies the body to the provided input
    //  then decides if more work is available
    graph_task* apply_body_bypass( const input_type &i
                                   __TBB_FLOW_GRAPH_METAINFO_ARG(const message_metainfo& metainfo))

    {
        return static_cast<ImplType *>(this)->apply_body_impl_bypass(i __TBB_FLOW_GRAPH_METAINFO_ARG(metainfo));
    }

    //! allocates a task to apply a body
#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT
    template <typename Metainfo>
    graph_task* create_body_task( const input_type &input, Metainfo&& metainfo )
#else
    graph_task* create_body_task( const input_type &input )
#endif
    {
        if (!is_graph_active(my_graph_ref)) {
            return nullptr;
        }
        // TODO revamp: extract helper for common graph task allocation part
        d1::small_object_allocator allocator{};
        graph_task* t = nullptr;
#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT
        if (!metainfo.empty()) {
            using task_type = apply_body_task_bypass<class_type, input_type, trackable_messages_graph_task>;
            t = allocator.new_object<task_type>(my_graph_ref, allocator, *this, input, my_priority, std::forward<Metainfo>(metainfo));
        } else
#endif
        {
            using task_type = apply_body_task_bypass<class_type, input_type>;
            t = allocator.new_object<task_type>(my_graph_ref, allocator, *this, input, my_priority);
        }
        return t;
    }

    //! This is executed by an enqueued task, the "forwarder"
    graph_task* forward_task() {
        operation_type op_data(try_fwd);
        graph_task* rval = nullptr;
        do {
            op_data.status = WAIT;
            my_aggregator.execute(&op_data);
            if(op_data.status == SUCCEEDED) {
                graph_task* ttask = op_data.bypass_t;
                __TBB_ASSERT( ttask && ttask != SUCCESSFULLY_ENQUEUED, nullptr);
                rval = combine_tasks(my_graph_ref, rval, ttask);
            }
        } while (op_data.status == SUCCEEDED);
        return rval;
    }

    inline graph_task* create_forward_task() {
        if (!is_graph_active(my_graph_ref)) {
            return nullptr;
        }
        d1::small_object_allocator allocator{};
        typedef forward_task_bypass<class_type> task_type;
        graph_task* t = allocator.new_object<task_type>( graph_reference(), allocator, *this, my_priority );
        return t;
    }

    //! Spawns a task that calls forward()
    inline void spawn_forward_task() {
        graph_task* tp = create_forward_task();
        if(tp) {
            spawn_in_graph_arena(graph_reference(), *tp);
        }
    }

    node_priority_t priority() const override { return my_priority; }
};  // function_input_base

//! Implements methods for a function node that takes a type Input as input and sends
//  a type Output to its successors.
template< typename Input, typename Output, typename Policy, typename A>
class function_input : public function_input_base<Input, Policy, A, function_input<Input,Output,Policy,A> > {
public:
    typedef Input input_type;
    typedef Output output_type;
    typedef function_body<input_type, output_type> function_body_type;
    typedef function_input<Input, Output, Policy,A> my_class;
    typedef function_input_base<Input, Policy, A, my_class> base_type;
    typedef function_input_queue<input_type, A> input_queue_type;

    // constructor
    template<typename Body>
    function_input(
        graph &g, size_t max_concurrency, Body& body, node_priority_t a_priority )
      : base_type(g, max_concurrency, a_priority, noexcept(tbb::detail::invoke(body, input_type())))
      , my_body( new function_body_leaf< input_type, output_type, Body>(body) )
      , my_init_body( new function_body_leaf< input_type, output_type, Body>(body) ) {
    }

    //! Copy constructor
    function_input( const function_input& src ) :
        base_type(src),
        my_body( src.my_init_body->clone() ),
        my_init_body(src.my_init_body->clone() ) {
    }
#if __INTEL_COMPILER <= 2021
    // Suppress superfluous diagnostic about virtual keyword absence in a destructor of an inherited
    // class while the parent class has the virtual keyword for the destrocutor.
    virtual
#endif
    ~function_input() {
        delete my_body;
        delete my_init_body;
    }

    template< typename Body >
    Body copy_function_object() {
        function_body_type &body_ref = *this->my_body;
        return dynamic_cast< function_body_leaf<input_type, output_type, Body> & >(body_ref).get_body();
    }

    output_type apply_body_impl( const input_type& i) {
        // There is an extra copied needed to capture the
        // body execution without the try_put
        fgt_begin_body( my_body );
        output_type v = tbb::detail::invoke(*my_body, i);
        fgt_end_body( my_body );
        return v;
    }

    //TODO: consider moving into the base class
    graph_task* apply_body_impl_bypass( const input_type &i
                                        __TBB_FLOW_GRAPH_METAINFO_ARG(const message_metainfo& metainfo))
    {
        output_type v = apply_body_impl(i);
        graph_task* postponed_task = nullptr;
        if( base_type::my_max_concurrency != 0 ) {
            postponed_task = base_type::try_get_postponed_task(i);
            __TBB_ASSERT( !postponed_task || postponed_task != SUCCESSFULLY_ENQUEUED, nullptr);
        }
        if( postponed_task ) {
            // make the task available for other workers since we do not know successors'
            // execution policy
            spawn_in_graph_arena(base_type::graph_reference(), *postponed_task);
        }
        graph_task* successor_task = successors().try_put_task(v __TBB_FLOW_GRAPH_METAINFO_ARG(metainfo));
#if _MSC_VER && !__INTEL_COMPILER
#pragma warning (push)
#pragma warning (disable: 4127)  /* suppress conditional expression is constant */
#endif
        if(has_policy<lightweight, Policy>::value) {
#if _MSC_VER && !__INTEL_COMPILER
#pragma warning (pop)
#endif
            if(!successor_task) {
                // Return confirmative status since current
                // node's body has been executed anyway
                successor_task = SUCCESSFULLY_ENQUEUED;
            }
        }
        return successor_task;
    }

protected:

    void reset_function_input(reset_flags f) {
        base_type::reset_function_input_base(f);
        if(f & rf_reset_bodies) {
            function_body_type *tmp = my_init_body->clone();
            delete my_body;
            my_body = tmp;
        }
    }

    function_body_type *my_body;
    function_body_type *my_init_body;
    virtual broadcast_cache<output_type > &successors() = 0;

};  // function_input


// helper templates to clear the successor edges of the output ports of an multifunction_node
template<int N> struct clear_element {
    template<typename P> static void clear_this(P &p) {
        (void)std::get<N-1>(p).successors().clear();
        clear_element<N-1>::clear_this(p);
    }
#if TBB_USE_ASSERT
    template<typename P> static bool this_empty(P &p) {
        if(std::get<N-1>(p).successors().empty())
            return clear_element<N-1>::this_empty(p);
        return false;
    }
#endif
};

template<> struct clear_element<1> {
    template<typename P> static void clear_this(P &p) {
        (void)std::get<0>(p).successors().clear();
    }
#if TBB_USE_ASSERT
    template<typename P> static bool this_empty(P &p) {
        return std::get<0>(p).successors().empty();
    }
#endif
};

template <typename OutputTuple>
struct init_output_ports {
    template <typename... Args>
    static OutputTuple call(graph& g, const std::tuple<Args...>&) {
        return OutputTuple(Args(g)...);
    }
}; // struct init_output_ports

#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT

class metainfo_tag_type {
public:
    metainfo_tag_type() = default;

    metainfo_tag_type(const metainfo_tag_type&) = delete;

    metainfo_tag_type(metainfo_tag_type&& other)
        : my_metainfo(std::move(other.my_metainfo)) {}

    metainfo_tag_type(const message_metainfo& metainfo) : my_metainfo(metainfo) {
        for (auto waiter : my_metainfo.waiters()) {
            waiter->reserve();
        }
    }

    metainfo_tag_type& operator=(const metainfo_tag_type&) = delete;
    metainfo_tag_type& operator=(metainfo_tag_type&& other) {
        // TODO: should this method be thread-safe?
        if (this != &other) {
            reset();
            my_metainfo = std::move(other.my_metainfo);
        }
        return *this;
    }

    ~metainfo_tag_type() {
        reset();
    }

    void merge(const metainfo_tag_type& other_tag) {
        tbb::spin_mutex::scoped_lock lock(my_mutex);

        // TODO: add comment
        for (auto waiter : other_tag.my_metainfo.waiters()) {
            waiter->reserve();
        }
        my_metainfo.merge(other_tag.my_metainfo);
    }

    void reset() {
        tbb::spin_mutex::scoped_lock lock(my_mutex);

        for (auto waiter : my_metainfo.waiters()) {
            waiter->release();
        }
        my_metainfo = message_metainfo{};
    }
private:
    friend struct metainfo_tag_accessor;

    message_metainfo my_metainfo;
    tbb::spin_mutex  my_mutex;
};

struct metainfo_tag_accessor {
    static const message_metainfo& get_metainfo(const metainfo_tag_type& tag) {
        return tag.my_metainfo;
    }
};

#endif

//! Implements methods for a function node that takes a type Input as input
//  and has a tuple of output ports specified.
template< typename Input, typename OutputPortSet, typename Policy, typename A>
class multifunction_input : public function_input_base<Input, Policy, A, multifunction_input<Input,OutputPortSet,Policy,A> > {
public:
    static const int N = std::tuple_size<OutputPortSet>::value;
    typedef Input input_type;
    typedef OutputPortSet output_ports_type;
#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT
    typedef metainfo_tag_type tag_type;
#endif
    typedef multifunction_body<input_type, output_ports_type> multifunction_body_type;
    typedef multifunction_input<Input, OutputPortSet, Policy, A> my_class;
    typedef function_input_base<Input, Policy, A, my_class> base_type;
    typedef function_input_queue<input_type, A> input_queue_type;

    // constructor
    template<typename Body>
    multifunction_input(graph &g, size_t max_concurrency,Body& body, node_priority_t a_priority )
      : base_type(g, max_concurrency, a_priority,
                  noexcept(invoke_body_with_tag(body __TBB_FLOW_GRAPH_METAINFO_ARG(metainfo_tag_type{}),
                                                input_type(), my_output_ports)))
      , my_body( new multifunction_body_leaf<input_type, output_ports_type, Body>(body) )
      , my_init_body( new multifunction_body_leaf<input_type, output_ports_type, Body>(body) )
      , my_output_ports(init_output_ports<output_ports_type>::call(g, my_output_ports)){
    }

    //! Copy constructor
    multifunction_input( const multifunction_input& src ) :
        base_type(src),
        my_body( src.my_init_body->clone() ),
        my_init_body(src.my_init_body->clone() ),
        my_output_ports( init_output_ports<output_ports_type>::call(src.my_graph_ref, my_output_ports) ) {
    }

    ~multifunction_input() {
        delete my_body;
        delete my_init_body;
    }

    template< typename Body >
    Body copy_function_object() {
        multifunction_body_type &body_ref = *this->my_body;
        return *static_cast<Body*>(dynamic_cast< multifunction_body_leaf<input_type, output_ports_type, Body> & >(body_ref).get_body_ptr());
    }

    // for multifunction nodes we do not have a single successor as such.  So we just tell
    // the task we were successful.
    //TODO: consider moving common parts with implementation in function_input into separate function
    graph_task* apply_body_impl_bypass( const input_type &i
                                        __TBB_FLOW_GRAPH_METAINFO_ARG(const message_metainfo& metainfo) )
    {
#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT
        metainfo_tag_type tag(metainfo);
#endif
        fgt_begin_body( my_body );
        (*my_body)(i, my_output_ports __TBB_FLOW_GRAPH_METAINFO_ARG(std::move(tag)));
        fgt_end_body( my_body );
        graph_task* ttask = nullptr;
        if(base_type::my_max_concurrency != 0) {
            ttask = base_type::try_get_postponed_task(i);
        }
        return ttask ? ttask : SUCCESSFULLY_ENQUEUED;
    }

    output_ports_type &output_ports(){ return my_output_ports; }

protected:

    void reset(reset_flags f) {
        base_type::reset_function_input_base(f);
        if(f & rf_clear_edges)clear_element<N>::clear_this(my_output_ports);
        if(f & rf_reset_bodies) {
            multifunction_body_type* tmp = my_init_body->clone();
            delete my_body;
            my_body = tmp;
        }
        __TBB_ASSERT(!(f & rf_clear_edges) || clear_element<N>::this_empty(my_output_ports), "multifunction_node reset failed");
    }

    multifunction_body_type *my_body;
    multifunction_body_type *my_init_body;
    output_ports_type my_output_ports;

};  // multifunction_input

// template to refer to an output port of a multifunction_node
template<size_t N, typename MOP>
typename std::tuple_element<N, typename MOP::output_ports_type>::type &output_port(MOP &op) {
    return std::get<N>(op.output_ports());
}

inline void check_task_and_spawn(graph& g, graph_task* t) {
    if (t && t != SUCCESSFULLY_ENQUEUED) {
        spawn_in_graph_arena(g, *t);
    }
}

// helper structs for split_node
template<int N>
struct emit_element {
    template<typename T, typename P>
    static graph_task* emit_this(graph& g, const T &t, P &p) {
        // TODO: consider to collect all the tasks in task_list and spawn them all at once
        graph_task* last_task = std::get<N-1>(p).try_put_task(std::get<N-1>(t));
        check_task_and_spawn(g, last_task);
        return emit_element<N-1>::emit_this(g,t,p);
    }

#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT
    template <typename TupleType, typename PortsType>
    static graph_task* emit_this(graph& g, const TupleType& t, PortsType& p,
                                 const message_metainfo& metainfo)
    {
        // TODO: consider to collect all the tasks in task_list and spawn them all at once
        graph_task* last_task = std::get<N-1>(p).try_put_task(std::get<N-1>(t), metainfo);
        check_task_and_spawn(g, last_task);
        return emit_element<N-1>::emit_this(g, t, p, metainfo);
    }
#endif
};

template<>
struct emit_element<1> {
    template<typename T, typename P>
    static graph_task* emit_this(graph& g, const T &t, P &p) {
        graph_task* last_task = std::get<0>(p).try_put_task(std::get<0>(t));
        check_task_and_spawn(g, last_task);
        return SUCCESSFULLY_ENQUEUED;
    }

#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT
    template <typename TupleType, typename PortsType>
    static graph_task* emit_this(graph& g, const TupleType& t, PortsType& ports,
                                 const message_metainfo& metainfo)
    {
        graph_task* last_task = std::get<0>(ports).try_put_task(std::get<0>(t), metainfo);
        check_task_and_spawn(g, last_task);
        return SUCCESSFULLY_ENQUEUED;
    }
#endif
};

//! Implements methods for an executable node that takes continue_msg as input
template< typename Output, typename Policy>
class continue_input : public continue_receiver {
public:

    //! The input type of this receiver
    typedef continue_msg input_type;

    //! The output type of this receiver
    typedef Output output_type;
    typedef function_body<input_type, output_type> function_body_type;
    typedef continue_input<output_type, Policy> class_type;

    template< typename Body >
    continue_input( graph &g, Body& body, node_priority_t a_priority )
        : continue_receiver(/*number_of_predecessors=*/0, a_priority)
        , my_graph_ref(g)
        , my_body( new function_body_leaf< input_type, output_type, Body>(body) )
        , my_init_body( new function_body_leaf< input_type, output_type, Body>(body) )
    { }

    template< typename Body >
    continue_input( graph &g, int number_of_predecessors,
                    Body& body, node_priority_t a_priority )
      : continue_receiver( number_of_predecessors, a_priority )
      , my_graph_ref(g)
      , my_body( new function_body_leaf< input_type, output_type, Body>(body) )
      , my_init_body( new function_body_leaf< input_type, output_type, Body>(body) )
    { }

    continue_input( const continue_input& src ) : continue_receiver(src),
                                                  my_graph_ref(src.my_graph_ref),
                                                  my_body( src.my_init_body->clone() ),
                                                  my_init_body( src.my_init_body->clone() ) {}

    ~continue_input() {
        delete my_body;
        delete my_init_body;
    }

    template< typename Body >
    Body copy_function_object() {
        function_body_type &body_ref = *my_body;
        return dynamic_cast< function_body_leaf<input_type, output_type, Body> & >(body_ref).get_body();
    }

    void reset_receiver( reset_flags f) override {
        continue_receiver::reset_receiver(f);
        if(f & rf_reset_bodies) {
            function_body_type *tmp = my_init_body->clone();
            delete my_body;
            my_body = tmp;
        }
    }

protected:

    graph& my_graph_ref;
    function_body_type *my_body;
    function_body_type *my_init_body;

    virtual broadcast_cache<output_type > &successors() = 0;

    friend class apply_body_task_bypass< class_type, continue_msg >;
#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT
    friend class apply_body_task_bypass< class_type, continue_msg, trackable_messages_graph_task >;
#endif

    //! Applies the body to the provided input
    graph_task* apply_body_bypass( input_type __TBB_FLOW_GRAPH_METAINFO_ARG(const message_metainfo& metainfo) ) {
        // There is an extra copied needed to capture the
        // body execution without the try_put
        fgt_begin_body( my_body );
        output_type v = (*my_body)( continue_msg() );
        fgt_end_body( my_body );
        return successors().try_put_task( v __TBB_FLOW_GRAPH_METAINFO_ARG(metainfo) );
    }

#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT
    graph_task* execute(const message_metainfo& metainfo) override {
#else
    graph_task* execute() override {
#endif
        if(!is_graph_active(my_graph_ref)) {
            return nullptr;
        }
#if _MSC_VER && !__INTEL_COMPILER
#pragma warning (push)
#pragma warning (disable: 4127)  /* suppress conditional expression is constant */
#endif
        if(has_policy<lightweight, Policy>::value) {
#if _MSC_VER && !__INTEL_COMPILER
#pragma warning (pop)
#endif
            return apply_body_bypass( continue_msg() __TBB_FLOW_GRAPH_METAINFO_ARG(metainfo) );
        }
        else {
            d1::small_object_allocator allocator{};
            graph_task* t = nullptr;
#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT
            if (!metainfo.empty()) {
                using task_type = apply_body_task_bypass<class_type, continue_msg, trackable_messages_graph_task>;
                t = allocator.new_object<task_type>( graph_reference(), allocator, *this, continue_msg(), my_priority, metainfo );
            } else
#endif
            {
                using task_type = apply_body_task_bypass<class_type, continue_msg>;
                t = allocator.new_object<task_type>( graph_reference(), allocator, *this, continue_msg(), my_priority );
            }
            return t;
        }
    }

    graph& graph_reference() const override {
        return my_graph_ref;
    }
};  // continue_input

//! Implements methods for both executable and function nodes that puts Output to its successors
template< typename Output >
class function_output : public sender<Output> {
public:

    template<int N> friend struct clear_element;
    typedef Output output_type;
    typedef typename sender<output_type>::successor_type successor_type;
    typedef broadcast_cache<output_type> broadcast_cache_type;

    function_output(graph& g) : my_successors(this), my_graph_ref(g) {}
    function_output(const function_output& other) = delete;

    //! Adds a new successor to this node
    bool register_successor( successor_type &r ) override {
        successors().register_successor( r );
        return true;
    }

    //! Removes a successor from this node
    bool remove_successor( successor_type &r ) override {
        successors().remove_successor( r );
        return true;
    }

    broadcast_cache_type &successors() { return my_successors; }

    graph& graph_reference() const { return my_graph_ref; }
protected:
    broadcast_cache_type my_successors;
    graph& my_graph_ref;
};  // function_output

template< typename Output >
class multifunction_output : public function_output<Output> {
public:
    typedef Output output_type;
    typedef function_output<output_type> base_type;
    using base_type::my_successors;

    multifunction_output(graph& g) : base_type(g) {}
    multifunction_output(const multifunction_output& other) : base_type(other.my_graph_ref) {}

    bool try_put(const output_type &i) {
        return try_put_impl(i __TBB_FLOW_GRAPH_METAINFO_ARG(message_metainfo{}));
    }

#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT
    bool try_put(const output_type& i, const metainfo_tag_type& tag) {
        return try_put_impl(i, metainfo_tag_accessor::get_metainfo(tag));
    }

    bool try_put(const output_type& i, metainfo_tag_type&& tag) {
        metainfo_tag_type local_tag = std::move(tag);
        return try_put_impl(i, metainfo_tag_accessor::get_metainfo(local_tag));
    }
#endif

    using base_type::graph_reference;

protected:
    bool try_put_impl(const output_type& i __TBB_FLOW_GRAPH_METAINFO_ARG(const message_metainfo& metainfo)) {
        graph_task *res = try_put_task(i __TBB_FLOW_GRAPH_METAINFO_ARG(metainfo));
        if( !res ) return false;
        if( res != SUCCESSFULLY_ENQUEUED ) {
            // wrapping in task_arena::execute() is not needed since the method is called from
            // inside task::execute()
            spawn_in_graph_arena(graph_reference(), *res);
        }
        return true;
    }

    graph_task* try_put_task(const output_type &i) {
        return my_successors.try_put_task(i);
    }

#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT
    graph_task* try_put_task(const output_type& i, const message_metainfo& metainfo) {
        return my_successors.try_put_task(i, metainfo);
    }
#endif

    template <int N> friend struct emit_element;

};  // multifunction_output

//composite_node
template<typename CompositeType>
void add_nodes_impl(CompositeType*, bool) {}

template< typename CompositeType, typename NodeType1, typename... NodeTypes >
void add_nodes_impl(CompositeType *c_node, bool visible, const NodeType1& n1, const NodeTypes&... n) {
    void *addr = const_cast<NodeType1 *>(&n1);

    fgt_alias_port(c_node, addr, visible);
    add_nodes_impl(c_node, visible, n...);
}

#endif // __TBB__flow_graph_node_impl_H