diff --git a/cpp/daal/include/algorithms/engines/mrg32k3a/mrg32k3a.h b/cpp/daal/include/algorithms/engines/mrg32k3a/mrg32k3a.h
new file mode 100644
index 00000000000..a70c1853e1a
--- /dev/null
+++ b/cpp/daal/include/algorithms/engines/mrg32k3a/mrg32k3a.h
@@ -0,0 +1,183 @@
+/* file: mrg32k3a.h */
+/*******************************************************************************
+* Copyright contributors to the oneDAL project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+/*
+//++
+// Implementation of the MRG32k3a engine: a 32-bit combined multiple recursive generator
+// with two components of order 3, optimized for batch processing.
+//--
+*/
+
+#ifndef __MRG32K3A_H__
+#define __MRG32K3A_H__
+
+#include "algorithms/engines/mrg32k3a/mrg32k3a_types.h"
+#include "algorithms/engines/engine.h"
+
+namespace daal
+{
+namespace algorithms
+{
+namespace engines
+{
+namespace mrg32k3a
+{
+/**
+ * @defgroup engines_mrg32k3a_batch Batch
+ * @ingroup engines_mrg32k3a
+ * @{
+ */
+namespace interface1
+{
+/**
+ *
+ * \brief Provides methods to run implementations of the mrg32k3a engine.
+ * This class is associated with the \ref mrg32k3a::interface1::Batch "mrg32k3a::Batch" class
+ * and supports the method of mrg32k3a engine computation in the batch processing mode
+ *
+ * \tparam algorithmFPType Data type to use in intermediate computations of mrg32k3a engine, double or float
+ * \tparam method Computation method of the engine, mrg32k3a::Method
+ * \tparam cpu Version of the cpu-specific implementation of the engine, daal::CpuType
+ */
+template
+class BatchContainer : public daal::algorithms::AnalysisContainerIface
+{
+public:
+ /**
+ * Constructs a container for the mrg32k3a engine with a specified environment
+ * in the batch processing mode
+ * \param[in] daalEnv Environment object
+ */
+ BatchContainer(daal::services::Environment::env * daalEnv);
+ ~BatchContainer();
+ /**
+ * Computes the result of the mrg32k3a engine in the batch processing mode
+ *
+ * \return Status of computations
+ */
+ services::Status compute() DAAL_C11_OVERRIDE;
+};
+
+/**
+ *
+ * \brief Provides methods for mrg32k3a engine computations in the batch processing mode
+ *
+ * \tparam algorithmFPType Data type to use in intermediate computations of mrg32k3a engine, double or float
+ * \tparam method Computation method of the engine, mrg32k3a::Method
+ *
+ * \par Enumerations
+ * - mrg32k3a::Method Computation methods for the mrg32k3a engine
+ *
+ * \par References
+ * - \ref engines::interface1::Input "engines::Input" class
+ * - \ref engines::interface1::Result "engines::Result" class
+ */
+template
+class DAAL_EXPORT Batch : public engines::BatchBase
+{
+public:
+ typedef engines::BatchBase super;
+
+ typedef typename super::InputType InputType;
+ typedef typename super::ResultType ResultType;
+
+ /**
+ * Creates mrg32k3a engine
+ * \param[in] seed Initial condition for mrg32k3a engine
+ *
+ * \return Pointer to mrg32k3a engine
+ */
+ static services::SharedPtr > create(size_t seed = 777);
+
+ /**
+ * Returns method of the engine
+ * \return Method of the engine
+ */
+ virtual int getMethod() const DAAL_C11_OVERRIDE { return (int)method; }
+
+ /**
+ * Returns the structure that contains results of mrg32k3a engine
+ * \return Structure that contains results of mrg32k3a engine
+ */
+ ResultPtr getResult() { return _result; }
+
+ /**
+ * Registers user-allocated memory to store results of mrg32k3a engine
+ * \param[in] result Structure to store results of mrg32k3a engine
+ *
+ * \return Status of computations
+ */
+ services::Status setResult(const ResultPtr & result)
+ {
+ DAAL_CHECK(result, services::ErrorNullResult)
+ _result = result;
+ _res = _result.get();
+ return services::Status();
+ }
+
+ /**
+ * Returns a pointer to the newly allocated mrg32k3a engine
+ * with a copy of input objects and parameters of this mrg32k3a engine
+ * \return Pointer to the newly allocated engine
+ */
+ services::SharedPtr > clone() const { return services::SharedPtr >(cloneImpl()); }
+
+ /**
+ * Allocates memory to store the result of the mrg32k3a engine
+ *
+ * \return Status of computations
+ */
+ virtual services::Status allocateResult() DAAL_C11_OVERRIDE
+ {
+ services::Status s = this->_result->template allocate(&(this->input), NULL, (int)method);
+ this->_res = this->_result.get();
+ return s;
+ }
+
+protected:
+ Batch(size_t seed = 777) { initialize(); }
+
+ Batch(const Batch & other) : super(other) { initialize(); }
+
+ virtual Batch * cloneImpl() const DAAL_C11_OVERRIDE { return new Batch(*this); }
+
+ void initialize()
+ {
+ Analysis::_ac = new __DAAL_ALGORITHM_CONTAINER(batch, BatchContainer, algorithmFPType, method)(&_env);
+ _in = &input;
+ _result.reset(new ResultType());
+ }
+
+private:
+ ResultPtr _result;
+
+ Batch & operator=(const Batch &);
+};
+typedef services::SharedPtr > mrg32k3aPtr;
+typedef services::SharedPtr > mrg32k3aConstPtr;
+
+} // namespace interface1
+using interface1::BatchContainer;
+using interface1::Batch;
+using interface1::mrg32k3aPtr;
+using interface1::mrg32k3aConstPtr;
+/** @} */
+} // namespace mrg32k3a
+} // namespace engines
+} // namespace algorithms
+} // namespace daal
+#endif
diff --git a/cpp/daal/include/algorithms/engines/mrg32k3a/mrg32k3a_types.h b/cpp/daal/include/algorithms/engines/mrg32k3a/mrg32k3a_types.h
new file mode 100644
index 00000000000..8fdc58b98c8
--- /dev/null
+++ b/cpp/daal/include/algorithms/engines/mrg32k3a/mrg32k3a_types.h
@@ -0,0 +1,65 @@
+/* file: mrg32k3a_types.h */
+/*******************************************************************************
+* Copyright contributors to the oneDAL project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+/*
+//++
+// Implementation of the MRG32k3a engine: a 32-bit combined multiple recursive generator
+// with two components of order 3, optimized for batch processing.
+//--
+*/
+
+#ifndef __MRG32K3A_TYPES_H__
+#define __MRG32K3A_TYPES_H__
+
+#include "algorithms/algorithm.h"
+#include "services/daal_defines.h"
+#include "data_management/data/numeric_table.h"
+#include "data_management/data/homogen_numeric_table.h"
+
+namespace daal
+{
+namespace algorithms
+{
+namespace engines
+{
+/**
+ * @defgroup engines_mrg32k3a mrg32k3a Engine
+ * \copydoc daal::algorithms::engines::mrg32k3a
+ * @ingroup engines
+ * @{
+ */
+/**
+ * \brief Contains classes for mrg32k3a engine
+ */
+namespace mrg32k3a
+{
+/**
+ *
+ * Available methods to compute mrg32k3a engine
+ */
+enum Method
+{
+ defaultDense = 0 /*!< Default: performance-oriented method. */
+};
+
+} // namespace mrg32k3a
+/** @} */
+} // namespace engines
+} // namespace algorithms
+} // namespace daal
+
+#endif
diff --git a/cpp/daal/include/algorithms/engines/philox4x32x10/philox4x32x10.h b/cpp/daal/include/algorithms/engines/philox4x32x10/philox4x32x10.h
new file mode 100644
index 00000000000..3a5d0e33180
--- /dev/null
+++ b/cpp/daal/include/algorithms/engines/philox4x32x10/philox4x32x10.h
@@ -0,0 +1,183 @@
+/* file: philox4x32x10.h */
+/*******************************************************************************
+* Copyright contributors to the oneDAL project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+/*
+//++
+// Implementation of the Philox4x32-10 engine: a counter-based pseudorandom number generator (PRNG)
+// that uses 4x32-bit keys and performs 10 rounds of mixing to produce high-quality randomness.
+//--
+*/
+
+#ifndef __PHILOX4X32X10_H__
+#define __PHILOX4X32X10_H__
+
+#include "algorithms/engines/philox4x32x10/philox4x32x10_types.h"
+#include "algorithms/engines/engine.h"
+
+namespace daal
+{
+namespace algorithms
+{
+namespace engines
+{
+namespace philox4x32x10
+{
+/**
+ * @defgroup engines_philox4x32x10_batch Batch
+ * @ingroup engines_philox4x32x10
+ * @{
+ */
+namespace interface1
+{
+/**
+ *
+ * \brief Provides methods to run implementations of the philox4x32x10 engine.
+ * This class is associated with the \ref philox4x32x10::interface1::Batch "philox4x32x10::Batch" class
+ * and supports the method of philox4x32x10 engine computation in the batch processing mode
+ *
+ * \tparam algorithmFPType Data type to use in intermediate computations of philox4x32x10 engine, double or float
+ * \tparam method Computation method of the engine, philox4x32x10::Method
+ * \tparam cpu Version of the cpu-specific implementation of the engine, daal::CpuType
+ */
+template
+class BatchContainer : public daal::algorithms::AnalysisContainerIface
+{
+public:
+ /**
+ * Constructs a container for the philox4x32x10 engine with a specified environment
+ * in the batch processing mode
+ * \param[in] daalEnv Environment object
+ */
+ BatchContainer(daal::services::Environment::env * daalEnv);
+ ~BatchContainer();
+ /**
+ * Computes the result of the philox4x32x10 engine in the batch processing mode
+ *
+ * \return Status of computations
+ */
+ services::Status compute() DAAL_C11_OVERRIDE;
+};
+
+/**
+ *
+ * \brief Provides methods for philox4x32x10 engine computations in the batch processing mode
+ *
+ * \tparam algorithmFPType Data type to use in intermediate computations of philox4x32x10 engine, double or float
+ * \tparam method Computation method of the engine, philox4x32x10::Method
+ *
+ * \par Enumerations
+ * - philox4x32x10::Method Computation methods for the philox4x32x10 engine
+ *
+ * \par References
+ * - \ref engines::interface1::Input "engines::Input" class
+ * - \ref engines::interface1::Result "engines::Result" class
+ */
+template
+class DAAL_EXPORT Batch : public engines::BatchBase
+{
+public:
+ typedef engines::BatchBase super;
+
+ typedef typename super::InputType InputType;
+ typedef typename super::ResultType ResultType;
+
+ /**
+ * Creates philox4x32x10 engine
+ * \param[in] seed Initial condition for philox4x32x10 engine
+ *
+ * \return Pointer to philox4x32x10 engine
+ */
+ static services::SharedPtr > create(size_t seed = 777);
+
+ /**
+ * Returns method of the engine
+ * \return Method of the engine
+ */
+ virtual int getMethod() const DAAL_C11_OVERRIDE { return (int)method; }
+
+ /**
+ * Returns the structure that contains results of philox4x32x10 engine
+ * \return Structure that contains results of philox4x32x10 engine
+ */
+ ResultPtr getResult() { return _result; }
+
+ /**
+ * Registers user-allocated memory to store results of philox4x32x10 engine
+ * \param[in] result Structure to store results of philox4x32x10 engine
+ *
+ * \return Status of computations
+ */
+ services::Status setResult(const ResultPtr & result)
+ {
+ DAAL_CHECK(result, services::ErrorNullResult)
+ _result = result;
+ _res = _result.get();
+ return services::Status();
+ }
+
+ /**
+ * Returns a pointer to the newly allocated philox4x32x10 engine
+ * with a copy of input objects and parameters of this philox4x32x10 engine
+ * \return Pointer to the newly allocated engine
+ */
+ services::SharedPtr > clone() const { return services::SharedPtr >(cloneImpl()); }
+
+ /**
+ * Allocates memory to store the result of the philox4x32x10 engine
+ *
+ * \return Status of computations
+ */
+ virtual services::Status allocateResult() DAAL_C11_OVERRIDE
+ {
+ services::Status s = this->_result->template allocate(&(this->input), NULL, (int)method);
+ this->_res = this->_result.get();
+ return s;
+ }
+
+protected:
+ Batch(size_t seed = 777) { initialize(); }
+
+ Batch(const Batch & other) : super(other) { initialize(); }
+
+ virtual Batch * cloneImpl() const DAAL_C11_OVERRIDE { return new Batch(*this); }
+
+ void initialize()
+ {
+ Analysis::_ac = new __DAAL_ALGORITHM_CONTAINER(batch, BatchContainer, algorithmFPType, method)(&_env);
+ _in = &input;
+ _result.reset(new ResultType());
+ }
+
+private:
+ ResultPtr _result;
+
+ Batch & operator=(const Batch &);
+};
+typedef services::SharedPtr > philox4x32x10Ptr;
+typedef services::SharedPtr > philox4x32x10ConstPtr;
+
+} // namespace interface1
+using interface1::BatchContainer;
+using interface1::Batch;
+using interface1::philox4x32x10Ptr;
+using interface1::philox4x32x10ConstPtr;
+/** @} */
+} // namespace philox4x32x10
+} // namespace engines
+} // namespace algorithms
+} // namespace daal
+#endif
diff --git a/cpp/daal/include/algorithms/engines/philox4x32x10/philox4x32x10_types.h b/cpp/daal/include/algorithms/engines/philox4x32x10/philox4x32x10_types.h
new file mode 100644
index 00000000000..0c0a92c9b3a
--- /dev/null
+++ b/cpp/daal/include/algorithms/engines/philox4x32x10/philox4x32x10_types.h
@@ -0,0 +1,65 @@
+/* file: philox4x32x10_types.h */
+/*******************************************************************************
+* Copyright contributors to the oneDAL project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+/*
+//++
+// Implementation of the Philox4x32-10 engine: a counter-based pseudorandom number generator (PRNG)
+// that uses 4x32-bit keys and performs 10 rounds of mixing to produce high-quality randomness.
+//--
+*/
+
+#ifndef __PHILOX4X32X10_TYPES_H__
+#define __PHILOX4X32X10_TYPES_H__
+
+#include "algorithms/algorithm.h"
+#include "services/daal_defines.h"
+#include "data_management/data/numeric_table.h"
+#include "data_management/data/homogen_numeric_table.h"
+
+namespace daal
+{
+namespace algorithms
+{
+namespace engines
+{
+/**
+ * @defgroup engines_philox4x32x10 philox4x32x10 Engine
+ * \copydoc daal::algorithms::engines::philox4x32x10
+ * @ingroup engines
+ * @{
+ */
+/**
+ * \brief Contains classes for philox4x32x10 engine
+ */
+namespace philox4x32x10
+{
+/**
+ *
+ * Available methods to compute philox4x32x10 engine
+ */
+enum Method
+{
+ defaultDense = 0 /*!< Default: performance-oriented method. */
+};
+
+} // namespace philox4x32x10
+/** @} */
+} // namespace engines
+} // namespace algorithms
+} // namespace daal
+
+#endif
diff --git a/cpp/daal/include/daal.h b/cpp/daal/include/daal.h
index 881a6c39fbe..f49625f8939 100755
--- a/cpp/daal/include/daal.h
+++ b/cpp/daal/include/daal.h
@@ -297,13 +297,17 @@
#include "algorithms/distributions/bernoulli/bernoulli.h"
#include "algorithms/distributions/bernoulli/bernoulli_types.h"
#include "algorithms/engines/engine.h"
+#include "algorithms/engines/engine_family.h"
+#include "algorithms/engines/mt2203/mt2203.h"
+#include "algorithms/engines/mt2203/mt2203_types.h"
#include "algorithms/engines/mt19937/mt19937.h"
#include "algorithms/engines/mt19937/mt19937_types.h"
#include "algorithms/engines/mcg59/mcg59.h"
#include "algorithms/engines/mcg59/mcg59_types.h"
-#include "algorithms/engines/engine_family.h"
-#include "algorithms/engines/mt2203/mt2203.h"
-#include "algorithms/engines/mt2203/mt2203_types.h"
+#include "algorithms/engines/mrg32k3a/mrg32k3a.h"
+#include "algorithms/engines/mrg32k3a/mrg32k3a_types.h"
+#include "algorithms/engines/philox4x32x10/philox4x32x10.h"
+#include "algorithms/engines/philox4x32x10/philox4x32x10_types.h"
#include "algorithms/dbscan/dbscan_types.h"
#include "algorithms/dbscan/dbscan_batch.h"
#include "algorithms/dbscan/dbscan_distributed.h"
diff --git a/cpp/daal/include/daal_win.h b/cpp/daal/include/daal_win.h
index e17eff16796..a15ed7db26e 100755
--- a/cpp/daal/include/daal_win.h
+++ b/cpp/daal/include/daal_win.h
@@ -309,13 +309,17 @@
#include "algorithms/distributions/bernoulli/bernoulli.h"
#include "algorithms/distributions/bernoulli/bernoulli_types.h"
#include "algorithms/engines/engine.h"
+#include "algorithms/engines/engine_family.h"
+#include "algorithms/engines/mt2203/mt2203.h"
+#include "algorithms/engines/mt2203/mt2203_types.h"
#include "algorithms/engines/mt19937/mt19937.h"
#include "algorithms/engines/mt19937/mt19937_types.h"
#include "algorithms/engines/mcg59/mcg59.h"
#include "algorithms/engines/mcg59/mcg59_types.h"
-#include "algorithms/engines/engine_family.h"
-#include "algorithms/engines/mt2203/mt2203.h"
-#include "algorithms/engines/mt2203/mt2203_types.h"
+#include "algorithms/engines/mrg32k3a/mrg32k3a.h"
+#include "algorithms/engines/mrg32k3a/mrg32k3a_types.h"
+#include "algorithms/engines/philox4x32x10/philox4x32x10.h"
+#include "algorithms/engines/philox4x32x10/philox4x32x10_types.h"
#include "algorithms/dbscan/dbscan_types.h"
#include "algorithms/dbscan/dbscan_batch.h"
#include "algorithms/dbscan/dbscan_distributed.h"
diff --git a/cpp/daal/src/algorithms/engines/mcg59/mcg59_batch_impl.h b/cpp/daal/src/algorithms/engines/mcg59/mcg59_batch_impl.h
index 6c3040da615..62f337ba9a0 100644
--- a/cpp/daal/src/algorithms/engines/mcg59/mcg59_batch_impl.h
+++ b/cpp/daal/src/algorithms/engines/mcg59/mcg59_batch_impl.h
@@ -26,9 +26,6 @@
#include "src/externals/service_rng.h"
#include "src/data_management/service_numeric_table.h"
-static const int leapfrogMethodErrcode = -1002;
-static const int skipAheadMethodErrcode = -1003;
-
namespace daal
{
namespace algorithms
@@ -67,7 +64,7 @@ class BatchImpl : public algorithms::engines::mcg59::interface1::Batch
+SharedPtr > Batch::create(size_t seed)
+{
+ SharedPtr > engPtr;
+#define DAAL_CREATE_ENGINE_CPU(cpuId, ...) engPtr.reset(new BatchImpl(__VA_ARGS__));
+
+ DAAL_DISPATCH_FUNCTION_BY_CPU(DAAL_CREATE_ENGINE_CPU, seed);
+
+#undef DAAL_CREATE_ENGINE_CPU
+ return engPtr;
+}
+
+template class Batch;
+template class Batch;
+
+} // namespace interface1
+} // namespace mrg32k3a
+} // namespace engines
+} // namespace algorithms
+} // namespace daal
diff --git a/cpp/daal/src/algorithms/engines/mrg32k3a/mrg32k3a_batch_container.h b/cpp/daal/src/algorithms/engines/mrg32k3a/mrg32k3a_batch_container.h
new file mode 100644
index 00000000000..ce83f554026
--- /dev/null
+++ b/cpp/daal/src/algorithms/engines/mrg32k3a/mrg32k3a_batch_container.h
@@ -0,0 +1,68 @@
+/* file: mrg32k3a_batch_container.h */
+/*******************************************************************************
+* Copyright contributors to the oneDAL project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+/*
+//++
+// Implementation of mrg32k3a calculation algorithm container.
+//--
+*/
+
+#ifndef __mrg32k3a_BATCH_CONTAINER_H__
+#define __mrg32k3a_BATCH_CONTAINER_H__
+
+#include "algorithms/engines/mrg32k3a/mrg32k3a.h"
+#include "src/algorithms/engines/mrg32k3a/mrg32k3a_kernel.h"
+
+namespace daal
+{
+namespace algorithms
+{
+namespace engines
+{
+namespace mrg32k3a
+{
+namespace interface1
+{
+template
+BatchContainer::BatchContainer(daal::services::Environment::env * daalEnv) : AnalysisContainerIface(daalEnv)
+{
+ __DAAL_INITIALIZE_KERNELS(internal::mrg32k3aKernel, algorithmFPType, method);
+}
+
+template
+BatchContainer::~BatchContainer()
+{
+ __DAAL_DEINITIALIZE_KERNELS();
+}
+
+template
+services::Status BatchContainer::compute()
+{
+ daal::services::Environment::env & env = *_env;
+ engines::Result * result = static_cast(_res);
+ NumericTable * resultTable = result->get(engines::randomNumbers).get();
+
+ __DAAL_CALL_KERNEL(env, internal::mrg32k3aKernel, __DAAL_KERNEL_ARGUMENTS(algorithmFPType, method), compute, resultTable);
+}
+
+} // namespace interface1
+} // namespace mrg32k3a
+} // namespace engines
+} // namespace algorithms
+} // namespace daal
+
+#endif
diff --git a/cpp/daal/src/algorithms/engines/mrg32k3a/mrg32k3a_batch_impl.h b/cpp/daal/src/algorithms/engines/mrg32k3a/mrg32k3a_batch_impl.h
new file mode 100644
index 00000000000..9c226e54af3
--- /dev/null
+++ b/cpp/daal/src/algorithms/engines/mrg32k3a/mrg32k3a_batch_impl.h
@@ -0,0 +1,114 @@
+/* file: mrg32k3a_batch_impl.h */
+/*******************************************************************************
+* Copyright contributors to the oneDAL project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+/*
+//++
+// Implementation of the class defining the mrg32k3a engine.
+//--
+*/
+
+#include "algorithms/engines/mrg32k3a/mrg32k3a.h"
+#include "src/algorithms/engines/engine_batch_impl.h"
+#include "src/externals/service_rng.h"
+#include "src/data_management/service_numeric_table.h"
+
+namespace daal
+{
+namespace algorithms
+{
+namespace engines
+{
+namespace mrg32k3a
+{
+namespace internal
+{
+template
+class BatchImpl : public algorithms::engines::mrg32k3a::interface1::Batch,
+ public algorithms::engines::internal::BatchBaseImpl
+{
+public:
+ typedef algorithms::engines::mrg32k3a::interface1::Batch super1;
+ typedef algorithms::engines::internal::BatchBaseImpl super2;
+ BatchImpl(size_t seed = 777) : baseRng(seed, __DAAL_BRNG_MRG32K3A), super2(seed) {}
+
+ void * getState() DAAL_C11_OVERRIDE { return baseRng.getState(); }
+
+ int getStateSize() const DAAL_C11_OVERRIDE { return baseRng.getStateSize(); }
+
+ services::Status saveStateImpl(byte * dest) const DAAL_C11_OVERRIDE
+ {
+ DAAL_CHECK(!baseRng.saveState((void *)dest), ErrorIncorrectErrorcodeFromGenerator);
+ return services::Status();
+ }
+
+ services::Status loadStateImpl(const byte * src) DAAL_C11_OVERRIDE
+ {
+ DAAL_CHECK(!baseRng.loadState((const void *)src), ErrorIncorrectErrorcodeFromGenerator);
+ return services::Status();
+ }
+
+ services::Status leapfrogImpl(size_t threadNum, size_t nThreads) DAAL_C11_OVERRIDE
+ {
+ int errcode = baseRng.leapfrog(threadNum, nThreads);
+ services::Status s;
+ if (errcode == __DAAL_RNG_ERROR_LEAPFROG_UNSUPPORTED)
+ s.add(ErrorLeapfrogUnsupported);
+ else if (errcode)
+ s.add(ErrorIncorrectErrorcodeFromGenerator);
+ return s;
+ }
+
+ services::Status skipAheadImpl(size_t nSkip) DAAL_C11_OVERRIDE
+ {
+ int errcode = baseRng.skipAhead(nSkip);
+ services::Status s;
+ if (errcode == __DAAL_RNG_ERROR_SKIPAHEAD_UNSUPPORTED)
+ s.add(ErrorSkipAheadUnsupported);
+ else if (errcode)
+ s.add(ErrorIncorrectErrorcodeFromGenerator);
+ return s;
+ }
+
+ virtual BatchImpl * cloneImpl() const DAAL_C11_OVERRIDE
+ {
+ return new BatchImpl(*this);
+ }
+
+ bool hasSupport(engines::internal::ParallelizationTechnique technique) const DAAL_C11_OVERRIDE
+ {
+ switch (technique)
+ {
+ case engines::internal::family: return false;
+ case engines::internal::skipahead: return true;
+ case engines::internal::leapfrog: return true;
+ }
+ return false;
+ }
+
+ ~BatchImpl() {}
+
+protected:
+ BatchImpl(const BatchImpl & other) : super1(other), super2(other), baseRng(other.baseRng) {}
+
+ daal::internal::BaseRNGsInst baseRng;
+};
+
+} // namespace internal
+} // namespace mrg32k3a
+} // namespace engines
+} // namespace algorithms
+} // namespace daal
diff --git a/cpp/daal/src/algorithms/engines/mrg32k3a/mrg32k3a_dense_default_batch_fpt_cpu.cpp b/cpp/daal/src/algorithms/engines/mrg32k3a/mrg32k3a_dense_default_batch_fpt_cpu.cpp
new file mode 100644
index 00000000000..529c4af2635
--- /dev/null
+++ b/cpp/daal/src/algorithms/engines/mrg32k3a/mrg32k3a_dense_default_batch_fpt_cpu.cpp
@@ -0,0 +1,47 @@
+/* file: mrg32k3a_dense_default_batch_fpt_cpu.cpp */
+/*******************************************************************************
+* Copyright contributors to the oneDAL project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+//++
+// Implementation of mrg32k3a calculation functions.
+//--
+
+#include "src/algorithms/engines/mrg32k3a/mrg32k3a_batch_container.h"
+#include "src/algorithms/engines/mrg32k3a/mrg32k3a_kernel.h"
+#include "src/algorithms/engines/mrg32k3a/mrg32k3a_impl.i"
+
+namespace daal
+{
+namespace algorithms
+{
+namespace engines
+{
+namespace mrg32k3a
+{
+namespace interface1
+{
+template class BatchContainer;
+} // namespace interface1
+
+namespace internal
+{
+template class mrg32k3aKernel;
+} // namespace internal
+
+} // namespace mrg32k3a
+} // namespace engines
+} // namespace algorithms
+} // namespace daal
diff --git a/cpp/daal/src/algorithms/engines/mrg32k3a/mrg32k3a_dense_default_batch_fpt_dispatcher.cpp b/cpp/daal/src/algorithms/engines/mrg32k3a/mrg32k3a_dense_default_batch_fpt_dispatcher.cpp
new file mode 100644
index 00000000000..fd78108df73
--- /dev/null
+++ b/cpp/daal/src/algorithms/engines/mrg32k3a/mrg32k3a_dense_default_batch_fpt_dispatcher.cpp
@@ -0,0 +1,30 @@
+/* file: mrg32k3a_dense_default_batch_fpt_dispatcher.cpp */
+/*******************************************************************************
+* Copyright contributors to the oneDAL project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+//++
+// Implementation of mrg32k3a calculation algorithm dispatcher.
+//--
+
+#include "src/algorithms/engines/mrg32k3a/mrg32k3a_batch_container.h"
+
+namespace daal
+{
+namespace algorithms
+{
+__DAAL_INSTANTIATE_DISPATCH_CONTAINER(engines::mrg32k3a::BatchContainer, batch, DAAL_FPTYPE, engines::mrg32k3a::defaultDense)
+} // namespace algorithms
+} // namespace daal
diff --git a/cpp/daal/src/algorithms/engines/mrg32k3a/mrg32k3a_impl.i b/cpp/daal/src/algorithms/engines/mrg32k3a/mrg32k3a_impl.i
new file mode 100644
index 00000000000..f8f12b2deea
--- /dev/null
+++ b/cpp/daal/src/algorithms/engines/mrg32k3a/mrg32k3a_impl.i
@@ -0,0 +1,49 @@
+/* file: mrg32k3a_impl.i */
+/*******************************************************************************
+* Copyright contributors to the oneDAL project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+/*
+//++
+// Implementation of mrg32k3a algorithm.
+//--
+*/
+
+#ifndef __MRG32K3A_IMPL_I__
+#define __MRG32K3A_IMPL_I__
+
+namespace daal
+{
+namespace algorithms
+{
+namespace engines
+{
+namespace mrg32k3a
+{
+namespace internal
+{
+template
+Status mrg32k3aKernel::compute(NumericTable * resultTensor)
+{
+ return Status();
+}
+
+} // namespace internal
+} // namespace mrg32k3a
+} // namespace engines
+} // namespace algorithms
+} // namespace daal
+
+#endif
diff --git a/cpp/daal/src/algorithms/engines/mrg32k3a/mrg32k3a_kernel.h b/cpp/daal/src/algorithms/engines/mrg32k3a/mrg32k3a_kernel.h
new file mode 100644
index 00000000000..80c9fbe44d9
--- /dev/null
+++ b/cpp/daal/src/algorithms/engines/mrg32k3a/mrg32k3a_kernel.h
@@ -0,0 +1,58 @@
+/* file: mrg32k3a_kernel.h */
+/*******************************************************************************
+* Copyright contributors to the oneDAL project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+//++
+// Declaration of a template function for calculating values using the MRG32k3a generator.
+//--
+
+#ifndef __MRG32K3A_KERNEL_H__
+#define __MRG32K3A_KERNEL_H__
+
+#include "algorithms/engines/mrg32k3a/mrg32k3a.h"
+#include "src/algorithms/kernel.h"
+#include "data_management/data/numeric_table.h"
+
+using namespace daal::services;
+using namespace daal::data_management;
+
+namespace daal
+{
+namespace algorithms
+{
+namespace engines
+{
+namespace mrg32k3a
+{
+namespace internal
+{
+/**
+ * \brief Kernel for mrg32k3a calculation
+ */
+template
+class mrg32k3aKernel : public Kernel
+{
+public:
+ Status compute(NumericTable * resultTable);
+};
+
+} // namespace internal
+} // namespace mrg32k3a
+} // namespace engines
+} // namespace algorithms
+} // namespace daal
+
+#endif
diff --git a/cpp/daal/src/algorithms/engines/mt19937/mt19937_batch_impl.h b/cpp/daal/src/algorithms/engines/mt19937/mt19937_batch_impl.h
index e92d0e46612..805ded3153c 100644
--- a/cpp/daal/src/algorithms/engines/mt19937/mt19937_batch_impl.h
+++ b/cpp/daal/src/algorithms/engines/mt19937/mt19937_batch_impl.h
@@ -26,9 +26,6 @@
#include "src/externals/service_rng.h"
#include "src/data_management/service_numeric_table.h"
-static const int leapfrogMethodErrcode = -1002;
-static const int skipAheadMethodErrcode = -1003;
-
namespace daal
{
namespace algorithms
@@ -67,7 +64,7 @@ class BatchImpl : public algorithms::engines::mt19937::interface1::Batchleapfrog(threadNum, nThreads);
services::Status s;
- if (errcode == leapfrogMethodErrcode)
+ if (errcode == __DAAL_RNG_ERROR_LEAPFROG_UNSUPPORTED)
s.add(ErrorLeapfrogUnsupported);
else if (errcode)
s.add(ErrorIncorrectErrorcodeFromGenerator);
@@ -199,7 +196,7 @@ class BatchImpl : public algorithms::engines::mt2203::interface1::BatchskipAhead(nSkip);
services::Status s;
- if (errcode == skipAheadMethodErrcode)
+ if (errcode == __DAAL_RNG_ERROR_SKIPAHEAD_UNSUPPORTED)
s.add(ErrorSkipAheadUnsupported);
else if (errcode)
s.add(ErrorIncorrectErrorcodeFromGenerator);
diff --git a/cpp/daal/src/algorithms/engines/mt2203/mt2203_kernel.h b/cpp/daal/src/algorithms/engines/mt2203/mt2203_kernel.h
index b7de119367f..e588a02c8fb 100644
--- a/cpp/daal/src/algorithms/engines/mt2203/mt2203_kernel.h
+++ b/cpp/daal/src/algorithms/engines/mt2203/mt2203_kernel.h
@@ -19,8 +19,8 @@
// Declaration of template function that calculate mt2203s.
//--
-#ifndef __MCG59_KERNEL_H__
-#define __MCG59_KERNEL_H__
+#ifndef __MT2203_KERNEL_H__
+#define __MT2203_KERNEL_H__
#include "algorithms/engines/mt2203/mt2203.h"
#include "src/algorithms/kernel.h"
diff --git a/cpp/daal/src/algorithms/engines/philox4x32x10/philox4x32x10.cpp b/cpp/daal/src/algorithms/engines/philox4x32x10/philox4x32x10.cpp
new file mode 100644
index 00000000000..47fb7dae70f
--- /dev/null
+++ b/cpp/daal/src/algorithms/engines/philox4x32x10/philox4x32x10.cpp
@@ -0,0 +1,59 @@
+/* file: philox4x32x10.cpp */
+/*******************************************************************************
+* Copyright contributors to the oneDAL project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+//++
+// Implementation of the Philox4x32-10 engine: a counter-based pseudorandom number generator (PRNG)
+// that uses 4x32-bit keys and performs 10 rounds of mixing to produce high-quality randomness.
+//--
+
+#include "algorithms/engines/philox4x32x10/philox4x32x10.h"
+#include "src/externals/service_dispatch.h"
+#include "src/algorithms/engines/philox4x32x10/philox4x32x10_batch_impl.h"
+
+namespace daal
+{
+namespace algorithms
+{
+namespace engines
+{
+namespace philox4x32x10
+{
+namespace interface1
+{
+using namespace daal::services;
+using namespace philox4x32x10::internal;
+
+template
+SharedPtr > Batch::create(size_t seed)
+{
+ SharedPtr > engPtr;
+#define DAAL_CREATE_ENGINE_CPU(cpuId, ...) engPtr.reset(new BatchImpl(__VA_ARGS__));
+
+ DAAL_DISPATCH_FUNCTION_BY_CPU(DAAL_CREATE_ENGINE_CPU, seed);
+
+#undef DAAL_CREATE_ENGINE_CPU
+ return engPtr;
+}
+
+template class Batch;
+template class Batch;
+
+} // namespace interface1
+} // namespace philox4x32x10
+} // namespace engines
+} // namespace algorithms
+} // namespace daal
diff --git a/cpp/daal/src/algorithms/engines/philox4x32x10/philox4x32x10_batch_container.h b/cpp/daal/src/algorithms/engines/philox4x32x10/philox4x32x10_batch_container.h
new file mode 100644
index 00000000000..9cb747e95a8
--- /dev/null
+++ b/cpp/daal/src/algorithms/engines/philox4x32x10/philox4x32x10_batch_container.h
@@ -0,0 +1,68 @@
+/* file: philox4x32x10_batch_container.h */
+/*******************************************************************************
+* Copyright contributors to the oneDAL project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+/*
+//++
+// Implementation of philox4x32x10 calculation algorithm container.
+//--
+*/
+
+#ifndef __PHILOX4X32X10_BATCH_CONTAINER_H__
+#define __PHILOX4X32X10_BATCH_CONTAINER_H__
+
+#include "algorithms/engines/philox4x32x10/philox4x32x10.h"
+#include "src/algorithms/engines/philox4x32x10/philox4x32x10_kernel.h"
+
+namespace daal
+{
+namespace algorithms
+{
+namespace engines
+{
+namespace philox4x32x10
+{
+namespace interface1
+{
+template
+BatchContainer::BatchContainer(daal::services::Environment::env * daalEnv) : AnalysisContainerIface(daalEnv)
+{
+ __DAAL_INITIALIZE_KERNELS(internal::philox4x32x10Kernel, algorithmFPType, method);
+}
+
+template
+BatchContainer::~BatchContainer()
+{
+ __DAAL_DEINITIALIZE_KERNELS();
+}
+
+template
+services::Status BatchContainer::compute()
+{
+ daal::services::Environment::env & env = *_env;
+ engines::Result * result = static_cast(_res);
+ NumericTable * resultTable = result->get(engines::randomNumbers).get();
+
+ __DAAL_CALL_KERNEL(env, internal::philox4x32x10Kernel, __DAAL_KERNEL_ARGUMENTS(algorithmFPType, method), compute, resultTable);
+}
+
+} // namespace interface1
+} // namespace philox4x32x10
+} // namespace engines
+} // namespace algorithms
+} // namespace daal
+
+#endif
diff --git a/cpp/daal/src/algorithms/engines/philox4x32x10/philox4x32x10_batch_impl.h b/cpp/daal/src/algorithms/engines/philox4x32x10/philox4x32x10_batch_impl.h
new file mode 100644
index 00000000000..1f7b40526ac
--- /dev/null
+++ b/cpp/daal/src/algorithms/engines/philox4x32x10/philox4x32x10_batch_impl.h
@@ -0,0 +1,114 @@
+/* file: philox4x32x10_batch_impl.h */
+/*******************************************************************************
+* Copyright contributors to the oneDAL project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+/*
+//++
+// Implementation of the class defining the philox4x32x10 engine
+//--
+*/
+
+#include "algorithms/engines/philox4x32x10/philox4x32x10.h"
+#include "src/algorithms/engines/engine_batch_impl.h"
+#include "src/externals/service_rng.h"
+#include "src/data_management/service_numeric_table.h"
+
+namespace daal
+{
+namespace algorithms
+{
+namespace engines
+{
+namespace philox4x32x10
+{
+namespace internal
+{
+template
+class BatchImpl : public algorithms::engines::philox4x32x10::interface1::Batch,
+ public algorithms::engines::internal::BatchBaseImpl
+{
+public:
+ typedef algorithms::engines::philox4x32x10::interface1::Batch super1;
+ typedef algorithms::engines::internal::BatchBaseImpl super2;
+ BatchImpl(size_t seed = 777) : baseRng(seed, __DAAL_BRNG_PHILOX4X32X10), super2(seed) {}
+
+ void * getState() DAAL_C11_OVERRIDE { return baseRng.getState(); }
+
+ int getStateSize() const DAAL_C11_OVERRIDE { return baseRng.getStateSize(); }
+
+ services::Status saveStateImpl(byte * dest) const DAAL_C11_OVERRIDE
+ {
+ DAAL_CHECK(!baseRng.saveState((void *)dest), ErrorIncorrectErrorcodeFromGenerator);
+ return services::Status();
+ }
+
+ services::Status loadStateImpl(const byte * src) DAAL_C11_OVERRIDE
+ {
+ DAAL_CHECK(!baseRng.loadState((const void *)src), ErrorIncorrectErrorcodeFromGenerator);
+ return services::Status();
+ }
+
+ services::Status leapfrogImpl(size_t threadNum, size_t nThreads) DAAL_C11_OVERRIDE
+ {
+ int errcode = baseRng.leapfrog(threadNum, nThreads);
+ services::Status s;
+ if (errcode == __DAAL_RNG_ERROR_LEAPFROG_UNSUPPORTED)
+ s.add(ErrorLeapfrogUnsupported);
+ else if (errcode)
+ s.add(ErrorIncorrectErrorcodeFromGenerator);
+ return s;
+ }
+
+ services::Status skipAheadImpl(size_t nSkip) DAAL_C11_OVERRIDE
+ {
+ int errcode = baseRng.skipAhead(nSkip);
+ services::Status s;
+ if (errcode == __DAAL_RNG_ERROR_SKIPAHEAD_UNSUPPORTED)
+ s.add(ErrorSkipAheadUnsupported);
+ else if (errcode)
+ s.add(ErrorIncorrectErrorcodeFromGenerator);
+ return s;
+ }
+
+ virtual BatchImpl * cloneImpl() const DAAL_C11_OVERRIDE
+ {
+ return new BatchImpl(*this);
+ }
+
+ bool hasSupport(engines::internal::ParallelizationTechnique technique) const DAAL_C11_OVERRIDE
+ {
+ switch (technique)
+ {
+ case engines::internal::family: return false;
+ case engines::internal::skipahead: return true;
+ case engines::internal::leapfrog: return false;
+ }
+ return false;
+ }
+
+ ~BatchImpl() {}
+
+protected:
+ BatchImpl(const BatchImpl & other) : super1(other), super2(other), baseRng(other.baseRng) {}
+
+ daal::internal::BaseRNGsInst baseRng;
+};
+
+} // namespace internal
+} // namespace philox4x32x10
+} // namespace engines
+} // namespace algorithms
+} // namespace daal
diff --git a/cpp/daal/src/algorithms/engines/philox4x32x10/philox4x32x10_dense_default_batch_fpt_cpu.cpp b/cpp/daal/src/algorithms/engines/philox4x32x10/philox4x32x10_dense_default_batch_fpt_cpu.cpp
new file mode 100644
index 00000000000..946517c1d9c
--- /dev/null
+++ b/cpp/daal/src/algorithms/engines/philox4x32x10/philox4x32x10_dense_default_batch_fpt_cpu.cpp
@@ -0,0 +1,47 @@
+/* file: philox4x32x10_dense_default_batch_fpt_cpu.cpp */
+/*******************************************************************************
+* Copyright contributors to the oneDAL project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+//++
+// Implementation of philox4x32x10 calculation functions.
+//--
+
+#include "src/algorithms/engines/philox4x32x10/philox4x32x10_batch_container.h"
+#include "src/algorithms/engines/philox4x32x10/philox4x32x10_kernel.h"
+#include "src/algorithms/engines/philox4x32x10/philox4x32x10_impl.i"
+
+namespace daal
+{
+namespace algorithms
+{
+namespace engines
+{
+namespace philox4x32x10
+{
+namespace interface1
+{
+template class BatchContainer;
+} // namespace interface1
+
+namespace internal
+{
+template class philox4x32x10Kernel;
+} // namespace internal
+
+} // namespace philox4x32x10
+} // namespace engines
+} // namespace algorithms
+} // namespace daal
diff --git a/cpp/daal/src/algorithms/engines/philox4x32x10/philox4x32x10_dense_default_batch_fpt_dispatcher.cpp b/cpp/daal/src/algorithms/engines/philox4x32x10/philox4x32x10_dense_default_batch_fpt_dispatcher.cpp
new file mode 100644
index 00000000000..1640fc4ec12
--- /dev/null
+++ b/cpp/daal/src/algorithms/engines/philox4x32x10/philox4x32x10_dense_default_batch_fpt_dispatcher.cpp
@@ -0,0 +1,30 @@
+/* file: philox4x32x10_dense_default_batch_fpt_dispatcher.cpp */
+/*******************************************************************************
+* Copyright contributors to the oneDAL project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+//++
+// Implementation of philox4x32x10 calculation algorithm dispatcher.
+//--
+
+#include "src/algorithms/engines/philox4x32x10/philox4x32x10_batch_container.h"
+
+namespace daal
+{
+namespace algorithms
+{
+__DAAL_INSTANTIATE_DISPATCH_CONTAINER(engines::philox4x32x10::BatchContainer, batch, DAAL_FPTYPE, engines::philox4x32x10::defaultDense)
+} // namespace algorithms
+} // namespace daal
diff --git a/cpp/daal/src/algorithms/engines/philox4x32x10/philox4x32x10_impl.i b/cpp/daal/src/algorithms/engines/philox4x32x10/philox4x32x10_impl.i
new file mode 100644
index 00000000000..5aa5addc22b
--- /dev/null
+++ b/cpp/daal/src/algorithms/engines/philox4x32x10/philox4x32x10_impl.i
@@ -0,0 +1,49 @@
+/* file: philox4x32x10_impl.i */
+/*******************************************************************************
+* Copyright contributors to the oneDAL project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+/*
+//++
+// Implementation of philox4x32x10 algorithm.
+//--
+*/
+
+#ifndef __PHILOX4X32X10_IMPL_I__
+#define __PHILOX4X32X10_IMPL_I__
+
+namespace daal
+{
+namespace algorithms
+{
+namespace engines
+{
+namespace philox4x32x10
+{
+namespace internal
+{
+template
+Status philox4x32x10Kernel::compute(NumericTable * resultTensor)
+{
+ return Status();
+}
+
+} // namespace internal
+} // namespace philox4x32x10
+} // namespace engines
+} // namespace algorithms
+} // namespace daal
+
+#endif
diff --git a/cpp/daal/src/algorithms/engines/philox4x32x10/philox4x32x10_kernel.h b/cpp/daal/src/algorithms/engines/philox4x32x10/philox4x32x10_kernel.h
new file mode 100644
index 00000000000..5870d781abd
--- /dev/null
+++ b/cpp/daal/src/algorithms/engines/philox4x32x10/philox4x32x10_kernel.h
@@ -0,0 +1,58 @@
+/* file: philox4x32x10_kernel.h */
+/*******************************************************************************
+* Copyright contributors to the oneDAL project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+//++
+// Declaration of a template function for generating values using the Philox4x32-10 engine.
+//--
+
+#ifndef __PHILOX4X32X10_KERNEL_H__
+#define __PHILOX4X32X10_KERNEL_H__
+
+#include "algorithms/engines/philox4x32x10/philox4x32x10.h"
+#include "src/algorithms/kernel.h"
+#include "data_management/data/numeric_table.h"
+
+using namespace daal::services;
+using namespace daal::data_management;
+
+namespace daal
+{
+namespace algorithms
+{
+namespace engines
+{
+namespace philox4x32x10
+{
+namespace internal
+{
+/**
+ * \brief Kernel for philox4x32x10 calculation
+ */
+template
+class philox4x32x10Kernel : public Kernel
+{
+public:
+ Status compute(NumericTable * resultTable);
+};
+
+} // namespace internal
+} // namespace philox4x32x10
+} // namespace engines
+} // namespace algorithms
+} // namespace daal
+
+#endif
diff --git a/cpp/daal/src/externals/service_rng_mkl.h b/cpp/daal/src/externals/service_rng_mkl.h
index b2dcd81b78b..425695c7f66 100644
--- a/cpp/daal/src/externals/service_rng_mkl.h
+++ b/cpp/daal/src/externals/service_rng_mkl.h
@@ -32,6 +32,8 @@
#define __DAAL_BRNG_MT2203 VSL_BRNG_MT2203
#define __DAAL_BRNG_MT19937 VSL_BRNG_MT19937
#define __DAAL_BRNG_MCG59 VSL_BRNG_MCG59
+#define __DAAL_BRNG_MRG32K3A VSL_BRNG_MRG32K3A
+#define __DAAL_BRNG_PHILOX4X32X10 VSL_BRNG_PHILOX4X32X10
#define __DAAL_RNG_METHOD_UNIFORM_STD VSL_RNG_METHOD_UNIFORM_STD
#define __DAAL_RNG_METHOD_UNIFORMBITS32_STD 0
#define __DAAL_RNG_METHOD_BERNOULLI_ICDF VSL_RNG_METHOD_BERNOULLI_ICDF
@@ -39,6 +41,10 @@
#define __DAAL_RNG_METHOD_GAUSSIAN_BOXMULLER2 VSL_RNG_METHOD_GAUSSIAN_BOXMULLER2
#define __DAAL_RNG_METHOD_GAUSSIAN_ICDF VSL_RNG_METHOD_GAUSSIAN_ICDF
+// Errors
+#define __DAAL_RNG_ERROR_LEAPFROG_UNSUPPORTED -1002
+#define __DAAL_RNG_ERROR_SKIPAHEAD_UNSUPPORTED -1003
+
namespace daal
{
namespace internal
diff --git a/cpp/daal/src/externals/service_rng_openrng.h b/cpp/daal/src/externals/service_rng_openrng.h
index dd70c644606..0e49c62c83b 100644
--- a/cpp/daal/src/externals/service_rng_openrng.h
+++ b/cpp/daal/src/externals/service_rng_openrng.h
@@ -25,6 +25,8 @@
#define __DAAL_BRNG_MT2203 VSL_BRNG_MT2203
#define __DAAL_BRNG_MT19937 VSL_BRNG_MT19937
#define __DAAL_BRNG_MCG59 VSL_BRNG_MCG59
+#define __DAAL_BRNG_MRG32K3A VSL_BRNG_MRG32K3A
+#define __DAAL_BRNG_PHILOX4X32X10 VSL_BRNG_PHILOX4X32X10
#define __DAAL_RNG_METHOD_UNIFORM_STD VSL_RNG_METHOD_UNIFORM_STD
#define __DAAL_RNG_METHOD_UNIFORMBITS32_STD 0
#define __DAAL_RNG_METHOD_BERNOULLI_ICDF VSL_RNG_METHOD_BERNOULLI_ICDF
@@ -32,6 +34,10 @@
#define __DAAL_RNG_METHOD_GAUSSIAN_BOXMULLER2 VSL_RNG_METHOD_GAUSSIAN_BOXMULLER2
#define __DAAL_RNG_METHOD_GAUSSIAN_ICDF VSL_RNG_METHOD_GAUSSIAN_ICDF
+// Errors
+#define __DAAL_RNG_ERROR_LEAPFROG_UNSUPPORTED -1002
+#define __DAAL_RNG_ERROR_SKIPAHEAD_UNSUPPORTED -1003
+
namespace daal
{
namespace internal
diff --git a/cpp/daal/src/externals/service_rng_ref.h b/cpp/daal/src/externals/service_rng_ref.h
index fc56fcf6205..f2df997c87d 100644
--- a/cpp/daal/src/externals/service_rng_ref.h
+++ b/cpp/daal/src/externals/service_rng_ref.h
@@ -36,16 +36,22 @@
#include
// RNGs
- #define __DAAL_BRNG_MT2203 (1 << 20) * 9 //VSL_BRNG_MT2203
- #define __DAAL_BRNG_MT19937 (1 << 20) * 8 //VSL_BRNG_MT19937
- #define __DAAL_BRNG_MCG59 (1 << 20) * 4 //VSL_BRNG_MCG59
-
- #define __DAAL_RNG_METHOD_UNIFORM_STD 0 //VSL_RNG_METHOD_UNIFORM_STD
- #define __DAAL_RNG_METHOD_UNIFORMBITS32_STD 4
- #define __DAAL_RNG_METHOD_BERNOULLI_ICDF 0 //VSL_RNG_METHOD_BERNOULLI_ICDF
- #define __DAAL_RNG_METHOD_GAUSSIAN_BOXMULLER 0 //VSL_RNG_METHOD_GAUSSIAN_BOXMULLER
- #define __DAAL_RNG_METHOD_GAUSSIAN_BOXMULLER2 1 //VSL_RNG_METHOD_GAUSSIAN_BOXMULLER2
- #define __DAAL_RNG_METHOD_GAUSSIAN_ICDF 2 //VSL_RNG_METHOD_GAUSSIAN_ICDF
+ #define __DAAL_BRNG_MT2203 (1 << 20) * 9 //VSL_BRNG_MT2203
+ #define __DAAL_BRNG_MT19937 (1 << 20) * 8 //VSL_BRNG_MT19937
+ #define __DAAL_BRNG_MCG59 (1 << 20) * 4 //VSL_BRNG_MCG59
+ #define __DAAL_BRNG_MRG32K3A (1 << 20) * 3 //VSL_BRNG_MRG32K3A
+ #define __DAAL_BRNG_PHILOX4X32X10 (1 << 20) * 16 //VSL_BRNG_PHILOX4X32X10
+
+ #define __DAAL_RNG_METHOD_UNIFORM_STD 0 //VSL_RNG_METHOD_UNIFORM_STD
+ #define __DAAL_RNG_METHOD_UNIFORMBITS32_STD 4
+ #define __DAAL_RNG_METHOD_BERNOULLI_ICDF 0 //VSL_RNG_METHOD_BERNOULLI_ICDF
+ #define __DAAL_RNG_METHOD_GAUSSIAN_BOXMULLER 0 //VSL_RNG_METHOD_GAUSSIAN_BOXMULLER
+ #define __DAAL_RNG_METHOD_GAUSSIAN_BOXMULLER2 1 //VSL_RNG_METHOD_GAUSSIAN_BOXMULLER2
+ #define __DAAL_RNG_METHOD_GAUSSIAN_ICDF 2 //VSL_RNG_METHOD_GAUSSIAN_ICDF
+
+ // Errors
+ #define __DAAL_RNG_ERROR_LEAPFROG_UNSUPPORTED -1002
+ #define __DAAL_RNG_ERROR_SKIPAHEAD_UNSUPPORTED -1003
namespace daal
{
diff --git a/cpp/oneapi/dal/algo/connected_components/backend/cpu/vertex_partitioning_default_kernel.hpp b/cpp/oneapi/dal/algo/connected_components/backend/cpu/vertex_partitioning_default_kernel.hpp
index 4da1866e277..bdcc3f1487a 100644
--- a/cpp/oneapi/dal/algo/connected_components/backend/cpu/vertex_partitioning_default_kernel.hpp
+++ b/cpp/oneapi/dal/algo/connected_components/backend/cpu/vertex_partitioning_default_kernel.hpp
@@ -24,7 +24,7 @@
#include "oneapi/dal/backend/memory.hpp"
#include "oneapi/dal/backend/interop/common.hpp"
#include "oneapi/dal/table/homogen.hpp"
-#include "oneapi/dal/backend/primitives/rng/rng_engine.hpp"
+#include "oneapi/dal/backend/primitives/rng/rng.hpp"
#include "oneapi/dal/detail/threading.hpp"
namespace oneapi::dal::preview::connected_components::backend {
@@ -90,9 +90,12 @@ std::int32_t most_frequent_element(const std::atomic *components,
const std::int64_t &samples_count = 1024) {
std::int32_t *rnd_vertex_ids = allocate(vertex_allocator, samples_count);
- dal::backend::primitives::engine eng;
- dal::backend::primitives::rng rn_gen;
- rn_gen.uniform(samples_count, rnd_vertex_ids, eng.get_state(), 0, vertex_count);
+ dal::backend::primitives::host_engine eng;
+ dal::backend::primitives::uniform(samples_count,
+ rnd_vertex_ids,
+ eng,
+ 0,
+ vertex_count);
std::int32_t *root_sample_counts = allocate(vertex_allocator, vertex_count);
diff --git a/cpp/oneapi/dal/algo/decision_forest/backend/gpu/infer_kernel_impl_dpc.cpp b/cpp/oneapi/dal/algo/decision_forest/backend/gpu/infer_kernel_impl_dpc.cpp
index 19da49ffb74..23bcca39fde 100644
--- a/cpp/oneapi/dal/algo/decision_forest/backend/gpu/infer_kernel_impl_dpc.cpp
+++ b/cpp/oneapi/dal/algo/decision_forest/backend/gpu/infer_kernel_impl_dpc.cpp
@@ -126,6 +126,7 @@ infer_kernel_impl::predict_by_tree_group_weighted(
const Float* cls_prb_list_ptr = class_proba_list.get_data();
Index obs_tree_group_response_count = ctx.class_count * ctx.tree_in_group_count;
+
de::check_mul_overflow(ctx.row_count, obs_tree_group_response_count);
auto [obs_response_list, zero_obs_response_event] =
pr::ndarray::zeros(queue_,
diff --git a/cpp/oneapi/dal/algo/decision_forest/backend/gpu/infer_model_manager.hpp b/cpp/oneapi/dal/algo/decision_forest/backend/gpu/infer_model_manager.hpp
index c0dd4cca565..770f73b5269 100644
--- a/cpp/oneapi/dal/algo/decision_forest/backend/gpu/infer_model_manager.hpp
+++ b/cpp/oneapi/dal/algo/decision_forest/backend/gpu/infer_model_manager.hpp
@@ -87,7 +87,6 @@ class infer_model_manager {
}
max_tree_size_ = dal::detail::integral_cast(tree_size_max);
-
const Index tree_block_size = dal::detail::check_mul_overflow(max_tree_size_, tree_count);
auto fi_list_host = dal::backend::primitives::ndarray::empty({ tree_block_size });
diff --git a/cpp/oneapi/dal/algo/decision_forest/backend/gpu/train_feature_type_dpc.cpp b/cpp/oneapi/dal/algo/decision_forest/backend/gpu/train_feature_type_dpc.cpp
index 7306533ed50..ee17b4ffabd 100644
--- a/cpp/oneapi/dal/algo/decision_forest/backend/gpu/train_feature_type_dpc.cpp
+++ b/cpp/oneapi/dal/algo/decision_forest/backend/gpu/train_feature_type_dpc.cpp
@@ -20,6 +20,7 @@
#include "oneapi/dal/table/row_accessor.hpp"
#include "oneapi/dal/backend/memory.hpp"
#include "oneapi/dal/detail/profiler.hpp"
+#include
#ifdef ONEDAL_DATA_PARALLEL
@@ -29,6 +30,12 @@ namespace de = dal::detail;
namespace bk = dal::backend;
namespace pr = dal::backend::primitives;
+template
+std::int64_t propose_block_size(const sycl::queue& q, const std::int64_t r) {
+ constexpr std::int64_t fsize = sizeof(Float);
+ return 0x10000l * (8 / fsize);
+}
+
template
inline sycl::event sort_inplace(sycl::queue& queue_,
pr::ndarray& src,
@@ -56,18 +63,29 @@ sycl::event indexed_features::extract_column(
Float* values = values_nd.get_mutable_data();
Index* indices = indices_nd.get_mutable_data();
auto column_count = column_count_;
-
- const sycl::range<1> range = de::integral_cast(row_count_);
-
- auto event = queue_.submit([&](sycl::handler& h) {
- h.depends_on(deps);
- h.parallel_for(range, [=](sycl::id<1> idx) {
- values[idx] = data[idx * column_count + feature_id];
- indices[idx] = idx;
+ const auto block_size = propose_block_size(queue_, row_count_);
+ const bk::uniform_blocking blocking(row_count_, block_size);
+
+ std::vector events(blocking.get_block_count());
+ for (std::int64_t block_index = 0; block_index < blocking.get_block_count(); ++block_index) {
+ const auto first_row = blocking.get_block_start_index(block_index);
+ const auto last_row = blocking.get_block_end_index(block_index);
+ const auto curr_block = last_row - first_row;
+ ONEDAL_ASSERT(curr_block > 0);
+
+ auto event = queue_.submit([&](sycl::handler& cgh) {
+ cgh.depends_on(deps);
+ cgh.parallel_for<>(de::integral_cast(curr_block), [=](sycl::id<1> idx) {
+ const std::int64_t row = idx + first_row;
+
+ values[row] = data[row * column_count + feature_id];
+ indices[row] = row;
+ });
});
- });
- return event;
+ events.push_back(event);
+ }
+ return bk::wait_or_pass(events);
}
template
diff --git a/cpp/oneapi/dal/algo/decision_forest/backend/gpu/train_kernel_hist_impl.hpp b/cpp/oneapi/dal/algo/decision_forest/backend/gpu/train_kernel_hist_impl.hpp
index 9dfe252e849..d15f1704065 100644
--- a/cpp/oneapi/dal/algo/decision_forest/backend/gpu/train_kernel_hist_impl.hpp
+++ b/cpp/oneapi/dal/algo/decision_forest/backend/gpu/train_kernel_hist_impl.hpp
@@ -1,5 +1,5 @@
/*******************************************************************************
-* Copyright 2021 Intel Corporation
+* Copyright 2021-2022 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -20,7 +20,8 @@
#include "oneapi/dal/backend/primitives/ndarray.hpp"
#include "oneapi/dal/backend/primitives/utils.hpp"
#include "oneapi/dal/algo/decision_forest/train_types.hpp"
-
+#include "oneapi/dal/backend/primitives/rng/rng.hpp"
+#include "oneapi/dal/backend/primitives/rng/rng.hpp"
#include "oneapi/dal/backend/primitives/rng/rng_engine_collection.hpp"
#include "oneapi/dal/algo/decision_forest/backend/gpu/train_misc_structs.hpp"
@@ -50,7 +51,7 @@ class train_kernel_hist_impl {
using model_manager_t = train_model_manager;
using train_context_t = train_context;
using imp_data_t = impurity_data;
- using rng_engine_t = pr::engine;
+ using rng_engine_t = pr::dpc_engine;
using rng_engine_list_t = std::vector;
using msg = dal::detail::error_messages;
using comm_t = bk::communicator;
@@ -62,7 +63,7 @@ class train_kernel_hist_impl {
train_kernel_hist_impl(const bk::context_gpu& ctx)
: queue_(ctx.get_queue()),
comm_(ctx.get_communicator()),
- train_service_kernels_(queue_) {}
+ train_service_kernels_(ctx.get_queue()) {}
~train_kernel_hist_impl() = default;
result_t operator()(const descriptor_t& desc,
@@ -83,13 +84,11 @@ class train_kernel_hist_impl {
pr::ndarray& node_list,
pr::ndarray& tree_order_level,
Index engine_offset,
- Index node_count);
+ Index node_count,
+ const bk::event_vector& deps = {});
void validate_input(const descriptor_t& desc, const table& data, const table& labels) const;
- Index get_row_total_count(bool distr_mode, Index row_count);
- Index get_global_row_offset(bool distr_mode, Index row_count);
-
/// Initializes `ctx` training context structure based on data and
/// descriptor class. Filling and calculating all parameters in context,
/// for example, tree count, required memory size, calculating indexed features, etc.
@@ -149,6 +148,24 @@ class train_kernel_hist_impl {
Index node_count,
const bk::event_vector& deps = {});
+ sycl::event compute_initial_imp_for_node_list_regression(
+ const train_context_t& ctx,
+ const pr::ndarray& node_list,
+ const pr::ndarray& local_sum_hist,
+ const pr::ndarray& local_sum2cent_hist,
+ imp_data_t& imp_data_list,
+ Index node_count,
+ const bk::event_vector& deps = {});
+
+ sycl::event compute_local_sum_histogram(const train_context_t& ctx,
+ const pr::ndarray& response,
+ const pr::ndarray& tree_order,
+ const pr::ndarray& node_list,
+ pr::ndarray& local_sum_hist,
+ pr::ndarray& local_sum2cent_hist,
+ Index node_count,
+ const bk::event_vector& deps = {});
+
/// Computes initial histograms for each node to compute impurity.
///
/// @param[in] ctx a training context structure for a GPU backend
@@ -575,7 +592,7 @@ class train_kernel_hist_impl {
pr::ndarray& oob_per_obs_list,
pr::ndarray& var_imp,
pr::ndarray& var_imp_variance,
- const rng_engine_list_t& rng_engine_arr,
+ rng_engine_list_t& rng_engine_arr,
Index tree_idx,
Index tree_in_block,
Index built_tree_count,
diff --git a/cpp/oneapi/dal/algo/decision_forest/backend/gpu/train_kernel_hist_impl_dpc.cpp b/cpp/oneapi/dal/algo/decision_forest/backend/gpu/train_kernel_hist_impl_dpc.cpp
index fc875683784..69593a81321 100644
--- a/cpp/oneapi/dal/algo/decision_forest/backend/gpu/train_kernel_hist_impl_dpc.cpp
+++ b/cpp/oneapi/dal/algo/decision_forest/backend/gpu/train_kernel_hist_impl_dpc.cpp
@@ -1,5 +1,5 @@
/*******************************************************************************
-* Copyright 2021 Intel Corporation
+* Copyright 2021-2022 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -19,7 +19,7 @@
#include "oneapi/dal/table/row_accessor.hpp"
#include "oneapi/dal/detail/profiler.hpp"
#include "oneapi/dal/algo/decision_forest/backend/gpu/train_helpers.hpp"
-
+#include
#ifdef ONEDAL_DATA_PARALLEL
#include "oneapi/dal/algo/decision_forest/backend/gpu/train_kernel_hist_impl.hpp"
@@ -91,47 +91,16 @@ void train_kernel_hist_impl::validate_input(const descr
}
}
-template
-Index train_kernel_hist_impl::get_row_total_count(bool distr_mode,
- Index row_count) {
- Index row_total_count = row_count;
-
- if (distr_mode) {
- ONEDAL_PROFILER_TASK(allreduce_row_total_count);
- comm_.allreduce(row_total_count).wait();
- }
-
- return row_total_count;
-}
-
-template
-Index train_kernel_hist_impl::get_global_row_offset(bool distr_mode,
- Index row_count) {
- Index global_row_offset = 0;
-
- if (distr_mode) {
- auto row_count_list_host = pr::ndarray::empty({ comm_.get_rank_count() });
- Index* row_count_list_host_ptr = row_count_list_host.get_mutable_data();
- {
- ONEDAL_PROFILER_TASK(allgather_row_count);
- comm_.allgather(row_count, row_count_list_host.flatten()).wait();
- }
-
- for (std::int64_t i = 0; i < comm_.get_rank(); ++i) {
- global_row_offset += row_count_list_host_ptr[i];
- }
- }
-
- return global_row_offset;
-}
-
template
void train_kernel_hist_impl::init_params(train_context_t& ctx,
const descriptor_t& desc,
const table& data,
const table& responses,
const table& weights) {
- ctx.distr_mode_ = (comm_.get_rank_count() > 1);
+ ONEDAL_PROFILER_TASK(init_params, queue_);
+ std::int64_t rank_count = comm_.get_rank_count();
+ ctx.distr_mode_ = (rank_count > 1);
+ auto current_rank = comm_.get_rank();
ctx.use_private_mem_buf_ = true;
@@ -143,7 +112,11 @@ void train_kernel_hist_impl::init_params(train_context_
}
ctx.row_count_ = de::integral_cast(data.get_row_count());
- ctx.row_total_count_ = get_row_total_count(ctx.distr_mode_, ctx.row_count_);
+ ctx.row_total_count_ = ctx.row_count_;
+ {
+ ONEDAL_PROFILER_TASK(allreduce_total_row_count_exactly_it, queue_);
+ comm_.allreduce(ctx.row_total_count_, spmd::reduce_op::sum).wait();
+ }
ctx.column_count_ = de::integral_cast(data.get_column_count());
@@ -151,10 +124,22 @@ void train_kernel_hist_impl::init_params(train_context_
ctx.selected_row_count_ = ctx.distr_mode_
? impl_const_t::bad_val_
: desc.get_observations_per_tree_fraction() * ctx.row_count_;
+
ctx.selected_row_total_count_ =
desc.get_observations_per_tree_fraction() * ctx.row_total_count_;
- ctx.global_row_offset_ = get_global_row_offset(ctx.distr_mode_, ctx.row_count_);
+ auto global_rank_offsets = array::zeros(rank_count);
+ global_rank_offsets.get_mutable_data()[current_rank] = ctx.row_count_;
+ {
+ ONEDAL_PROFILER_TASK(allreduce_recv_counts, queue_);
+ comm_.allreduce(global_rank_offsets, spmd::reduce_op::sum).wait();
+ }
+
+ ctx.global_row_offset_ = 0;
+ for (std::int64_t i = 0; i < current_rank; i++) {
+ ONEDAL_ASSERT(global_rank_offsets.get_data()[i] >= 0);
+ ctx.global_row_offset_ += global_rank_offsets.get_data()[i];
+ }
ctx.tree_count_ = de::integral_cast(desc.get_tree_count());
@@ -211,7 +196,7 @@ void train_kernel_hist_impl::init_params(train_context_
bin_borders_host_[clmn_idx] = ind_ftrs.get_bin_borders(clmn_idx).to_host(queue_);
}
- data_host_ = pr::table2ndarray_1d(queue_, data, alloc::device).to_host(queue_);
+ data_host_ = pr::table2ndarray_1d(queue_, data, alloc::host);
response_nd_ = pr::table2ndarray_1d(queue_, responses, alloc::device);
@@ -332,7 +317,9 @@ void train_kernel_hist_impl::init_params(train_context_
template
void train_kernel_hist_impl::allocate_buffers(const train_context_t& ctx) {
- de::check_mul_overflow(ctx.selected_row_total_count_, ctx.tree_in_block_);
+ ONEDAL_PROFILER_TASK(allocate_buffers, queue_);
+ de::check_mul_overflow(ctx.selected_row_total_count_,
+ static_cast(ctx.tree_in_block_));
// main tree order and auxilliary one are used for partitioning
tree_order_lev_ =
@@ -372,7 +359,8 @@ sycl::event train_kernel_hist_impl::gen_initial_tree_or
pr::ndarray& node_list_host,
pr::ndarray& tree_order_level,
Index engine_offset,
- Index node_count) {
+ Index node_count,
+ const bk::event_vector& deps) {
ONEDAL_PROFILER_TASK(gen_initial_tree_order, queue_);
ONEDAL_ASSERT(node_list_host.get_count() == node_count * impl_const_t::node_prop_count_);
@@ -382,50 +370,74 @@ sycl::event train_kernel_hist_impl::gen_initial_tree_or
sycl::event last_event;
if (ctx.bootstrap_) {
- auto selected_row_global_host =
- pr::ndarray::empty({ ctx.selected_row_total_count_ * ctx.tree_in_block_ });
- pr::ndarray selected_row_host;
+ auto selected_row_global =
+ pr::ndarray::empty(queue_,
+ { ctx.selected_row_total_count_ * ctx.tree_in_block_ },
+ alloc::device);
+ pr::ndarray selected_row;
if (ctx.distr_mode_) {
- selected_row_host = pr::ndarray::empty(
- { ctx.selected_row_total_count_ * ctx.tree_in_block_ });
+ selected_row =
+ pr::ndarray::empty(queue_,
+ { ctx.selected_row_total_count_ * ctx.tree_in_block_ },
+ alloc::device);
}
- Index* const selected_row_global_ptr = selected_row_global_host.get_mutable_data();
- Index* const selected_row_ptr =
- ctx.distr_mode_ ? selected_row_host.get_mutable_data() : nullptr;
+ Index* const selected_row_global_ptr = selected_row_global.get_mutable_data();
+ Index* const selected_row_ptr = ctx.distr_mode_ ? selected_row.get_mutable_data() : nullptr;
Index* const node_list_ptr = node_list_host.get_mutable_data();
for (Index node_idx = 0; node_idx < node_count; ++node_idx) {
- pr::rng rn_gen;
Index* gen_row_idx_global_ptr =
selected_row_global_ptr + ctx.selected_row_total_count_ * node_idx;
- rn_gen.uniform(ctx.selected_row_total_count_,
- gen_row_idx_global_ptr,
- rng_engine_list[engine_offset + node_idx].get_state(),
- 0,
- ctx.row_total_count_);
+ uniform(queue_,
+ ctx.selected_row_total_count_,
+ gen_row_idx_global_ptr,
+ rng_engine_list[engine_offset + node_idx],
+ 0,
+ ctx.row_total_count_,
+ { deps });
if (ctx.distr_mode_) {
Index* node_ptr = node_list_ptr + node_idx * impl_const_t::node_prop_count_;
- Index* src = gen_row_idx_global_ptr;
Index* const dst = selected_row_ptr + ctx.selected_row_total_count_ * node_idx;
- Index row_idx = 0;
- for (Index i = 0; i < ctx.selected_row_total_count_; ++i) {
- dst[i] = 0;
- if (src[i] >= ctx.global_row_offset_ &&
- src[i] < (ctx.global_row_offset_ + ctx.row_count_)) {
- dst[row_idx++] = src[i] - ctx.global_row_offset_;
- }
- }
- node_ptr[impl_const_t::ind_lrc] = row_idx;
+ auto [row_index, row_index_event] =
+ pr::ndarray::full(queue_, 1, 0, alloc::device);
+ row_index_event.wait_and_throw();
+ Index* row_idx_ptr = row_index.get_mutable_data();
+ const sycl::nd_range<1> nd_range =
+ bk::make_multiple_nd_range_1d(ctx.selected_row_total_count_, 1);
+ auto event_ = queue_.submit([&](sycl::handler& cgh) {
+ cgh.depends_on({ last_event });
+ cgh.parallel_for(nd_range, [=](sycl::nd_item<1> id) {
+ auto idx = id.get_global_id(0);
+ dst[idx] = 0;
+ if (gen_row_idx_global_ptr[idx] >= ctx.global_row_offset_ &&
+ gen_row_idx_global_ptr[idx] <
+ (ctx.global_row_offset_ + ctx.row_count_)) {
+ sycl::atomic_ref<
+ Index,
+ sycl::memory_order::relaxed,
+ sycl::memory_scope::device,
+ sycl::access::address_space::ext_intel_global_device_space>
+ counter_atomic(row_idx_ptr[0]);
+ auto cur_idx = counter_atomic.fetch_add(1);
+ dst[cur_idx] = gen_row_idx_global_ptr[idx] - ctx.global_row_offset_;
+ }
+ });
+ });
+ auto set_event = queue_.submit([&](sycl::handler& cgh) {
+ cgh.depends_on(event_);
+ cgh.parallel_for(sycl::range<1>{ std::size_t(1) }, [=](sycl::id<1> idx) {
+ node_ptr[impl_const_t::ind_lrc] = row_idx_ptr[0];
+ });
+ });
+ set_event.wait_and_throw();
}
}
- last_event = ctx.distr_mode_
- ? tree_order_level.assign_from_host(queue_, selected_row_host)
- : tree_order_level.assign_from_host(queue_, selected_row_global_host);
+ ctx.distr_mode_ ? tree_order_level = selected_row : tree_order_level = selected_row_global;
}
else {
Index row_count = ctx.selected_row_count_;
@@ -433,29 +445,30 @@ sycl::event train_kernel_hist_impl::gen_initial_tree_or
if (ctx.distr_mode_) {
row_count = 0;
if (ctx.global_row_offset_ < ctx.selected_row_total_count_) {
- row_count = std::min(ctx.selected_row_total_count_ - ctx.global_row_offset_,
+ row_count = std::min(static_cast(ctx.selected_row_total_count_ -
+ ctx.global_row_offset_),
ctx.row_count_);
}
// in case of no bootstrap
// it is valid case if this worker's rows set wasn't taken for tree build
// i.e. row_count can be eq 0
-
Index* node_list_ptr = node_list_host.get_mutable_data();
-
- for (Index node_idx = 0; node_idx < node_count; ++node_idx) {
- Index* node_ptr = node_list_ptr + node_idx * impl_const_t::node_prop_count_;
- node_ptr[impl_const_t::ind_lrc] = row_count;
+ auto set_event = queue_.submit([&](sycl::handler& cgh) {
+ cgh.parallel_for(sycl::range<1>{ std::size_t(node_count) }, [=](sycl::id<1> idx) {
+ Index* node_ptr = node_list_ptr + idx * impl_const_t::node_prop_count_;
+ node_ptr[impl_const_t::ind_lrc] = row_count;
+ });
+ });
+ set_event.wait_and_throw();
+
+ if (row_count > 0) {
+ last_event = train_service_kernels_.initialize_tree_order(tree_order_level,
+ node_count,
+ row_count,
+ stride);
}
}
-
- if (row_count > 0) {
- last_event = train_service_kernels_.initialize_tree_order(tree_order_level,
- node_count,
- row_count,
- stride);
- }
}
-
return last_event;
}
@@ -469,7 +482,6 @@ train_kernel_hist_impl::gen_feature_list(
ONEDAL_PROFILER_TASK(gen_feature_list, queue_);
ONEDAL_ASSERT(node_vs_tree_map_list.get_count() == node_count);
-
de::check_mul_overflow((node_count + 1), ctx.selected_ftr_count_);
// first part is used for features indices, +1 block - part for generator
auto selected_features_host =
@@ -479,36 +491,43 @@ train_kernel_hist_impl::gen_feature_list(
{ node_count * ctx.selected_ftr_count_ },
alloc::device);
- auto selected_features_host_ptr = selected_features_host.get_mutable_data();
-
auto node_vs_tree_map_list_host = node_vs_tree_map_list.to_host(queue_);
- pr::rng rn_gen;
- auto tree_map_ptr = node_vs_tree_map_list_host.get_mutable_data();
if (ctx.selected_ftr_count_ != ctx.column_count_) {
+ auto tree_map_ptr = node_vs_tree_map_list_host.get_mutable_data();
+ auto selected_features_host_ptr = selected_features_host.get_mutable_data();
for (Index node = 0; node < node_count; ++node) {
- rn_gen.uniform_without_replacement(
+ pr::uniform_without_replacement(
+ queue_,
ctx.selected_ftr_count_,
selected_features_host_ptr + node * ctx.selected_ftr_count_,
selected_features_host_ptr + (node + 1) * ctx.selected_ftr_count_,
- rng_engine_list[tree_map_ptr[node]].get_state(),
+ rng_engine_list[tree_map_ptr[node]],
0,
ctx.column_count_);
}
+ auto event = selected_features_com.assign_from_host(queue_,
+ selected_features_host_ptr,
+ selected_features_com.get_count());
+
+ return std::tuple{ selected_features_com, event };
}
else {
+ sycl::event fill_event;
for (Index node = 0; node < node_count; ++node) {
- for (Index i = 0; i < ctx.selected_ftr_count_; ++i) {
- selected_features_host_ptr[node * ctx.selected_ftr_count_ + i] = i;
- }
+ auto selected_features_host_ptr = selected_features_com.get_mutable_data();
+
+ fill_event = queue_.submit([&](sycl::handler& cgh) {
+ cgh.parallel_for(
+ sycl::range<1>{ std::size_t(ctx.selected_ftr_count_) },
+ [=](sycl::id<1> idx) {
+ selected_features_host_ptr[node * ctx.selected_ftr_count_ + idx] = idx;
+ });
+ });
}
- }
- auto event = selected_features_com.assign_from_host(queue_,
- selected_features_host_ptr,
- selected_features_com.get_count());
-
- return std::tuple{ selected_features_com, event };
+ return std::tuple{ selected_features_com, fill_event };
+ }
}
template
@@ -524,7 +543,6 @@ train_kernel_hist_impl::gen_random_thresholds(
auto node_vs_tree_map_list_host = node_vs_tree_map.to_host(queue_);
- pr::rng rn_gen;
auto tree_map_ptr = node_vs_tree_map_list_host.get_mutable_data();
// Create arrays for random generated bins
@@ -537,11 +555,12 @@ train_kernel_hist_impl::gen_random_thresholds(
// Generate random bins for selected features
for (Index node = 0; node < node_count; ++node) {
- rn_gen.uniform(ctx.selected_ftr_count_,
- random_bins_host_ptr + node * ctx.selected_ftr_count_,
- rng_engine_list[tree_map_ptr[node]].get_state(),
- 0.0f,
- 1.0f);
+ pr::uniform(queue_,
+ ctx.selected_ftr_count_,
+ random_bins_host_ptr + node * ctx.selected_ftr_count_,
+ rng_engine_list[tree_map_ptr[node]],
+ 0.0f,
+ 1.0f);
}
auto event_rnd_generate =
random_bins_com.assign_from_host(queue_, random_bins_host_ptr, random_bins_com.get_count());
@@ -758,54 +777,50 @@ sycl::event train_kernel_hist_impl::compute_initial_imp
pr::ndarray& node_list,
Index node_count,
const bk::event_vector& deps) {
+ ONEDAL_PROFILER_TASK(compute_initial_imp_for_node_list, queue_);
ONEDAL_ASSERT(imp_data_list.imp_list_.get_count() ==
node_count * impl_const_t::node_imp_prop_count_);
- if constexpr (std::is_same_v) {
- ONEDAL_ASSERT(imp_data_list.class_hist_list_.get_count() == node_count * ctx.class_count_);
- }
ONEDAL_ASSERT(node_list.get_count() == node_count * impl_const_t::node_prop_count_);
+ sycl::event event_;
if constexpr (std::is_same_v) {
- auto class_hist_list_host = imp_data_list.class_hist_list_.to_host(queue_, deps);
- auto imp_list_host = imp_data_list.imp_list_.to_host(queue_);
- auto node_list_host = node_list.to_host(queue_);
+ const Index* class_hist_list_ptr = imp_data_list.class_hist_list_.get_data();
+ Float* imp_list_ptr = imp_data_list.imp_list_.get_mutable_data();
+ Index* node_list_ptr = node_list.get_mutable_data();
+
+ // Launch kernel to compute impurity and winning class for each node
+ auto event_ = queue_.submit([&](sycl::handler& cgh) {
+ cgh.depends_on(deps);
+ cgh.parallel_for(sycl::range<1>(node_count), [=](sycl::id<1> idx) {
+ Index node_idx = idx;
+ const Index* node_histogram_ptr = class_hist_list_ptr + node_idx * ctx.class_count_;
+ Float* node_imp_ptr = imp_list_ptr + node_idx * impl_const_t::node_imp_prop_count_;
+ Index* node_ptr = node_list_ptr + node_idx * impl_const_t::node_prop_count_;
- const Index* class_hist_list_host_ptr = class_hist_list_host.get_data();
- Float* imp_list_host_ptr = imp_list_host.get_mutable_data();
- Index* node_list_host_ptr = node_list_host.get_mutable_data();
+ Index row_count = node_ptr[impl_const_t::ind_grc];
- for (Index node_idx = 0; node_idx < node_count; ++node_idx) {
- const Index* node_histogram_ptr =
- class_hist_list_host_ptr + node_idx * ctx.class_count_;
- Float* node_imp_ptr = imp_list_host_ptr + node_idx * impl_const_t::node_imp_prop_count_;
- Index* node_ptr = node_list_host_ptr + node_idx * impl_const_t::node_prop_count_;
-
- Index row_count = node_ptr[impl_const_t::ind_grc];
-
- Float imp = Float(1);
- Float div = Float(1) / (Float(row_count) * row_count);
- Index max_cls_count = 0;
- Index win_cls = 0;
- Index cls_count = 0;
-
- for (Index cls_idx = 0; cls_idx < ctx.class_count_; ++cls_idx) {
- cls_count = node_histogram_ptr[cls_idx];
- imp -= Float(cls_count) * (cls_count)*div;
-
- if (cls_count > max_cls_count) {
- max_cls_count = cls_count;
- win_cls = cls_idx;
- }
- }
+ Float imp = Float(1);
+ Float div = Float(1) / (Float(row_count) * row_count);
+ Index max_cls_count = 0;
+ Index win_cls = 0;
+ Index cls_count = 0;
- node_ptr[impl_const_t::ind_win] = win_cls;
- node_imp_ptr[0] = sycl::max(imp, Float(0));
- }
- imp_data_list.imp_list_.assign_from_host(queue_, imp_list_host).wait_and_throw();
- node_list.assign_from_host(queue_, node_list_host).wait_and_throw();
+ for (Index cls_idx = 0; cls_idx < ctx.class_count_; ++cls_idx) {
+ cls_count = node_histogram_ptr[cls_idx];
+ imp -= cls_count * cls_count * div;
+
+ if (cls_count > max_cls_count) {
+ max_cls_count = cls_count;
+ win_cls = cls_idx;
+ }
+ }
+ node_ptr[impl_const_t::ind_win] = win_cls;
+ node_imp_ptr[0] = sycl::max(imp, Float(0));
+ });
+ });
}
- return sycl::event{};
+ return event_;
}
template
@@ -994,6 +1009,136 @@ Float* local_buf_ptr = local_buf.get_pointer().get();
return event;
}
+template
+sycl::event train_kernel_hist_impl::compute_local_sum_histogram(
+ const train_context_t& ctx,
+ const pr::ndarray& response,
+ const pr::ndarray& tree_order,
+ const pr::ndarray& node_list,
+ pr::ndarray& local_sum_hist,
+ pr::ndarray& local_sum2cent_hist,
+ Index node_count,
+ const bk::event_vector& deps) {
+ ONEDAL_ASSERT(response.get_count() == ctx.row_count_);
+ ONEDAL_ASSERT(tree_order.get_count() == ctx.tree_in_block_ * ctx.selected_row_total_count_);
+ ONEDAL_ASSERT(node_list.get_count() == node_count * impl_const_t::node_prop_count_);
+ ONEDAL_ASSERT(local_sum_hist.get_count() == node_count);
+ ONEDAL_ASSERT(local_sum2cent_hist.get_count() == node_count);
+
+ auto fill_event1 = local_sum_hist.fill(queue_, 0, deps);
+ auto fill_event2 = local_sum2cent_hist.fill(queue_, 0, deps);
+
+ fill_event1.wait_and_throw();
+ fill_event2.wait_and_throw();
+
+ const Float* response_ptr = response.get_data();
+ const Index* tree_order_ptr = tree_order.get_data();
+ const Index* node_list_ptr = node_list.get_data();
+ Float* local_sum_hist_ptr = local_sum_hist.get_mutable_data();
+ Float* local_sum2cent_hist_ptr = local_sum2cent_hist.get_mutable_data();
+
+ const Index node_prop_count = impl_const_t::node_prop_count_;
+
+ auto local_size = ctx.preferable_group_size_;
+ const sycl::nd_range<2> nd_range =
+ bk::make_multiple_nd_range_2d({ local_size, node_count }, { local_size, 1 });
+
+ auto event = queue_.submit([&](sycl::handler& cgh) {
+ cgh.depends_on(deps);
+ local_accessor_rw_t local_sum_buf(local_size, cgh);
+ local_accessor_rw_t local_sum2cent_buf(local_size, cgh);
+ cgh.parallel_for(nd_range, [=](sycl::nd_item<2> item) {
+ const Index node_id = item.get_global_id()[1];
+ const Index local_id = item.get_local_id()[0];
+ const Index local_size = item.get_local_range()[0];
+
+ const Index* node_ptr = node_list_ptr + node_id * node_prop_count;
+
+ const Index row_offset = node_ptr[impl_const_t::ind_ofs];
+ const Index row_count = node_ptr[impl_const_t::ind_lrc];
+
+ const Index* node_tree_order_ptr = &tree_order_ptr[row_offset];
+#if __SYCL_COMPILER_VERSION >= 20230828
+ Float* local_sum_buf_ptr =
+ local_sum_buf.template get_multi_ptr().get_raw();
+ Float* local_sum2cent_buf_ptr =
+ local_sum2cent_buf.template get_multi_ptr().get_raw();
+#else
+ Float* local_sum_buf_ptr = local_sum_buf.get_pointer().get();
+ Float* local_sum2cent_buf_ptr = local_sum2cent_buf.get_pointer().get();
+#endif
+ Float local_sum = Float(0);
+ Float local_sum2cent = Float(0);
+ for (Index i = local_id; i < row_count; i += local_size) {
+ Float value = response_ptr[node_tree_order_ptr[i]];
+ local_sum += value;
+ local_sum2cent += value * value;
+ }
+
+ local_sum_buf_ptr[local_id] = local_sum;
+ local_sum2cent_buf_ptr[local_id] = local_sum2cent;
+
+ for (Index offset = local_size / 2; offset > 0; offset >>= 1) {
+ item.barrier(sycl::access::fence_space::local_space);
+ if (local_id < offset) {
+ local_sum_buf_ptr[local_id] += local_sum_buf_ptr[local_id + offset];
+ local_sum2cent_buf_ptr[local_id] += local_sum2cent_buf_ptr[local_id + offset];
+ }
+ }
+
+ if (local_id == 0) {
+ local_sum_hist_ptr[node_id] = local_sum_buf_ptr[local_id];
+ local_sum2cent_hist_ptr[node_id] = local_sum2cent_buf_ptr[local_id];
+ }
+ });
+ });
+
+ event.wait_and_throw();
+ return event;
+}
+
+template
+sycl::event
+train_kernel_hist_impl::compute_initial_imp_for_node_list_regression(
+ const train_context_t& ctx,
+ const pr::ndarray& node_list,
+ const pr::ndarray& local_sum_hist,
+ const pr::ndarray& local_sum2cent_hist,
+ imp_data_t& imp_data_list,
+ Index node_count,
+ const bk::event_vector& deps) {
+ ONEDAL_ASSERT(node_list.get_count() == node_count * impl_const_t::node_prop_count_);
+ ONEDAL_ASSERT(local_sum_hist.get_count() == node_count);
+ ONEDAL_ASSERT(local_sum2cent_hist.get_count() == node_count);
+ ONEDAL_ASSERT(imp_data_list.imp_list_.get_count() ==
+ node_count * impl_const_t::node_imp_prop_count_);
+
+ const Index* node_list_ptr = node_list.get_data();
+ const Float* local_sum_hist_ptr = local_sum_hist.get_data();
+ const Float* local_sum2cent_hist_ptr = local_sum2cent_hist.get_data();
+ Float* imp_list_ptr = imp_data_list.imp_list_.get_mutable_data();
+
+ const sycl::range<1> range{ de::integral_cast(node_count) };
+
+ auto last_event = queue_.submit([&](sycl::handler& cgh) {
+ cgh.depends_on(deps);
+ cgh.parallel_for(range, [=](sycl::id<1> node_idx) {
+ // set mean
+ imp_list_ptr[node_idx * impl_const_t::node_imp_prop_count_ + 0] =
+ local_sum_hist_ptr[node_idx] /
+ node_list_ptr[node_idx * impl_const_t::node_prop_count_ + impl_const_t::ind_grc];
+ // set sum2cent
+ imp_list_ptr[node_idx * impl_const_t::node_imp_prop_count_ + 1] =
+ local_sum2cent_hist_ptr[node_idx] -
+ (local_sum_hist_ptr[node_idx] * local_sum_hist_ptr[node_idx]) /
+ node_list_ptr[node_idx * impl_const_t::node_prop_count_ +
+ impl_const_t::ind_grc];
+ });
+ });
+
+ return last_event;
+}
+
template
sycl::event train_kernel_hist_impl::compute_initial_sum2cent_local(
const train_context_t& ctx,
@@ -1135,8 +1280,8 @@ sycl::event train_kernel_hist_impl::compute_initial_his
sycl::event last_event;
- if (ctx.distr_mode_) {
- if constexpr (std::is_same_v) {
+ if constexpr (std::is_same_v) {
+ if (ctx.distr_mode_) {
last_event = compute_initial_histogram_local(ctx,
response,
tree_order,
@@ -1156,49 +1301,44 @@ sycl::event train_kernel_hist_impl::compute_initial_his
{ last_event });
}
else {
- auto sum_list = pr::ndarray::empty(queue_, { node_count });
- auto sum2cent_list = pr::ndarray::empty(queue_, { node_count });
- last_event = compute_initial_sum_local(ctx,
- response,
- tree_order,
- node_list,
- sum_list,
- node_count,
- deps);
- {
- ONEDAL_PROFILER_TASK(sum_list, queue_);
- comm_.allreduce(sum_list.flatten(queue_, { last_event })).wait();
- }
- last_event = compute_initial_sum2cent_local(ctx,
- response,
- tree_order,
- node_list,
- sum_list,
- sum2cent_list,
- node_count,
- { last_event });
- {
- ONEDAL_PROFILER_TASK(allreduce_sum2cent_list, queue_);
- comm_.allreduce(sum2cent_list.flatten(queue_, { last_event })).wait();
- }
- last_event = fin_initial_imp(ctx,
- node_list,
- sum_list,
- sum2cent_list,
- imp_data_list,
- node_count,
- { last_event });
+ last_event = compute_initial_histogram_local(ctx,
+ response,
+ tree_order,
+ node_list,
+ imp_data_list,
+ node_count,
+ deps);
last_event.wait_and_throw();
}
}
else {
- last_event = compute_initial_histogram_local(ctx,
- response,
- tree_order,
- node_list,
- imp_data_list,
- node_count,
- deps);
+ auto local_sum_hist = pr::ndarray::empty(queue_, { node_count });
+ auto local_sum2cent_hist = pr::ndarray::empty(queue_, { node_count });
+
+ last_event = compute_local_sum_histogram(ctx,
+ response,
+ tree_order,
+ node_list,
+ local_sum_hist,
+ local_sum2cent_hist,
+ node_count,
+ deps);
+ {
+ ONEDAL_PROFILER_TASK(allreduce_sum_hist, queue_);
+ comm_.allreduce(local_sum_hist.flatten(queue_, { last_event })).wait();
+ }
+ {
+ ONEDAL_PROFILER_TASK(allreduce_sum2cent_hist, queue_);
+ comm_.allreduce(local_sum2cent_hist.flatten(queue_, { last_event })).wait();
+ }
+
+ last_event = compute_initial_imp_for_node_list_regression(ctx,
+ node_list,
+ local_sum_hist,
+ local_sum2cent_hist,
+ imp_data_list,
+ node_count,
+ { last_event });
last_event.wait_and_throw();
}
@@ -1409,8 +1549,6 @@ sycl::event train_kernel_hist_impl::do_node_split(
const Index* node_list_ptr = node_list.get_data();
const Index* node_vs_tree_map_list_ptr = node_vs_tree_map_list.get_data();
- const bool distr_mode = ctx.distr_mode_;
-
Index* node_list_new_ptr = node_list_new.get_mutable_data();
Index* node_vs_tree_map_list_new_ptr = node_vs_tree_map_list_new.get_mutable_data();
@@ -1449,7 +1587,7 @@ sycl::event train_kernel_hist_impl::do_node_split(
Index* node_rch = node_list_new_ptr + (new_left_node_pos + 1) * node_prop_count;
node_lch[impl_const_t::ind_ofs] = node_prn[impl_const_t::ind_ofs];
- node_lch[impl_const_t::ind_lrc] = distr_mode
+ node_lch[impl_const_t::ind_lrc] = ctx.distr_mode_
? node_prn[impl_const_t::ind_lch_lrc]
: node_prn[impl_const_t::ind_lch_grc];
node_lch[impl_const_t::ind_grc] = node_prn[impl_const_t::ind_lch_grc];
@@ -1613,7 +1751,7 @@ sycl::event train_kernel_hist_impl::compute_results(
pr::ndarray& oob_per_obs_list,
pr::ndarray& var_imp,
pr::ndarray& var_imp_variance,
- const rng_engine_list_t& engine_arr,
+ rng_engine_list_t& engine_arr,
Index tree_idx_in_block,
Index tree_in_block_count,
Index built_tree_count,
@@ -1660,12 +1798,12 @@ sycl::event train_kernel_hist_impl::compute_results(
const Float div1 = Float(1) / Float(built_tree_count + tree_idx_in_block + 1);
- pr::rng rn_gen;
-
for (Index column_idx = 0; column_idx < ctx.column_count_; ++column_idx) {
- rn_gen.shuffle(oob_row_count,
- permutation_ptr,
- engine_arr[built_tree_count + tree_idx_in_block].get_state());
+ pr::shuffle(queue_,
+ oob_row_count,
+ permutation_ptr,
+ engine_arr[built_tree_count + tree_idx_in_block],
+ {});
const Float oob_err_perm = compute_oob_error_perm(ctx,
model_manager,
data_host,
@@ -1858,10 +1996,12 @@ train_result train_kernel_hist_impl::operator()(
de::check_mul_overflow((ctx.tree_count_ - 1), skip_num);
- pr::engine_collection collection(ctx.tree_count_, desc.get_seed());
- rng_engine_list_t engine_arr = collection([&](std::size_t i, std::size_t& skip) {
- skip = i * skip_num;
- });
+ pr::engine_collection_oneapi collection(
+ queue_,
+ ctx.tree_count_,
+ desc.get_seed());
+
+ rng_engine_list_t engine_arr = collection.get_engines();
pr::ndarray node_imp_decrease_list;
@@ -1882,49 +2022,55 @@ train_result train_kernel_hist_impl::operator()(
imp_data_mng_t imp_data_holder(queue_, ctx);
// initilizing imp_list and class_hist_list (for classification)
imp_data_holder.init_new_level(node_count);
-
de::check_mul_overflow(node_count, impl_const_t::node_prop_count_);
de::check_mul_overflow(node_count, impl_const_t::node_imp_prop_count_);
- auto node_vs_tree_map_list_host = pr::ndarray::empty({ node_count });
- auto level_node_list_init_host =
- pr::ndarray::empty({ node_count * impl_const_t::node_prop_count_ });
-
- auto tree_map = node_vs_tree_map_list_host.get_mutable_data();
- auto node_list_ptr = level_node_list_init_host.get_mutable_data();
-
- for (Index node = 0; node < node_count; ++node) {
- Index* node_ptr = node_list_ptr + node * impl_const_t::node_prop_count_;
- tree_map[node] = iter + node;
- node_ptr[impl_const_t::ind_ofs] =
- ctx.selected_row_total_count_ * node; // local row offset
- node_ptr[impl_const_t::ind_lrc] =
- ctx.distr_mode_
- ? 0
- : ctx.selected_row_count_; // for distr_mode it will be updated during gen_initial_tree_order
- node_ptr[impl_const_t::ind_grc] =
- ctx.selected_row_total_count_; // global selected rows - it is already filtered for current block
- node_ptr[impl_const_t::ind_lch_lrc] =
- 0; // for distr_mode it will be updated during tree_order_gen
- node_ptr[impl_const_t::ind_fid] = impl_const_t::bad_val_;
- }
+ auto node_vs_tree_map_list =
+ pr::ndarray::empty(queue_, { node_count }, alloc::device);
+ auto level_node_list_init =
+ pr::ndarray::empty(queue_,
+ { node_count * impl_const_t::node_prop_count_ },
+ alloc::device);
+
+ auto tree_map = node_vs_tree_map_list.get_mutable_data();
+ auto node_list_ptr = level_node_list_init.get_mutable_data();
+
+ auto fill_event = queue_.submit([&](sycl::handler& cgh) {
+ cgh.depends_on({ last_event });
+ cgh.parallel_for(sycl::range<1>{ std::size_t(node_count) }, [=](sycl::id<1> node) {
+ Index* node_ptr = node_list_ptr + node * impl_const_t::node_prop_count_;
+ tree_map[node] = iter + node;
+ node_ptr[impl_const_t::ind_ofs] =
+ ctx.selected_row_total_count_ * node; // local row offset
+ node_ptr[impl_const_t::ind_lrc] =
+ ctx.distr_mode_
+ ? 0
+ : ctx.selected_row_count_; // for distr_mode it will be updated during gen_initial_tree_order
+ node_ptr[impl_const_t::ind_grc] =
+ ctx.selected_row_total_count_; // global selected rows - it is already filtered for current block
+ node_ptr[impl_const_t::ind_lch_lrc] =
+ 0; // for distr_mode it will be updated during tree_order_gen
+ node_ptr[impl_const_t::ind_fid] = impl_const_t::bad_val_;
+ });
+ });
- last_event = gen_initial_tree_order(ctx,
- engine_arr,
- level_node_list_init_host,
- tree_order_lev_,
- iter,
- node_count);
-
- auto node_vs_tree_map_list = node_vs_tree_map_list_host.to_device(queue_);
- level_node_lists.push_back(level_node_list_init_host.to_device(queue_));
-
- last_event = compute_initial_histogram(ctx,
- response_nd_,
- tree_order_lev_,
- level_node_lists[0],
- imp_data_holder.get_mutable_data(0),
- node_count,
- { last_event });
+ auto gen_initial_tree_order_event = gen_initial_tree_order(ctx,
+ engine_arr,
+ level_node_list_init,
+ tree_order_lev_,
+ iter,
+ node_count,
+ { fill_event });
+
+ level_node_lists.push_back(level_node_list_init);
+
+ auto compute_initial_histogram_event =
+ compute_initial_histogram(ctx,
+ response_nd_,
+ tree_order_lev_,
+ level_node_lists[0],
+ imp_data_holder.get_mutable_data(0),
+ node_count,
+ { gen_initial_tree_order_event });
last_event.wait_and_throw();
if (ctx.oob_required_) {
@@ -2000,7 +2146,6 @@ train_result train_kernel_hist_impl::operator()(
if (node_count_new) {
//there are split nodes -> next level is required
node_count_new *= 2;
-
de::check_mul_overflow(node_count_new, impl_const_t::node_prop_count_);
auto node_list_new = pr::ndarray::empty(
queue_,
diff --git a/cpp/oneapi/dal/algo/decision_forest/backend/gpu/train_misc_structs.hpp b/cpp/oneapi/dal/algo/decision_forest/backend/gpu/train_misc_structs.hpp
index 96e4552ea1b..360c1ba1065 100644
--- a/cpp/oneapi/dal/algo/decision_forest/backend/gpu/train_misc_structs.hpp
+++ b/cpp/oneapi/dal/algo/decision_forest/backend/gpu/train_misc_structs.hpp
@@ -119,7 +119,7 @@ struct train_context {
Index selected_ftr_count_ = 0;
Index selected_row_count_ = 0;
- Index selected_row_total_count_ = 0;
+ std::int64_t selected_row_total_count_ = 0;
Index min_observations_in_leaf_node_ = 0;
Index max_tree_depth_ = 0;
diff --git a/cpp/oneapi/dal/algo/decision_forest/backend/gpu/train_service_kernels_dpc.cpp b/cpp/oneapi/dal/algo/decision_forest/backend/gpu/train_service_kernels_dpc.cpp
index 11549f3d62d..0de8dd1d6f6 100644
--- a/cpp/oneapi/dal/algo/decision_forest/backend/gpu/train_service_kernels_dpc.cpp
+++ b/cpp/oneapi/dal/algo/decision_forest/backend/gpu/train_service_kernels_dpc.cpp
@@ -19,6 +19,7 @@
#include "oneapi/dal/table/row_accessor.hpp"
#include "oneapi/dal/detail/profiler.hpp"
#include "oneapi/dal/algo/decision_forest/backend/gpu/train_helpers.hpp"
+#include
#ifdef ONEDAL_DATA_PARALLEL
diff --git a/cpp/oneapi/dal/algo/decision_forest/backend/gpu/train_splitter_impl_dpc.cpp b/cpp/oneapi/dal/algo/decision_forest/backend/gpu/train_splitter_impl_dpc.cpp
index eeaafe2a179..d7ad6137288 100644
--- a/cpp/oneapi/dal/algo/decision_forest/backend/gpu/train_splitter_impl_dpc.cpp
+++ b/cpp/oneapi/dal/algo/decision_forest/backend/gpu/train_splitter_impl_dpc.cpp
@@ -19,7 +19,7 @@
#include "oneapi/dal/table/row_accessor.hpp"
#include "oneapi/dal/detail/profiler.hpp"
#include "oneapi/dal/algo/decision_forest/backend/gpu/train_helpers.hpp"
-
+#include
#ifdef ONEDAL_DATA_PARALLEL
#include "oneapi/dal/algo/decision_forest/backend/gpu/train_splitter_impl.hpp"
@@ -134,7 +134,6 @@ sycl::event train_splitter_impl::random_split(
const auto nd_range =
bk::make_multiple_nd_range_2d({ local_size, node_in_block_count }, { local_size, 1 });
-
sycl::event last_event = queue.submit([&](sycl::handler& cgh) {
cgh.depends_on(deps);
local_accessor_rw_t local_hist_buf(hist_size, cgh);
@@ -537,7 +536,19 @@ sycl::event train_splitter_impl::best_split(
const Index bin_block =
compute_bin_block_size(queue, hist_prop_count, bin_count);
- const Index local_size = bk::device_max_wg_size(queue);
+ const Index local_size_initial = bk::device_max_wg_size(queue);
+ Index local_size = local_size_initial;
+ const auto max_int_limit = std::numeric_limits::max();
+
+ if (node_count * ftr_count > 0 && node_count * ftr_count <= max_int_limit) {
+ while (node_count * ftr_count * local_size > max_int_limit) {
+ local_size /= 2;
+ }
+ }
+ else {
+ std::cerr << "Error: node_count * ftr_count exceeds int limit" << std::endl;
+ }
+
const auto nd_range =
bk::make_multiple_nd_range_3d({ node_count, ftr_count, local_size }, { 1, 1, local_size });
@@ -686,7 +697,6 @@ sycl::event train_splitter_impl::best_split(
}
});
});
-
// Merging kernel: selects best split among all features.
const auto merge_range =
bk::make_multiple_nd_range_2d({ node_count, local_size }, { 1, local_size });
diff --git a/cpp/oneapi/dal/algo/decision_forest/test/spmd.cpp b/cpp/oneapi/dal/algo/decision_forest/test/spmd.cpp
index 534acddb04a..fe99f8f8d4b 100644
--- a/cpp/oneapi/dal/algo/decision_forest/test/spmd.cpp
+++ b/cpp/oneapi/dal/algo/decision_forest/test/spmd.cpp
@@ -209,7 +209,7 @@ DF_SPMD_CLS_TEST_NIGHTLY_EXT("df cls default flow") {
desc.set_max_tree_depth(max_tree_depth_val);
desc.set_class_count(wl.ds_info.class_count);
- this->set_rank_count(2);
+ this->set_rank_count(4);
const auto train_result =
this->train_spmd_base_checks(desc, data, this->get_homogen_table_id());
const auto model = train_result.get_model();
@@ -233,7 +233,7 @@ DF_SPMD_CLS_TEST_EXT("df cls corner flow") {
desc.set_min_observations_in_leaf_node(8);
desc.set_class_count(wl.ds_info.class_count);
- this->set_rank_count(2);
+ this->set_rank_count(4);
const auto train_result =
this->train_spmd_base_checks(desc, data, this->get_homogen_table_id());
const auto model = train_result.get_model();
@@ -257,7 +257,7 @@ DF_SPMD_CLS_TEST_EXT("df cls small flow") {
desc.set_tree_count(tree_count);
desc.set_class_count(wl.ds_info.class_count);
- this->set_rank_count(2);
+ this->set_rank_count(4);
const auto train_result =
this->train_spmd_base_checks(desc, data, this->get_homogen_table_id());
const auto model = train_result.get_model();
@@ -288,7 +288,7 @@ DF_SPMD_CLS_TEST_NIGHTLY_EXT("df cls impurity flow") {
desc.set_impurity_threshold(impurity_threshold_val);
desc.set_class_count(wl.ds_info.class_count);
- this->set_rank_count(2);
+ this->set_rank_count(4);
const auto train_result =
this->train_spmd_base_checks(desc, data, this->get_homogen_table_id());
const auto model = train_result.get_model();
@@ -317,7 +317,7 @@ DF_SPMD_CLS_TEST_NIGHTLY_EXT("df cls all features flow") {
desc.set_features_per_node(data.get_column_count() - 1); // skip responses column
desc.set_class_count(wl.ds_info.class_count);
- this->set_rank_count(2);
+ this->set_rank_count(4);
const auto train_result =
this->train_spmd_base_checks(desc, data, this->get_homogen_table_id());
const auto model = train_result.get_model();
@@ -342,7 +342,7 @@ DF_SPMD_CLS_TEST_NIGHTLY_EXT("df cls bootstrap flow") {
desc.set_max_tree_depth(50);
desc.set_class_count(wl.ds_info.class_count);
- this->set_rank_count(2);
+ this->set_rank_count(4);
const auto train_result =
this->train_spmd_base_checks(desc, data, this->get_homogen_table_id());
const auto model = train_result.get_model();
@@ -372,7 +372,7 @@ DF_SPMD_CLS_TEST_NIGHTLY_EXT("df cls oob per observation flow") {
desc.set_observations_per_tree_fraction(observations_per_tree_fraction_val);
desc.set_class_count(wl.ds_info.class_count);
- this->set_rank_count(2);
+ this->set_rank_count(4);
const auto train_result =
this->train_spmd_base_checks(desc, data, this->get_homogen_table_id());
const auto model = train_result.get_model();
@@ -393,7 +393,7 @@ DF_SPMD_CLS_TEST("df cls base check with default params") {
desc.set_class_count(class_count);
- this->set_rank_count(2);
+ this->set_rank_count(4);
const auto train_result =
this->train_spmd_base_checks(desc, data, this->get_homogen_table_id());
const auto model = train_result.get_model();
@@ -411,7 +411,7 @@ DF_SPMD_CLS_TEST("df cls base check with default params and train weights") {
desc.set_class_count(class_count);
- this->set_rank_count(2);
+ this->set_rank_count(4);
const auto train_result =
this->train_spmd_weighted_base_checks(desc, data, this->get_homogen_table_id());
const auto model = train_result.get_model();
@@ -444,7 +444,7 @@ DF_SPMD_CLS_TEST("df cls base check with non default params") {
desc.set_voting_mode(df::voting_mode::unweighted);
desc.set_class_count(class_count);
- this->set_rank_count(2);
+ this->set_rank_count(4);
const auto train_result =
this->train_spmd_base_checks(desc, data, this->get_homogen_table_id());
const auto model = train_result.get_model();
@@ -462,7 +462,7 @@ DF_SPMD_REG_TEST("df reg base check with default params") {
auto desc = this->get_default_descriptor();
- this->set_rank_count(2);
+ this->set_rank_count(4);
const auto train_result =
this->train_spmd_base_checks(desc, data, this->get_homogen_table_id());
const auto model = train_result.get_model();
@@ -478,7 +478,7 @@ DF_SPMD_REG_TEST("df reg base check with default params and train weights") {
auto desc = this->get_default_descriptor();
- this->set_rank_count(2);
+ this->set_rank_count(4);
const auto train_result =
this->train_spmd_weighted_base_checks(desc, data, this->get_homogen_table_id());
const auto model = train_result.get_model();
@@ -505,7 +505,7 @@ DF_SPMD_REG_TEST("df reg base check with non default params") {
desc.set_variable_importance_mode(variable_importance_mode_val);
desc.set_error_metric_mode(error_metric_mode_val);
- this->set_rank_count(2);
+ this->set_rank_count(4);
const auto train_result =
this->train_spmd_base_checks(desc, data, this->get_homogen_table_id());
const auto model = train_result.get_model();
@@ -524,7 +524,7 @@ DF_SPMD_REG_TEST_NIGHTLY_EXT("df reg default flow") {
auto desc = this->get_default_descriptor();
- this->set_rank_count(2);
+ this->set_rank_count(4);
const auto train_result =
this->train_spmd_base_checks(desc, data, this->get_homogen_table_id());
const auto model = train_result.get_model();
@@ -547,7 +547,7 @@ DF_SPMD_REG_TEST_EXT("df reg small flow") {
desc.set_tree_count(tree_count);
desc.set_min_observations_in_leaf_node(1);
- this->set_rank_count(2);
+ this->set_rank_count(4);
const auto train_result =
this->train_spmd_base_checks(desc, data, this->get_homogen_table_id());
const auto model = train_result.get_model();
@@ -572,7 +572,7 @@ DF_SPMD_REG_TEST_NIGHTLY_EXT("df reg impurity flow") {
desc.set_min_observations_in_leaf_node(min_observations_in_leaf_node);
desc.set_impurity_threshold(impurity_threshold_val);
- this->set_rank_count(2);
+ this->set_rank_count(4);
const auto train_result =
this->train_spmd_base_checks(desc, data, this->get_homogen_table_id());
const auto model = train_result.get_model();
@@ -599,7 +599,7 @@ DF_SPMD_REG_TEST_NIGHTLY_EXT("df reg bootstrap flow") {
desc.set_max_tree_depth(max_tree_depth_val);
desc.set_bootstrap(bootstrap_val);
- this->set_rank_count(2);
+ this->set_rank_count(4);
const auto train_result =
this->train_spmd_base_checks(desc, data, this->get_homogen_table_id());
const auto model = train_result.get_model();
diff --git a/cpp/oneapi/dal/algo/louvain/backend/cpu/louvain_data.hpp b/cpp/oneapi/dal/algo/louvain/backend/cpu/louvain_data.hpp
index d21de8c9627..ecd49784378 100644
--- a/cpp/oneapi/dal/algo/louvain/backend/cpu/louvain_data.hpp
+++ b/cpp/oneapi/dal/algo/louvain/backend/cpu/louvain_data.hpp
@@ -17,7 +17,7 @@
#pragma once
#include "oneapi/dal/backend/memory.hpp"
-#include "oneapi/dal/backend/primitives/rng/rng_engine.hpp"
+#include "oneapi/dal/backend/primitives/rng/rng.hpp"
namespace oneapi::dal::preview::louvain::backend {
using namespace oneapi::dal::preview::detail;
@@ -123,8 +123,7 @@ struct louvain_data {
// Total link weight in the network
value_type m;
- engine eng;
- rng rn_gen;
+ host_engine eng;
const std::int64_t vertex_count;
const std::int64_t edge_count;
diff --git a/cpp/oneapi/dal/algo/louvain/backend/cpu/vertex_partitioning_default_kernel.hpp b/cpp/oneapi/dal/algo/louvain/backend/cpu/vertex_partitioning_default_kernel.hpp
index 79e294e9f47..e287c3f2f66 100644
--- a/cpp/oneapi/dal/algo/louvain/backend/cpu/vertex_partitioning_default_kernel.hpp
+++ b/cpp/oneapi/dal/algo/louvain/backend/cpu/vertex_partitioning_default_kernel.hpp
@@ -206,7 +206,7 @@ inline Float move_nodes(const dal::preview::detail::topology& t,
ld.random_order[index] = index;
}
// random shuffle
- ld.rn_gen.uniform(t._vertex_count, ld.index, ld.eng.get_state(), 0, t._vertex_count);
+ uniform(t._vertex_count, ld.index, ld.eng, 0, t._vertex_count);
for (std::int64_t index = 0; index < t._vertex_count; ++index) {
std::swap(ld.random_order[index], ld.random_order[ld.index[index]]);
}
diff --git a/cpp/oneapi/dal/backend/primitives/objective_function/test/fixture.hpp b/cpp/oneapi/dal/backend/primitives/objective_function/test/fixture.hpp
index d22a2dde0a1..6cf2b73ccd6 100644
--- a/cpp/oneapi/dal/backend/primitives/objective_function/test/fixture.hpp
+++ b/cpp/oneapi/dal/backend/primitives/objective_function/test/fixture.hpp
@@ -25,7 +25,7 @@
#include "oneapi/dal/table/csr_accessor.hpp"
#include "oneapi/dal/detail/debug.hpp"
-#include "oneapi/dal/backend/primitives/rng/rng_engine.hpp"
+#include "oneapi/dal/backend/primitives/rng/rng.hpp"
namespace oneapi::dal::backend::primitives::test {
@@ -572,13 +572,12 @@ class logloss_test : public te::float_algo_fixture rn_gen;
auto vec_host =
ndarray::empty(this->get_queue(), { dim }, sycl::usm::alloc::host);
for (std::int32_t ij = 0; ij < num_checks; ++ij) {
- primitives::engine eng(2007 + dim * num_checks + ij);
- rn_gen.uniform(dim, vec_host.get_mutable_data(), eng.get_state(), -1.0, 1.0);
+ primitives::host_engine eng(2007 + dim * num_checks + ij);
+ pr::uniform(dim, vec_host.get_mutable_data(), eng, -1.0, 1.0);
auto vec_gpu = vec_host.to_device(this->get_queue());
auto out_vector =
ndarray::empty(this->get_queue(), { dim }, sycl::usm::alloc::device);
diff --git a/cpp/oneapi/dal/backend/primitives/objective_function/test/spmd_fixture.hpp b/cpp/oneapi/dal/backend/primitives/objective_function/test/spmd_fixture.hpp
index e902dd452e1..63ab0a07c13 100644
--- a/cpp/oneapi/dal/backend/primitives/objective_function/test/spmd_fixture.hpp
+++ b/cpp/oneapi/dal/backend/primitives/objective_function/test/spmd_fixture.hpp
@@ -100,12 +100,12 @@ class logloss_spmd_test : public logloss_test {
std::int64_t num_checks = 5;
std::vector> vecs_host(num_checks), vecs_gpu(num_checks);
- rng rn_gen;
+
for (std::int64_t ij = 0; ij < num_checks; ++ij) {
- engine eng(2007 + dim * num_checks + ij);
+ host_engine eng(2007 + dim * num_checks + ij);
vecs_host[ij] =
(ndarray::empty(this->get_queue(), { dim }, sycl::usm::alloc::host));
- rn_gen.uniform(dim, vecs_host[ij].get_mutable_data(), eng.get_state(), -1.0, 1.0);
+ uniform(dim, vecs_host[ij].get_mutable_data(), eng, -1.0, 1.0);
vecs_gpu[ij] = vecs_host[ij].to_device(this->get_queue());
}
diff --git a/cpp/oneapi/dal/backend/primitives/optimizers/test/cg_solver_dpc.cpp b/cpp/oneapi/dal/backend/primitives/optimizers/test/cg_solver_dpc.cpp
index ea320f690a2..b529836f70e 100644
--- a/cpp/oneapi/dal/backend/primitives/optimizers/test/cg_solver_dpc.cpp
+++ b/cpp/oneapi/dal/backend/primitives/optimizers/test/cg_solver_dpc.cpp
@@ -20,7 +20,7 @@
#include "oneapi/dal/test/engine/common.hpp"
#include "oneapi/dal/test/engine/fixtures.hpp"
#include "oneapi/dal/table/row_accessor.hpp"
-#include "oneapi/dal/backend/primitives/rng/rng_engine.hpp"
+#include "oneapi/dal/backend/primitives/rng/rng.hpp"
#include
namespace oneapi::dal::backend::primitives::test {
@@ -43,9 +43,8 @@ class cg_solver_test : public te::float_algo_fixture {
x_host_ = ndarray::empty(this->get_queue(), { n_ }, sycl::usm::alloc::host);
b_host_ = ndarray::empty(this->get_queue(), { n_ }, sycl::usm::alloc::host);
- primitives::rng rn_gen;
- primitives::engine eng(4014 + n_);
- rn_gen.uniform(n_, x_host_.get_mutable_data(), eng.get_state(), -1.0, 1.0);
+ primitives::host_engine eng(4014 + n_);
+ primitives::uniform(n_, x_host_.get_mutable_data(), eng, -1.0, 1.0);
create_stable_matrix(this->get_queue(), A_host_);
diff --git a/cpp/oneapi/dal/backend/primitives/optimizers/test/fixture.hpp b/cpp/oneapi/dal/backend/primitives/optimizers/test/fixture.hpp
index a6b87b2dcc1..c188c50983c 100644
--- a/cpp/oneapi/dal/backend/primitives/optimizers/test/fixture.hpp
+++ b/cpp/oneapi/dal/backend/primitives/optimizers/test/fixture.hpp
@@ -21,7 +21,7 @@
#include "oneapi/dal/backend/primitives/ndarray.hpp"
#include "oneapi/dal/test/engine/common.hpp"
#include "oneapi/dal/test/engine/fixtures.hpp"
-#include "oneapi/dal/backend/primitives/rng/rng_engine.hpp"
+#include "oneapi/dal/backend/primitives/rng/rng.hpp"
#include "oneapi/dal/backend/primitives/blas/gemv.hpp"
#include "oneapi/dal/backend/primitives/element_wise.hpp"
@@ -133,11 +133,10 @@ void create_stable_matrix(sycl::queue& queue,
ONEDAL_ASSERT(A.get_dimension(1) == n);
auto J = ndarray::empty(queue, { n, n }, sycl::usm::alloc::host);
auto eigen_values = ndarray::empty(queue, { n }, sycl::usm::alloc::host);
- primitives::rng rn_gen;
- primitives::engine eng(2007 + n);
+ primitives::host_engine eng(2007 + n);
- rn_gen.uniform(n * n, J.get_mutable_data(), eng.get_state(), -1.0, 1.0);
- rn_gen.uniform(n, eigen_values.get_mutable_data(), eng.get_state(), bottom_eig, top_eig);
+ primitives::uniform(n * n, J.get_mutable_data(), eng, -1.0, 1.0);
+ primitives::uniform(n, eigen_values.get_mutable_data(), eng, bottom_eig, top_eig);
// orthogonalize matrix J
gram_schmidt(J);
diff --git a/cpp/oneapi/dal/backend/primitives/optimizers/test/newton_cg_dpc.cpp b/cpp/oneapi/dal/backend/primitives/optimizers/test/newton_cg_dpc.cpp
index f473dddf1f7..b2ebe9f5bdb 100644
--- a/cpp/oneapi/dal/backend/primitives/optimizers/test/newton_cg_dpc.cpp
+++ b/cpp/oneapi/dal/backend/primitives/optimizers/test/newton_cg_dpc.cpp
@@ -22,7 +22,7 @@
#include "oneapi/dal/test/engine/common.hpp"
#include "oneapi/dal/test/engine/fixtures.hpp"
#include "oneapi/dal/table/row_accessor.hpp"
-#include "oneapi/dal/backend/primitives/rng/rng_engine.hpp"
+#include "oneapi/dal/backend/primitives/rng/rng.hpp"
#include
#include "oneapi/dal/backend/primitives/objective_function.hpp"
@@ -56,10 +56,10 @@ class newton_cg_test : public te::float_algo_fixture {
ndarray::empty(this->get_queue(), { n_ + 1 }, sycl::usm::alloc::host);
auto params_host =
ndarray::empty(this->get_queue(), { p_ + 1 }, sycl::usm::alloc::host);
- primitives::rng rn_gen;
- primitives::engine eng(2007 + n);
- rn_gen.uniform(n_ * p_, X_host.get_mutable_data(), eng.get_state(), -10.0, 10.0);
- rn_gen.uniform(p_ + 1, params_host.get_mutable_data(), eng.get_state(), -5.0, 5.0);
+
+ primitives::host_engine eng(2007 + n);
+ primitives::uniform