Skip to content

Commit 4b2e7b8

Browse files
CopilotWenyinWei
andcommitted
Move test files to test/integration and add simplified parallel interface
Co-authored-by: WenyinWei <[email protected]>
1 parent 4a204cc commit 4b2e7b8

File tree

8 files changed

+381
-12
lines changed

8 files changed

+381
-12
lines changed

include/diffeq.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,8 @@
3737
#include <interfaces/integration_interface.hpp> // Unified interface for all domains
3838

3939
// Enhanced parallelism capabilities with modern C++ features
40-
#include <execution/parallelism_facade_clean.hpp> // Unified parallelism interface (Facade pattern)
40+
#include <execution/parallel.hpp> // Simplified parallel execution interface
41+
#include <execution/parallelism_facade_clean.hpp> // Advanced parallelism interface (Facade pattern)
4142
#include <execution/parallel_builder.hpp> // Fluent interface for configuration (Builder pattern)
4243
#include <execution/modern_executor.hpp> // Modern C++ executor support with coroutines
4344
#include <execution/hardware_support.hpp> // Hardware-specific execution (CUDA, OpenCL, FPGA, MPI)

include/examples/parallelism_usage.hpp

Lines changed: 87 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,48 @@
88

99
namespace diffeq::examples::parallelism {
1010

11+
/**
12+
* @brief Quick Start Example - Simplified Parallel Interface
13+
*
14+
* This shows the easiest way to add parallelism to your diffeq computations.
15+
* No complex configuration needed!
16+
*/
17+
void quick_start_example() {
18+
std::cout << "\n=== Quick Start: Simplified Parallel Interface ===\n";
19+
20+
// Example: Parallel ODE integration for multiple initial conditions
21+
std::vector<std::vector<double>> initial_conditions;
22+
for (int i = 0; i < 100; ++i) {
23+
initial_conditions.push_back({static_cast<double>(i), 0.0});
24+
}
25+
26+
// Simple exponential decay: dy/dt = -0.1 * y
27+
auto system = [](double t, const std::vector<double>& y, std::vector<double>& dydt) {
28+
dydt[0] = -0.1 * y[0];
29+
dydt[1] = -0.2 * y[1];
30+
};
31+
32+
std::cout << "Integrating " << initial_conditions.size() << " initial conditions in parallel...\n";
33+
34+
// THIS IS ALL YOU NEED FOR PARALLEL EXECUTION!
35+
diffeq::execution::parallel_for_each(initial_conditions, [&](std::vector<double>& state) {
36+
diffeq::RK4Integrator<std::vector<double>> integrator(system);
37+
integrator.step(state, 0.01); // Single integration step
38+
});
39+
40+
std::cout << "✓ Parallel integration completed!\n";
41+
std::cout << "Result for initial condition 10: [" << initial_conditions[10][0]
42+
<< ", " << initial_conditions[10][1] << "]\n";
43+
44+
// Want to use GPU if available? Just one line:
45+
diffeq::execution::enable_gpu_acceleration();
46+
47+
// Want more workers? Just one line:
48+
diffeq::execution::set_parallel_workers(8);
49+
50+
std::cout << "Current worker count: " << diffeq::execution::parallel().worker_count() << "\n";
51+
}
52+
1153
/**
1254
* @brief Robotics Control Systems Example
1355
*
@@ -42,7 +84,38 @@ struct RobotArmSystem {
4284
void demonstrate_realtime_control() {
4385
std::cout << "\n=== Robotics Control System with Real-time Parallelism ===\n";
4486

45-
// Configure for real-time robotics control
87+
// SIMPLE APPROACH: Use the simplified parallel interface for basic needs
88+
std::cout << "\n--- Simple Parallel Approach ---\n";
89+
90+
// Setup multiple control systems (e.g., different robot joints)
91+
std::vector<std::vector<double>> joint_states;
92+
for (int i = 0; i < 6; ++i) { // 6-DOF robot arm
93+
joint_states.push_back({0.1 * i, 0.0}); // [angle, angular_velocity]
94+
}
95+
96+
// Create simple parallel executor
97+
auto parallel = diffeq::execution::Parallel(4); // 4 worker threads
98+
99+
auto simple_start_time = std::chrono::high_resolution_clock::now();
100+
101+
// Parallel control loop - very simple!
102+
parallel.for_each(joint_states, [](std::vector<double>& state) {
103+
RobotArmSystem system;
104+
diffeq::RK4Integrator<std::vector<double>> integrator(system);
105+
integrator.step(state, 0.001); // 1ms control timestep
106+
});
107+
108+
auto simple_end_time = std::chrono::high_resolution_clock::now();
109+
auto simple_duration = std::chrono::duration_cast<std::chrono::microseconds>(simple_end_time - simple_start_time);
110+
111+
std::cout << "Simple parallel control completed in " << simple_duration.count() << " μs\n";
112+
std::cout << "Average per joint: " << simple_duration.count() / joint_states.size() << " μs\n";
113+
114+
// ADVANCED APPROACH: Use full facade for complex real-time requirements
115+
std::cout << "\n--- Advanced Facade Approach (for complex scenarios) ---\n";
116+
117+
// For applications requiring precise real-time control, load balancing,
118+
// hardware-specific optimizations, etc., use the full facade:
46119
auto parallel_config = diffeq::execution::presets::robotics_control()
47120
.realtime_priority()
48121
.workers(4) // Dedicated cores for control
@@ -52,7 +125,7 @@ void demonstrate_realtime_control() {
52125

53126
// Create integrator for robot dynamics
54127
auto robot_system = RobotArmSystem{};
55-
auto integrator = diffeq::ode::factory::make_rk4_integrator<std::vector<double>, double>(robot_system);
128+
auto integrator = diffeq::RK4Integrator<std::vector<double>>(robot_system);
56129

57130
// Initial state: [angle=0.1 rad, angular_velocity=0]
58131
std::vector<double> state = {0.1, 0.0};
@@ -62,7 +135,7 @@ void demonstrate_realtime_control() {
62135
std::cout << "Running real-time robot control simulation...\n";
63136
std::cout << "Target frequency: 1kHz (1ms timestep)\n";
64137

65-
auto start_time = std::chrono::high_resolution_clock::now();
138+
auto advanced_start_time = std::chrono::high_resolution_clock::now();
66139

67140
// Simulate real-time control loop
68141
for (double t = 0.0; t < simulation_time; t += dt) {
@@ -89,11 +162,11 @@ void demonstrate_realtime_control() {
89162
}
90163
}
91164

92-
auto end_time = std::chrono::high_resolution_clock::now();
93-
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
165+
auto advanced_end_time = std::chrono::high_resolution_clock::now();
166+
auto advanced_duration = std::chrono::duration_cast<std::chrono::milliseconds>(advanced_end_time - advanced_start_time);
94167

95-
std::cout << "Simulation completed in " << duration.count() << "ms\n";
96-
std::cout << "Average loop time: " << duration.count() / (simulation_time / dt) << "ms\n";
168+
std::cout << "Simulation completed in " << advanced_duration.count() << "ms\n";
169+
std::cout << "Average loop time: " << advanced_duration.count() / (simulation_time / dt) << "ms\n";
97170
std::cout << "Final robot state: angle=" << state[0] << " rad, velocity=" << state[1] << " rad/s\n";
98171
}
99172

@@ -156,7 +229,7 @@ void demonstrate_monte_carlo_simulation() {
156229
for (size_t i = 0; i < num_simulations; ++i) {
157230
simulation_futures.push_back(parallel_config->async([=]() {
158231
// Create integrator for this simulation
159-
auto integrator = diffeq::ode::factory::make_rk4_integrator<std::vector<double>, double>(gbm_system);
232+
auto integrator = diffeq::RK4Integrator<std::vector<double>>(gbm_system);
160233

161234
// Random number generator for this thread
162235
std::mt19937 rng(std::random_device{}() + i);
@@ -286,7 +359,7 @@ void benchmark_hardware_targets() {
286359

287360
// Parallel integration using the unified interface
288361
parallel_facade->parallel_for_each(states.begin(), states.end(), [&](auto& state) {
289-
auto integrator = diffeq::ode::factory::make_rk4_integrator<std::vector<double>, double>(system);
362+
auto integrator = diffeq::RK4Integrator<std::vector<double>>(system);
290363

291364
double t = 0.0;
292365
while (t < end_time) {
@@ -324,9 +397,12 @@ void benchmark_hardware_targets() {
324397
*/
325398
void demonstrate_all_parallelism_features() {
326399
std::cout << "=== Enhanced Parallelism Capabilities Demo ===\n";
327-
std::cout << "Demonstrating modern C++ parallelism with unified hardware interface\n";
400+
std::cout << "Demonstrating both simple and advanced parallelism interfaces\n";
401+
402+
// Start with the simple interface for new users
403+
quick_start_example();
328404

329-
// Run robotics control example
405+
// Run robotics control example (shows both simple and advanced)
330406
robotics_control::demonstrate_realtime_control();
331407

332408
// Run stochastic research example

include/execution/parallel.hpp

Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
#pragma once
2+
3+
#include <execution/parallelism_facade_clean.hpp>
4+
#include <vector>
5+
#include <functional>
6+
#include <future>
7+
#include <type_traits>
8+
9+
namespace diffeq::execution {
10+
11+
/**
12+
* @brief Simplified parallel execution interface for the diffeq library
13+
*
14+
* This provides a much simpler interface for common parallel operations
15+
* without the complexity of the full parallelism facade system.
16+
* Users can access advanced features through the full ParallelismFacade if needed.
17+
*/
18+
class Parallel {
19+
private:
20+
ParallelismFacade facade_;
21+
22+
public:
23+
/**
24+
* @brief Create a parallel executor with automatic hardware selection
25+
*/
26+
Parallel() = default;
27+
28+
/**
29+
* @brief Create a parallel executor with specified number of workers
30+
*/
31+
explicit Parallel(size_t num_workers) {
32+
ParallelConfig config;
33+
config.max_workers = num_workers;
34+
config.target = HardwareTarget::Auto;
35+
facade_.configure(config);
36+
}
37+
38+
/**
39+
* @brief Execute a function on each element in parallel
40+
*/
41+
template<typename Iterator, typename F>
42+
void for_each(Iterator first, Iterator last, F&& func) {
43+
facade_.parallel_for_each(first, last, std::forward<F>(func));
44+
}
45+
46+
/**
47+
* @brief Execute a function on each element in a container in parallel
48+
*/
49+
template<typename Container, typename F>
50+
void for_each(Container& container, F&& func) {
51+
facade_.parallel_for_each(container.begin(), container.end(), std::forward<F>(func));
52+
}
53+
54+
/**
55+
* @brief Execute a function asynchronously and return a future
56+
*/
57+
template<typename F, typename... Args>
58+
auto async(F&& func, Args&&... args)
59+
-> std::future<std::invoke_result_t<F, Args...>> {
60+
return facade_.async(std::forward<F>(func), std::forward<Args>(args)...);
61+
}
62+
63+
/**
64+
* @brief Get the number of parallel workers available
65+
*/
66+
size_t worker_count() const {
67+
return facade_.get_max_concurrency();
68+
}
69+
70+
/**
71+
* @brief Check if GPU acceleration is available
72+
*/
73+
bool gpu_available() const {
74+
return facade_.is_target_available(HardwareTarget::GPU_CUDA) ||
75+
facade_.is_target_available(HardwareTarget::GPU_OpenCL);
76+
}
77+
78+
/**
79+
* @brief Enable GPU acceleration if available
80+
*/
81+
void use_gpu() {
82+
if (gpu_available()) {
83+
facade_.set_hardware_target(HardwareTarget::GPU_CUDA);
84+
}
85+
}
86+
87+
/**
88+
* @brief Force CPU-only execution
89+
*/
90+
void use_cpu() {
91+
facade_.set_hardware_target(HardwareTarget::CPU_ThreadPool);
92+
}
93+
94+
/**
95+
* @brief Set the number of worker threads
96+
*/
97+
void set_workers(size_t count) {
98+
facade_.set_max_workers(count);
99+
}
100+
};
101+
102+
/**
103+
* @brief Get the global parallel execution instance
104+
*/
105+
inline Parallel& parallel() {
106+
static Parallel instance;
107+
return instance;
108+
}
109+
110+
/**
111+
* @brief Convenience function for parallel for_each operation
112+
*/
113+
template<typename Iterator, typename F>
114+
void parallel_for_each(Iterator first, Iterator last, F&& func) {
115+
parallel().for_each(first, last, std::forward<F>(func));
116+
}
117+
118+
/**
119+
* @brief Convenience function for parallel for_each on containers
120+
*/
121+
template<typename Container, typename F>
122+
void parallel_for_each(Container& container, F&& func) {
123+
parallel().for_each(container, std::forward<F>(func));
124+
}
125+
126+
/**
127+
* @brief Convenience function for async execution
128+
*/
129+
template<typename F, typename... Args>
130+
auto parallel_async(F&& func, Args&&... args)
131+
-> std::future<std::invoke_result_t<F, Args...>> {
132+
return parallel().async(std::forward<F>(func), std::forward<Args>(args)...);
133+
}
134+
135+
/**
136+
* @brief Configure global parallel execution with number of workers
137+
*/
138+
inline void set_parallel_workers(size_t count) {
139+
parallel().set_workers(count);
140+
}
141+
142+
/**
143+
* @brief Enable GPU acceleration globally if available
144+
*/
145+
inline void enable_gpu_acceleration() {
146+
parallel().use_gpu();
147+
}
148+
149+
/**
150+
* @brief Force CPU-only execution globally
151+
*/
152+
inline void enable_cpu_only() {
153+
parallel().use_cpu();
154+
}
155+
156+
} // namespace diffeq::execution
File renamed without changes.
File renamed without changes.

0 commit comments

Comments
 (0)