1+ #pragma once
2+
3+ #include < diffeq.hpp>
4+ #include < vector>
5+ #include < iostream>
6+ #include < chrono>
7+ #include < random>
8+
9+ namespace diffeq ::examples::parallelism {
10+
11+ /* *
12+ * @brief Robotics Control Systems Example
13+ *
14+ * Demonstrates real-time integration with feedback signals from sensors
15+ * with low latency and deterministic performance requirements.
16+ */
17+ namespace robotics_control {
18+
19+ // Robot arm dynamics (simplified)
20+ struct RobotArmSystem {
21+ double mass = 1.0 ;
22+ double length = 0.5 ;
23+ double damping = 0.1 ;
24+ double gravity = 9.81 ;
25+
26+ void operator ()(double t, const std::vector<double >& state, std::vector<double >& derivative) {
27+ // state = [angle, angular_velocity]
28+ // Simple pendulum with control input
29+ double angle = state[0 ];
30+ double angular_vel = state[1 ];
31+
32+ // Control input (placeholder - would come from feedback controller)
33+ double control_torque = -2.0 * angle - 0.5 * angular_vel; // PD controller
34+
35+ derivative[0 ] = angular_vel;
36+ derivative[1 ] = -(gravity / length) * std::sin (angle) -
37+ (damping / (mass * length * length)) * angular_vel +
38+ control_torque / (mass * length * length);
39+ }
40+ };
41+
42+ void demonstrate_realtime_control () {
43+ std::cout << " \n === Robotics Control System with Real-time Parallelism ===\n " ;
44+
45+ // Configure for real-time robotics control
46+ auto parallel_config = diffeq::execution::presets::robotics_control ()
47+ .realtime_priority ()
48+ .workers (4 ) // Dedicated cores for control
49+ .batch_size (1 ) // Single step processing
50+ .disable_load_balancing () // Deterministic scheduling
51+ .build ();
52+
53+ // Create integrator for robot dynamics
54+ auto robot_system = RobotArmSystem{};
55+ auto integrator = diffeq::ode::factory::make_rk4_integrator<std::vector<double >, double >(robot_system);
56+
57+ // Initial state: [angle=0.1 rad, angular_velocity=0]
58+ std::vector<double > state = {0.1 , 0.0 };
59+ double dt = 0.001 ; // 1ms timestep for 1kHz control
60+ double simulation_time = 1.0 ;
61+
62+ std::cout << " Running real-time robot control simulation...\n " ;
63+ std::cout << " Target frequency: 1kHz (1ms timestep)\n " ;
64+
65+ auto start_time = std::chrono::high_resolution_clock::now ();
66+
67+ // Simulate real-time control loop
68+ for (double t = 0.0 ; t < simulation_time; t += dt) {
69+ // Execute control step with real-time priority
70+ auto control_future = parallel_config->async ([&]() {
71+ integrator->step (state, dt);
72+ });
73+
74+ // Simulate sensor reading (parallel)
75+ auto sensor_future = parallel_config->async ([&]() {
76+ // Placeholder for sensor data processing
77+ std::this_thread::sleep_for (std::chrono::microseconds (50 ));
78+ return state[0 ]; // Return angle measurement
79+ });
80+
81+ // Wait for both to complete
82+ control_future.wait ();
83+ double measured_angle = sensor_future.get ();
84+
85+ // Output every 100ms
86+ if (static_cast <int >(t * 1000 ) % 100 == 0 ) {
87+ std::cout << " t=" << t << " s, angle=" << state[0 ]
88+ << " rad, measured=" << measured_angle << " rad\n " ;
89+ }
90+ }
91+
92+ auto end_time = std::chrono::high_resolution_clock::now ();
93+ auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
94+
95+ std::cout << " Simulation completed in " << duration.count () << " ms\n " ;
96+ std::cout << " Average loop time: " << duration.count () / (simulation_time / dt) << " ms\n " ;
97+ std::cout << " Final robot state: angle=" << state[0 ] << " rad, velocity=" << state[1 ] << " rad/s\n " ;
98+ }
99+
100+ } // namespace robotics_control
101+
102+ /* *
103+ * @brief Stochastic Process Research Example
104+ *
105+ * Demonstrates Monte Carlo simulations requiring thousands/millions of integrations
106+ * with focus on throughput rather than latency.
107+ */
108+ namespace stochastic_research {
109+
110+ // Geometric Brownian Motion for financial modeling
111+ struct GeometricBrownianMotion {
112+ double mu = 0.05 ; // drift
113+ double sigma = 0.2 ; // volatility
114+
115+ void operator ()(double t, const std::vector<double >& state, std::vector<double >& derivative) {
116+ derivative[0 ] = mu * state[0 ]; // deterministic part
117+ }
118+
119+ void diffusion (double t, const std::vector<double >& state, std::vector<double >& noise_coeff) {
120+ noise_coeff[0 ] = sigma * state[0 ]; // stochastic part
121+ }
122+ };
123+
124+ void demonstrate_monte_carlo_simulation () {
125+ std::cout << " \n === Stochastic Process Research with GPU-Accelerated Monte Carlo ===\n " ;
126+
127+ // Configure for high-throughput Monte Carlo
128+ auto parallel_config = diffeq::execution::presets::monte_carlo ()
129+ .auto_target () // Let system choose best hardware
130+ .workers (std::thread::hardware_concurrency () * 4 ) // Oversubscribe for throughput
131+ .batch_size (10000 ) // Large batches for efficiency
132+ .enable_load_balancing () // Dynamic load balancing
133+ .build ();
134+
135+ // Print available hardware
136+ diffeq::execution::hardware::HardwareCapabilities::print_capabilities ();
137+
138+ const size_t num_simulations = 100000 ;
139+ const double T = 1.0 ; // 1 year
140+ const double dt = 1.0 /252 ; // daily steps
141+ const double S0 = 100.0 ; // initial stock price
142+
143+ std::cout << " \n Running " << num_simulations << " Monte Carlo simulations...\n " ;
144+ std::cout << " Each simulation: " << static_cast <int >(T/dt) << " steps over " << T << " years\n " ;
145+
146+ auto start_time = std::chrono::high_resolution_clock::now ();
147+
148+ // Prepare simulation parameters
149+ std::vector<double > final_prices (num_simulations);
150+ std::vector<std::future<double >> simulation_futures;
151+
152+ // Create the SDE system
153+ auto gbm_system = GeometricBrownianMotion{};
154+
155+ // Launch parallel simulations
156+ for (size_t i = 0 ; i < num_simulations; ++i) {
157+ simulation_futures.push_back (parallel_config->async ([=]() {
158+ // Create integrator for this simulation
159+ auto integrator = diffeq::ode::factory::make_rk4_integrator<std::vector<double >, double >(gbm_system);
160+
161+ // Random number generator for this thread
162+ std::mt19937 rng (std::random_device{}() + i);
163+ std::normal_distribution<double > normal (0.0 , 1.0 );
164+
165+ std::vector<double > state = {S0};
166+ double t = 0.0 ;
167+
168+ while (t < T) {
169+ // Deterministic step
170+ integrator->step (state, dt);
171+
172+ // Add stochastic component (simplified Euler-Maruyama)
173+ double dW = normal (rng) * std::sqrt (dt);
174+ state[0 ] += gbm_system.sigma * state[0 ] * dW;
175+
176+ t += dt;
177+ }
178+
179+ return state[0 ]; // Return final price
180+ }));
181+ }
182+
183+ // Collect results
184+ for (size_t i = 0 ; i < num_simulations; ++i) {
185+ final_prices[i] = simulation_futures[i].get ();
186+ }
187+
188+ auto end_time = std::chrono::high_resolution_clock::now ();
189+ auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
190+
191+ // Calculate statistics
192+ double mean_price = 0.0 ;
193+ double min_price = final_prices[0 ];
194+ double max_price = final_prices[0 ];
195+
196+ for (double price : final_prices) {
197+ mean_price += price;
198+ min_price = std::min (min_price, price);
199+ max_price = std::max (max_price, price);
200+ }
201+ mean_price /= num_simulations;
202+
203+ // Calculate variance
204+ double variance = 0.0 ;
205+ for (double price : final_prices) {
206+ variance += (price - mean_price) * (price - mean_price);
207+ }
208+ variance /= (num_simulations - 1 );
209+
210+ std::cout << " \n Monte Carlo Results:\n " ;
211+ std::cout << " Simulation time: " << duration.count () << " ms\n " ;
212+ std::cout << " Throughput: " << (num_simulations * 1000.0 ) / duration.count () << " simulations/second\n " ;
213+ std::cout << " Mean final price: $" << mean_price << " \n " ;
214+ std::cout << " Price range: $" << min_price << " - $" << max_price << " \n " ;
215+ std::cout << " Standard deviation: $" << std::sqrt (variance) << " \n " ;
216+ std::cout << " Expected price (theoretical): $" << S0 * std::exp (gbm_system.mu * T) << " \n " ;
217+ }
218+
219+ } // namespace stochastic_research
220+
221+ /* *
222+ * @brief Multi-Hardware Benchmark
223+ *
224+ * Demonstrates the same computation running on different hardware targets
225+ * to showcase the unified interface abstraction.
226+ */
227+ namespace multi_hardware_demo {
228+
229+ // Simple ODE for benchmarking: dx/dt = -k*x (exponential decay)
230+ struct ExponentialDecay {
231+ double k = 1.0 ;
232+
233+ void operator ()(double t, const std::vector<double >& state, std::vector<double >& derivative) {
234+ derivative[0 ] = -k * state[0 ];
235+ }
236+ };
237+
238+ void benchmark_hardware_targets () {
239+ std::cout << " \n === Multi-Hardware Parallelism Benchmark ===\n " ;
240+
241+ const size_t num_integrations = 10000 ;
242+ const double dt = 0.01 ;
243+ const double end_time = 1.0 ;
244+ const std::vector<double > initial_state = {1.0 };
245+
246+ std::vector<diffeq::execution::HardwareTarget> targets = {
247+ diffeq::execution::HardwareTarget::CPU_Sequential,
248+ diffeq::execution::HardwareTarget::CPU_ThreadPool,
249+ diffeq::execution::HardwareTarget::GPU_CUDA,
250+ diffeq::execution::HardwareTarget::GPU_OpenCL,
251+ diffeq::execution::HardwareTarget::FPGA_HLS
252+ };
253+
254+ auto system = ExponentialDecay{};
255+
256+ for (auto target : targets) {
257+ std::string target_name;
258+ switch (target) {
259+ case diffeq::execution::HardwareTarget::CPU_Sequential: target_name = " CPU Sequential" ; break ;
260+ case diffeq::execution::HardwareTarget::CPU_ThreadPool: target_name = " CPU Thread Pool" ; break ;
261+ case diffeq::execution::HardwareTarget::GPU_CUDA: target_name = " GPU CUDA" ; break ;
262+ case diffeq::execution::HardwareTarget::GPU_OpenCL: target_name = " GPU OpenCL" ; break ;
263+ case diffeq::execution::HardwareTarget::FPGA_HLS: target_name = " FPGA HLS" ; break ;
264+ default : target_name = " Unknown" ; break ;
265+ }
266+
267+ std::cout << " \n Benchmarking " << target_name << " ...\n " ;
268+
269+ // Configure parallelism for this hardware target
270+ diffeq::execution::ParallelConfig config;
271+ config.target = target;
272+ config.max_workers = std::thread::hardware_concurrency ();
273+ config.batch_size = num_integrations / 10 ;
274+
275+ auto parallel_facade = std::make_unique<diffeq::execution::ParallelismFacade>(config);
276+
277+ if (!parallel_facade->is_target_available (target)) {
278+ std::cout << " Hardware not available, skipping...\n " ;
279+ continue ;
280+ }
281+
282+ auto start_time = std::chrono::high_resolution_clock::now ();
283+
284+ // Create vector of initial conditions
285+ std::vector<std::vector<double >> states (num_integrations, initial_state);
286+
287+ // Parallel integration using the unified interface
288+ parallel_facade->parallel_for_each (states.begin (), states.end (), [&](auto & state) {
289+ auto integrator = diffeq::ode::factory::make_rk4_integrator<std::vector<double >, double >(system);
290+
291+ double t = 0.0 ;
292+ while (t < end_time) {
293+ double step_size = std::min (dt, end_time - t);
294+ integrator->step (state, step_size);
295+ t += step_size;
296+ }
297+ });
298+
299+ auto end_time_chrono = std::chrono::high_resolution_clock::now ();
300+ auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time_chrono - start_time);
301+
302+ // Verify results (should be approximately e^(-k*t) = e^(-1) ≈ 0.368)
303+ double expected_result = std::exp (-system.k * end_time);
304+ double mean_result = 0.0 ;
305+ for (const auto & state : states) {
306+ mean_result += state[0 ];
307+ }
308+ mean_result /= num_integrations;
309+
310+ double error = std::abs (mean_result - expected_result);
311+
312+ std::cout << " Execution time: " << duration.count () << " ms\n " ;
313+ std::cout << " Throughput: " << (num_integrations * 1000.0 ) / duration.count () << " integrations/second\n " ;
314+ std::cout << " Average result: " << mean_result << " (expected: " << expected_result << " )\n " ;
315+ std::cout << " Error: " << error << " \n " ;
316+ std::cout << " Max concurrency: " << parallel_facade->get_max_concurrency () << " \n " ;
317+ }
318+ }
319+
320+ } // namespace multi_hardware_demo
321+
322+ /* *
323+ * @brief Demonstration function that runs all examples
324+ */
325+ void demonstrate_all_parallelism_features () {
326+ std::cout << " === Enhanced Parallelism Capabilities Demo ===\n " ;
327+ std::cout << " Demonstrating modern C++ parallelism with unified hardware interface\n " ;
328+
329+ // Run robotics control example
330+ robotics_control::demonstrate_realtime_control ();
331+
332+ // Run stochastic research example
333+ stochastic_research::demonstrate_monte_carlo_simulation ();
334+
335+ // Run multi-hardware benchmark
336+ multi_hardware_demo::benchmark_hardware_targets ();
337+
338+ std::cout << " \n === All Parallelism Demonstrations Complete ===\n " ;
339+ }
340+
341+ } // namespace diffeq::examples::parallelism
0 commit comments