Skip to content

Commit 415fe4d

Browse files
feat: final
1 parent 57fb6c5 commit 415fe4d

File tree

4 files changed

+52
-7
lines changed

4 files changed

+52
-7
lines changed

config.toml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@ Re = 500.0 # Reynolds number
33
length = 1.0 # Length of Grid
44
grid_points = 100 # Number Of GridPoints
55
time_step = 0.01 # Time step
6-
max_iterations = 10000 # Maximum Number of iterations
7-
tolerance = 1e-6 # tolerance adjust
6+
max_iterations = 100000 # Maximum Number of iterations
7+
tolerance = 1e-8 # tolerance adjust
88
python_plot = true
9-
lid_velocity = 1.0
9+
lid_velocity = 1.0
10+
number_of_threads = 17

src/LidDrivenCavity.cpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ auto LidDrivenCavity::compute_velocities() -> void {
4141
void LidDrivenCavity::solve_stream_function() {
4242
constexpr double relaxation_factor = 0.2;
4343
#pragma omp parallel for
44-
for (int k = 0; k < 10; ++k) {
44+
for (int k = 0; k < 4; ++k) {
4545
for (int i = 1; i < N - 1; ++i) {
4646
for (int j = 1; j < N - 1; ++j) {
4747
// Update stream function using SOR
@@ -177,6 +177,8 @@ auto LidDrivenCavity::solve() -> void {
177177

178178
Renderer renderer(800, 800, u, v, x, y);
179179
renderer.initialize();
180+
auto start_loop = std::chrono::high_resolution_clock::now();
181+
auto end_loop = std::chrono::high_resolution_clock::now();
180182

181183
for (int n = 0; n < max_iter; ++n) {
182184
// Store old vorticity for convergence check
@@ -188,16 +190,21 @@ auto LidDrivenCavity::solve() -> void {
188190
}
189191

190192
// Perform one iteration
193+
if (!n % 100)
194+
start_loop = std::chrono::high_resolution_clock::now();
191195
apply_boundary_conditions();
192196
solve_vorticity();
193197
solve_stream_function();
194198
compute_velocities();
195-
199+
if (!n % 100)
200+
end_loop = std::chrono::high_resolution_clock::now();
196201
renderer.updateData(u, v);
197202
renderer.render();
198203

199204
if (n % 100 == 0) {
200-
std::cout << "Iteration: " << n << std::endl;
205+
const auto duration = std::chrono::duration_cast<std::chrono::nanoseconds>(end_loop - start_loop);
206+
std::cout << "Iteration: " << n << "\n";
207+
std::cout << "Time elapsed: " << duration.count() << " ns\n";
201208
}
202209
// Check for convergence
203210
double max_diff = 0.0;

src/main.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
#include "subprojects/tomlplusplus/toml.hpp"
55

66
int main(const int argc, char **argv) {
7-
omp_set_num_threads(6);
87
std::string config_file = "config.toml";
98

109
if (argc > 1) {
@@ -21,7 +20,9 @@ int main(const int argc, char **argv) {
2120
const double tolerance = config["LidDrivenCavity"]["tolerance"].value_or(1e-6);
2221
const bool python_plot = config["LidDrivenCavity"]["python_plot"].value_or(true);
2322
const double lid_velocity = config["LidDrivenCavity"]["lid_velocity"].value_or(1.0);
23+
const int number_of_threads = config["LidDrivenCavity"]["number_of_threads"].value_or(10);
2424

25+
omp_set_num_threads(number_of_threads);
2526
LidDrivenCavity cavity(Re, Length, grid_points, time_step, max_iterations, tolerance, lid_velocity);
2627
cavity.solve();
2728

src/perf_profile.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
import numpy as np
2+
import matplotlib.pyplot as plt
3+
4+
5+
def load_performance_data():
6+
threads = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
7+
elapsed_time = [4547588, 4580443, 3142521, 2290600, 2028613, 2349594, 2276177, 2192905, 1946464, 1484296, 1914334,
8+
1735396, 2040608, 1672974, 1699116, 1654355, 1756129, 1337214, 1685929, 1849847, 1995665]
9+
return np.array(threads), np.array(elapsed_time)
10+
11+
12+
def plot_performance(threads, elapsed_time):
13+
plt.figure(figsize=(10, 6))
14+
plt.plot(threads, elapsed_time, 'bo-')
15+
plt.xlabel('Number of Threads')
16+
plt.ylabel('Elapsed Time per Step (ns)')
17+
plt.title('Performance Analysis: Thread Count vs Elapsed Time')
18+
plt.grid(True)
19+
20+
# Calculate and annotate speedup
21+
baseline_time = elapsed_time[0]
22+
speedup = baseline_time / elapsed_time
23+
24+
# Add text box with statistics
25+
stats_text = f'Max Speedup: {speedup.max():.2f}x\n'
26+
stats_text += f'Optimal Thread Count: {threads[speedup.argmax()]}'
27+
plt.text(0.02, 0.98, stats_text, transform=plt.gca().transAxes,
28+
verticalalignment='top', bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))
29+
30+
plt.tight_layout()
31+
plt.show()
32+
33+
34+
if __name__ == "__main__":
35+
threads, times = load_performance_data()
36+
plot_performance(threads, times)

0 commit comments

Comments
 (0)