feat: final

varun-r-mallya · varun-r-mallya · commit 415fe4de589a · 2025-04-24T19:13:30.000+05:30
diff --git a/config.toml b/config.toml
@@ -3,7 +3,8 @@ Re = 500.0              # Reynolds number
 length = 1.0            # Length of Grid
 grid_points = 100       # Number Of GridPoints
 time_step = 0.01        # Time step
-max_iterations = 10000     # Maximum Number of iterations
-tolerance = 1e-6        # tolerance adjust
+max_iterations = 100000     # Maximum Number of iterations
+tolerance = 1e-8        # tolerance adjust
 python_plot = true
-lid_velocity = 1.0
+lid_velocity = 1.0
+number_of_threads = 17
diff --git a/src/LidDrivenCavity.cpp b/src/LidDrivenCavity.cpp
@@ -41,7 +41,7 @@ auto LidDrivenCavity::compute_velocities() -> void {
 void LidDrivenCavity::solve_stream_function() {
     constexpr double relaxation_factor = 0.2;
 #pragma omp parallel for
-    for (int k = 0; k < 10; ++k) {
+    for (int k = 0; k < 4; ++k) {
         for (int i = 1; i < N - 1; ++i) {
             for (int j = 1; j < N - 1; ++j) {
                 // Update stream function using SOR
@@ -177,6 +177,8 @@ auto LidDrivenCavity::solve() -> void {
 
     Renderer renderer(800, 800, u, v, x, y);
     renderer.initialize();
+    auto start_loop = std::chrono::high_resolution_clock::now();
+    auto end_loop = std::chrono::high_resolution_clock::now();
 
     for (int n = 0; n < max_iter; ++n) {
         // Store old vorticity for convergence check
@@ -188,16 +190,21 @@ auto LidDrivenCavity::solve() -> void {
         }
 
         // Perform one iteration
+        if (!n % 100)
+            start_loop = std::chrono::high_resolution_clock::now();
         apply_boundary_conditions();
         solve_vorticity();
         solve_stream_function();
         compute_velocities();
-
+        if (!n % 100)
+            end_loop = std::chrono::high_resolution_clock::now();
         renderer.updateData(u, v);
         renderer.render();
 
         if (n % 100 == 0) {
-            std::cout << "Iteration: " << n << std::endl;
+            const auto duration = std::chrono::duration_cast<std::chrono::nanoseconds>(end_loop - start_loop);
+            std::cout << "Iteration: " << n << "\n";
+            std::cout << "Time elapsed: " << duration.count() << " ns\n";
         }
         // Check for convergence
         double max_diff = 0.0;
diff --git a/src/main.cpp b/src/main.cpp
@@ -4,7 +4,6 @@
 #include "subprojects/tomlplusplus/toml.hpp"
 
 int main(const int argc, char **argv) {
-    omp_set_num_threads(6);
     std::string config_file = "config.toml";
 
     if (argc > 1) {
@@ -21,7 +20,9 @@ int main(const int argc, char **argv) {
         const double tolerance = config["LidDrivenCavity"]["tolerance"].value_or(1e-6);
         const bool python_plot = config["LidDrivenCavity"]["python_plot"].value_or(true);
         const double lid_velocity = config["LidDrivenCavity"]["lid_velocity"].value_or(1.0);
+        const int number_of_threads = config["LidDrivenCavity"]["number_of_threads"].value_or(10);
 
+        omp_set_num_threads(number_of_threads);
         LidDrivenCavity cavity(Re, Length, grid_points, time_step, max_iterations, tolerance, lid_velocity);
         cavity.solve();
 
diff --git a/src/perf_profile.py b/src/perf_profile.py
@@ -0,0 +1,36 @@
+import numpy as np
+import matplotlib.pyplot as plt
+
+
+def load_performance_data():
+    threads = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
+    elapsed_time = [4547588, 4580443, 3142521, 2290600, 2028613, 2349594, 2276177, 2192905, 1946464, 1484296, 1914334,
+                    1735396, 2040608, 1672974, 1699116, 1654355, 1756129, 1337214, 1685929, 1849847, 1995665]
+    return np.array(threads), np.array(elapsed_time)
+
+
+def plot_performance(threads, elapsed_time):
+    plt.figure(figsize=(10, 6))
+    plt.plot(threads, elapsed_time, 'bo-')
+    plt.xlabel('Number of Threads')
+    plt.ylabel('Elapsed Time per Step (ns)')
+    plt.title('Performance Analysis: Thread Count vs Elapsed Time')
+    plt.grid(True)
+
+    # Calculate and annotate speedup
+    baseline_time = elapsed_time[0]
+    speedup = baseline_time / elapsed_time
+
+    # Add text box with statistics
+    stats_text = f'Max Speedup: {speedup.max():.2f}x\n'
+    stats_text += f'Optimal Thread Count: {threads[speedup.argmax()]}'
+    plt.text(0.02, 0.98, stats_text, transform=plt.gca().transAxes,
+             verticalalignment='top', bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))
+
+    plt.tight_layout()
+    plt.show()
+
+
+if __name__ == "__main__":
+    threads, times = load_performance_data()
+    plot_performance(threads, times)