From 3d265b9cbb54360851a837de6501e2e0b6d7f200 Mon Sep 17 00:00:00 2001 From: Brendan Long Date: Mon, 2 Mar 2026 14:42:18 -0600 Subject: [PATCH 1/5] Add default thread count CLI arg of 1 --- main.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main.cpp b/main.cpp index 5c4dfe5..a1dded7 100644 --- a/main.cpp +++ b/main.cpp @@ -5,11 +5,11 @@ using namespace std; int main(int argc, char* argv[]){ - const int NUM_THREADS = std::stoi(argv[1]); + const int NUM_THREADS = argc == 2 ? std::stoi(argv[1]) : 1; std::cout << "Number of Threads = " << NUM_THREADS << std::endl; Mandelbrot::Mandelbrot myplot(1920, 1080, NUM_THREADS); - myplot.draw("/home/ec2-user/Mandelbrot-final/Mandelbrot-Example/images/Green-Parallel-512.bmp", Mandelbrot::Mandelbrot::GREEN); + myplot.draw("Green-Parallel-512.bmp", Mandelbrot::Mandelbrot::GREEN); return 0; } \ No newline at end of file From f1238f799529b454786f902745f7b05614868a9a Mon Sep 17 00:00:00 2001 From: Brendan Long Date: Mon, 2 Mar 2026 14:42:49 -0600 Subject: [PATCH 2/5] contain #pragma pack settings to their own file --- BitmapFileHeader.h | 6 ++++-- BitmapInfoHeader.h | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/BitmapFileHeader.h b/BitmapFileHeader.h index f389ba1..6a7a62c 100644 --- a/BitmapFileHeader.h +++ b/BitmapFileHeader.h @@ -1,7 +1,7 @@ #pragma once #include -#pragma pack(2) +#pragma pack(push, 2) namespace Mandelbrot{ @@ -12,4 +12,6 @@ struct BitmapFileHeader { int32_t dataOffset; }; -} \ No newline at end of file +} + +#pragma pack(pop) \ No newline at end of file diff --git a/BitmapInfoHeader.h b/BitmapInfoHeader.h index 6036085..b750abf 100644 --- a/BitmapInfoHeader.h +++ b/BitmapInfoHeader.h @@ -4,7 +4,7 @@ using namespace std; -#pragma pack(2) +#pragma pack(push, 2) namespace Mandelbrot { @@ -23,4 +23,6 @@ struct BitmapInfoHeader{ }; -} \ No newline at end of file +} + +#pragma pack(pop) \ No newline at end of file From b6aaa9fb4a3baa97e50109ced9e7073d7e504630 Mon Sep 17 00:00:00 2001 From: Brendan Long Date: Mon, 2 Mar 2026 14:47:40 -0600 Subject: [PATCH 3/5] Use Arm Neon operations to handle 4 floating point operations at a time. --- Mandelbrot.cpp | 69 ++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 67 insertions(+), 2 deletions(-) diff --git a/Mandelbrot.cpp b/Mandelbrot.cpp index edd7feb..dd29da8 100644 --- a/Mandelbrot.cpp +++ b/Mandelbrot.cpp @@ -3,12 +3,51 @@ #include #include #include +#include +#if defined(__ARM_NEON) || defined(__ARM_NEON__) +#include +#endif #include "Mandelbrot.h" using namespace std; namespace Mandelbrot{ +#if defined(__ARM_NEON) || defined(__ARM_NEON__) +static inline void getIterationsNeon4(float32x4_t c_re, float32x4_t c_im, int out_iters[4]){ + const float32x4_t threshold2 = vdupq_n_f32(4.0f); + float32x4_t z_re = vdupq_n_f32(0.0f); + float32x4_t z_im = vdupq_n_f32(0.0f); + int32x4_t iters = vdupq_n_s32(0); + uint32x4_t active = vdupq_n_u32(0xFFFFFFFFu); + + for (int i = 0; i < Mandelbrot::MAX_ITERATIONS; i++){ + float32x4_t z_re2 = vmulq_f32(z_re, z_re); + float32x4_t z_im2 = vmulq_f32(z_im, z_im); + float32x4_t z_re_im = vmulq_f32(z_re, z_im); + + float32x4_t z_re_new = vaddq_f32(vsubq_f32(z_re2, z_im2), c_re); + float32x4_t z_im_new = vaddq_f32(vaddq_f32(z_re_im, z_re_im), c_im); + + float32x4_t mag2 = vaddq_f32(vmulq_f32(z_re_new, z_re_new), vmulq_f32(z_im_new, z_im_new)); + uint32x4_t still_in = vcleq_f32(mag2, threshold2); + + uint32x4_t inc_mask = vandq_u32(active, still_in); + iters = vaddq_s32(iters, vreinterpretq_s32_u32(vandq_u32(inc_mask, vdupq_n_u32(1)))); + + z_re = z_re_new; + z_im = z_im_new; + active = inc_mask; + + if (vmaxvq_u32(active) == 0){ + break; + } + } + + vst1q_s32(out_iters, iters); +} +#endif + Mandelbrot::Mandelbrot(int width, int height, const int N_THREADS): _bitmap(width, height), _width(width), @@ -50,7 +89,33 @@ void Mandelbrot::draw(string fileName, drawColor colourSelection ){ auto work = [&](int thread_id){ for (int y = thread_id; y < _height; y+= NUM_THREADS){ - for (int x = 0; x < _width; x++){ + int x = 0; +#if defined(__ARM_NEON) || defined(__ARM_NEON__) + const float32x4_t x_step = vdupq_n_f32(2.0f/_width); + const float32x4_t x_base = vdupq_n_f32((-_width/2.0f - 150.0f) * 2.0f/_width); + const float32x4_t y_fractal = vdupq_n_f32((y - _height/2.0f) * 2.0f/_width); + for (; x + 3 < _width; x += 4){ + int32_t x_vals[4] = {x, x + 1, x + 2, x + 3}; + float32x4_t x_offsets = vcvtq_f32_s32(vld1q_s32(x_vals)); + float32x4_t c_re = vaddq_f32(x_base, vmulq_f32(x_offsets, x_step)); + float32x4_t c_im = y_fractal; + + int iters[4]; + getIterationsNeon4(c_re, c_im, iters); + + unique_lock l(histMutex); + for (int lane = 0; lane < 4; lane++){ + int idx = y*_width + (x + lane); + int num_iters = iters[lane]; + pfractalData[idx] = num_iters; + if (num_iters != MAX_ITERATIONS){ + p[num_iters]++; + } + } + l.unlock(); + } +#else + for (; x < _width; x++){ double xFractal = (x - _width/2 - 150) * 2.0/_width; double yFractal = (y - _height/2) * 2.0/_width; @@ -66,6 +131,7 @@ void Mandelbrot::draw(string fileName, drawColor colourSelection ){ } l.unlock(); } +#endif } }; @@ -153,4 +219,3 @@ bool Mandelbrot::_validHistogram(){ return false; } } - From 3f1557e7d018c8cf30c06e7066d111bb2a19a9ed Mon Sep 17 00:00:00 2001 From: Brendan Long Date: Mon, 2 Mar 2026 16:20:33 -0600 Subject: [PATCH 4/5] Fix loop vectorization #endif logic --- Mandelbrot.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Mandelbrot.cpp b/Mandelbrot.cpp index dd29da8..fa325b1 100644 --- a/Mandelbrot.cpp +++ b/Mandelbrot.cpp @@ -114,7 +114,7 @@ void Mandelbrot::draw(string fileName, drawColor colourSelection ){ } l.unlock(); } -#else +#endif for (; x < _width; x++){ double xFractal = (x - _width/2 - 150) * 2.0/_width; double yFractal = (y - _height/2) * 2.0/_width; @@ -131,7 +131,6 @@ void Mandelbrot::draw(string fileName, drawColor colourSelection ){ } l.unlock(); } -#endif } }; From 304bb39b5c83bb69002e19361787a15fdf7ff49e Mon Sep 17 00:00:00 2001 From: Brendan Long Date: Tue, 3 Mar 2026 14:01:11 -0600 Subject: [PATCH 5/5] add build.sh with optimizing compiler flags --- build.sh | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/build.sh b/build.sh index 80b6536..7492bf2 100755 --- a/build.sh +++ b/build.sh @@ -1,3 +1,36 @@ #!/bin/bash -g++ --std=c++11 -g -O0 main.cpp Mandelbrot.cpp bitmap.cpp -o ./builds/mandelbrot-parallel \ No newline at end of file +set -euo pipefail + +MODE="${1:-release}" +CXX="${CXX:-g++}" +SRC=(main.cpp Mandelbrot.cpp bitmap.cpp) +OUT_DIR="./builds" + +mkdir -p "$OUT_DIR" + +case "$MODE" in + debug) + OUT="$OUT_DIR/mandelbrot-parallel-debug" + CXXFLAGS=(--std=c++11 -g -O0) + ;; + release) + OUT="$OUT_DIR/mandelbrot-parallel" + CXXFLAGS=( + --std=c++11 + -O3 + -mcpu=neoverse-n1+crc+crypto + -ffast-math + -funroll-loops + -flto + -DNDEBUG + ) + ;; + *) + echo "Usage: $0 [debug|release]" + exit 1 + ;; +esac + +echo "Building $MODE -> $OUT" +"$CXX" "${CXXFLAGS[@]}" "${SRC[@]}" -o "$OUT" \ No newline at end of file