Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit a25ff6d

Browse files
authoredFeb 25, 2025··
Tune sieve array size (#159)
1 parent e042c8f commit a25ff6d

File tree

3 files changed

+43
-13
lines changed

3 files changed

+43
-13
lines changed
 

‎ChangeLog

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
1-
Changes in version 12.7, 17/02/2025
1+
Changes in version 12.7, 24/02/2025
22
===================================
33

44
* EratBig.cpp: Simplify bucket handling.
5+
* Erat.cpp: Tune sieve size using FACTOR_SIEVESIZE.
56
* README.md: Add Sponsors section.
67

78
Changes in version 12.6, 11/11/2024

‎include/primesieve/config.hpp

+16-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
/// @file config.hpp
33
/// @brief primesieve compile time constants.
44
///
5-
/// Copyright (C) 2024 Kim Walisch, <kim.walisch@gmail.com>
5+
/// Copyright (C) 2025 Kim Walisch, <kim.walisch@gmail.com>
66
///
77
/// This file is distributed under the BSD License. See the COPYING
88
/// file in the top level directory.
@@ -60,6 +60,21 @@ constexpr uint64_t MAX_CACHE_ITERATOR = 1 << 30;
6060
///
6161
constexpr uint64_t MIN_THREAD_DISTANCE = (uint64_t) 1e7;
6262

63+
/// sieveSize = sqrt(stop) * FACTOR_SIEVESIZE.
64+
///
65+
/// Using a larger FACTOR_SIEVESIZE increases the segment size in the
66+
/// sieve of Eratosthenes and hence reduces the number of operations
67+
/// used by the algorithm. However, as a drawback a larger segment
68+
/// size is less cache efficient and hence performance may deteriorate
69+
/// on CPUs with limited L2 cache bandwidth (especially when using
70+
/// multi-threading).
71+
///
72+
/// Using FACTOR_SIEVESIZE = 2.0 performs well for counting the
73+
/// primes < 10^11 using multi-threading on both the Apple M3 CPU and
74+
/// the Intel Arrow Lake 245K CPU (from 2024).
75+
///
76+
constexpr double FACTOR_SIEVESIZE = 2.0;
77+
6378
/// Sieving primes <= (L1D_CACHE_BYTES * FACTOR_ERATSMALL) are
6479
/// processed in EratSmall. When FACTOR_ERATSMALL is small fewer
6580
/// sieving primes are processed in EratSmall.cpp and more sieving

‎src/Erat.cpp

+25-11
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
/// @brief The Erat class manages prime sieving using the
44
/// EratSmall, EratMedium, EratBig classes.
55
///
6-
/// Copyright (C) 2024 Kim Walisch, <kim.walisch@gmail.com>
6+
/// Copyright (C) 2025 Kim Walisch, <kim.walisch@gmail.com>
77
///
88
/// This file is distributed under the BSD License. See the COPYING
99
/// file in the top level directory.
@@ -110,20 +110,34 @@ void Erat::initAlgorithms(uint64_t maxSieveSize,
110110
uint64_t minSieveSize = std::min(l1CacheSize, maxSieveSize);
111111

112112
// ================================================================
113-
// 2. sieveSize = inBetween(minSieveSize, sqrtStop, maxSieveSize)
113+
// 2. sieveSize = sqrt(stop) * FACTOR_SIEVESIZE
114+
// ================================================================
115+
116+
// Using a larger FACTOR_SIEVESIZE increases the segment size
117+
// in the sieve of Eratosthenes and hence reduces the number
118+
// of operations used by the algorithm. However, as a drawback
119+
// a larger segment size is less cache efficient and hence
120+
// performance may deteriorate on CPUs with limited L2 cache
121+
// bandwidth (especially when using multi-threading).
122+
uint64_t sieveSize = uint64_t(sqrtStop * config::FACTOR_SIEVESIZE);
123+
124+
// ================================================================
125+
// 3. L1CacheSize <= sieveSize <= L2CacheSize
114126
// ================================================================
115127

116128
// For small stop numbers a small sieve array size that
117129
// matches the CPU's L1 data cache size performs best.
118-
// For larger stop numbers a sieve array size that is
119-
// within [L1CacheSize, L2CacheSize] usually performs best.
120-
uint64_t sieveSize = inBetween(minSieveSize, sqrtStop, maxSieveSize);
130+
// For larger stop numbers a sieve array size that is ~
131+
// L2CacheSize usually performs best. Hence our sieve size
132+
// increases dynamically based on the stop number but it
133+
// can never exceed the L2CacheSize (or maxSieveSize).
134+
sieveSize = inBetween(minSieveSize, sieveSize, maxSieveSize);
121135
sieveSize = inBetween(16 << 10, sieveSize, 8192 << 10);
122136
sieveSize = ceilDiv(sieveSize, sizeof(uint64_t)) * sizeof(uint64_t);
123137
minSieveSize = std::min(l1CacheSize, sieveSize);
124138

125139
// ================================================================
126-
// 3. Initialize upper bounds for EratSmall & EratMedium
140+
// 4. Initialize upper bounds for EratSmall & EratMedium
127141
// ================================================================
128142

129143
// Small sieving primes are processed using the EratSmall
@@ -134,7 +148,7 @@ void Erat::initAlgorithms(uint64_t maxSieveSize,
134148
maxEratMedium_ = (uint64_t) (sieveSize * config::FACTOR_ERATMEDIUM);
135149

136150
// ================================================================
137-
// 4. EratBig requires a power of 2 sieve size
151+
// 5. EratBig requires a power of 2 sieve size
138152
// ================================================================
139153

140154
if (sqrtStop > maxEratMedium_)
@@ -146,14 +160,14 @@ void Erat::initAlgorithms(uint64_t maxSieveSize,
146160
}
147161

148162
// ================================================================
149-
// 5. Ensure we allocate the smallest possible amount of memory
163+
// 6. Ensure we allocate the smallest possible amount of memory
150164
// ================================================================
151165

152166
maxEratSmall_ = std::min(maxEratSmall_, sqrtStop);
153167
maxEratMedium_ = std::min(maxEratMedium_, sqrtStop);
154168

155169
// ================================================================
156-
// 6. Initialize segment bounds
170+
// 7. Initialize segment bounds
157171
// ================================================================
158172

159173
// The 8 bits of each byte of the sieve array correspond to
@@ -168,7 +182,7 @@ void Erat::initAlgorithms(uint64_t maxSieveSize,
168182
segmentHigh_ = std::min(segmentHigh_, stop_);
169183

170184
// ================================================================
171-
// 7. Use tiny sieveSize if possible
185+
// 8. Use tiny sieveSize if possible
172186
// ================================================================
173187

174188
// If we are sieving just a single segment
@@ -184,7 +198,7 @@ void Erat::initAlgorithms(uint64_t maxSieveSize,
184198
}
185199

186200
// ================================================================
187-
// 8. Finally, initialize EratSmall, EratMedium & EratBig
201+
// 9. Finally, initialize EratSmall, EratMedium & EratBig
188202
// ================================================================
189203

190204
ASSERT(sieveSize % sizeof(uint64_t) == 0);

0 commit comments

Comments
 (0)
Please sign in to comment.