improve comments

TilmanNeumann · TilmanNeumann · commit 4565ac212173 · 2025-02-03T18:48:47.000+01:00
diff --git a/src/main/java/de/tilman_neumann/jml/factor/siqs/tdiv/TDiv_QS_2LP.java b/src/main/java/de/tilman_neumann/jml/factor/siqs/tdiv/TDiv_QS_2LP.java
@@ -223,17 +223,17 @@ private AQPair test(BigInteger A, BigInteger QRest0, int x) {
 		BigInteger QRest = QRest0; // keep initial QRest0 for logging below
 
 		// Pass 1: Test solution arrays.
-		// If |x| < p, then no modulus computation is required.
-		// Otherwise we compute x%p using long-valued Barrett reduction, see https://en.wikipedia.org/wiki/Barrett_reduction.
+		// The performance bottle-neck here is the modulus computation.
+		// The current approach is already quite fast for large N, because then we have pMax > 3*sieveArraySize,
+		// which means that for ~75% of x-values we can completely omit the mod-computation or replace it by a simple addition.
+		// For (big |x|, small p) we compute x%p using long-valued Barrett reduction, see https://en.wikipedia.org/wiki/Barrett_reduction.
 		// We can use the long-variant here because x*m will never overflow positive long values.
-		// For some reasons I do not understand, it is faster to divide Q by p in pass 2 only, not here.
-		// IMPORTANT: Java gives x % p = x for |x| < p, and we have many p bigger than any sieve array entry.
-		// IMPORTANT: Not computing the modulus in these cases improves performance by almost factor 2!
+		// For some reasons I do not understand yet, it is faster to divide Q by p in pass 2 only, not here.
 		int pass2Count = 0;
 		int pIndex = baseSize-1;
 		if (x < 0) {
 			for ( ; pIndex >= p1Index; pIndex--) {
-				// for pIndex >= p1Index, we know that |x| < sieveArraySize < p
+				// for pIndex >= p1Index, we know that |x| < p
 				int xModP = x+pArray[pIndex];
 				if (xModP==x1Array[pIndex] || xModP==x2Array[pIndex]) {
 					pass2Primes[pass2Count] = primes[pIndex];
@@ -272,7 +272,7 @@ private AQPair test(BigInteger A, BigInteger QRest0, int x) {
 		} else {
 			// x >= 0
 			for ( ; pIndex >= p1Index; pIndex--) {
-				// for pIndex > p1Index, we know that |x| < sieveArraySize < p
+				// for pIndex > p1Index, we know that |x| < p
 				if (x==x1Array[pIndex] || x==x2Array[pIndex]) {
 					pass2Primes[pass2Count] = primes[pIndex];
 					pass2Exponents[pass2Count] = exponents[pIndex];
diff --git a/src/main/java/de/tilman_neumann/jml/factor/siqs/tdiv/TDiv_QS_2LP_Full.java b/src/main/java/de/tilman_neumann/jml/factor/siqs/tdiv/TDiv_QS_2LP_Full.java
@@ -216,17 +216,19 @@ private AQPair test(BigInteger A, BigInteger Q, int x) {
 		}
 		
 		// Pass 1: Test solution arrays.
-		// IMPORTANT: Java gives x % p = x for |x| < p, and we have many p bigger than any sieve array entry.
-		// IMPORTANT: Not computing the modulus in these cases improves performance by almost factor 2!
+		// The performance bottle-neck here is the modulus computation.
+		// The current approach is already quite fast for large N, because then we have pMax > 3*sieveArraySize,
+		// which means that for ~75% of x-values we can completely omit the mod-computation or replace it by a simple addition.
+		// For (big |x|, small p) we compute x%p using long-valued Barrett reduction, see https://en.wikipedia.org/wiki/Barrett_reduction.
+		// We can use the long-variant here because x*m will never overflow positive long values.
+		// For some reasons I do not understand yet, it is faster to divide Q by p in pass 2 only, not here.
 		final int xAbs = x<0 ? -x : x;
-		for (int pIndex = baseSize-1; pIndex > 0; pIndex--) { // p[0]=2 was already tested
+		for (int pIndex = baseSize-1; pIndex > 0; pIndex--) { // p[0]=2 has already been tested
 			int p = pArray[pIndex];
 			int xModP;
 			if (xAbs<p) {
 				xModP = x<0 ? x+p : x;
 			} else {
-				// Compute x%p using long-valued Barrett reduction, see https://en.wikipedia.org/wiki/Barrett_reduction.
-				// We can use the long-variant here because x*m will never overflow positive long values.
 				final long m = pinvArrayL[pIndex];
 				final long q = ( ( ((long)x) * m) >>> 32); // first argument long optimizes register usage
 				xModP = (int) ( ((long)x) - q * p);
@@ -246,7 +248,6 @@ private AQPair test(BigInteger A, BigInteger Q, int x) {
 				pass2Primes[pass2Count] = primes[pIndex];
 				pass2Exponents[pass2Count] = exponents[pIndex];
 				pass2Powers[pass2Count++] = p;
-				// for some reasons I do not understand it is faster to divide Q by p in pass 2 only, not here
 			}
 		}
 		if (ANALYZE) pass1Duration += timer.capture();
diff --git a/src/main/java/de/tilman_neumann/jml/factor/siqs/tdiv/TDiv_QS_3LP.java b/src/main/java/de/tilman_neumann/jml/factor/siqs/tdiv/TDiv_QS_3LP.java
@@ -226,17 +226,17 @@ private AQPair test(BigInteger A, BigInteger QRest0, int x) {
 		BigInteger QRest = QRest0; // keep initial QRest0 for logging below
 
 		// Pass 1: Test solution arrays.
-		// If |x| < p, then no modulus computation is required.
-		// Otherwise we compute x%p using long-valued Barrett reduction, see https://en.wikipedia.org/wiki/Barrett_reduction.
+		// The performance bottle-neck here is the modulus computation.
+		// The current approach is already quite fast for large N, because then we have pMax > 3*sieveArraySize,
+		// which means that for ~75% of x-values we can completely omit the mod-computation or replace it by a simple addition.
+		// For (big |x|, small p) we compute x%p using long-valued Barrett reduction, see https://en.wikipedia.org/wiki/Barrett_reduction.
 		// We can use the long-variant here because x*m will never overflow positive long values.
-		// For some reasons I do not understand, it is faster to divide Q by p in pass 2 only, not here.
-		// IMPORTANT: Java gives x % p = x for |x| < p, and we have many p bigger than any sieve array entry.
-		// IMPORTANT: Not computing the modulus in these cases improves performance by almost factor 2!
+		// For some reasons I do not understand yet, it is faster to divide Q by p in pass 2 only, not here.
 		int pass2Count = 0;
 		int pIndex = baseSize-1;
 		if (x < 0) {
 			for ( ; pIndex >= p1Index; pIndex--) {
-				// for pIndex >= p1Index, we know that |x| < sieveArraySize < p
+				// for pIndex >= p1Index, we know that |x| < p
 				int xModP = x+pArray[pIndex];
 				if (xModP==x1Array[pIndex] || xModP==x2Array[pIndex]) {
 					pass2Primes[pass2Count] = primes[pIndex];
@@ -275,7 +275,7 @@ private AQPair test(BigInteger A, BigInteger QRest0, int x) {
 		} else {
 			// x >= 0
 			for ( ; pIndex >= p1Index; pIndex--) {
-				// for pIndex > p1Index, we know that |x| < sieveArraySize < p
+				// for pIndex > p1Index, we know that |x| < p
 				if (x==x1Array[pIndex] || x==x2Array[pIndex]) {
 					pass2Primes[pass2Count] = primes[pIndex];
 					pass2Exponents[pass2Count] = exponents[pIndex];
diff --git a/src/main/java/de/tilman_neumann/jml/factor/siqs/tdiv/TDiv_QS_Small.java b/src/main/java/de/tilman_neumann/jml/factor/siqs/tdiv/TDiv_QS_Small.java
@@ -192,17 +192,19 @@ private AQPair test(BigInteger A, BigInteger Q, int x) {
 		}
 		
 		// Pass 1: Test solution arrays.
-		// IMPORTANT: Java gives x % p = x for |x| < p, and we have many p bigger than any sieve array entry.
-		// IMPORTANT: Not computing the modulus in these cases improves performance by almost factor 2!
+		// The performance bottle-neck here is the modulus computation.
+		// The current approach is already quite fast for large N, because then we have pMax > 3*sieveArraySize,
+		// which means that for ~75% of x-values we can completely omit the mod-computation or replace it by a simple addition.
+		// For (big |x|, small p) we compute x%p using long-valued Barrett reduction, see https://en.wikipedia.org/wiki/Barrett_reduction.
+		// We can use the long-variant here because x*m will never overflow positive long values.
+		// For some reasons I do not understand yet, it is faster to divide Q by p in pass 2 only, not here.
 		final int xAbs = x<0 ? -x : x;
-		for (int pIndex = baseSize-1; pIndex > 0; pIndex--) { // p[0]=2 was already tested
+		for (int pIndex = baseSize-1; pIndex > 0; pIndex--) { // p[0]=2 has already been tested
 			int p = pArray[pIndex];
 			int xModP;
 			if (xAbs<p) {
 				xModP = x<0 ? x+p : x;
 			} else {
-				// Compute x%p using long-valued Barrett reduction, see https://en.wikipedia.org/wiki/Barrett_reduction.
-				// We can use the long-variant here because x*m will never overflow positive long values.
 				final long m = pinvArrayL[pIndex];
 				final long q = ( ( ((long)x) * m) >>> 32); // first argument long optimizes register usage
 				xModP = (int) ( ((long)x) - q * p);
@@ -222,7 +224,6 @@ private AQPair test(BigInteger A, BigInteger Q, int x) {
 				pass2Primes[pass2Count] = primes[pIndex];
 				pass2Exponents[pass2Count] = exponents[pIndex];
 				pass2Powers[pass2Count++] = p;
-				// for some reasons I do not understand it is faster to divide Q by p in pass 2 only, not here
 			}
 		}
 		if (ANALYZE) pass1Duration += timer.capture();
diff --git a/src/main/java/de/tilman_neumann/jml/factor/siqs/tdiv/TDiv_QS_nLP.java b/src/main/java/de/tilman_neumann/jml/factor/siqs/tdiv/TDiv_QS_nLP.java
@@ -225,17 +225,17 @@ private AQPair test(BigInteger A, BigInteger QRest0, int x) {
 		BigInteger QRest = QRest0; // keep initial QRest0 for logging below
 
 		// Pass 1: Test solution arrays.
-		// If |x| < p, then no modulus computation is required.
-		// Otherwise we compute x%p using long-valued Barrett reduction, see https://en.wikipedia.org/wiki/Barrett_reduction.
+		// The performance bottle-neck here is the modulus computation.
+		// The current approach is already quite fast for large N, because then we have pMax > 3*sieveArraySize,
+		// which means that for ~75% of x-values we can completely omit the mod-computation or replace it by a simple addition.
+		// For (big |x|, small p) we compute x%p using long-valued Barrett reduction, see https://en.wikipedia.org/wiki/Barrett_reduction.
 		// We can use the long-variant here because x*m will never overflow positive long values.
-		// For some reasons I do not understand, it is faster to divide Q by p in pass 2 only, not here.
-		// IMPORTANT: Java gives x % p = x for |x| < p, and we have many p bigger than any sieve array entry.
-		// IMPORTANT: Not computing the modulus in these cases improves performance by almost factor 2!
+		// For some reasons I do not understand yet, it is faster to divide Q by p in pass 2 only, not here.
 		int pass2Count = 0;
 		int pIndex = baseSize-1;
 		if (x < 0) {
 			for ( ; pIndex >= p1Index; pIndex--) {
-				// for pIndex >= p1Index, we know that |x| < sieveArraySize < p
+				// for pIndex >= p1Index, we know that |x| < p
 				int xModP = x+pArray[pIndex];
 				if (xModP==x1Array[pIndex] || xModP==x2Array[pIndex]) {
 					pass2Primes[pass2Count] = primes[pIndex];
@@ -274,7 +274,7 @@ private AQPair test(BigInteger A, BigInteger QRest0, int x) {
 		} else {
 			// x >= 0
 			for ( ; pIndex >= p1Index; pIndex--) {
-				// for pIndex > p1Index, we know that |x| < sieveArraySize < p
+				// for pIndex > p1Index, we know that |x| < p
 				if (x==x1Array[pIndex] || x==x2Array[pIndex]) {
 					pass2Primes[pass2Count] = primes[pIndex];
 					pass2Exponents[pass2Count] = exponents[pIndex];
diff --git a/src/main/java/de/tilman_neumann/jml/factor/siqs/tdiv/TDiv_QS_nLP_Full.java b/src/main/java/de/tilman_neumann/jml/factor/siqs/tdiv/TDiv_QS_nLP_Full.java
@@ -218,17 +218,19 @@ private AQPair test(BigInteger A, BigInteger Q, int x) {
 		}
 		
 		// Pass 1: Test solution arrays.
-		// IMPORTANT: Java gives x % p = x for |x| < p, and we have many p bigger than any sieve array entry.
-		// IMPORTANT: Not computing the modulus in these cases improves performance by almost factor 2!
+		// The performance bottle-neck here is the modulus computation.
+		// The current approach is already quite fast for large N, because then we have pMax > 3*sieveArraySize,
+		// which means that for ~75% of x-values we can completely omit the mod-computation or replace it by a simple addition.
+		// For (big |x|, small p) we compute x%p using long-valued Barrett reduction, see https://en.wikipedia.org/wiki/Barrett_reduction.
+		// We can use the long-variant here because x*m will never overflow positive long values.
+		// For some reasons I do not understand yet, it is faster to divide Q by p in pass 2 only, not here.
 		final int xAbs = x<0 ? -x : x;
-		for (int pIndex = baseSize-1; pIndex > 0; pIndex--) { // p[0]=2 was already tested
+		for (int pIndex = baseSize-1; pIndex > 0; pIndex--) { // p[0]=2 has already been tested
 			int p = pArray[pIndex];
 			int xModP;
 			if (xAbs<p) {
 				xModP = x<0 ? x+p : x;
 			} else {
-				// Compute x%p using long-valued Barrett reduction, see https://en.wikipedia.org/wiki/Barrett_reduction.
-				// We can use the long-variant here because x*m will never overflow positive long values.
 				final long m = pinvArrayL[pIndex];
 				final long q = ( ( ((long)x) * m) >>> 32); // first argument long optimizes register usage
 				xModP = (int) ( ((long)x) - q * p);
@@ -248,7 +250,6 @@ private AQPair test(BigInteger A, BigInteger Q, int x) {
 				pass2Primes[pass2Count] = primes[pIndex];
 				pass2Exponents[pass2Count] = exponents[pIndex];
 				pass2Powers[pass2Count++] = p;
-				// for some reasons I do not understand it is faster to divide Q by p in pass 2 only, not here
 			}
 		}
 		if (ANALYZE) pass1Duration += timer.capture();