Skip to content

Commit 78fea34

Browse files
committed
Omit corners from solid; add experimental AVX512 get_full_lines
1 parent 31297bf commit 78fea34

4 files changed

Lines changed: 35 additions & 14 deletions

File tree

src/board.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
* some board properties. Most of the functions are optimized to be as fast as
1212
* possible, while remaining readable.
1313
*
14-
* @date 1998 - 2025
14+
* @date 1998 - 2026
1515
* @author Richard Delorme
1616
* @author Toshihiko Okuhara
1717
* @version 4.5
@@ -824,9 +824,9 @@ void edge_stability_init(void)
824824
unsigned long long get_stable_edge(const unsigned long long P, const unsigned long long O)
825825
{ // compute the exact stable edges (from precomputed tables)
826826
return edge_stability[((unsigned int) P & 0xff) * 256 + ((unsigned int) O & 0xff)]
827-
| (unsigned long long) edge_stability[(unsigned int) (P >> 56) * 256 + (unsigned int) (O >> 56)] << 56
828-
| unpackA2A7(edge_stability[packA1A8(P) * 256 + packA1A8(O)])
829-
| unpackH2H7(edge_stability[packH1H8(P) * 256 + packH1H8(O)]);
827+
| (unsigned long long) edge_stability[(unsigned int) (P >> 56) * 256 + (unsigned int) (O >> 56)] << 56
828+
| unpackA2A7(edge_stability[packA1A8(P) * 256 + packA1A8(O)])
829+
| unpackH2H7(edge_stability[packH1H8(P) * 256 + packH1H8(O)]);
830830
}
831831

832832
/**

src/board_sse.c

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
*
44
* SSE/AVX translation of some board.c functions
55
*
6-
* @date 2014 - 2025
6+
* @date 2014 - 2026
77
* @author Toshihiko Okuhara
88
* @version 4.5
99
*/
@@ -752,38 +752,58 @@ int get_opp_edge_stability(const Board *board)
752752
*/
753753
#ifdef __AVX2__
754754

755-
// returns v4_full in __m256i, reducted in caller
755+
// returns v4_full in __m256i, reduced in caller
756756
static __m256i vectorcall get_full_lines_avx(__m128i disc)
757757
{
758758
__m128i l1, l79, l8;
759759
__m256i v4_disc, lr79;
760760

761-
#if 0 && !defined(USE_SOLID) // PCMPEQQ, not suitable for USE_SOLID since diag < 3 is omitted
761+
#if 0 // PCMPEQQ
762762
static const V4DI m791 = {{ 0x0402010000804020, 0x2040800000010204, 0x0804020180402010, 0x1020408001020408 }}; // V8SI
763763
static const V4DI m792 = {{ 0x0000008040201008, 0x0000000102040810, 0x1008040201000000, 0x0810204080000000 }};
764764
static const V4DI m793 = {{ 0x0000804020100804, 0x0000010204081020, 0x2010080402010000, 0x0408102040800000 }};
765765
static const V4DI m794 = {{ 0x0080402010080402, 0x0001020408102040, 0x4020100804020100, 0x0204081020408000 }};
766766
static const V2DI m795 = {{ 0x8040201008040201, 0x0102040810204080 }};
767767

768+
#ifdef __AVX512VL__
769+
v4_disc = _mm256_broadcastq_epi64(disc); __m256i v4_empty = _mm256_ternarylogic_epi64(v4_disc, v4_disc, v4_disc, 0x55); // bit not
770+
l8 = _mm256_castsi256_si128(v4_disc); lr79 = _mm256_maskz_mov_epi32(_mm256_testn_epi32_mask(v4_empty, m791.v4), m791.v4);
771+
l8 = _mm_and_si128(l8, _mm_srli_si128(l8, 1)); lr79 = _mm256_mask_or_epi64(lr79, _mm256_testn_epi64_mask(v4_empty, m792.v4), lr79, m792.v4);
772+
l8 = _mm_and_si128(l8, _mm_shufflelo_epi16(l8, 0x39)); lr79 = _mm256_mask_or_epi64(lr79, _mm256_testn_epi64_mask(v4_empty, m793.v4), lr79, m793.v4);
773+
l8 = _mm_and_si128(l8, _mm_shufflelo_epi16(l8, 0x4e)); lr79 = _mm256_mask_or_epi64(lr79, _mm256_testn_epi64_mask(v4_empty, m794.v4), lr79, m794.v4);
774+
l79 = _mm_or_si128(_mm256_extracti128_si256(lr79, 1), _mm256_castsi256_si128(lr79));
775+
l1 = _mm_cmpeq_epi8(_mm256_castsi256_si128(v4_empty), _mm_setzero_si128());
776+
l79 = _mm_mask_or_epi64(l79, _mm_testn_epi64_mask(_mm256_castsi256_si128(v4_empty), m795.v2), l79, m795.v2);
777+
778+
#else
768779
v4_disc = _mm256_broadcastq_epi64(disc); lr79 = _mm256_and_si256(_mm256_cmpeq_epi32(_mm256_and_si256(v4_disc, m791.v4), m791.v4), m791.v4);
769780
l8 = _mm256_castsi256_si128(v4_disc); lr79 = _mm256_or_si256(lr79, _mm256_and_si256(_mm256_cmpeq_epi64(_mm256_and_si256(v4_disc, m792.v4), m792.v4), m792.v4));
770781
l1 = _mm_cmpeq_epi8(l8, _mm_set1_epi8(-1)); lr79 = _mm256_or_si256(lr79, _mm256_and_si256(_mm256_cmpeq_epi64(_mm256_and_si256(v4_disc, m793.v4), m793.v4), m793.v4));
771782
l8 = _mm_and_si128(l8, _mm_srli_si128(l8, 1)); lr79 = _mm256_or_si256(lr79, _mm256_and_si256(_mm256_cmpeq_epi64(_mm256_and_si256(v4_disc, m794.v4), m794.v4), m794.v4));
772783
l8 = _mm_and_si128(l8, _mm_shufflelo_epi16(l8, 0x39)); l79 = _mm_and_si128(_mm_cmpeq_epi64(_mm_and_si128(_mm256_castsi256_si128(v4_disc), m795.v2), m795.v2), m795.v2);
773784
l8 = _mm_and_si128(l8, _mm_shufflelo_epi16(l8, 0x4e)); l79 = _mm_or_si128(l79, _mm_or_si128(_mm256_extracti128_si256(lr79, 1), _mm256_castsi256_si128(lr79)));
785+
#endif
774786

775-
#elif 0 && !defined(USE_SOLID) // PCMPEQD
787+
#elif 0 // PCMPEQD
776788
__m256i lm79;
777789
static const V4DI m790 = {{ 0x80c0e0f0783c1e0f, 0x0103070f1e3c78f0, 0x70381c0e07030100, 0x0e1c3870e0c08000 }};
790+
// ++++**** ****++++ ....+++. .+++....
791+
// +++****. .****+++ ...+++.* *.+++...
792+
// ++****.. ..****++ ..+++.** **.+++..
793+
// +****... ...****+ .+++.*** ***.+++.
794+
// ****.... ....**** ....***. .***....
795+
// ***..... .....*** ...***.. ..***...
796+
// **...... ......** ..***... ...***..
797+
// *....... .......* .***.... ....***.
778798
static const V4DI m791 = {{ 0x0402010000804020, 0x2040800000010204, 0x0804020180402010, 0x1020408001020408 }}; // V8SI
779799
static const V4DI m792 = {{ 0x2010884440201088, 0x0408112202040811, 0x2211080411080402, 0x4488102088102040 }}; // V8SI
780800
static const V4DI m793 = {{ 0x8844221110884422, 0x1122448808112244, 0x0000000044221108, 0x0000000022448810 }}; // V8SI
781801

782802
v4_disc = _mm256_broadcastq_epi64(disc); lm79 = _mm256_and_si256(v4_disc, m790.v4);
783-
l8 = _mm256_castsi256_si128(v4_disc); lm79 = _mm256_or_si256(lm79, _mm256_shuffle_epi32(lm79, 0xb1));
803+
l8 = _mm256_castsi256_si128(v4_disc); lm79 = _mm256_or_si256(lm79, _mm256_shuffle_epi32(lm79, 0xb1)); // pack diag lines into V8SI
784804
l1 = _mm_cmpeq_epi8(l8, _mm_set1_epi8(-1)); lr79 = _mm256_and_si256(_mm256_cmpeq_epi32(_mm256_and_si256(lm79, m792.v4), m792.v4), m792.v4);
785805
l8 = _mm_and_si128(l8, _mm_srli_si128(l8, 1)); lr79 = _mm256_or_si256(lr79, _mm256_and_si256(_mm256_cmpeq_epi32(_mm256_and_si256(lm79, m793.v4), m793.v4), m793.v4));
786-
l8 = _mm_and_si128(l8, _mm_shufflelo_epi16(l8, 0x39)); lr79 = _mm256_and_si256(_mm256_or_si256(lr79, _mm256_shuffle_epi32(lr79, 0xb1)), m790.v4);
806+
l8 = _mm_and_si128(l8, _mm_shufflelo_epi16(l8, 0x39)); lr79 = _mm256_and_si256(_mm256_or_si256(lr79, _mm256_shuffle_epi32(lr79, 0xb1)), m790.v4); // unpack
787807
l8 = _mm_and_si128(l8, _mm_shufflelo_epi16(l8, 0x4e)); lr79 = _mm256_or_si256(lr79, _mm256_and_si256(_mm256_cmpeq_epi32(_mm256_and_si256(v4_disc, m791.v4), m791.v4), m791.v4));
788808
l79 = _mm_or_si128(_mm256_extracti128_si256(lr79, 1), _mm256_castsi256_si128(lr79));
789809

src/endgame.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
*
44
* Search near the end of the game.
55
*
6-
* @date 1998 - 2025
6+
* @date 1998 - 2026
77
* @author Richard Delorme
88
* @author Toshihiko Okuhara
99
* @version 4.5
@@ -473,7 +473,8 @@ static int NWS_endgame_local(Search *search, const int alpha)
473473
hashboard.v2 = _mm_xor_si128(hashboard.v2, _mm_unpacklo_epi64(solid, solid));
474474
ofssolid = bit_count_si64(solid) * 2;
475475
#else
476-
unsigned long long solid = full[4] & hashboard.bb.player; // full[4] = all full
476+
// exclude corners from solid to absorb get_full_lines anormalies
477+
unsigned long long solid = full[4] & 0x3c7effffffff7e3c & hashboard.bb.player; // full[4] = all full
477478
if (solid) { // (72%)
478479
hashboard.bb.player ^= solid; // normalize solid to opponent
479480
hashboard.bb.opponent ^= solid;

src/midgame.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
*
44
* Search near the end of the game.
55
*
6-
* @date 1998 - 2025
6+
* @date 1998 - 2026
77
* @author Richard Delorme
88
* @author Toshihiko Okuhara
99
* @version 4.5
@@ -845,7 +845,7 @@ int PVS_midgame(Search *search, const int alpha, const int beta, int depth, Node
845845
if (search->eval.n_empties <= depth && depth <= DEPTH_TO_USE_LOCAL_HASH && depth > DEPTH_TO_SHALLOW_SEARCH) {
846846
vBoard hashboard;
847847
#ifdef USE_SOLID
848-
unsigned long long solid = get_all_full_lines(search->board.player | search->board.opponent) & search->board.player;
848+
unsigned long long solid = get_all_full_lines(search->board.player | search->board.opponent) & 0x3c7effffffff7e3c & search->board.player;
849849
if (solid) {
850850
int ofssolid = bit_count(solid) * 2; // hash score is ofssolid smaller than real
851851
hashboard.bb.player = search->board.player ^ solid; // normalize solid to opponent

0 commit comments

Comments
 (0)