Skip to content

Commit 02584ff

Browse files
scan plus fts automation
Change-Id: I2eb5608b8334601ad3fd5b9cccbabd6680192c93 Reviewed-on: https://review.couchbase.org/c/testrunner/+/243922 Reviewed-by: Balakumaran G <balakumaran.gopal@couchbase.com> Tested-by: Couchbase QE
1 parent db1a9fa commit 02584ff

5 files changed

Lines changed: 1707 additions & 8 deletions

File tree

conf/fts/py-fts-scan-plus.conf

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
fts.stable_topology_fts.StableTopFTS:
2+
3+
# --- Functional tests ---
4+
# Exact consistency: CRUD paused before each query, result must match
5+
# HashMap perfectly (count + val).
6+
7+
# P0 smoke: 2-node, default params
8+
test_scan_plus_functional,cluster=F,F,initial_docs=5,batch_duration=60,num_batches=3,num_crud_threads=4,GROUP=P0
9+
10+
# More seed docs, more batches, higher concurrency
11+
test_scan_plus_functional,cluster=F,F,initial_docs=100,batch_duration=60,num_batches=5,num_crud_threads=8,GROUP=P0
12+
13+
# 4-partition index: KV-verified check for scan_plus partition fan-out bug
14+
test_scan_plus_functional,cluster=F,F,initial_docs=100,batch_duration=60,num_batches=5,num_crud_threads=8,index_partitions=4,GROUP=P0
15+
16+
# --- Manager options variants ---
17+
# use_bucket_seqnos=true: whole-bucket seqno path (avoids per-collection contention, different code path)
18+
test_scan_plus_functional,cluster=F,F,initial_docs=100,batch_duration=60,num_batches=5,num_crud_threads=8,use_bucket_seqnos=true,GROUP=P0
19+
20+
# num_workers=1 (extreme low): serialised seqno retrieval; stresses multi-partition fan-out ordering
21+
test_scan_plus_functional,cluster=F,F,initial_docs=100,batch_duration=60,num_batches=5,num_crud_threads=8,index_partitions=4,num_workers=1,GROUP=P0
22+
23+
# num_workers=100 (extreme high): max parallelism; may surface race conditions in seqno ack path
24+
test_scan_plus_functional,cluster=F,F,initial_docs=100,batch_duration=60,num_batches=5,num_crud_threads=8,num_workers=100,GROUP=P0
25+
26+
# getseqnos_retries=1 (extreme low): one attempt only; surfaces any timing/fail-fast issues
27+
test_scan_plus_functional,cluster=F,F,initial_docs=100,batch_duration=60,num_batches=5,num_crud_threads=8,getseqnos_retries=1,GROUP=P0
28+
29+
# getseqnos_retries=100 (extreme high): very patient seqno retrieval; baseline sanity with high retries
30+
test_scan_plus_functional,cluster=F,F,initial_docs=100,batch_duration=60,num_batches=5,num_crud_threads=8,getseqnos_retries=100,GROUP=P0
31+
32+
# Combined: bucket seqnos + high workers (both non-defaults together)
33+
test_scan_plus_functional,cluster=F,F,initial_docs=100,batch_duration=60,num_batches=5,num_crud_threads=8,use_bucket_seqnos=true,num_workers=100,GROUP=P0
34+
35+
# --- Stress / performance tests ---
36+
# Continuous CRUD + random queries; validates result.val >= snapshot.val
37+
# and flags deleted docs that still appear in results.
38+
39+
# 2-node, 10-minute run
40+
test_scan_plus_stress,cluster=F,F,initial_docs=50,duration=600,num_crud_threads=4,GROUP=P1
41+
42+
# 3-node, 10-minute run, higher CRUD concurrency
43+
test_scan_plus_stress,cluster=F,F,initial_docs=50,duration=600,num_crud_threads=8,GROUP=P1
44+
45+
# 2-partition index: scan_plus must gather seqno from both partitions
46+
test_scan_plus_stress,cluster=F,F,initial_docs=50,duration=600,num_crud_threads=8,index_partitions=2,GROUP=P1
47+
48+
# 4-partition index: higher fan-out, more seqno coordination points
49+
test_scan_plus_stress,cluster=F,F,initial_docs=50,duration=600,num_crud_threads=8,index_partitions=4,GROUP=P1
50+
51+
# 1 index replica: scan_plus querying a replica shard must also be consistent
52+
test_scan_plus_stress,cluster=F,F,F,initial_docs=50,duration=600,num_crud_threads=8,num_replicas=1,GROUP=P1
53+
54+
# Extended 30-minute soak, larger seed set
55+
test_scan_plus_stress,cluster=F,F,initial_docs=200,duration=1800,num_crud_threads=8,GROUP=P1
56+
57+
# --- Stress manager options variants ---
58+
# num_workers=1: serialised seqno retrieval under continuous load
59+
test_scan_plus_stress,cluster=F,F,initial_docs=50,duration=600,num_crud_threads=8,num_workers=1,GROUP=P1
60+
61+
# num_workers=100: max parallelism under continuous load
62+
test_scan_plus_stress,cluster=F,F,initial_docs=50,duration=600,num_crud_threads=8,num_workers=100,GROUP=P1
63+
64+
# use_bucket_seqnos=true: bucket-level seqno path under continuous load
65+
test_scan_plus_stress,cluster=F,F,initial_docs=50,duration=600,num_crud_threads=8,use_bucket_seqnos=true,GROUP=P1
66+
67+
# getseqnos_retries=1: aggressive fail-fast under continuous CRUD pressure
68+
test_scan_plus_stress,cluster=F,F,initial_docs=50,duration=600,num_crud_threads=8,getseqnos_retries=1,GROUP=P1
69+
70+
# --- Multi-collection tests ---
71+
# One FTS index spans 3 collections in a custom scope.
72+
# CRUD runs concurrently against all 3 collections; scan_plus queries
73+
# must return a globally consistent view across collection boundaries.
74+
75+
# Baseline: 3 collections, default partition
76+
test_scan_plus_multi_collection,cluster=F,F,initial_docs=30,duration=600,num_crud_threads=6,GROUP=P1
77+
78+
# 3 collections + 4 partitions: exercises per-partition seqno tracking across collections
79+
test_scan_plus_multi_collection,cluster=F,F,initial_docs=50,duration=600,num_crud_threads=9,index_partitions=4,GROUP=P1
80+
81+
# 3 collections + 1 replica: consistency when querying replica shards in a multi-collection index
82+
test_scan_plus_multi_collection,cluster=F,F,F,initial_docs=30,duration=600,num_crud_threads=6,num_replicas=1,GROUP=P1
83+
84+
# --- Vector tests ---
85+
# KNN scan_plus: index has val + vec (1536-dim); CRUD only changes val.
86+
# K=1000 always covers the entire live corpus, so validation is identical
87+
# to the functional test — exact bidirectional val-consistency check.
88+
test_scan_plus_vector,cluster=F,F,initial_docs=10,batch_duration=60,num_batches=3,num_crud_threads=4,vec_dims=1536,GROUP=P1
89+
90+
fts.moving_topology_fts.MovingTopFTS:
91+
92+
# --- Rebalance / failover tests ---
93+
# Background CRUD + scan_plus queries fire continuously during each topology change.
94+
# Violations (stale/missing hits) during the disruption are logged as warnings.
95+
# The test fails only if the final exact-consistency check fails after recovery.
96+
# All tests require D,D,F,F initial topology (2 KV + 2 FTS nodes);
97+
# rebalance-in cases additionally need spare nodes available in the .ini file.
98+
99+
# RF-1: scan_plus during KV node rebalance out + in
100+
test_scan_plus_kv_rebalance,cluster=D,D,F,F,GROUP=moving
101+
102+
# RF-2: scan_plus during FTS node rebalance out + in
103+
test_scan_plus_fts_rebalance,cluster=D,D,F,F,GROUP=moving
104+
105+
# RF-3: scan_plus during graceful KV failover + full recovery
106+
test_scan_plus_kv_graceful_failover,cluster=D,D,F,F,GROUP=moving
107+
108+
# RF-4: scan_plus during hard KV failover (memcached kill) + full recovery
109+
test_scan_plus_kv_hard_failover,cluster=D,D,F,F,GROUP=moving
110+
111+
# RF-5: scan_plus during FTS node hard failover + full recovery
112+
test_scan_plus_fts_failover,cluster=D,D,F,F,GROUP=moving
113+
114+
# RF-6: scan_plus across sequential KV failover then FTS failover
115+
test_scan_plus_sequential_kv_fts_failover,cluster=D,D,F,F,GROUP=moving
116+
117+
# RF-7: scan_plus during 2-node swap rebalance (needs 2 spare FTS nodes in .ini)
118+
test_scan_plus_swap_rebalance,cluster=D,D,F,F,GROUP=moving
119+
120+
# RF-8: scan_plus after KV failover with delta recovery
121+
test_scan_plus_delta_recovery,cluster=D,D,F,F,GROUP=moving
122+
123+
# --- Manager options variants (run against a representative subset) ---
124+
# num_workers=1: serialised seqno path during KV rebalance
125+
test_scan_plus_kv_rebalance,cluster=D,D,F,F,num_workers=1,GROUP=moving
126+
127+
# num_workers=100: max parallelism during KV rebalance
128+
test_scan_plus_kv_rebalance,cluster=D,D,F,F,num_workers=100,GROUP=moving
129+
130+
# use_bucket_seqnos=true during FTS failover
131+
test_scan_plus_fts_failover,cluster=D,D,F,F,use_bucket_seqnos=true,GROUP=moving
132+
133+
# num_workers=1 during FTS failover: worst-case seqno serialisation under disruption
134+
test_scan_plus_fts_failover,cluster=D,D,F,F,num_workers=1,GROUP=moving
135+
136+
fts.upgrade_fts.UpgradeFTS:
137+
138+
# --- Online upgrade test ---
139+
# Validates scan_plus behavior at each upgrade stage:
140+
# Stage 0 (all pre-8.1) : query must fail
141+
# Stage 2 (mixed) : V-2 upgraded coordinator succeeds,
142+
# V-3 old coordinator fails with version error
143+
# Stage 4 (all 8.1) : exact consistency validation (V-1)
144+
# Requires >= 2 FTS nodes; upgrade_to must be supplied at runtime.
145+
146+
test_scan_plus_online_upgrade,cluster=F,F,F,nodes_init=3,GROUP=upgrade

0 commit comments

Comments
 (0)