Open
Description
https://jenkins.scylladb.com/job/scylla-6.2/job/next/21/ failed with the following error:
=================================== FAILURES ===================================
________________________________ test_replace.2 ________________________________
manager = <test.pylib.manager_client.ManagerClient object at 0x7ff870c88080>
@pytest.mark.asyncio
async def test_replace(manager: ManagerClient):
logger.info("Bootstrapping cluster")
cmdline = [
'--logger-log-level', 'storage_service=trace',
'--logger-log-level', 'raft_topology=trace',
]
servers = await manager.servers_add(3, cmdline=cmdline)
cql = manager.get_cql()
await create_keyspace(cql, "test", 32, rf=1)
await cql.run_async("CREATE TABLE test.test (pk int PRIMARY KEY, c int);")
# We want RF=2 table to validate that quorum reads work after replacing node finishes
# bootstrap which indicates that bootstrap waits for rebuilt.
# Otherwise, some reads would fail to find a quorum.
await create_keyspace(cql, "test2", 32, rf=2)
await cql.run_async("CREATE TABLE test2.test (pk int PRIMARY KEY, c int);")
await create_keyspace(cql, "test3", 32, rf=3)
await cql.run_async("CREATE TABLE test3.test (pk int PRIMARY KEY, c int);")
await cql.run_async("CREATE TABLE test3.test2 (pk int PRIMARY KEY, c int);")
logger.info("Populating table")
keys = range(256)
await asyncio.gather(*[run_async_cl_all(cql, f"INSERT INTO test.test (pk, c) VALUES ({k}, {k});") for k in keys])
await asyncio.gather(*[run_async_cl_all(cql, f"INSERT INTO test2.test (pk, c) VALUES ({k}, {k});") for k in keys])
await asyncio.gather(*[run_async_cl_all(cql, f"INSERT INTO test3.test (pk, c) VALUES ({k}, {k});") for k in keys])
async def check_ks(ks):
logger.info(f"Checking {ks}")
query = SimpleStatement(f"SELECT * FROM {ks}.test;", consistency_level=ConsistencyLevel.QUORUM)
rows = await cql.run_async(query)
assert len(rows) == len(keys)
for r in rows:
assert r.c == r.pk
async def check():
# RF=1 keyspace will experience data loss so don't check it.
# We include it in the test only to check that the system doesn't crash.
await check_ks("test2")
await check_ks("test3")
await check()
# Disable migrations concurrent with replace since we don't handle nodes going down during migration yet.
# See https://github.com/scylladb/scylladb/issues/16527
await manager.api.disable_tablet_balancing(servers[0].ip_addr)
finish_writes = await start_writes(cql, "test3", "test2")
logger.info('Replacing a node')
await manager.server_stop(servers[0].server_id)
replace_cfg = ReplaceConfig(replaced_id = servers[0].server_id, reuse_ip_addr = False, use_host_id = True)
servers.append(await manager.server_add(replace_cfg))
servers = servers[1:]
> key_count = await finish_writes()
test/topology_experimental_raft/test_tablets_removenode.py:91:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
test/pylib/util.py:227: in finish
await asyncio.gather(*tasks)
test/pylib/util.py:213: in do_writes
raise e
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
worker_id = 1
async def do_writes(worker_id: int):
write_count = 0
while not stop_event.is_set():
pk = key_gen.next_pk()
# Once next_pk() is produced, key_gen.last_key() is assumed to be in the database
# hence we can't give up on it.
while True:
try:
await cql.run_async(stmt, [pk, pk])
# Check read-your-writes
> rows = await cql.run_async(rd_stmt, [pk])
E cassandra.ReadTimeout: Error from server: code=1200 [Coordinator node timed out waiting for replica nodes' responses] message="Operation timed out for test3.test2 - received only 1 responses from 2 CL=QUORUM." info={'consistency': 'QUORUM', 'required_responses': 2, 'received_responses': 1}
test/pylib/util.py:204: ReadTimeout
------------------------------ Captured log setup ------------------------------