Skip to content

Commit 12534dd

Browse files
authored
ci: Add state sync node initialization to interactive installer [DEV-5270] (#928)
* ci: Add state sync node initialization to interactive installer * Update installer test prompts * Fix installer tests EOF errors * Add debugging steps to see why connection to RPCs are failing * Fix failling tests * Fix * Handle endpoints edge-case scenario
1 parent 2089162 commit 12534dd

File tree

6 files changed

+183
-4
lines changed

6 files changed

+183
-4
lines changed

installer/installer.py

Lines changed: 176 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,11 @@
3737
# Set branch dynamically in CI workflow for testing if Python dev mode is enabled and DEFAULT_DEBUG_BRANCH is set
3838
# Otherwise, use the main branch
3939
DEFAULT_DEBUG_BRANCH = os.getenv("DEFAULT_DEBUG_BRANCH") if os.getenv("DEFAULT_DEBUG_BRANCH") is not None else "main"
40-
40+
# RPC endpoints
41+
MAINNET_RPC_ENDPOINT_EU = "https://eu-rpc.cheqd.net:443"
42+
MAINNET_RPC_ENDPOINT_AP = "https://ap-rpc.cheqd.net:443"
43+
TESTNET_RPC_ENDPOINT_EU = "https://eu-rpc.cheqd.network:443"
44+
TESTNET_RPC_ENDPOINT_AP = "https://ap-rpc.cheqd.network:443"
4145

4246
###############################################################
4347
### Cosmovisor configuration ###
@@ -490,6 +494,15 @@ def install(self) -> bool:
490494
logging.error("Failed to configure cheqd-noded settings")
491495
return False
492496

497+
# Configure state sync only for fresh installs
498+
if self.interviewer.is_from_scratch and getattr(self.interviewer, 'use_statesync', False):
499+
logging.info("Configuring state sync (default)")
500+
if not self.configure_statesync():
501+
logging.error("Failed to configure state sync")
502+
return False
503+
# Ensure snapshot is not attempted
504+
self.interviewer.init_from_snapshot = False
505+
493506
# Configure systemd service for cheqd-noded
494507
# Sets up either a standalone service or a Cosmovisor service
495508
# ONLY enables it without activating it
@@ -1197,6 +1210,137 @@ def configure_node_settings(self) -> bool:
11971210
logging.exception(f"Failed to configure cheqd-noded settings. Reason: {e}")
11981211
return False
11991212

1213+
def _select_working_rpc_endpoint(self, chain: str) -> str:
1214+
try:
1215+
endpoints = []
1216+
if chain == "testnet":
1217+
endpoints = [TESTNET_RPC_ENDPOINT_EU, TESTNET_RPC_ENDPOINT_AP]
1218+
else:
1219+
endpoints = [MAINNET_RPC_ENDPOINT_EU, MAINNET_RPC_ENDPOINT_AP]
1220+
1221+
for endpoint in endpoints:
1222+
try:
1223+
req = request.Request(f"{endpoint}/status")
1224+
with request.urlopen(req, timeout=10) as resp:
1225+
if resp.getcode() == 200:
1226+
return endpoint
1227+
except Exception:
1228+
continue
1229+
except Exception as e:
1230+
logging.exception(f"Could not select a working RPC endpoint. Reason: {e}")
1231+
return ""
1232+
1233+
def _get_latest_block_height(self, rpc_endpoint: str) -> int:
1234+
try:
1235+
req = request.Request(f"{rpc_endpoint}/status")
1236+
with request.urlopen(req, timeout=10) as resp:
1237+
status = json.loads(resp.read().decode("utf-8").strip())
1238+
# Tendermint /status -> result.sync_info.latest_block_height
1239+
latest_height = int(status["result"]["sync_info"]["latest_block_height"])
1240+
return latest_height
1241+
except Exception as e:
1242+
logging.exception(f"Failed to fetch latest block height from {rpc_endpoint}. Reason: {e}")
1243+
raise
1244+
1245+
def _get_block_hash_at_height(self, rpc_endpoint: str, height: int) -> str:
1246+
try:
1247+
req = request.Request(f"{rpc_endpoint}/block?height={height}")
1248+
with request.urlopen(req, timeout=10) as resp:
1249+
block = json.loads(resp.read().decode("utf-8").strip())
1250+
# Tendermint /block -> result.block_id.hash
1251+
return block["result"]["block_id"]["hash"]
1252+
except Exception as e:
1253+
logging.exception(f"Failed to fetch block hash at height {height} from {rpc_endpoint}. Reason: {e}")
1254+
raise
1255+
1256+
def _is_endpoint_healthy(self, endpoint: str) -> bool:
1257+
try:
1258+
req = request.Request(f"{endpoint}/status")
1259+
with request.urlopen(req, timeout=10) as resp:
1260+
return resp.getcode() == 200
1261+
except Exception:
1262+
return False
1263+
1264+
def configure_statesync(self) -> bool:
1265+
# Configure statesync settings in config.toml using selected network RPCs
1266+
try:
1267+
config_toml_path = os.path.join(self.cheqd_config_dir, "config.toml")
1268+
1269+
# Determine RPC servers for the chosen network
1270+
if self.interviewer.chain == "testnet":
1271+
candidates = [TESTNET_RPC_ENDPOINT_EU, TESTNET_RPC_ENDPOINT_AP]
1272+
else:
1273+
candidates = [MAINNET_RPC_ENDPOINT_EU, MAINNET_RPC_ENDPOINT_AP]
1274+
1275+
healthy = [ep for ep in candidates if self._is_endpoint_healthy(ep)]
1276+
if len(healthy) == 0:
1277+
logging.error("No working RPC endpoint found for statesync configuration")
1278+
return False
1279+
1280+
if len(healthy) == 1:
1281+
rpc_servers = f"{healthy[0]},{healthy[0]}"
1282+
working_rpc = healthy[0]
1283+
else:
1284+
rpc_servers = f"{healthy[0]},{healthy[1]}"
1285+
working_rpc = healthy[0]
1286+
1287+
latest_height = self._get_latest_block_height(working_rpc)
1288+
trust_height = max(latest_height - 2000, 1)
1289+
trust_hash = self._get_block_hash_at_height(working_rpc, trust_height)
1290+
1291+
# Safely edit only the [statesync] section for 'enable'
1292+
with open(config_toml_path, "r") as f:
1293+
lines = f.readlines()
1294+
1295+
start = -1
1296+
end = len(lines)
1297+
for i, line in enumerate(lines):
1298+
if line.strip() == "[statesync]":
1299+
start = i
1300+
break
1301+
1302+
if start == -1:
1303+
logging.error("[statesync] section not found in config.toml")
1304+
return False
1305+
1306+
# Find end of [statesync] block (next top-level table)
1307+
for j in range(start + 1, len(lines)):
1308+
stripped = lines[j].lstrip()
1309+
if stripped.startswith('['):
1310+
end = j
1311+
break
1312+
1313+
block = lines[start:end]
1314+
1315+
def upsert(key: str, value: str, quote: bool = False):
1316+
nonlocal block
1317+
key_prefix = f"{key} ="
1318+
new_line = f"{key} = \"{value}\"\n" if quote else f"{key} = {value}\n"
1319+
for idx, l in enumerate(block):
1320+
if l.strip().startswith(key_prefix):
1321+
block[idx] = new_line
1322+
return
1323+
# insert after header
1324+
block.insert(1, new_line)
1325+
1326+
upsert("enable", "true", quote=False)
1327+
1328+
# Write back
1329+
new_lines = lines[:start] + block + lines[end:]
1330+
with open(config_toml_path, "w") as f:
1331+
f.writelines(new_lines)
1332+
1333+
# Use existing search_and_replace helper for other statesync fields (unique keys)
1334+
search_and_replace('rpc_servers = ""', f'rpc_servers = "{rpc_servers}"', config_toml_path)
1335+
search_and_replace('trust_height = 0', f'trust_height = {trust_height}', config_toml_path)
1336+
search_and_replace('trust_hash = ""', f'trust_hash = "{trust_hash}"', config_toml_path)
1337+
1338+
logging.info("Configured state sync settings in config.toml")
1339+
return True
1340+
except Exception as e:
1341+
logging.exception(f"Failed to configure state sync. Reason: {e}")
1342+
return False
1343+
12001344
def setup_node_systemd(self) -> bool:
12011345
# Setup cheqd-noded related systemd services
12021346
# If user selected Cosmovisor install, then cheqd-cosmovisor.service will be setup
@@ -1702,6 +1846,7 @@ def __init__(self, home_dir=DEFAULT_CHEQD_HOME_DIR):
17021846
self._is_cosmovisor_installed = False
17031847
self._systemd_service_file = ""
17041848
self._init_from_snapshot = False
1849+
self._use_statesync = True
17051850
self._release = None
17061851
self._chain = ""
17071852
self._is_configuration_needed = False
@@ -1780,6 +1925,10 @@ def is_cosmovisor_installed(self) -> bool:
17801925
def init_from_snapshot(self) -> bool:
17811926
return self._init_from_snapshot
17821927

1928+
@property
1929+
def use_statesync(self) -> bool:
1930+
return self._use_statesync
1931+
17831932
@property
17841933
def chain(self) -> str:
17851934
return self._chain
@@ -1881,6 +2030,10 @@ def init_from_snapshot(self, ifs):
18812030
def chain(self, chain):
18822031
self._chain = chain
18832032

2033+
@use_statesync.setter
2034+
def use_statesync(self, value: bool):
2035+
self._use_statesync = value
2036+
18842037
@is_configuration_needed.setter
18852038
def is_configuration_needed(self, is_configuration_needed):
18862039
self._is_configuration_needed = is_configuration_needed
@@ -2119,6 +2272,23 @@ def ask_for_cosmovisor(self):
21192272
except Exception as e:
21202273
logging.exception(f"Failed to set whether installation should be done with Cosmovisor. Reason: {e}")
21212274

2275+
# Ask whether to initialize via state sync (default yes). If declined, snapshot remains available.
2276+
def ask_for_statesync(self):
2277+
try:
2278+
logging.info("State sync rapidly bootstraps a node without downloading state DB snapshot and uses less storage. You can still choose snapshot (slower, much larger storage, but contains more historic data and blocks) if you decline state sync.\n")
2279+
answer = self.ask(
2280+
"Initialize chain via State Sync? (yes/no)", default="yes")
2281+
if answer.lower().startswith("y"):
2282+
self.use_statesync = True
2283+
self.init_from_snapshot = False
2284+
elif answer.lower().startswith("n"):
2285+
self.use_statesync = False
2286+
else:
2287+
logging.error("Invalid input provided. Please choose either 'yes' or 'no'.\n")
2288+
self.ask_for_statesync()
2289+
except Exception as e:
2290+
logging.exception(f"Failed to set state sync preference. Reason: {e}")
2291+
21222292
# Ask user whether to bump Cosmovisor to latest version
21232293
def ask_for_cosmovisor_bump(self):
21242294
try:
@@ -2451,7 +2621,11 @@ def install_steps():
24512621
interviewer.ask_for_log_level()
24522622
interviewer.ask_for_log_format()
24532623

2454-
interviewer.ask_for_init_from_snapshot()
2624+
# Prefer state sync by default; if declined, offer snapshot option
2625+
interviewer.ask_for_statesync()
2626+
if interviewer.use_statesync is False:
2627+
logging.info("You chose not to use state sync. Snapshot restore is slower and requires substantially more disk space.")
2628+
interviewer.ask_for_init_from_snapshot()
24552629

24562630
except Exception as e:
24572631
logging.exception(f"Unable to complete user interview process for installation. Reason for exiting: {e}")

installer/tests/install-from-scratch-cosmovisor.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,4 @@ nodeid1@1.1.1.1:26656,nodeid2@8.8.8.8:26656
1515
debug
1616
plain
1717
no
18+
no

installer/tests/install-from-scratch-standalone.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,5 @@ nodeid1@1.1.1.1:26656,nodeid2@8.8.8.8:26656
1010
100ncheq
1111
debug
1212
plain
13+
no
1314
no

installer/tests/upgrade-existing-cosmovisor.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,4 @@ no
77
yes
88
yes
99
yes
10+
yes

installer/tests/upgrade-existing-standalone.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,5 @@
44
no
55
yes
66
yes
7+
yes
8+
2

installer/tests/upgrade-fresh-install.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,5 +17,5 @@ nodeid1@1.1.1.1:26656,nodeid2@8.8.8.8:26656
1717
debug
1818
plain
1919
no
20-
yes
21-
2
20+
no
21+
no

0 commit comments

Comments
 (0)