Skip to content

Commit 67e1172

Browse files
author
Sidhant Thole
committed
server issue trial run
1 parent 29348a6 commit 67e1172

File tree

1 file changed

+73
-36
lines changed

1 file changed

+73
-36
lines changed

utils/utils.py

Lines changed: 73 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
import smtplib
1414
import logging
1515
import subprocess
16+
import shutil
17+
from pathlib import Path
1618

1719
# Third-party imports
1820
import requests
@@ -125,49 +127,84 @@ def load_model(model_name,
125127
if _embed_mode == 'infinity_emb':
126128
infinity_api_url = "http://0.0.0.0:7997"
127129
# Check if the Infinity API server is running
130+
def _start_infinity_with_poll(model_name, infinity_api_url, max_wait=120):
131+
"""
132+
Try to start the infinity_emb binary and poll the health endpoint until ready.
133+
Writes stdout/stderr to a logfile and returns the logfile path on success.
134+
Raises RuntimeError on failure with path to logfile for debugging.
135+
"""
136+
# Prepare logfile
137+
artifacts_dir = Path(os.path.join(os.path.dirname(__file__), "..", "artifacts")).resolve()
138+
artifacts_dir.mkdir(parents=True, exist_ok=True)
139+
logfile = artifacts_dir / "infinity_emb.log"
140+
141+
# Locate binary
142+
candidate = os.path.join(os.path.dirname(__file__), "..", "infinity_env", "bin", "infinity_emb")
143+
if not os.path.exists(candidate):
144+
# fallback to PATH
145+
candidate = shutil.which("infinity_emb")
146+
if not candidate:
147+
raise RuntimeError("infinity_emb binary not found (checked project path and PATH). Please install or provide the binary.")
148+
149+
# Start attempts with simple fallback sequence
150+
cmds = [
151+
[candidate, "v2", "--model-id", model_name],
152+
[candidate, "v2" , "--model-id", model_name],
153+
]
154+
155+
# Open logfile in append mode so repeated runs keep history
156+
with open(logfile, "ab") as lf:
157+
for attempt, cmd in enumerate(cmds, start=1):
158+
logger.info(f"Starting infinity_emb (attempt {attempt}) with command: {cmd}")
159+
try:
160+
proc = subprocess.Popen(
161+
cmd,
162+
stdout=lf,
163+
stderr=lf,
164+
)
165+
except Exception as e:
166+
logger.warning(f"Failed to spawn infinity_emb process: {e}")
167+
continue
168+
169+
# Poll health endpoint until ready or timeout
170+
start_t = time.time()
171+
while True:
172+
try:
173+
response = requests.get(f"{infinity_api_url}/health", timeout=3)
174+
if response.status_code == 200:
175+
logger.info("Infinity API is healthy and reachable.")
176+
return str(logfile)
177+
except Exception:
178+
# ignore connection errors while starting up
179+
pass
180+
181+
# If process died, break and try next attempt
182+
if proc.poll() is not None:
183+
logger.warning(f"infinity_emb process exited prematurely (returncode={proc.returncode}). See logfile: {logfile}")
184+
break
185+
186+
if time.time() - start_t > max_wait:
187+
logger.warning(f"Timeout waiting for Infinity API after {max_wait}s. Checking next attempt or failing. See logfile: {logfile}")
188+
break
189+
190+
time.sleep(2)
191+
192+
# All attempts failed
193+
raise RuntimeError(f"Infinity API failed to start or did not become healthy within allotted time. See logfile: {logfile}")
194+
128195
try:
129-
response = requests.get(f"{infinity_api_url}/health", timeout=2)
196+
# initial quick check
197+
response = requests.get(f"{infinity_api_url}/health", timeout=5)
130198
if response.status_code != 200:
131199
raise Exception("Infinity API health check failed")
132200
except Exception:
133-
logger.info("Infinity API not running. Attempting to start it...")
201+
logger.info("Infinity API not running or not healthy. Attempting to start it (this can take up to 2 minutes)...")
134202
try:
135-
try:
136-
proc_main = subprocess.Popen(
137-
[os.path.join(os.path.dirname(__file__), "..", "infinity_env", "bin", "infinity_emb"), "v2", "--model-id", model_name],
138-
stdout=subprocess.DEVNULL,
139-
stderr=subprocess.DEVNULL,
140-
)
141-
time.sleep(30)
142-
# Check if server started
143-
response = requests.get(f"{infinity_api_url}/health", timeout=10)
144-
if response.status_code != 200:
145-
raise Exception("Infinity API health check failed after main start attempt")
146-
except Exception as e:
147-
logger.warning(f"Initial infinity_emb start failed: {e}. Trying fallback...")
148-
try:
149-
proc = subprocess.Popen(
150-
[os.path.join(os.path.dirname(__file__), "..", "infinity_env", "bin", "infinity_emb")],
151-
stdout=subprocess.DEVNULL,
152-
stderr=subprocess.DEVNULL,
153-
)
154-
time.sleep(10)
155-
proc_fallback = subprocess.Popen(
156-
[os.path.join(os.path.dirname(__file__), "..", "infinity_env", "bin", "infinity_emb"), "v2", "--model-id", model_name],
157-
stdout=subprocess.DEVNULL,
158-
stderr=subprocess.DEVNULL,
159-
)
160-
time.sleep(30)
161-
# Check again if the Infinity API server is running after fallback
162-
response = requests.get(f"{infinity_api_url}/health", timeout=10)
163-
if response.status_code != 200:
164-
raise Exception("Infinity API health check failed after fallback start attempt")
165-
except Exception as e2:
166-
logger.error("Infinity API still not running after fallback start attempt.")
167-
raise RuntimeError("Infinity API failed to start or is not reachable at http://0.0.0.0:7997")
203+
logfile_path = _start_infinity_with_poll(model_name, infinity_api_url, max_wait=120)
204+
logger.info(f"Started Infinity API; logs: {logfile_path}")
168205
except Exception as e:
169206
logger.error(f"Failed to start Infinity API: {e}")
170-
raise RuntimeError(f"Failed to start Infinity API: {e}")
207+
raise RuntimeError(f"Failed to start Infinity API: {e}. Check the log at {str(Path(os.path.join(os.path.dirname(__file__), '..', 'artifacts', 'infinity_emb.log')).resolve())}")
171208
try:
172209
hf_embeddings = InfinityEmbeddings(
173210
model=model_name, infinity_api_url=infinity_api_url

0 commit comments

Comments
 (0)