Skip to content

Commit 9ed4bdc

Browse files
committed
Report engine crash reason in fishnet abort payload
1 parent 4af880e commit 9ed4bdc

File tree

1 file changed

+18
-5
lines changed

1 file changed

+18
-5
lines changed

fairyfishnet.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@
139139
LVL_SKILL = [-4, 0, 3, 6, 10, 14, 16, 18, 20]
140140
LVL_MOVETIMES = [50, 50, 100, 150, 200, 300, 400, 500, 1000]
141141
LVL_DEPTHS = [1, 1, 1, 2, 3, 5, 8, 13, 22]
142+
ABORT_REASON_ENGINE_CRASH = "engine_crash"
142143

143144
NNUE_NET = {}
144145

@@ -764,14 +765,23 @@ def run_inner(self):
764765

765766
# Do the next work unit
766767
path, request = self.work()
767-
except DEAD_ENGINE_ERRORS:
768+
except DEAD_ENGINE_ERRORS as err:
768769
alive = self.is_alive()
770+
error = {
771+
"reason": ABORT_REASON_ENGINE_CRASH,
772+
"kind": err.__class__.__name__,
773+
}
774+
if self.stockfish:
775+
returncode = self.stockfish.poll()
776+
if returncode is not None:
777+
error["engine_returncode"] = returncode
769778
if alive:
770779
t = next(self.backoff)
771780
logging.exception("Engine process has died. Backing off %0.1fs", t)
772781

773-
# Abort current job
774-
self.abort_job()
782+
# Tell server this abort is from an engine crash so it can cap retries
783+
# and avoid rescheduling the same crashing position forever.
784+
self.abort_job(error=error)
775785

776786
if alive:
777787
self.sleep.wait(t)
@@ -825,15 +835,18 @@ def run_inner(self):
825835
logging.error("Unexpected HTTP status for acquire: %d", response.status_code)
826836
self.sleep.wait(t)
827837

828-
def abort_job(self):
838+
def abort_job(self, error=None):
829839
if self.job is None:
830840
return
831841

832842
logging.debug("Aborting job %s", self.job["work"]["id"])
843+
request = self.make_request()
844+
if error is not None:
845+
request["error"] = error
833846

834847
try:
835848
response = requests.post(get_endpoint(self.conf, "abort/%s" % self.job["work"]["id"]),
836-
data=json.dumps(self.make_request()),
849+
data=json.dumps(request),
837850
timeout=HTTP_TIMEOUT)
838851
if response.status_code == 204:
839852
logging.info("Aborted job %s", self.job["work"]["id"])

0 commit comments

Comments
 (0)