Skip to content

Commit a3d8948

Browse files
Merge pull request #261 from frappe/mergify/bp/master/pr-260
feat(registry): Registry retry (backport #260)
2 parents c5f7951 + 0bb0bb5 commit a3d8948

3 files changed

Lines changed: 37 additions & 22 deletions

File tree

agent/builder.py

Lines changed: 7 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,11 @@
99
from typing import TYPE_CHECKING
1010

1111
import docker
12-
import requests
1312

1413
from agent.base import Base
1514
from agent.exceptions import RegistryDownException
1615
from agent.job import Job, Step, job, step
16+
from agent.utils import is_registry_healthy
1717

1818
if TYPE_CHECKING:
1919
from typing import Literal
@@ -138,21 +138,6 @@ def _publish_docker_build_output(self, result):
138138
self._publish_throttled_output(False)
139139
self._publish_throttled_output(True)
140140

141-
def is_registry_healthy(self) -> bool:
142-
"""Check if production registry (only) is healthy in the push cycle"""
143-
headers = {"Accept": "application/vnd.docker.distribution.manifest.v2+json"}
144-
145-
if self.registry["url"] != "registry.frappe.cloud":
146-
return True
147-
148-
response = requests.get(
149-
f"https://{self.registry['url']}/v2",
150-
auth=(self.registry["username"], self.registry["password"]),
151-
headers=headers,
152-
)
153-
154-
return response.ok
155-
156141
def _wait_for_registry_recovery(self):
157142
"""Wait for registry to recover after restart"""
158143
time.sleep(60)
@@ -165,12 +150,16 @@ def _push_docker_image(self):
165150

166151
for attempt in range(max_retries):
167152
try:
168-
if not self.is_registry_healthy():
153+
if not is_registry_healthy(
154+
self.registry["url"], self.registry["username"], self.registry["password"]
155+
):
169156
raise RegistryDownException("Registry is currently down")
170157

171158
self._push_image(client)
172159

173-
if not self.is_registry_healthy():
160+
if not is_registry_healthy(
161+
self.registry["url"], self.registry["username"], self.registry["password"]
162+
):
174163
raise RegistryDownException("Registry became unhealthy after push")
175164

176165
return self.output["push"]

agent/server.py

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,11 @@
2222
)
2323
from agent.base import AgentException, Base
2424
from agent.bench import Bench
25-
from agent.exceptions import BenchNotExistsException
25+
from agent.exceptions import BenchNotExistsException, RegistryDownException
2626
from agent.job import Job, Step, job, step
2727
from agent.patch_handler import run_patches
2828
from agent.site import Site
29-
from agent.utils import get_supervisor_processes_status
29+
from agent.utils import get_supervisor_processes_status, is_registry_healthy
3030

3131

3232
class Server(Base):
@@ -55,8 +55,22 @@ def docker_login(self, registry):
5555
password = registry["password"]
5656
return self.execute(f"docker login -u {username} -p {password} {url}")
5757

58+
def establish_connection_with_registry(self, max_retries: int, registry: dict[str, str]):
59+
"""Given the attempt count try and establish connection with the registry else Raise"""
60+
for attempt in range(max_retries):
61+
try:
62+
if not is_registry_healthy(registry["url"], registry["username"], registry["password"]):
63+
raise RegistryDownException("Registry is not available")
64+
break
65+
except RegistryDownException as e:
66+
if attempt == max_retries - 1:
67+
raise Exception("Failed to pull image") from e
68+
69+
time.sleep(60)
70+
5871
@step("Initialize Bench")
59-
def bench_init(self, name, config):
72+
def bench_init(self, name, config, registry: dict[str, str]):
73+
self.establish_connection_with_registry(max_retries=3, registry=registry)
6074
bench_directory = os.path.join(self.benches_directory, name)
6175
os.mkdir(bench_directory)
6276
directories = ["logs", "sites", "config"]
@@ -98,7 +112,7 @@ def dump(self):
98112
@job("New Bench", priority="low")
99113
def new_bench(self, name, bench_config, common_site_config, registry, mounts=None):
100114
self.docker_login(registry)
101-
self.bench_init(name, bench_config)
115+
self.bench_init(name, bench_config, registry)
102116
bench = Bench(name, self, mounts=mounts)
103117
bench.update_config(common_site_config, bench_config)
104118
if bench.bench_config.get("single_container"):

agent/utils.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,18 @@ def get_size(folder, ignore_dirs=None):
7272
return total_size
7373

7474

75+
def is_registry_healthy(url: str, username: str, password: str) -> bool:
76+
"""Check if production registry (only) is healthy in the push cycle"""
77+
headers = {"Accept": "application/vnd.docker.distribution.manifest.v2+json"}
78+
79+
if url != "registry.frappe.cloud":
80+
return True
81+
82+
response = requests.get(f"https://{url}/v2", auth=(username, password), headers=headers)
83+
84+
return response.ok
85+
86+
7587
def cint(x):
7688
"""Convert to integer"""
7789
if x is None:

0 commit comments

Comments
 (0)