Skip to content

Commit 013bb6c

Browse files
Enhance private cloud deployment support (#520)
1 parent 1b9246e commit 013bb6c

File tree

3 files changed

+56
-45
lines changed

3 files changed

+56
-45
lines changed

Diff for: README.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -1831,9 +1831,9 @@ Options:
18311831
--master Run in master mode
18321832
--slave Run in slave mode
18331833
--token TEXT The master server token
1834-
--master-ip TEXT The master server ip address
1834+
--master-domain TEXT The master server domain
18351835
--master-port INTEGER The master server port
1836-
--slave-ip TEXT The slave server ip address
1836+
--slave-domain TEXT The slave server domain
18371837
--update Pull the latest image before starting
18381838
--no-update Do not update to the latest version
18391839
--compute TEXT Compute configuration to use

Diff for: lean/commands/private_cloud/start.py

+48-39
Original file line numberDiff line numberDiff line change
@@ -27,29 +27,18 @@
2727
from lean.constants import COMPUTE_MASTER, COMPUTE_SLAVE, COMPUTE_MESSAGING
2828

2929

30-
def get_free_port():
31-
from socket import socket
32-
for i in range(0, 3):
33-
try:
34-
port = 32787 + i
35-
with socket() as s:
36-
s.bind(('', port))
37-
return port
38-
except:
39-
pass
40-
return 0
41-
42-
43-
def deploy(ip: str, port: int, token: str, slave: bool, update: bool, no_update: bool,
30+
def deploy(target_master_domain: str, self_domain: str, port: int, token: str, slave: bool, update: bool, no_update: bool,
4431
image: str, lean_config: dict, extra_docker_config: str, counter: int = 0):
4532
logger = container.logger
4633

4734
compute_node_name = f"{COMPUTE_SLAVE}{counter}" if slave else COMPUTE_MASTER
4835
logger.info(f"Starting {compute_node_name}...")
4936
compute_directory = Path(f"~/.lean/compute/{compute_node_name}").expanduser()
5037
lean_config["node-name"] = compute_node_name
38+
5139
run_options = container.lean_runner.get_basic_docker_config_without_algo(lean_config, None, True, None, None,
5240
None, compute_directory)
41+
run_options["environment"]["AIRLOCK"] = compute_directory
5342
run_options["mounts"].append(Mount(target="/QuantConnect/platform-services/airlock",
5443
source=str(compute_directory), type="bind"))
5544
run_options["mounts"].append(Mount(target="/var/run/docker.sock", source="/var/run/docker.sock",
@@ -59,17 +48,31 @@ def deploy(ip: str, port: int, token: str, slave: bool, update: bool, no_update:
5948
type="bind", read_only=True))
6049
container.lean_runner.parse_extra_docker_config(run_options, loads(extra_docker_config))
6150

51+
if not image:
52+
image = "quantconnect/platform-services:latest"
53+
54+
is_domain = not self_domain.replace('.', '').isnumeric()
6255
if not slave:
63-
run_options["ports"]["9696"] = str(port)
64-
run_options["ports"]["9697"] = str(get_free_port())
56+
if not is_domain:
57+
run_options["ports"]["9696"] = str(port)
58+
run_options["ports"]["9697"] = str(0)
6559

6660
root_directory = container.lean_config_manager.get_cli_root_directory()
6761
run_options["volumes"][str(root_directory)] = {"bind": "/LeanCLIWorkspace", "mode": "rw"}
6862

63+
if is_domain:
64+
labels = {}
65+
for name, value in container.docker_manager.get_image_labels(image):
66+
if slave and name == "slave" or not slave and name == "master":
67+
for key, label in loads(value).items():
68+
labels[key] = label.replace("{{domain}}", self_domain)
69+
run_options["labels"] = labels
70+
6971
run_options["remove"] = False
7072
run_options["name"] = compute_node_name
7173
run_options["environment"]["MODE"] = str('slave') if slave else str('master')
72-
run_options["environment"]["IP"] = str(ip)
74+
run_options["environment"]["MASTER_DOMAIN"] = str(target_master_domain)
75+
run_options["environment"]["SELF_DOMAIN"] = str(self_domain)
7376
run_options["environment"]["PORT"] = str(port)
7477
run_options["environment"]["TOKEN"] = str(token)
7578
run_options["user"] = "root"
@@ -103,9 +106,9 @@ def get_ip_address():
103106
@option("--master", is_flag=True, default=False, help="Run in master mode")
104107
@option("--slave", is_flag=True, default=False, help="Run in slave mode")
105108
@option("--token", type=str, required=False, help="The master server token")
106-
@option("--master-ip", type=str, required=False, help="The master server ip address")
107-
@option("--master-port", type=int, required=False, default=0, help="The master server port")
108-
@option("--slave-ip", type=str, required=False, help="The slave server ip address")
109+
@option("--master-domain", type=str, required=False, help="The master server domain")
110+
@option("--master-port", type=int, required=False, default=443, help="The master server port")
111+
@option("--slave-domain", type=str, required=False, help="The slave server domain")
109112
@option("--update", is_flag=True, default=False, help="Pull the latest image before starting")
110113
@option("--no-update", is_flag=True, default=False, help="Do not update to the latest version")
111114
@option("--compute", type=str, required=False, help="Compute configuration to use")
@@ -115,8 +118,8 @@ def get_ip_address():
115118
def start(master: bool,
116119
slave: bool,
117120
token: str,
118-
master_ip: str,
119-
slave_ip: str,
121+
master_domain: str,
122+
slave_domain: str,
120123
master_port: int,
121124
update: bool,
122125
no_update: bool,
@@ -135,9 +138,9 @@ def start(master: bool,
135138
# just default to slave if none given
136139
slave = True
137140

138-
if not master_ip:
139-
master_ip = get_ip_address()
140-
logger.info(f"'--master-ip' was not provided using '{master_ip}'")
141+
if not master_domain:
142+
master_domain = get_ip_address()
143+
logger.info(f"'--master-domain' was not provided using '{master_domain}'")
141144

142145
str_mode = 'slave' if slave else 'master'
143146
logger.info(f'Start running in {str_mode} mode')
@@ -154,9 +157,7 @@ def start(master: bool,
154157

155158
if slave:
156159
if not token:
157-
raise RuntimeError(f"Master token is required when running as slave")
158-
if master_port == 0:
159-
raise RuntimeError(f"Master port is required when running as slave")
160+
raise RuntimeError(f"Master token '--token' is required when running as slave")
160161
else:
161162
if not token:
162163
from uuid import uuid4
@@ -166,41 +167,49 @@ def start(master: bool,
166167
if any(docker_container):
167168
names = [node.name for node in docker_container if node.status == 'running']
168169
if master and (COMPUTE_MASTER in names or COMPUTE_MESSAGING in names):
169-
raise RuntimeError(f"Private cloud nodes already running detected: {names}")
170+
raise RuntimeError(f"Private cloud nodes already running, please use '--stop'. Detected: {names}")
170171
logger.info(f"Running nodes: {names}")
171172

172173
container.temp_manager.delete_temporary_directories_when_done = False
173174
lean_config = container.lean_config_manager.get_complete_lean_config(None, None, None)
174175

176+
master_is_domain = not master_domain.replace('.', '').isnumeric()
175177
if master:
176-
deploy(master_ip, master_port, token, False, update, no_update, image, lean_config, extra_docker_config)
178+
master_port_option = f" --master-port {master_port}"
179+
if master_is_domain:
180+
slave_domain = master_domain
181+
master_port_option = ''
182+
deploy(master_domain, master_domain, master_port, token, False, update, no_update, image,
183+
lean_config, extra_docker_config)
177184
if master_port == 0:
178185
master_port = container.docker_manager.get_container_port(COMPUTE_MASTER, "9696/tcp")
179-
logger.info(f"Slaves can be added running: "
180-
f"lean private-cloud start --slave --master-ip {master_ip} --token \"{token}\" --master-port {master_port}")
186+
187+
logger.info(f"Slaves can be added running: lean private-cloud start --slave --master-domain {master_domain}"
188+
f" --slave-domain {{slave.domain}} --token \"{token}\"{master_port_option}")
181189

182190
compute_index = len(get_private_cloud_containers([COMPUTE_SLAVE]))
183191
if compute:
184192
logger.debug(f"Starting given compute configuration: {compute}")
185193

186-
if not slave_ip:
187-
logger.debug(f"'slave-ip' was not given will try to figure it out...")
194+
if not slave_domain:
195+
logger.debug(f"'slave-domain' was not given will try to figure it out...")
188196
retry_count = 0
189197
while retry_count < 10:
190198
retry_count += 1
191199
try:
192200
from requests import get
193-
resp = get(f'http://{master_ip}:{master_port}', stream=True)
194-
slave_ip = resp.raw._connection.sock.getsockname()[0]
201+
resp = get(f'http://{master_domain}:{master_port}', stream=True)
202+
slave_domain = resp.raw._connection.sock.getsockname()[0]
195203
break
196204
except Exception as e:
197205
from time import sleep
198206
sleep(1)
199207
pass
200-
lean_config["self-ip-address"] = slave_ip
201-
logger.info(f"Using ip address '{slave_ip}' as own")
208+
lean_config["self-ip-address"] = slave_domain
209+
logger.debug(f"Using address '{slave_domain}' as own")
202210

203211
for configuration in compute:
204212
lean_config["compute"] = configuration
205213
for i in range(compute_index, int(configuration["count"]) + compute_index):
206-
deploy(master_ip, master_port, token, True, update, no_update, image, lean_config, extra_docker_config, i)
214+
deploy(master_domain, slave_domain, master_port, token, True, update, no_update, image,
215+
lean_config, extra_docker_config, i)

Diff for: lean/components/docker/docker_manager.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,12 @@ def __init__(self, logger: Logger, temp_manager: TempManager, platform_manager:
3838
self._temp_manager = temp_manager
3939
self._platform_manager = platform_manager
4040

41-
def get_image_label(self, image: DockerImage, label: str, default: str) -> str:
42-
docker_image = self._get_docker_client().images.get(str(image))
41+
def get_image_labels(self, image: str) -> str:
42+
docker_image = self._get_docker_client().images.get(image)
43+
return docker_image.labels.items()
4344

44-
for name, value in docker_image.labels.items():
45+
def get_image_label(self, image: DockerImage, label: str, default: str) -> str:
46+
for name, value in self.get_image_labels(str(image)):
4547
if name == label:
4648
self._logger.debug(f"Label '{label}' found in image '{image.name}', value {value}")
4749
return value
@@ -179,7 +181,7 @@ def run_image(self, image: DockerImage, **kwargs) -> bool:
179181
from time import sleep
180182
i = 0
181183
self._logger.info(f'Verifying deployment \'{container.name}\' is stable...')
182-
while i < 35:
184+
while i < 60:
183185
i += 1
184186
container.reload()
185187
if (container.status != "running" and container.attrs and "State" in container.attrs and "ExitCode"

0 commit comments

Comments
 (0)