Skip to content

Commit 40dde40

Browse files
[gNOI] Adding gNOI rest service.
1 parent 00393e3 commit 40dde40

File tree

2 files changed

+356
-2
lines changed

2 files changed

+356
-2
lines changed

host_modules/gnoi_reset.py

Lines changed: 353 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,353 @@
1+
"""gNOI reset module which performs factory reset."""
2+
3+
import json
4+
import logging
5+
import threading
6+
import time
7+
from host_modules import host_service
8+
from host_modules import infra_host
9+
10+
MOD_NAME = "gnoi_reset"
11+
12+
# We don't execute any boot install commands to the non-switch-linux switches
13+
# because they don't have boot count as the switch-linux switches do.
14+
EXECUTE_BOOT_INSTALL_COMMAND = ""
15+
GET_BOOT_INSTALL_VALUE_COMMAND = ""
16+
EXECUTE_CLEANUP_COMMAND = []
17+
18+
# Timeout for SONiC Host Service to be killed during reboot. After executing the
19+
# reboot command, we will wait for 260 seconds for the reboot to complete, where
20+
# we expect that SONiC Host Service will be killed during this waiting period if
21+
# the reboot is successful. If this module is still alive after the waiting
22+
# period, we can conclude that the reboot has failed. Each container can take up
23+
# to 20 seconds to get killed. In total, there are 10 containers, and adding a
24+
# buffer of 1 minute brings up the delay value to be 260 seconds.
25+
REBOOT_TIMEOUT = 260
26+
27+
EXECUTE_COLD_REBOOT_COMMAND = "sudo reboot"
28+
29+
logger = logging.getLogger(__name__)
30+
31+
32+
class GnoiReset(host_service.HostModule):
33+
"""DBus endpoint that executes the factory reset and returns the reset
34+
35+
status and response.
36+
"""
37+
38+
def __init__(self, mod_name):
39+
self.lock = threading.Lock()
40+
self.is_reset_ongoing = False
41+
self.reset_request = {}
42+
self.reset_response = {}
43+
super(GnoiReset, self).__init__(mod_name)
44+
45+
def populate_reset_response(
46+
self,
47+
reset_success=True,
48+
factory_os_unsupported=False,
49+
zero_fill_unsupported=False,
50+
detail="",
51+
) -> tuple[int, str]:
52+
"""Populate the factory reset response.
53+
54+
Args:
55+
reset_success: A boolean type variable to indicate whether the factory
56+
reset succeeds or not.
57+
factory_os_unsupported: A boolean type variable to indicate whether the
58+
restoring to factory_os succeeds or not.
59+
zero_fill_unsupported: A boolean type variable to indicate whether the
60+
request to zero fill succeeds or not.
61+
detail: A string indicates the detailed error message of the factory
62+
reset if the error is not either factory_os_unsupported or
63+
zero_fill_unsupported.
64+
65+
Returns:
66+
A integer that indicates whether the factory reset succeeds or not,
67+
and a json-style of StartResponse protobuf defined in reset.proto.
68+
The integer value will be 0 if the factory reset succeeds, or 1 if
69+
there is any failure happens.
70+
71+
Examples of the return value:
72+
(0, dbus.String('{"reset_success": {}}'))
73+
(1, dbus.String('{
74+
"reset_error": {
75+
"other": true,
76+
"detail": "Previous reset is ongoing."
77+
}
78+
}')
79+
)
80+
"""
81+
self.lock.acquire()
82+
self.reset_response = {}
83+
if reset_success:
84+
self.reset_response["reset_success"] = {}
85+
else:
86+
self.reset_response["reset_error"] = {}
87+
if factory_os_unsupported:
88+
self.reset_response["reset_error"]["factory_os_unsupported"] = True
89+
elif zero_fill_unsupported:
90+
self.reset_response["reset_error"]["zero_fill_unsupported"] = True
91+
else:
92+
self.reset_response["reset_error"]["other"] = True
93+
self.reset_response["reset_error"]["detail"] = detail
94+
response_data = json.dumps(self.reset_response)
95+
self.lock.release()
96+
return 0 if reset_success else 1, response_data
97+
98+
def execute_reboot(self) -> None:
99+
"""Execute cold reboot and log the error and raise the critical state
100+
101+
when the reboot fails.
102+
"""
103+
rc, stdout, stderr = infra_host.InfraHost._run_command(
104+
EXECUTE_COLD_REBOOT_COMMAND
105+
)
106+
if rc:
107+
logger.error(
108+
"%s: Cold reboot failed execution with stdout: %s, stderr: %s.",
109+
MOD_NAME,
110+
stdout,
111+
stderr,
112+
)
113+
return
114+
115+
time.sleep(REBOOT_TIMEOUT)
116+
# Raise critical state if reboot fails.
117+
infra_host.InfraHost.raise_critical_state(infra_host.InfraHost)
118+
return
119+
120+
def _check_reboot_in_progress(self) -> tuple[int, str]:
121+
"""Checks if reboot is already in progress.
122+
123+
Returns:
124+
A integer that indicates whether the factory reset succeeds or not,
125+
and a json-style of StartResponse protobuf defined in reset.proto.
126+
The integer value will be 0 if the factory reset succeeds, or 1 if
127+
there is any failure happens.
128+
129+
Examples of the return value:
130+
(0, dbus.String('{"reset_success": {}}'))
131+
(1, dbus.String('{
132+
"reset_error": {
133+
"other": true,
134+
"detail": "Previous reset is ongoing."
135+
}
136+
}')
137+
)
138+
"""
139+
self.lock.acquire()
140+
is_reset_ongoing = self.is_reset_ongoing
141+
self.lock.release()
142+
143+
rc, stdout, stderr = infra_host.InfraHost._run_command(
144+
GET_BOOT_INSTALL_VALUE_COMMAND
145+
)
146+
if rc or not stdout:
147+
logger.error(
148+
"%s: Failed to get boot install value with stdout: %s, stderr: %s",
149+
MOD_NAME,
150+
stdout,
151+
stderr,
152+
)
153+
self.is_reset_ongoing = False
154+
return self.populate_reset_response(
155+
reset_success=False, detail="Failed to get the boot install value."
156+
)
157+
158+
# Example of a valid google-specific platform stdout here is:
159+
# ["regionselect=a", "bootcount=0", "bootinstall=0"].
160+
boot_install = 0
161+
try:
162+
boot_install = int(stdout[2].split("=")[1])
163+
except (ValueError, IndexError) as error:
164+
return self.populate_reset_response(
165+
reset_success=False,
166+
detail="Failed to get the boot install value with error: %s."
167+
% str(error),
168+
)
169+
170+
# Return without issuing the reset if the previous reset is ongoing.
171+
if is_reset_ongoing or boot_install != 0:
172+
return self.populate_reset_response(
173+
reset_success=False, detail="Previous reset is ongoing."
174+
)
175+
176+
return 0, ""
177+
178+
def _parse_arguments(self, options) -> tuple[int, str]:
179+
"""Parses and validates the given arguments into a reset request.
180+
181+
Args:
182+
options: A json-style string of StartRequest protobuf defined in
183+
factory_reset/reset.proto.
184+
185+
Returns:
186+
A integer that indicates whether the factory reset succeeds or not,
187+
and a json-style of StartResponse protobuf defined in reset.proto.
188+
The integer value will be 0 if the factory reset succeeds, or 1 if
189+
there is any failure happens.
190+
191+
Examples of the return value:
192+
(0, dbus.String('{"reset_success": {}}'))
193+
(1, dbus.String('{
194+
"reset_error": {
195+
"other": true,
196+
"detail": "Previous reset is ongoing."
197+
}
198+
}')
199+
)
200+
"""
201+
self.reset_request = {}
202+
try:
203+
self.reset_request = json.loads(options)
204+
except ValueError:
205+
return self.populate_reset_response(
206+
reset_success=False,
207+
detail=(
208+
"Failed to parse json formatted factory reset request "
209+
"into python dict."
210+
),
211+
)
212+
213+
# Reject the request if zero_fill is set.
214+
if "zeroFill" in self.reset_request and self.reset_request["zeroFill"]:
215+
return self.populate_reset_response(
216+
reset_success=False,
217+
zero_fill_unsupported=True,
218+
detail="zero_fill operation is currently unsupported.",
219+
)
220+
221+
# Issue a warning if retain_certs is set.
222+
if "retainCerts" in self.reset_request and self.reset_request["retainCerts"]:
223+
logger.warning("%s: retain_certs is currently ignored.", MOD_NAME)
224+
225+
return 0, ""
226+
227+
def _cleanup_images(self) -> None:
228+
"""Cleans up the installed images, preparing for a factory reset."""
229+
logger.info("Cleaning up install images.")
230+
# Cleanup all install artifacts.
231+
for command in EXECUTE_CLEANUP_COMMAND:
232+
rc, stdout, stderr = infra_host.InfraHost._run_command(command)
233+
if rc:
234+
# Cleaning up artifacts is best effort, so continue on failure.
235+
logger.warning(
236+
"%s: Command %s execution failed with stdout: %s, stderr: %s.",
237+
MOD_NAME,
238+
command,
239+
stdout,
240+
stderr,
241+
)
242+
243+
def _execute_reboot(self) -> tuple[int, str]:
244+
"""Performs a cold reboot, putting the switch into boot install mode.
245+
246+
Returns
247+
A integer that indicates whether the factory reset succeeds or not,
248+
and a json-style of StartResponse protobuf defined in reset.proto.
249+
The integer value will be 0 if the factory reset succeeds, or 1 if
250+
there is any failure happens.
251+
252+
Examples of the return value:
253+
(0, dbus.String('{"reset_success": {}}'))
254+
(1, dbus.String('{
255+
"reset_error": {
256+
"other": true,
257+
"detail": "Previous reset is ongoing."
258+
}
259+
}')
260+
)
261+
262+
Raises:
263+
RuntimeError: An error occurred when starting a new thread.
264+
"""
265+
# Issue the boot install command.
266+
rc, stdout, stderr = infra_host.InfraHost._run_command(
267+
EXECUTE_BOOT_INSTALL_COMMAND
268+
)
269+
if rc:
270+
logger.error(
271+
"%s: Boot count execution with stdout: %s, stderr: %s.",
272+
MOD_NAME,
273+
stdout,
274+
stderr,
275+
)
276+
self.is_reset_ongoing = False
277+
return self.populate_reset_response(
278+
reset_success=False, detail="Boot count execution failed."
279+
)
280+
281+
# Issue a cold reboot in a new thread and clear the reset response if
282+
# the reboot succeeds.
283+
try:
284+
t = threading.Thread(target=self.execute_reboot)
285+
t.start()
286+
except RuntimeError as error:
287+
self.is_reset_ongoing = False
288+
return self.populate_reset_response(
289+
reset_success=False,
290+
detail="Failed to start thread to execute reboot.",
291+
)
292+
293+
return 0, ""
294+
295+
@host_service.method(
296+
host_service.bus_name(MOD_NAME), in_signature="as", out_signature="is"
297+
)
298+
def issue_reset(self, options) -> tuple[int, str]:
299+
"""Issues the factory reset by performing the following steps
300+
301+
sequentially:
302+
1. Checks that there is no other reset requests ongoing.
303+
2. Issues a bootcount command to the switch if it runs switch-linux.
304+
3. Issues the cold reboot command to the switch.
305+
306+
Args:
307+
options: A json-style string of StartRequest protobuf defined in
308+
factory_reset/reset.proto.
309+
310+
Returns:
311+
A integer that indicates whether the factory reset succeeds or not,
312+
and a json-style of StartResponse protobuf defined in reset.proto.
313+
The integer value will be 0 always regardless of success or failure
314+
to ensure that the FE consumes the response correctly.
315+
316+
Examples of the return value:
317+
(0, dbus.String('{"reset_success": {}}'))
318+
(0, dbus.String('{
319+
"reset_error": {
320+
"other": true,
321+
"detail": "Previous reset is ongoing."
322+
}
323+
}')
324+
)
325+
326+
Raises:
327+
RuntimeError: An error occurred when starting a new thread.
328+
"""
329+
# Override the error code to always note success, so that the FE consumes
330+
# the response correctly.
331+
print("Issueing reset from Back end")
332+
rc, resp = self._parse_arguments(options)
333+
if rc:
334+
return 0, resp
335+
336+
rc, resp = self._check_reboot_in_progress()
337+
if rc:
338+
return 0, resp
339+
340+
self.is_reset_ongoing = True
341+
if "factoryOs" in self.reset_request and self.reset_request["factoryOs"]:
342+
self._cleanup_images()
343+
344+
rc, resp = self._execute_reboot()
345+
if rc:
346+
return 0, resp
347+
348+
return 0, self.populate_reset_response()[1]
349+
350+
351+
def register():
352+
"""Return the class name"""
353+
return GnoiReset, MOD_NAME

scripts/sonic-host-server

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ import dbus.service
1212
import dbus.mainloop.glib
1313

1414
from gi.repository import GObject
15-
from host_modules import config_engine, gcu, host_service, showtech, systemd_service, file_service, image_service, docker_service, reboot
15+
from host_modules import config_engine, gcu, host_service, showtech, systemd_service, file_service, image_service, docker_service, reboot, gnoi_reset
1616

1717

1818
def register_dbus():
@@ -26,7 +26,8 @@ def register_dbus():
2626
'systemd': systemd_service.SystemdService('systemd'),
2727
'image_service': image_service.ImageService('image_service'),
2828
'docker_service': docker_service.DockerService('docker_service'),
29-
'file_stat': file_service.FileService('file')
29+
'file_stat': file_service.FileService('file'),
30+
'gnoi_reset': gnoi_reset.GnoiReset('gnoi_reset')
3031
}
3132
for mod_name, handler_class in mod_dict.items():
3233
handlers[mod_name] = handler_class

0 commit comments

Comments
 (0)