Skip to content

Commit 26ed3d8

Browse files
[gNOI] Adding gNOI rest service.
1 parent 00393e3 commit 26ed3d8

File tree

2 files changed

+400
-2
lines changed

2 files changed

+400
-2
lines changed

host_modules/gnoi_reset.py

Lines changed: 397 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,397 @@
1+
"""gNOI reset module which performs factory reset."""
2+
3+
import json
4+
import logging
5+
import threading
6+
import time
7+
import datetime
8+
import shlex
9+
import subprocess
10+
import re
11+
from host_modules import host_service
12+
13+
14+
MOD_NAME = "gnoi_reset"
15+
16+
# We don't execute any boot install commands to the non-switch-linux switches
17+
# because they don't have boot count as the switch-linux switches do.
18+
EXECUTE_BOOT_INSTALL_COMMAND = ""
19+
GET_BOOT_INSTALL_VALUE_COMMAND = ""
20+
EXECUTE_CLEANUP_COMMAND = []
21+
22+
# Timeout for SONiC Host Service to be killed during reboot. After executing the
23+
# reboot command, we will wait for 260 seconds for the reboot to complete, where
24+
# we expect that SONiC Host Service will be killed during this waiting period if
25+
# the reboot is successful. If this module is still alive after the waiting
26+
# period, we can conclude that the reboot has failed. Each container can take up
27+
# to 20 seconds to get killed. In total, there are 10 containers, and adding a
28+
# buffer of 1 minute brings up the delay value to be 260 seconds.
29+
REBOOT_TIMEOUT = 260
30+
31+
EXECUTE_COLD_REBOOT_COMMAND = "sudo reboot"
32+
33+
logger = logging.getLogger(__name__)
34+
35+
36+
class GnoiReset(host_service.HostModule):
37+
"""DBus endpoint that executes the factory reset and returns the reset
38+
39+
status and response.
40+
"""
41+
42+
def __init__(self, mod_name):
43+
self.lock = threading.Lock()
44+
self.is_reset_ongoing = False
45+
self.reset_request = {}
46+
self.reset_response = {}
47+
super(GnoiReset, self).__init__(mod_name)
48+
49+
def populate_reset_response(
50+
self,
51+
reset_success=True,
52+
factory_os_unsupported=False,
53+
zero_fill_unsupported=False,
54+
detail="",
55+
) -> tuple[int, str]:
56+
"""Populate the factory reset response.
57+
58+
Args:
59+
reset_success: A boolean type variable to indicate whether the factory
60+
reset succeeds or not.
61+
factory_os_unsupported: A boolean type variable to indicate whether the
62+
restoring to factory_os succeeds or not.
63+
zero_fill_unsupported: A boolean type variable to indicate whether the
64+
request to zero fill succeeds or not.
65+
detail: A string indicates the detailed error message of the factory
66+
reset if the error is not either factory_os_unsupported or
67+
zero_fill_unsupported.
68+
69+
Returns:
70+
A integer that indicates whether the factory reset succeeds or not,
71+
and a json-style of StartResponse protobuf defined in reset.proto.
72+
The integer value will be 0 if the factory reset succeeds, or 1 if
73+
there is any failure happens.
74+
75+
Examples of the return value:
76+
(0, dbus.String('{"reset_success": {}}'))
77+
(1, dbus.String('{
78+
"reset_error": {
79+
"other": true,
80+
"detail": "Previous reset is ongoing."
81+
}
82+
}')
83+
)
84+
"""
85+
self.lock.acquire()
86+
self.reset_response = {}
87+
if reset_success:
88+
self.reset_response["reset_success"] = {}
89+
else:
90+
self.reset_response["reset_error"] = {}
91+
if factory_os_unsupported:
92+
self.reset_response["reset_error"]["factory_os_unsupported"] = True
93+
elif zero_fill_unsupported:
94+
self.reset_response["reset_error"]["zero_fill_unsupported"] = True
95+
else:
96+
self.reset_response["reset_error"]["other"] = True
97+
self.reset_response["reset_error"]["detail"] = detail
98+
response_data = json.dumps(self.reset_response)
99+
self.lock.release()
100+
return 0 if reset_success else 1, response_data
101+
102+
@staticmethod
103+
def _run_command(cmd):
104+
'''!
105+
Execute a given command
106+
107+
@param cmd (str) Command to execute. Since we execute the command directly, and not within the
108+
context of the shell, the full path needs to be provided ($PATH is not used).
109+
Command parameters are simply separated by a space.
110+
Should be either string or a list
111+
112+
'''
113+
try:
114+
if not cmd:
115+
return (0, None, None)
116+
shcmd = shlex.split(cmd)
117+
proc = subprocess.Popen(shcmd, shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=1, close_fds=True)
118+
output_stdout, output_stderr = proc.communicate()
119+
list_stdout = []
120+
for l in output_stdout.splitlines():
121+
list_stdout.append(str(l.decode()))
122+
list_stderr = []
123+
for l in output_stderr.splitlines():
124+
list_stderr.append(str(l.decode()))
125+
return (proc.returncode, list_stdout, list_stderr)
126+
except (OSError, ValueError) as e:
127+
logger.error("%s: !Exception [%s] encountered while processing the "
128+
"command : %s", MOD_NAME, str(e), str(cmd))
129+
return (1, None, None)
130+
131+
132+
@staticmethod
133+
def raise_critical_state(self):
134+
"""Raise critical state when reboot fails"""
135+
rc, stdout, stderr = self._run_command(self.generate_critical_state_command(self))
136+
if rc:
137+
logger.error("%s: Failed to raise critical state when reboot failed"
138+
" with stdout: %s, stderr: %s", MOD_NAME, stdout,
139+
stderr)
140+
return
141+
142+
def execute_reboot(self) -> None:
143+
"""Execute cold reboot and log the error and raise the critical state
144+
145+
when the reboot fails.
146+
"""
147+
rc, stdout, stderr = _run_command(
148+
EXECUTE_COLD_REBOOT_COMMAND
149+
)
150+
if rc:
151+
logger.error(
152+
"%s: Cold reboot failed execution with stdout: %s, stderr: %s.",
153+
MOD_NAME,
154+
stdout,
155+
stderr,
156+
)
157+
return
158+
159+
time.sleep(REBOOT_TIMEOUT)
160+
# Raise critical state if reboot fails.
161+
raise_critical_state(gnoi_reset.GnoiReset)
162+
return
163+
164+
def _check_reboot_in_progress(self) -> tuple[int, str]:
165+
"""Checks if reboot is already in progress.
166+
167+
Returns:
168+
A integer that indicates whether the factory reset succeeds or not,
169+
and a json-style of StartResponse protobuf defined in reset.proto.
170+
The integer value will be 0 if the factory reset succeeds, or 1 if
171+
there is any failure happens.
172+
173+
Examples of the return value:
174+
(0, dbus.String('{"reset_success": {}}'))
175+
(1, dbus.String('{
176+
"reset_error": {
177+
"other": true,
178+
"detail": "Previous reset is ongoing."
179+
}
180+
}')
181+
)
182+
"""
183+
self.lock.acquire()
184+
is_reset_ongoing = self.is_reset_ongoing
185+
self.lock.release()
186+
187+
rc, stdout, stderr = _run_command(
188+
GET_BOOT_INSTALL_VALUE_COMMAND
189+
)
190+
if rc or not stdout:
191+
logger.error(
192+
"%s: Failed to get boot install value with stdout: %s, stderr: %s",
193+
MOD_NAME,
194+
stdout,
195+
stderr,
196+
)
197+
self.is_reset_ongoing = False
198+
return self.populate_reset_response(
199+
reset_success=False, detail="Failed to get the boot install value."
200+
)
201+
202+
# Example of a valid google-specific platform stdout here is:
203+
# ["regionselect=a", "bootcount=0", "bootinstall=0"].
204+
boot_install = 0
205+
try:
206+
boot_install = int(stdout[2].split("=")[1])
207+
except (ValueError, IndexError) as error:
208+
return self.populate_reset_response(
209+
reset_success=False,
210+
detail="Failed to get the boot install value with error: %s."
211+
% str(error),
212+
)
213+
214+
# Return without issuing the reset if the previous reset is ongoing.
215+
if is_reset_ongoing or boot_install != 0:
216+
return self.populate_reset_response(
217+
reset_success=False, detail="Previous reset is ongoing."
218+
)
219+
220+
return 0, ""
221+
222+
def _parse_arguments(self, options) -> tuple[int, str]:
223+
"""Parses and validates the given arguments into a reset request.
224+
225+
Args:
226+
options: A json-style string of StartRequest protobuf defined in
227+
factory_reset/reset.proto.
228+
229+
Returns:
230+
A integer that indicates whether the factory reset succeeds or not,
231+
and a json-style of StartResponse protobuf defined in reset.proto.
232+
The integer value will be 0 if the factory reset succeeds, or 1 if
233+
there is any failure happens.
234+
235+
Examples of the return value:
236+
(0, dbus.String('{"reset_success": {}}'))
237+
(1, dbus.String('{
238+
"reset_error": {
239+
"other": true,
240+
"detail": "Previous reset is ongoing."
241+
}
242+
}')
243+
)
244+
"""
245+
self.reset_request = {}
246+
try:
247+
self.reset_request = json.loads(options)
248+
except ValueError:
249+
return self.populate_reset_response(
250+
reset_success=False,
251+
detail=(
252+
"Failed to parse json formatted factory reset request "
253+
"into python dict."
254+
),
255+
)
256+
257+
# Reject the request if zero_fill is set.
258+
if "zeroFill" in self.reset_request and self.reset_request["zeroFill"]:
259+
return self.populate_reset_response(
260+
reset_success=False,
261+
zero_fill_unsupported=True,
262+
detail="zero_fill operation is currently unsupported.",
263+
)
264+
265+
# Issue a warning if retain_certs is set.
266+
if "retainCerts" in self.reset_request and self.reset_request["retainCerts"]:
267+
logger.warning("%s: retain_certs is currently ignored.", MOD_NAME)
268+
269+
return 0, ""
270+
271+
def _cleanup_images(self) -> None:
272+
"""Cleans up the installed images, preparing for a factory reset."""
273+
logger.info("Cleaning up install images.")
274+
# Cleanup all install artifacts.
275+
for command in EXECUTE_CLEANUP_COMMAND:
276+
rc, stdout, stderr = _run_command(command)
277+
if rc:
278+
# Cleaning up artifacts is best effort, so continue on failure.
279+
logger.warning(
280+
"%s: Command %s execution failed with stdout: %s, stderr: %s.",
281+
MOD_NAME,
282+
command,
283+
stdout,
284+
stderr,
285+
)
286+
287+
def _execute_reboot(self) -> tuple[int, str]:
288+
"""Performs a cold reboot, putting the switch into boot install mode.
289+
290+
Returns
291+
A integer that indicates whether the factory reset succeeds or not,
292+
and a json-style of StartResponse protobuf defined in reset.proto.
293+
The integer value will be 0 if the factory reset succeeds, or 1 if
294+
there is any failure happens.
295+
296+
Examples of the return value:
297+
(0, dbus.String('{"reset_success": {}}'))
298+
(1, dbus.String('{
299+
"reset_error": {
300+
"other": true,
301+
"detail": "Previous reset is ongoing."
302+
}
303+
}')
304+
)
305+
306+
Raises:
307+
RuntimeError: An error occurred when starting a new thread.
308+
"""
309+
# Issue the boot install command.
310+
rc, stdout, stderr = _run_command(
311+
EXECUTE_BOOT_INSTALL_COMMAND
312+
)
313+
if rc:
314+
logger.error(
315+
"%s: Boot count execution with stdout: %s, stderr: %s.",
316+
MOD_NAME,
317+
stdout,
318+
stderr,
319+
)
320+
self.is_reset_ongoing = False
321+
return self.populate_reset_response(
322+
reset_success=False, detail="Boot count execution failed."
323+
)
324+
325+
# Issue a cold reboot in a new thread and clear the reset response if
326+
# the reboot succeeds.
327+
try:
328+
t = threading.Thread(target=self.execute_reboot)
329+
t.start()
330+
except RuntimeError as error:
331+
self.is_reset_ongoing = False
332+
return self.populate_reset_response(
333+
reset_success=False,
334+
detail="Failed to start thread to execute reboot.",
335+
)
336+
337+
return 0, ""
338+
339+
@host_service.method(
340+
host_service.bus_name(MOD_NAME), in_signature="as", out_signature="is"
341+
)
342+
def issue_reset(self, options) -> tuple[int, str]:
343+
"""Issues the factory reset by performing the following steps
344+
345+
sequentially:
346+
1. Checks that there is no other reset requests ongoing.
347+
2. Issues a bootcount command to the switch if it runs switch-linux.
348+
3. Issues the cold reboot command to the switch.
349+
350+
Args:
351+
options: A json-style string of StartRequest protobuf defined in
352+
factory_reset/reset.proto.
353+
354+
Returns:
355+
A integer that indicates whether the factory reset succeeds or not,
356+
and a json-style of StartResponse protobuf defined in reset.proto.
357+
The integer value will be 0 always regardless of success or failure
358+
to ensure that the FE consumes the response correctly.
359+
360+
Examples of the return value:
361+
(0, dbus.String('{"reset_success": {}}'))
362+
(0, dbus.String('{
363+
"reset_error": {
364+
"other": true,
365+
"detail": "Previous reset is ongoing."
366+
}
367+
}')
368+
)
369+
370+
Raises:
371+
RuntimeError: An error occurred when starting a new thread.
372+
"""
373+
# Override the error code to always note success, so that the FE consumes
374+
# the response correctly.
375+
print("Issueing reset from Back end")
376+
rc, resp = self._parse_arguments(options)
377+
if rc:
378+
return 0, resp
379+
380+
rc, resp = self._check_reboot_in_progress()
381+
if rc:
382+
return 0, resp
383+
384+
self.is_reset_ongoing = True
385+
if "factoryOs" in self.reset_request and self.reset_request["factoryOs"]:
386+
self._cleanup_images()
387+
388+
rc, resp = self._execute_reboot()
389+
if rc:
390+
return 0, resp
391+
392+
return 0, self.populate_reset_response()[1]
393+
394+
395+
def register():
396+
"""Return the class name"""
397+
return GnoiReset, MOD_NAME

0 commit comments

Comments
 (0)