|
1 | 1 | #!/usr/bin/python |
2 | | -# pylint: disable=line-too-long |
3 | | -# pylint: disable=C0103 |
4 | | -######################################################################## |
5 | | -# Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. |
6 | 2 | # |
7 | | -# Redistribution and use in source and binary forms, with or without |
8 | | -# modification, are permitted provided that the following conditions are met: |
| 3 | +# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES |
| 4 | +# Copyright (c) 2020-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. |
| 5 | +# SPDX-License-Identifier: LicenseRef-NvidiaProprietary |
9 | 6 | # |
10 | | -# 1. Redistributions of source code must retain the above copyright |
11 | | -# notice, this list of conditions and the following disclaimer. |
12 | | -# 2. Redistributions in binary form must reproduce the above copyright |
13 | | -# notice, this list of conditions and the following disclaimer in the |
14 | | -# documentation and/or other materials provided with the distribution. |
15 | | -# 3. Neither the names of the copyright holders nor the names of its |
16 | | -# contributors may be used to endorse or promote products derived from |
17 | | -# this software without specific prior written permission. |
18 | | -# |
19 | | -# Alternatively, this software may be distributed under the terms of the |
20 | | -# GNU General Public License ("GPL") version 2 as published by the Free |
21 | | -# Software Foundation. |
22 | | -# |
23 | | -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
24 | | -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
25 | | -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
26 | | -# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
27 | | -# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
28 | | -# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
29 | | -# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
30 | | -# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
31 | | -# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
32 | | -# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
33 | | -# POSSIBILITY OF SUCH DAMAGE. |
| 7 | +# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual |
| 8 | +# property and proprietary rights in and to this material, related |
| 9 | +# documentation and any modifications thereto. Any use, reproduction, |
| 10 | +# disclosure or distribution of this material and related documentation |
| 11 | +# without an express license agreement from NVIDIA CORPORATION or |
| 12 | +# its affiliates is strictly prohibited. |
34 | 13 | # |
35 | 14 |
|
36 | 15 | """ |
@@ -463,6 +442,8 @@ class CONST: |
463 | 442 | gmemory_snapshot = None |
464 | 443 | gmemory_snapshot_profiler = ObjectSnapshot(max_depth=16) |
465 | 444 |
|
| 445 | +_sig_condition_name = "-" |
| 446 | + |
466 | 447 | # ---------------------------------------------------------------------- |
467 | 448 |
|
468 | 449 |
|
@@ -3240,19 +3221,22 @@ def __init__(self, cmd_arg, tc_logger): |
3240 | 3221 | signal.signal(signal.SIGINT, self.sig_handler) |
3241 | 3222 | signal.signal(signal.SIGHUP, self.sig_handler) |
3242 | 3223 | self.exit = threading.Event() |
3243 | | - self.exit_flag = False |
3244 | 3224 |
|
3245 | 3225 | if not str2bool(self.sys_config.get("platform_support", 1)): |
3246 | 3226 | self.log.notice("Platform Board:'{}', SKU:'{}' is not supported.".format(self.board_type, self.sku), repeat=1) |
3247 | 3227 | self.log.notice("Set TC to idle.") |
3248 | 3228 | while True: |
3249 | 3229 | self.exit.wait(60) |
| 3230 | + if self.exit.is_set(): |
| 3231 | + return |
3250 | 3232 |
|
3251 | 3233 | if not self.is_pwm_exists(): |
3252 | 3234 | self.log.notice("Missing PWM control (probably ASIC driver not loaded). PWM control is required for TC run\nWaiting for ASIC init", repeat=1) |
3253 | 3235 | while not self.is_pwm_exists(): |
3254 | 3236 | self.log.notice("Wait...") |
3255 | 3237 | self.exit.wait(10) |
| 3238 | + if self.exit.is_set(): |
| 3239 | + return |
3256 | 3240 | self.log.notice("PWM control activated", repeat=1) |
3257 | 3241 |
|
3258 | 3242 | self.attention_fans_lst = get_dict_val_by_path(self.sys_config, [CONST.SYS_CONF_GENERAL_CONFIG_PARAM, CONST.SYS_CONF_FAN_STEADY_ATTENTION_ITEMS]) |
@@ -3290,19 +3274,25 @@ def __init__(self, cmd_arg, tc_logger): |
3290 | 3274 | else: |
3291 | 3275 | self.log.info("Set PWM successful") |
3292 | 3276 | break |
| 3277 | + if self.exit.is_set(): |
| 3278 | + return |
3293 | 3279 |
|
3294 | 3280 | if not self.is_fan_tacho_init(): |
3295 | 3281 | self.log.notice("Missing FAN tacho (probably ASIC not initialized yet). FANs is required for TC run\nWaiting for ASIC init", repeat=1) |
3296 | 3282 | while not self.is_fan_tacho_init(): |
3297 | 3283 | self.log.notice("Wait...") |
3298 | 3284 | self.exit.wait(10) |
| 3285 | + if self.exit.is_set(): |
| 3286 | + return |
3299 | 3287 |
|
3300 | 3288 | self.log.notice("Nvidia thermal control is waiting for configuration ({} sec).".format(CONST.THERMAL_WAIT_FOR_CONFIG), repeat=1) |
3301 | 3289 | timeout = current_milli_time() + 1000 * CONST.THERMAL_WAIT_FOR_CONFIG |
3302 | 3290 | while timeout > current_milli_time(): |
3303 | 3291 | if not self.write_pwm(self.pwm_target): |
3304 | 3292 | self.log.info("Set PWM failed. Possible SDK is not started") |
3305 | 3293 | self.exit.wait(2) |
| 3294 | + if self.exit.is_set(): |
| 3295 | + return |
3306 | 3296 |
|
3307 | 3297 | self._collect_hw_info() |
3308 | 3298 | self.amb_tmp = CONST.TEMP_INIT_VAL_DEF |
@@ -3935,13 +3925,12 @@ def sig_handler(self, sig, *_): |
3935 | 3925 | Signal handler for termination signals |
3936 | 3926 | """ |
3937 | 3927 | if sig in [signal.SIGTERM, signal.SIGINT, signal.SIGHUP]: |
3938 | | - self.exit_flag = True |
3939 | | - self.log.syslog_log(self.log.NOTICE, "Thermal control stopped by signal {}".format(sig)) |
3940 | | - self.log.stop() |
3941 | | - if self.sys_config.get("platform_support", 1): |
3942 | | - self.stop(reason="SIG {}".format(sig)) |
3943 | | - |
3944 | | - os._exit(0) |
| 3928 | + global _sig_condition_name |
| 3929 | + try: |
| 3930 | + _sig_condition_name = signal.Signals(sig).name |
| 3931 | + except (ValueError, AttributeError): |
| 3932 | + _sig_condition_name = str(sig) |
| 3933 | + self.exit.set() |
3945 | 3934 |
|
3946 | 3935 | # ---------------------------------------------------------------------- |
3947 | 3936 | def load_user_configuration(self, user_config_file_name): |
@@ -4373,7 +4362,7 @@ def run(self): |
4373 | 4362 | gmemory_snapshot = None |
4374 | 4363 |
|
4375 | 4364 | # main loop |
4376 | | - while not self.exit.is_set() or not self.exit_flag: |
| 4365 | + while not self.exit.is_set(): |
4377 | 4366 | try: |
4378 | 4367 | log_level = int(self.read_file(CONST.LOG_LEVEL_FILENAME)) |
4379 | 4368 | if log_level != self.cmd_arg["verbosity"]: |
@@ -4692,12 +4681,20 @@ class RawTextArgumentDefaultsHelpFormatter( |
4692 | 4681 | thermal_management = None |
4693 | 4682 | try: |
4694 | 4683 | thermal_management = ThermalManagement(args, logger) |
4695 | | - thermal_management.init() |
4696 | | - thermal_management.start(reason="init") |
4697 | | - thermal_management.run() |
| 4684 | + if not thermal_management.exit.is_set(): |
| 4685 | + thermal_management.init() |
| 4686 | + thermal_management.start(reason="init") |
| 4687 | + thermal_management.run() |
| 4688 | + |
| 4689 | + logger.notice("Thermal control stopped by signal {}".format(_sig_condition_name), repeat=1) |
| 4690 | + if (thermal_management is not None and |
| 4691 | + hasattr(thermal_management, 'sys_config') and |
| 4692 | + thermal_management.sys_config.get("platform_support", 1)): |
| 4693 | + thermal_management.stop(reason="SIG {}".format(_sig_condition_name)) |
| 4694 | + |
4698 | 4695 | except Exception as e: |
4699 | 4696 | logger.info(traceback.format_exc()) |
4700 | | - if thermal_management: |
| 4697 | + if thermal_management is not None: |
4701 | 4698 | thermal_management.stop(reason="crash ({})".format(str(e))) |
4702 | 4699 | sys.exit(1) |
4703 | 4700 |
|
|
0 commit comments