Skip to content

Commit ca4fba8

Browse files
committed
hw-mgmt: scripts: Fix race condition in fan direction initialization
Fan direction setup in chassis-events depends on fanX_status symlinks created by thermal-events. Since these handlers run asynchronously, chassis-events may read fan status before symlinks exist. Fix: Add fan_status_ready flag synchronization between handlers. If flag not set, defer fan direction init via 5s background task. Bug: 4859849 Signed-off-by: Oleksandr Shamray <oleksandrs@nvidia.com>
1 parent b981634 commit ca4fba8

File tree

2 files changed

+29
-11
lines changed

2 files changed

+29
-11
lines changed

usr/usr/bin/hw-management-chassis-events.sh

Lines changed: 26 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/bin/bash
22

33
# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
4-
# Copyright (c) 2018-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4+
# Copyright (c) 2018-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
55
#
66
# Redistribution and use in source and binary forms, with or without
77
# modification, are permitted provided that the following conditions are met:
@@ -509,6 +509,21 @@ function set_fan_direction()
509509
esac
510510
}
511511

512+
# Set fan direction for all fans
513+
function set_fan_direction_for_all_fans()
514+
{
515+
local -r max_tachos=$(<"$config_path"/max_tachos)
516+
for ((i=1; i<="$max_tachos"; i+=1)); do
517+
if [ -L "${thermal_path}"/fan"${i}"_status ]; then
518+
# check if fan status is set
519+
status=$(< "${thermal_path}"/fan"${i}"_status)
520+
if [ "$status" -eq 1 ]; then
521+
set_fan_direction "fan${i}" 1
522+
fi
523+
fi
524+
done
525+
rm -f "$config_path"/fan_status_ready || true
526+
}
512527

513528
# Get FAN direction based on VPD PN field
514529
#
@@ -1159,14 +1174,16 @@ if [ "$1" == "add" ]; then
11591174
done
11601175
handle_cpld_versions "$cpld_num"
11611176
fi
1162-
for ((i=1; i<=$(<$config_path/max_tachos); i+=1)); do
1163-
if [ -L $thermal_path/fan"$i"_status ]; then
1164-
status=$(< $thermal_path/fan"$i"_status)
1165-
if [ "$status" -eq 1 ]; then
1166-
set_fan_direction fan"${i}" 1
1167-
fi
1168-
fi
1169-
done
1177+
1178+
# Set fan direction for all fans.
1179+
# Wait for fan status attributes to be created (indicated by fan_status_ready flag).
1180+
if [ -f "$config_path"/fan_status_ready ] && [ "$(< "$config_path"/fan_status_ready)" -eq 1 ]; then
1181+
set_fan_direction_for_all_fans
1182+
else
1183+
# Fan status attributes are not created yet, postpone fan dir initialization in background by 5 seconds.
1184+
nohup bash -c 'sleep 5 && source hw-management-chassis-events.sh && set_fan_direction_for_all_fans' >/dev/null 2>&1 &
1185+
fi
1186+
rm -f "$config_path"/fan_status_ready || true
11701187

11711188
# Handle linecard.
11721189
if [ "$linecard" -ne 0 ]; then

usr/usr/bin/hw-management-thermal-events.sh

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
###########################################################################
44
# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
5-
# Copyright (c) 2018-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
5+
# Copyright (c) 2018-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
66
#
77
# Redistribution and use in source and binary forms, with or without
88
# modification, are permitted provided that the following conditions are met:
@@ -612,7 +612,6 @@ if [ "$1" == "add" ]; then
612612
(( fan_drwr_num++ ))
613613
fi
614614
done
615-
616615
if [ -f $config_path/fixed_fans_system ] && [ "$(< $config_path/fixed_fans_system)" = 1 ]; then
617616
get_fixed_fans_direction
618617
dir=$?
@@ -625,6 +624,8 @@ if [ "$1" == "add" ]; then
625624
else
626625
echo $fan_drwr_num > $config_path/fan_drwr_num
627626
fi
627+
# Set temporary flag in fs to indicate that fanX_status is set for all fans
628+
echo 1 > "$config_path"/fan_status_ready
628629

629630
for ((i=1; i<=max_psus; i+=1)); do
630631
if [ -f "$3""$4"/psu$i ]; then

0 commit comments

Comments
 (0)