diff --git a/Jenkinsfile b/Jenkinsfile
index 1c0438e4635..25792cb5199 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -189,7 +189,7 @@ pipeline {
// 'stages of this run (i.e. auto, auto_md_on_ssd, auto:-3DNAND, ' +
// '0000:81:00.0, etc.). Does not apply to MD on SSD stages.')
string(name: 'BuildType',
- defaultValue: '',
+ defaultValue: 'release',
description: 'Type of build. Passed to scons as BUILD_TYPE. (I.e. dev, release, debug, etc.). ' +
'Defaults to release on an RC or dev otherwise.')
string(name: 'TestRepeat',
@@ -205,12 +205,15 @@ pipeline {
'stages. Specifies the default provider to use the daos_server ' +
'config file when running functional tests (the launch.py ' +
'--provider argument; i.e. "ucx+dc_x", "ofi+verbs", "ofi+tcp")')
- booleanParam(name: 'CI_SKIP_CANCEL_PREV_BUILD',
+ booleanParam(name: 'CI_CANCEL_PREV_BUILD_SKIP',
defaultValue: false,
description: 'Do not cancel previous build.')
booleanParam(name: 'CI_BUILD_PACKAGES_ONLY',
defaultValue: false,
description: 'Only build RPM and DEB packages, Skip unit tests.')
+ string(name: 'CI_SCONS_ARGS',
+ defaultValue: '',
+ description: 'Arguments for scons when building DAOS')
string(name: 'CI_RPM_TEST_VERSION',
defaultValue: '',
description: 'Package version to use instead of building. example: 1.3.103-1, 1.2-2')
@@ -273,6 +276,9 @@ pipeline {
booleanParam(name: 'CI_TEST_LEAP15_RPMs',
defaultValue: true,
description: 'Run the Test RPMs on Leap 15 test stage')
+ booleanParam(name: 'CI_FUNCTIONAL_TEST_SKIP',
+ defaultValue: false,
+ description: 'Skip all functional test stages (Test)')
booleanParam(name: 'CI_MORE_FUNCTIONAL_PR_TESTS',
defaultValue: false,
description: 'Enable more distros for functional CI tests')
@@ -293,6 +299,9 @@ pipeline {
defaultValue: false,
description: 'Run the Functional on Ubuntu 20.04 test stage' +
' Requires CI_MORE_FUNCTIONAL_PR_TESTS')
+ booleanParam(name: 'CI_FUNCTIONAL_HARDWARE_TEST_SKIP',
+ defaultValue: false,
+ description: 'Skip Functional Hardware (Test Hardware) stage')
booleanParam(name: 'CI_medium_TEST',
defaultValue: true,
description: 'Run the Functional Hardware Medium test stage')
@@ -333,7 +342,7 @@ pipeline {
defaultValue: 'ci_nvme5',
description: 'Label to use for the Functional Hardware Medium (MD on SSD) stages')
string(name: 'FUNCTIONAL_HARDWARE_MEDIUM_VERBS_PROVIDER_LABEL',
- defaultValue: 'ci_nvme5',
+ defaultValue: 'ci_ofed5',
description: 'Label to use for 5 node Functional Hardware Medium Verbs Provider (MD on SSD) stages')
string(name: 'FUNCTIONAL_HARDWARE_MEDIUM_VMD_LABEL',
defaultValue: 'ci_vmd5',
@@ -407,7 +416,7 @@ pipeline {
stage('Cancel Previous Builds') {
when {
beforeAgent true
- expression { !paramsValue('CI_SKIP_CANCEL_PREV_BUILD', false) && !skipStage() }
+ expression { !paramsValue('CI_CANCEL_PREV_BUILD_SKIP', false) && !skipStage() }
}
steps {
cancelPreviousBuilds()
@@ -613,7 +622,7 @@ pipeline {
}
}
}
- stage('Build on EL 8') {
+ stage('Build on EL 8.8') {
when {
beforeAgent true
expression { !params.CI_el8_NOBUILD && !skipStage() }
@@ -834,7 +843,9 @@ pipeline {
stage('Test') {
when {
beforeAgent true
- expression { !skipStage() }
+ //expression { !paramsValue('CI_FUNCTIONAL_TEST_SKIP', false) && !skipStage() }
+ // Above not working, always skipping functional VM tests.
+ expression { !paramsValue('CI_FUNCTIONAL_TEST_SKIP', false) }
}
parallel {
stage('Functional on EL 8.8 with Valgrind') {
@@ -1098,7 +1109,7 @@ pipeline {
stage('Test Hardware') {
when {
beforeAgent true
- expression { !skipStage() }
+ expression { !paramsValue('CI_FUNCTIONAL_HARDWARE_TEST_SKIP', false) && !skipStage() }
}
steps {
script {
diff --git a/ci/functional/test_main.sh b/ci/functional/test_main.sh
index 56fe36f8571..3e0eed2e5c7 100755
--- a/ci/functional/test_main.sh
+++ b/ci/functional/test_main.sh
@@ -1,9 +1,10 @@
#!/bin/bash
#
-# Copyright 2020-2023 Intel Corporation.
+# Copyright 2020-2024 Intel Corporation.
+# Copyright 2025 Hewlett Packard Enterprise Development LP
#
# SPDX-License-Identifier: BSD-2-Clause-Patent
-
+#
set -eux
if [ -z "$TEST_TAG" ]; then
@@ -47,6 +48,8 @@ test_cluster() {
NODELIST=${tnodes} \
BUILD_URL=\"${BUILD_URL:-Unknown in GHA}\" \
STAGE_NAME=\"$STAGE_NAME\" \
+ JENKINS_URL=\"${JENKINS_URL:-}\" \
+ DAOS_DEVOPS_EMAIL=\"${DAOS_DEVOPS_EMAIL:-}\" \
$(cat ci/functional/test_main_prep_node.sh)"
}
@@ -58,7 +61,11 @@ if ! test_cluster; then
if cluster_reboot; then
if test_cluster; then
hardware_ok=true
+ else
+ echo "Hardware test failed again after reboot"
fi
+ else
+ echo "Cluster reboot failed"
fi
else
hardware_ok=true
@@ -99,6 +106,7 @@ if "$hardware_ok"; then
FTEST_ARG=\"${FTEST_ARG:-}\" \
WITH_VALGRIND=\"${WITH_VALGRIND:-}\" \
STAGE_NAME=\"$STAGE_NAME\" \
+ HTTPS_PROXY=\"${HTTPS_PROXY:-}\" \
$(cat ci/functional/test_main_node.sh)"
else
./ftest.sh "$test_tag" "$tnodes" "$FTEST_ARG"
diff --git a/ci/functional/test_main_node.sh b/ci/functional/test_main_node.sh
index f86ad21f441..1a0b2e02794 100755
--- a/ci/functional/test_main_node.sh
+++ b/ci/functional/test_main_node.sh
@@ -1,5 +1,10 @@
#!/bin/bash
-
+#
+# Copyright 2020-2022 Intel Corporation.
+# Copyright 2025 Hewlett Packard Enterprise Development LP
+#
+# SPDX-License-Identifier: BSD-2-Clause-Patent
+#
set -eux
DAOS_TEST_SHARED_DIR=$(mktemp -d -p /mnt/share/)
@@ -11,4 +16,5 @@ export REMOTE_ACCT=jenkins
export WITH_VALGRIND="$WITH_VALGRIND"
export STAGE_NAME="$STAGE_NAME"
-/usr/lib/daos/TESTING/ftest/ftest.sh "$TEST_TAG" "$TNODES" "$FTEST_ARG"
+HTTPS_PROXY="${HTTPS_PROXY:-}" /usr/lib/daos/TESTING/ftest/ftest.sh \
+ "$TEST_TAG" "$TNODES" "$FTEST_ARG"
diff --git a/ci/functional/test_main_prep_node.sh b/ci/functional/test_main_prep_node.sh
index 32993d114e4..d096737d24d 100755
--- a/ci/functional/test_main_prep_node.sh
+++ b/ci/functional/test_main_prep_node.sh
@@ -1,6 +1,7 @@
#!/bin/bash
#
# Copyright 2020-2023 Intel Corporation.
+# Copyright 2025 Hewlett Packard Enterprise Development LP
#
# SPDX-License-Identifier: BSD-2-Clause-Patent
#
@@ -10,6 +11,11 @@ set -eux
: "${OPERATIONS_EMAIL:=}"
: "${STAGE_NAME:=Unknown}"
: "${BUILD_URL:=Unknown}"
+: "${JENKINS_URL:=https://jenkins.example.com}"
+domain1="${JENKINS_URL#https://}"
+mail_domain="${domain1%%/*}"
+: "${EMAIL_DOMAIN:=$mail_domain}"
+: "${DAOS_DEVOPS_EMAIL:="$HOSTNAME"@"$EMAIL_DOMAIN"}"
result=0
mail_message=''
@@ -22,6 +28,7 @@ testfails=0
myhost="${HOSTNAME%%.*}"
: "${NODELIST:=$myhost}"
mynodenum=0
+
# in order for junit test names to be consistent between test runs
# Need to use the position number of the host in the node list for
# the junit report.
@@ -42,7 +49,7 @@ function do_mail {
# shellcheck disable=SC2059
build_info="BUILD_URL = $BUILD_URL$nl STAGE = $STAGE_NAME$nl$nl"
mail -s "Hardware check failed after reboot!" \
- -r "$HOSTNAME"@intel.com "$OPERATIONS_EMAIL" \
+ -r "$DAOS_DEVOPS_EMAIL" "$OPERATIONS_EMAIL" \
<<< "$build_info$mail_message"
set -x
}
@@ -63,17 +70,27 @@ set +x
while IFS= read -r line; do
((opa_count++)) || true
done < <(lspci -mm | grep "Omni-Path")
+echo "Found $opa_count Omni-Path adapters."
if [ "$opa_count" -gt 0 ]; then
((ib_count=opa_count)) || true
fi
+last_pci_bus=''
while IFS= read -r line; do
+ pci_bus="${line%.*}"
+ if [ "$pci_bus" == "$last_pci_bus" ]; then
+ # We only use one interface on a dual interface HBA
+ # Fortunately lspci appears to group them together
+ continue
+ fi
+ last_pci_bus="$pci_bus"
mlnx_type="${line##*ConnectX-}"
mlnx_type="${mlnx_type%]*}"
if [ "$mlnx_type" -ge 6 ]; then
((hdr_count++)) || true
fi
-done < <(lspci -mm | grep "ConnectX")
+done < <(lspci -mm | grep "ConnectX" | grep -i "infiniband" )
+echo "Found $hdr_count Mellanox HDR adapters."
if [ "$hdr_count" -gt 0 ]; then
((ib_count=hdr_count)) || true
fi
@@ -85,33 +102,66 @@ if [ "$hdr_count" -gt 0 ] && [ "$opa_count" -gt 0 ]; then
$hdr_count Mellanox HDR ConnectX adapters,
and
$opa_count Omni-Path adapters.
-The Onmi-Path adapters will not be used."
+The Omni-Path adapters will not be used."
mail_message+="${nl}${ib_message}${nl}"
echo "$ib_message"
fi
set -x
+# Wait for at least the expected IB devices to show up.
+# in the case of dual port HBAs, not all IB devices will
+# show up.
+# For some unknown reason, sometimes IB devices will not show up
+# except in the lspci output unless an ip link set up command for
+# at least one device that should be present shows up.
+good_ibs=()
function do_wait_for_ib {
+ local ib_devs=("$@")
+ local working_ib
ib_timeout=300 # 5 minutes
retry_wait=10 # seconds
timeout=$((SECONDS + ib_timeout))
while [ "$SECONDS" -lt "$timeout" ]; do
- ip link set up "$1" || true
- sleep 2
- if ip addr show "$1" | grep "inet "; then
- return 0
- fi
- sleep ${retry_wait}
+ for ib_dev in "${ib_devs[@]}"; do
+ ip link set up "$ib_dev" || true
+ done
+ sleep 2
+ working_ib=0
+ good_ibs=()
+ for ib_dev in "${ib_devs[@]}"; do
+ if ip addr show "$ib_dev" | grep "inet "; then
+ good_ibs+=("$ib_dev")
+ ((working_ib++)) || true
+ fi
+ # With udev rules, the ib adapter name has the numa
+ # affinity in its name. On a single adapter system
+ # we do not have an easy way to know what that
+ # adapter name is in the case of a udev rule, so we have to try
+ # both possible names.
+ if [ "$working_ib" -ge "$ib_count" ]; then
+ return 0
+ fi
+ done
+ sleep ${retry_wait}
done
return 1
}
-# First check for infinband devices
-for i in $(seq 0 $((ib_count-1))); do
- ((testruns++)) || true
- testcases+=" ${nl}"
- iface="ib$i"
- if do_wait_for_ib "$iface"; then
+# Migrating to using udev rules for network devices
+if [ -e /etc/udev/rules.d/70-persistent-ipoib.rules ]; then
+ ib_list=('ib_cpu0_0' 'ib_cpu1_0')
+else
+ ib_list=('ib0')
+ if [ "$ib_count" -gt 1 ]; then
+ ib_list+=('ib1')
+ fi
+fi
+
+function check_ib_devices {
+ local ib_devs=("$@")
+ for iface in "${ib_devs[@]}"; do
+ ((testruns++)) || true
+ testcases+=" ${nl}"
set +x
if ! ip addr show "$iface" | grep "inet "; then
ib_message="$({
@@ -136,31 +186,31 @@ for i in $(seq 0 $((ib_count-1))); do
cat "/sys/class/net/$iface/device/numa_node"
fi
set -x
+ testcases+=" $nl"
+ done
+}
+
+
+# First check for InfiniBand devices
+if [ "$ib_count" -gt 0 ]; then
+ if do_wait_for_ib "${ib_list[@]}"; then
+ echo "Found at least $ib_count working devices in" "${ib_list[@]}"
+ # All good, generate Junit report
+ check_ib_devices "${good_ibs[@]}"
else
- ib_message="Failed to bring up interface $iface on $HOSTNAME. "
- mail_message+="${nl}${ib_message}${nl}"
- echo "$ib_message"
- ((testfails++)) || true
- testcases+="
-
- $nl"
- result=1
+ # Something wrong, generate Junit report and update e-mail
+ check_ib_devices "${ib_list[@]}"
fi
- testcases+=" $nl"
-done
+fi
# having -x just makes the console log harder to read.
-set +x
-if [ -e /sys/class/net/ib1 ]; then
- # now check for pmem & NVMe drives when ib1 is present.
+# set +x
+if [ "$ib_count" -ge 2 ]; then
+ # now check for pmem & NVMe drives when multiple ib are present.
# ipmctl show -dimm should show an even number of drives, all healthy
- dimm_count=0
- while IFS= read -r line; do
- if [[ "$line" != *"| Healthy "* ]]; then continue; fi
- ((dimm_count++)) || true
- done < <(ipmctl show -dimm)
+ dimm_count=$(ipmctl show -dimm | grep Healthy -c)
if [ "$dimm_count" -eq 0 ] || [ $((dimm_count%2)) -ne 0 ]; then
- # Not fatal, the PMEM DIMM should be replaced when downtime can be
+ # May not be fatal, the PMEM DIMM should be replaced when downtime can be
# scheduled for this system.
dimm_message="FAIL: Wrong number $dimm_count healthy PMEM DIMMs seen."
mail_message+="$nl$dimm_message$nl$(ipmctl show -dimm)$nl"
@@ -184,7 +234,7 @@ if [ -e /sys/class/net/ib1 ]; then
testcases+="
$nl"
- result=1
+ result=3
else
echo "OK: Found $dimm_rcount DIMM PMEM regions."
fi
@@ -211,23 +261,15 @@ if [ -e /sys/class/net/ib1 ]; then
testcases+="
$nl"
- result=1
+ result=4
else
echo "OK: Even number ($nvme_count) of NVMe devices seen."
fi
testcases+=" $nl"
# All storage found by lspci should also be in lsblk report
- lsblk_nvme=0
- lsblk_pmem=0
- while IFS= read -r line; do
- if [[ "$line" = nvme* ]];then
- ((lsblk_nvme++)) || true
- fi
- if [[ "$line" = pmem* ]];then
- ((lsblk_pmem++)) || true
- fi
- done < <(lsblk)
+ lsblk_nvme=$(lsblk | grep nvme -c)
+ lsblk_pmem=$(lsblk | grep pmem -c)
((testruns++)) || true
testcases+=" ${nl}"
@@ -238,7 +280,7 @@ if [ -e /sys/class/net/ib1 ]; then
testcases+="
$nl"
- result=1
+ result=5
else
echo "OK: All $nvme_count NVMe devices are in lsblk report."
fi
@@ -253,7 +295,7 @@ if [ -e /sys/class/net/ib1 ]; then
testcases+="
$nl"
- result=1
+ result=6
else
echo "OK: All $dimm_rcount PMEM devices are in lsblk report."
fi
@@ -295,4 +337,8 @@ echo "$junit_xml" > "./hardware_prep_node_results.xml"
do_mail
+if [ "$result" -ne 0 ]; then
+ echo "Check failure $result"
+fi
+
exit $result
diff --git a/ci/gha_functions.sh b/ci/gha_functions.sh
index 7fbc8bcac28..d7234c457d9 100644
--- a/ci/gha_functions.sh
+++ b/ci/gha_functions.sh
@@ -233,7 +233,7 @@ test_test_tag_and_features() {
CP_FEATURES="foo bar" get_test_tags "-hw")" "always_passes,-hw always_fails,-hw"
}
-test_jenkins_curl() {
- JENKINS_URL="${JENKINS_URL:-https://build.hpdd.intel.com/}"
- assert_equals "$(QUIET=true VERBOSE=false jenkins_curl -X POST "${JENKINS_URL}api/xml" 3>&1 >/dev/null | tr -d '\r' | grep '^X-Content-Type-Options:')" "X-Content-Type-Options: nosniff"
-}
+#test_jenkins_curl() {
+# JENKINS_URL="${JENKINS_URL:-https://build.hpdd.intel.com/}"
+# assert_equals "$(QUIET=true VERBOSE=false jenkins_curl -X POST "${JENKINS_URL}api/xml" 3>&1 >/dev/null | tr -d '\r' | grep '^X-Content-Type-Options:')" "X-Content-Type-Options: nosniff"
+#}
diff --git a/ci/provisioning/post_provision_config.sh b/ci/provisioning/post_provision_config.sh
index 92575907ca8..14980c86a03 100755
--- a/ci/provisioning/post_provision_config.sh
+++ b/ci/provisioning/post_provision_config.sh
@@ -1,5 +1,10 @@
#!/bin/bash
-
+#
+# Copyright 2020-2023 Intel Corporation.
+# Copyright 2025 Hewlett Packard Enterprise Development LP
+#
+# SPDX-License-Identifier: BSD-2-Clause-Patent
+#
set -eux
export PS4='+ ${HOSTNAME%%.*}:${BASH_SOURCE:+$BASH_SOURCE:}$LINENO:${FUNCNAME:+$FUNCNAME():} '
@@ -24,12 +29,38 @@ source ci/junit.sh
: "${MLNX_VER_NUM:=24.04-0.6.6.0}"
: "${DISTRO:=EL_7}"
-DSL_REPO_var="DAOS_STACK_${DISTRO}_LOCAL_REPO"
-DSG_REPO_var="DAOS_STACK_${DISTRO}_GROUP_REPO"
-DSA_REPO_var="DAOS_STACK_${DISTRO}_APPSTREAM_REPO"
retry_cmd 300 clush -B -S -l root -w "$NODESTRING" -c ci_key* --dest=/tmp/
+function create_host_file() {
+ local node_string="$1"
+ local output_file="${2:-./hosts}"
+ local input_file="${3:-}"
+ rm -rf "$output_file" 2>/dev/null
+ if [ -n "$input_file" ]; then
+ cp "$input_file" "$output_file"
+ fi
+ IFS=',' read -ra NODES <<< "$node_string"
+ for node in "${NODES[@]}"; do
+ ip_address=$(nslookup "$node" 2>/dev/null | awk '/^Address: / {print $2}' | head -n 1)
+ long_name=$(nslookup "$node" 2>/dev/null | awk '/^Name:/ {print $2}' | head -n 1)
+ if [ -n "$ip_address" ] && [ -n "$long_name" ]; then
+ echo "$ip_address $long_name $node" >> "$output_file"
+ else
+ echo "ERROR: Could not resolve $node"
+ return 1
+ fi
+ done
+ return 0
+}
+
+if create_host_file "$NODESTRING" "./hosts" "/etc/hosts"; then
+ retry_cmd 300 clush -B -S -l root -w "$NODESTRING" -c ./hosts --dest=/etc/hosts
+else
+ echo "ERROR: Failed to create host file"
+fi
+
+
# shellcheck disable=SC2001
sanitized_commit_message="$(echo "$COMMIT_MESSAGE" | sed -e 's/\(["\$]\)/\\\1/g')"
@@ -42,9 +73,6 @@ if ! retry_cmd 2400 clush -B -S -l root -w "$NODESTRING" \
GPG_KEY_URLS=\"${GPG_KEY_URLS:-}\"
REPOSITORY_URL=\"${REPOSITORY_URL:-}\"
JENKINS_URL=\"${JENKINS_URL:-}\"
- DAOS_STACK_LOCAL_REPO=\"${!DSL_REPO_var}\"
- DAOS_STACK_GROUP_REPO=\"${!DSG_REPO_var:-}\"
- DAOS_STACK_EL_8_APPSTREAM_REPO=\"${!DSA_REPO_var:-}\"
DISTRO=\"$DISTRO\"
DAOS_STACK_RETRY_DELAY_SECONDS=\"$DAOS_STACK_RETRY_DELAY_SECONDS\"
DAOS_STACK_RETRY_COUNT=\"$DAOS_STACK_RETRY_COUNT\"
diff --git a/ci/provisioning/post_provision_config_common.sh b/ci/provisioning/post_provision_config_common.sh
index 06ad80b984a..d5300fd3a8f 100755
--- a/ci/provisioning/post_provision_config_common.sh
+++ b/ci/provisioning/post_provision_config_common.sh
@@ -1,5 +1,10 @@
#!/bin/bash
-
+#
+# Copyright 2021-2023 Intel Corporation.
+# Copyright 2025 Hewlett Packard Enterprise Development LP
+#
+# SPDX-License-Identifier: BSD-2-Clause-Patent
+#
set -eux
repo_server_pragma=$(echo "$COMMIT_MESSAGE" | sed -ne '/^Repo-servers: */s/.*: *//p')
@@ -24,6 +29,7 @@ if [ -n "$repo_files_pr" ]; then
REPO_FILE_URL="${JENKINS_URL:-https://build.hpdd.intel.com/}job/daos-do/job/repo-files/job/$branch/$build_number/artifact/"
fi
+# shellcheck disable=SC1091
. /etc/os-release
# shellcheck disable=SC2034
EXCLUDE_UPGRADE=mercury,daos,daos-\*
diff --git a/ci/provisioning/post_provision_config_common_functions.sh b/ci/provisioning/post_provision_config_common_functions.sh
index 3edc30025b4..1feddc2e31b 100755
--- a/ci/provisioning/post_provision_config_common_functions.sh
+++ b/ci/provisioning/post_provision_config_common_functions.sh
@@ -1,5 +1,10 @@
#!/bin/bash
-
+#
+# Copyright 2022-2023 Intel Corporation.
+# Copyright 2025 Hewlett Packard Enterprise Development LP
+#
+# SPDX-License-Identifier: BSD-2-Clause-Patent
+#
set -eux
: "${DAOS_STACK_RETRY_DELAY_SECONDS:=60}"
@@ -8,6 +13,11 @@ set -eux
: "${BUILD_URL:=Not_in_jenkins}"
: "${STAGE_NAME:=Unknown_Stage}"
: "${OPERATIONS_EMAIL:=$USER@localhost}"
+: "${JENKINS_URL:=https://jenkins.example.com}"
+domain1="${JENKINS_URL#https://}"
+mail_domain="${domain1%%/*}"
+: "${EMAIL_DOMAIN:=$mail_domain}"
+: "${DAOS_DEVOPS_EMAIL:="$HOSTNAME"@"$EMAIL_DOMAIN"}"
# functions common to more than one distro specific provisioning
url_to_repo() {
@@ -49,17 +59,6 @@ add_repo() {
fi
}
-add_group_repo() {
- local match="$1"
-
- add_repo "$match" "$DAOS_STACK_GROUP_REPO"
- group_repo_post
-}
-
-add_local_repo() {
- add_repo 'argobots' "$DAOS_STACK_LOCAL_REPO" false
-}
-
disable_gpg_check() {
local url="$1"
@@ -107,9 +106,6 @@ retry_dnf() {
# non-experimental one after trying twice with the experimental one
set_local_repo "${repo_servers[1]}"
dnf -y makecache
- if [ -n "${POWERTOOLSREPO:-}" ]; then
- POWERTOOLSREPO=${POWERTOOLSREPO/${repo_servers[0]}/${repo_servers[1]}}
- fi
fi
sleep "${RETRY_DELAY_SECONDS:-$DAOS_STACK_RETRY_DELAY_SECONDS}"
fi
@@ -117,6 +113,10 @@ retry_dnf() {
if [ "$rc" -ne 0 ]; then
send_mail "Command retry failed in $STAGE_NAME after $attempt attempts using ${repo_server:-nexus} as initial repo server " \
"Command: $*\nAttempts: $attempt\nStatus: $rc"
+ echo "Command retry failed in $STAGE_NAME after $attempt attempts using ${repo_server:-nexus} as initial repo server "
+ echo "Command: $*"
+ echo "Attempts: $attempt"
+ echo "Status: $rc"
fi
return 1
@@ -140,7 +140,7 @@ send_mail() {
echo "Host: $HOSTNAME"
echo ""
echo -e "$message"
- } 2>&1 | mail -s "$subject" -r "$HOSTNAME"@intel.com "$recipients"
+ } 2>&1 | mail -s "$subject" -r "$DAOS_DEVOPS_EMAIL" "$recipients"
set -x
}
@@ -186,6 +186,10 @@ retry_cmd() {
if [ "$rc" -ne 0 ]; then
send_mail "Command retry failed in $STAGE_NAME after $attempt attempts" \
"Command: $*\nAttempts: $attempt\nStatus: $rc"
+ echo "Command retry failed in $STAGE_NAME after $attempt attempts"
+ echo "Command: $*"
+ echo "Attempts: $attempt"
+ echo "Status: $rc"
fi
return 1
}
@@ -217,6 +221,10 @@ timeout_cmd() {
if [ "$rc" -ne 0 ]; then
send_mail "Command timeout failed in $STAGE_NAME after $attempt attempts" \
"Command: $*\nAttempts: $attempt\nStatus: $rc"
+ echo "Command timeout failed in $STAGE_NAME after $attempt attempts"
+ echo "Command: $*"
+ echo "Attempts: $attempt"
+ echo "Status: $rc"
fi
return "$rc"
}
@@ -229,6 +237,7 @@ fetch_repo_config() {
local repo_file="daos_ci-${ID}${VERSION_ID%%.*}-$repo_server"
local repopath="${REPOS_DIR}/$repo_file"
if ! curl -f -o "$repopath" "$REPO_FILE_URL$repo_file.repo"; then
+ echo "Failed to fetch repo file $REPO_FILE_URL$repo_file.repo"
return 1
fi
@@ -274,8 +283,13 @@ set_local_repo() {
# Disable the daos repo so that the Jenkins job repo or a PR-repos*: repo is
# used for daos packages
dnf -y config-manager \
- --disable daos-stack-daos-"${DISTRO_GENERIC}"-"${VERSION_ID%%.*}"-x86_64-stable-local-artifactory
+ --disable daos-stack-daos-"${DISTRO_GENERIC}"-"${VERSION_ID%%.*}"*-stable-local-artifactory
+ else
+ dnf -y config-manager \
+ --enable daos-stack-daos-"${DISTRO_GENERIC}"-"${VERSION_ID%%.*}"*-stable-local-artifactory
fi
+ dnf -y config-manager \
+ --enable daos-stack-deps-"${DISTRO_GENERIC}"-"${VERSION_ID%%.*}"*-stable-local-artifactory
fi
dnf repolist
@@ -290,6 +304,7 @@ update_repos() {
if ! fetch_repo_config "$repo_server"; then
# leave the existing on-image repo config alone if the repo fetch fails
send_mail "Fetch repo file for repo server \"$repo_server\" failed. Continuing on with in-image repos."
+ echo "Fetch repo file for repo server \"$repo_server\" failed. Continuing on with in-image repos."
return 1
fi
done
@@ -306,8 +321,12 @@ update_repos() {
# successfully grabbed them all, so replace the entire $REPOS_DIR
# content with them
+
+ # This is not working right on a second run.
+ # using a quick hack to stop deleting a critical repo
local file
for file in "$REPOS_DIR"/*.repo; do
+ [[ $file == *"artifactory"* ]] && continue
[ -e "$file" ] || break
# empty the file but keep it around so that updates don't recreate it
true > "$file"
@@ -395,6 +414,7 @@ post_provision_config_nodes() {
fi
if ! "${cmd[@]}"; then
dump_repos
+ echo "Failed to upgrade packages"
return 1
fi
@@ -413,35 +433,12 @@ post_provision_config_nodes() {
if ! retry_dnf 360 install "${inst_rpms[@]/%/${DAOS_VERSION:-}}"; then
rc=${PIPESTATUS[0]}
dump_repos
+ echo "Failed to install packages"
return "$rc"
fi
fi
if lspci | grep "ConnectX-6" && ! grep MOFED_VERSION /etc/do-release; then
- # Need this module file
- version="$(rpm -q --qf "%{version}" openmpi)"
- mkdir -p /etc/modulefiles/mpi/
- cat << EOF > /etc/modulefiles/mpi/mlnx_openmpi-x86_64
-#%Module 1.0
-#
-# OpenMPI module for use with 'environment-modules' package:
-#
-conflict mpi
-prepend-path PATH /usr/mpi/gcc/openmpi-$version/bin
-prepend-path LD_LIBRARY_PATH /usr/mpi/gcc/openmpi-$version/lib64
-prepend-path PKG_CONFIG_PATH /usr/mpi/gcc/openmpi-$version/lib64/pkgconfig
-prepend-path MANPATH /usr/mpi/gcc/openmpi-$version/share/man
-setenv MPI_BIN /usr/mpi/gcc/openmpi-$version/bin
-setenv MPI_SYSCONFIG /usr/mpi/gcc/openmpi-$version/etc
-setenv MPI_FORTRAN_MOD_DIR /usr/mpi/gcc/openmpi-$version/lib64
-setenv MPI_INCLUDE /usr/mpi/gcc/openmpi-$version/include
-setenv MPI_LIB /usr/mpi/gcc/openmpi-$version/lib64
-setenv MPI_MAN /usr/mpi/gcc/openmpi-$version/share/man
-setenv MPI_COMPILER openmpi-x86_64
-setenv MPI_SUFFIX _openmpi
-setenv MPI_HOME /usr/mpi/gcc/openmpi-$version
-EOF
-
printf 'MOFED_VERSION=%s\n' "$MLNX_VER_NUM" >> /etc/do-release
fi
diff --git a/ci/provisioning/post_provision_config_nodes.sh b/ci/provisioning/post_provision_config_nodes.sh
index e2f76f0d2da..14ac540d3a4 100644
--- a/ci/provisioning/post_provision_config_nodes.sh
+++ b/ci/provisioning/post_provision_config_nodes.sh
@@ -1,8 +1,21 @@
#!/bin/bash
-
+#
+# Copyright 2020-2023 Intel Corporation.
+# Copyright 2025 Hewlett Packard Enterprise Development LP
+#
+# SPDX-License-Identifier: BSD-2-Clause-Patent
+#
set -eux
env > /root/last_run-env.txt
+
+# Need this fix earlier
+# For some reason sssd_common must be reinstalled
+# to fix up the restored image.
+if command -v dnf; then
+ bootstrap_dnf
+fi
+
if ! grep ":$MY_UID:" /etc/group; then
groupadd -g "$MY_UID" jenkins
fi
@@ -29,13 +42,14 @@ echo "jenkins ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/jenkins
# /scratch is needed on test nodes
mkdir -p /scratch
-mount "${DAOS_CI_INFO_DIR}" /scratch
+retry_cmd 2400 mount "${DAOS_CI_INFO_DIR}" /scratch
# defined in ci/functional/post_provision_config_nodes_.sh
# and catted to the remote node along with this script
if ! post_provision_config_nodes; then
- rc=${PIPESTATUS[0]}
- exit "$rc"
+ rc=${PIPESTATUS[0]}
+ echo "post_provision_config_nodes failed with rc=$rc"
+ exit "$rc"
fi
# Workaround to enable binding devices back to nvme or vfio-pci after they are unbound from vfio-pci
@@ -47,6 +61,80 @@ if lspci | grep -i nvme; then
daos_server nvme reset && rmmod vfio_pci && modprobe vfio_pci
fi
+# FOR now limit to 2 devices per CPU NUMA node
+: "${DAOS_CI_NVME_NUMA_LIMIT:=2}"
+
+function mount_nvme_drive {
+ local drive="$1"
+ file_system=$(file -sL "/dev/$drive")
+ if [[ "$file_system" != *"ext4 filesystem"* ]]; then
+ yes | mkfs -t ext4 "/dev/$drive"
+ fi
+ mkdir -p "/mnt/$drive"
+ mount "/dev/$drive" "/mnt/$drive"
+}
+
+
+nvme_class="/sys/class/nvme/"
+function nvme_limit {
+ set +x
+ if [ ! -d /sys/class/nvme ]; then
+ echo "No NVMe devices found"
+ return
+ fi
+ local numa0_devices=()
+ local numa1_devices=()
+ for nvme_path in "$nvme_class"*; do
+ nvme="$(basename "$nvme_path")n1"
+ numa_node="$(cat "${nvme_path}/numa_node")"
+ if mount | grep "$nvme"; then
+ continue
+ fi
+ if [ "$numa_node" -eq 0 ]; then
+ numa0_devices+=("$nvme")
+ else
+ numa1_devices+=("$nvme")
+ fi
+ done
+ echo numa0 "${numa0_devices[@]}"
+ echo numa1 "${numa1_devices[@]}"
+ if [ "${#numa0_devices[@]}" -gt 0 ] && [ "${#numa1_devices[@]}" -gt 0 ]; then
+ echo "balanced NVMe configuration possible"
+ nvme_count=0
+ for nvme in "${numa0_devices[@]}"; do
+ if [ "$nvme_count" -ge "${DAOS_CI_NVME_NUMA_LIMIT}" ]; then
+ mount_nvme_drive "$nvme"
+ else
+ ((nvme_count++)) || true
+ fi
+ done
+ nvme_count=0
+ for nvme in "${numa1_devices[@]}"; do
+ if [ "$nvme_count" -ge "${DAOS_CI_NVME_NUMA_LIMIT}" ]; then
+ mount_nvme_drive "$nvme"
+ else
+ ((nvme_count++)) || true
+ fi
+ done
+ else
+ echo "balanced NVMe configuration not possible"
+ for nvme in "${numa0_devices[@]}" "${numa1_devices[@]}"; do
+ ((needed = "$DAOS_CI_NVME_NUMA_LIMIT" + 1)) || true
+ nvme_count=0
+ if [ "$nvme_count" -ge "$needed" ]; then
+ mount_nvme_drive "$nvme"
+ else
+ ((nvme_count++)) || true
+ fi
+ done
+ fi
+ set -x
+}
+
+# Force only the desired number of NVMe devices to be seen by DAOS tests
+# by mounting the extra ones.
+nvme_limit
+
systemctl enable nfs-server.service
systemctl start nfs-server.service
sync
diff --git a/ci/provisioning/post_provision_config_nodes_EL_8.sh b/ci/provisioning/post_provision_config_nodes_EL_8.sh
index 6451bf332a9..27b35fb4b15 100644
--- a/ci/provisioning/post_provision_config_nodes_EL_8.sh
+++ b/ci/provisioning/post_provision_config_nodes_EL_8.sh
@@ -6,8 +6,18 @@
# SPDX-License-Identifier: BSD-2-Clause-Patent
bootstrap_dnf() {
+set +e
systemctl enable postfix.service
systemctl start postfix.service
+ postfix_start_exit=$?
+ if [ $postfix_start_exit -ne 0 ]; then
+ echo "WARNING: Postfix not started: $postfix_start_exit"
+ systemctl status postfix.service
+ journalctl -xe -u postfix.service
+ fi
+set -e
+ # Seems to be needed to fix some issues.
+ dnf -y reinstall sssd-common
}
group_repo_post() {
diff --git a/ci/provisioning/post_provision_config_nodes_LEAP_15.sh b/ci/provisioning/post_provision_config_nodes_LEAP_15.sh
index 2c7c66da133..2e4315f348c 100755
--- a/ci/provisioning/post_provision_config_nodes_LEAP_15.sh
+++ b/ci/provisioning/post_provision_config_nodes_LEAP_15.sh
@@ -1,8 +1,15 @@
#!/bin/bash
+#
+# Copyright 2021-2024 Intel Corporation.
+# Copyright 2025 Hewlett Packard Enterprise Development LP
+#
+# SPDX-License-Identifier: BSD-2-Clause-Patent
bootstrap_dnf() {
rm -rf "$REPOS_DIR"
ln -s ../zypp/repos.d "$REPOS_DIR"
+ dnf -y remove lua-lmod
+ dnf -y install lua-lmod '--repo=*lua*' --repo '*network-cluster*'
}
group_repo_post() {
diff --git a/ci/provisioning/post_provision_config_nodes_UBUNTU_20_04.sh b/ci/provisioning/post_provision_config_nodes_UBUNTU_20_04.sh
index 484a678a0a8..0d9c3b618b8 100755
--- a/ci/provisioning/post_provision_config_nodes_UBUNTU_20_04.sh
+++ b/ci/provisioning/post_provision_config_nodes_UBUNTU_20_04.sh
@@ -1,4 +1,10 @@
#!/bin/bash
+#
+# Copyright 2020-2022 Intel Corporation.
+# Copyright 2025 Hewlett Packard Enterprise Development LP
+#
+# SPDX-License-Identifier: BSD-2-Clause-Patent
+#
post_provision_config_nodes() {
# should we port this to Ubuntu or just consider $CONFIG_POWER_ONLY dead?
@@ -12,15 +18,7 @@ post_provision_config_nodes() {
# slurm-example-configs slurmctld slurm-slurmmd
#fi
codename=$(lsb_release -s -c)
- if [ -n "$DAOS_STACK_GROUP_REPO" ]; then
- add-apt-repository \
- "deb $REPOSITORY_URL/$DAOS_STACK_GROUP_REPO $codename"
- fi
-
- if [ -n "$DAOS_STACK_LOCAL_REPO" ]; then
- echo "deb [trusted=yes] $REPOSITORY_URL/$DAOS_STACK_LOCAL_REPO $codename main" >> /etc/apt/sources.list
- fi
-
+ echo "$codename"
if [ -n "$INST_REPOS" ]; then
for repo in $INST_REPOS; do
branch="master"
diff --git a/ci/storage/test_main_storage_prepare_node.sh b/ci/storage/test_main_storage_prepare_node.sh
index 0be5a33167c..f87333327b8 100755
--- a/ci/storage/test_main_storage_prepare_node.sh
+++ b/ci/storage/test_main_storage_prepare_node.sh
@@ -1,5 +1,10 @@
#!/bin/bash
-
+#
+# Copyright 2021-2023 Intel Corporation.
+# Copyright 2025 Hewlett Packard Enterprise Development LP
+#
+# SPDX-License-Identifier: BSD-2-Clause-Patent
+#
set -eux
: "${STORAGE_PREP_OPT:=}"
@@ -21,12 +26,16 @@ else
;;
esac
dnf -y config-manager \
- --disable daos-stack-daos-"${DISTRO_GENERIC}"-"${VERSION_ID%%.*}"-x86_64-stable-local-artifactory
+ --disable daos-stack-daos-"${DISTRO_GENERIC}"-"${VERSION_ID%%.*}"*-stable-local-artifactory
fi
+# this needs to be made more generic in the future.
+dnf -y config-manager \
+ --enable daos-stack-deps-"${DISTRO_GENERIC}"-"${VERSION_ID%%.*}"*-stable-local-artifactory
+
dnf -y install ipmctl daos-server"$DAOS_PKG_VERSION"
-lspci | grep Mellanox
-lscpu | grep Virtualization
+lspci | grep Mellanox || true
+lscpu | grep Virtualization || true
lscpu | grep -E -e Socket -e NUMA
if command -v opainfo; then opainfo || true; fi
@@ -51,7 +60,12 @@ if ipmctl show -dimm; then
fi
fi
else
- if ip addr show ib1; then
+ counter=0
+ for ib in /sys/class/net/ib*; do
+ ((counter++)) || true
+ ip addr show "$ib"
+ done
+ if "$counter" -ge 2; then
# All of our CI nodes with two ib adapters should have PMEM DIMMs
echo 'No PMEM DIMM devices found on CI node!'
exit 1
diff --git a/ci/unit/test_main.sh b/ci/unit/test_main.sh
index 4c5922013fe..5ad364b7d16 100755
--- a/ci/unit/test_main.sh
+++ b/ci/unit/test_main.sh
@@ -1,5 +1,10 @@
#!/bin/bash
-
+#
+# Copyright 2020-2023 Intel Corporation.
+# Copyright 2025 Hewlett Packard Enterprise Development LP
+#
+# SPDX-License-Identifier: BSD-2-Clause-Patent
+#
# This is the script used for running unit testing
# run_utest.py and run_utest.py with memcheck stages on the CI
set -uex
@@ -30,8 +35,6 @@ if $USE_BULLSEYE; then
rm -rf bullseye
mkdir -p bullseye
tar -C bullseye --strip-components=1 -xf bullseye.tar
-else
- BULLSEYE=
fi
NODE=${NODELIST%%,*}
@@ -43,6 +46,6 @@ rsync -rlpt -z -e "ssh $SSH_KEY_ARGS" . jenkins@"$NODE":build/
ssh -tt "$SSH_KEY_ARGS" jenkins@"$NODE" "HOSTNAME=$HOSTNAME \
HOSTPWD=$PWD \
WITH_VALGRIND=$WITH_VALGRIND \
- BULLSEYE=$BULLSEYE \
+ HTTPS_PROXY=\"${HTTPS_PROXY:-}\" \
BDEV_TEST=$BDEV_TEST \
./build/ci/unit/test_main_node.sh"
diff --git a/ci/unit/test_main_node.sh b/ci/unit/test_main_node.sh
index a14b1fc3880..65f6e859eba 100755
--- a/ci/unit/test_main_node.sh
+++ b/ci/unit/test_main_node.sh
@@ -1,5 +1,10 @@
#!/bin/bash
-
+#
+# Copyright 2020-2023 Intel Corporation.
+# Copyright 2025 Hewlett Packard Enterprise Development LP
+#
+# SPDX-License-Identifier: BSD-2-Clause-Patent
+#
# This is a script to be run by the ci/unit/test_main.sh to run a test
# on a CI node.
@@ -21,6 +26,7 @@ sudo mount --bind build "${SL_SRC_DIR}"
log_prefix="unit_test"
+: "${BULLSEYE:=}"
if [ -n "$BULLSEYE" ]; then
pushd "${SL_SRC_DIR}/bullseye"
set +x
@@ -47,6 +53,7 @@ sudo ln -sf "$SL_PREFIX/share/spdk/scripts/common.sh" /usr/share/spdk/scripts/
sudo ln -s "$SL_PREFIX/include" /usr/share/spdk/include
# set CMOCKA envs here
+: "${WITH_VALGRIND:=}"
export CMOCKA_MESSAGE_OUTPUT=xml
if [[ -z ${WITH_VALGRIND} ]]; then
export CMOCKA_XML_FILE="${SL_SRC_DIR}/test_results/%g.xml"
@@ -86,5 +93,5 @@ pip install --requirement requirements-utest.txt
pip install /opt/daos/lib/daos/python/
-utils/run_utest.py $RUN_TEST_VALGRIND --no-fail-on-error $VDB_ARG --log_dir="$test_log_dir" \
- $SUDO_ARG
+HTTPS_PROXY="${HTTPS_PROXY:-}" utils/run_utest.py $RUN_TEST_VALGRIND \
+ --no-fail-on-error $VDB_ARG --log_dir="$test_log_dir" $SUDO_ARG
diff --git a/ftest.sh b/ftest.sh
index 0140c7ed098..03947a83891 100755
--- a/ftest.sh
+++ b/ftest.sh
@@ -1,7 +1,7 @@
#!/bin/bash
# /*
# * (C) Copyright 2016-2022 Intel Corporation.
-# * (C) Copyright 2025 Hewlett Packard Enterprise Development LP
+# * Copyright 2025 Hewlett Packard Enterprise Development LP
# *
# * SPDX-License-Identifier: BSD-2-Clause-Patent
# */
@@ -113,6 +113,7 @@ args="${1:-quick}"
shift || true
args+=" $*"
+_HTTPS_PROXY=${HTTPS_PROXY:-}
# shellcheck disable=SC2029
# shellcheck disable=SC2086
if ! ssh -A $SSH_KEY_ARGS ${REMOTE_ACCT:-jenkins}@"${nodes[0]}" \
@@ -128,6 +129,7 @@ if ! ssh -A $SSH_KEY_ARGS ${REMOTE_ACCT:-jenkins}@"${nodes[0]}" \
LAUNCH_OPT_ARGS=\"$LAUNCH_OPT_ARGS\"
WITH_VALGRIND=\"$WITH_VALGRIND\"
STAGE_NAME=\"$STAGE_NAME\"
+ HTTPS_PROXY=\"$_HTTPS_PROXY\"
$(sed -e '1,/^$/d' "$SCRIPT_LOC"/main.sh)"; then
rc=${PIPESTATUS[0]}
if ${SETUP_ONLY:-false}; then
diff --git a/site_scons/env_modules.py b/site_scons/env_modules.py
index b7a20bf769b..f6e4c58dd76 100644
--- a/site_scons/env_modules.py
+++ b/site_scons/env_modules.py
@@ -1,4 +1,5 @@
# Copyright 2019-2023 Intel Corporation
+# Copyright 2025 Hewlett Packard Enterprise Development LP
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -34,8 +35,7 @@ class _env_module(): # pylint: disable=invalid-name
env_module_init = None
_mpi_map = {"mpich": ['mpi/mpich-x86_64', 'gnu-mpich'],
- "openmpi": ['mpi/mlnx_openmpi-x86_64', 'mpi/openmpi3-x86_64',
- 'gnu-openmpi', 'mpi/openmpi-x86_64']}
+ "openmpi": ['mpi/openmpi3-x86_64', 'gnu-openmpi', 'mpi/openmpi-x86_64']}
def __init__(self, silent=False):
"""Load Modules for initializing environment variables"""
diff --git a/src/tests/ftest/dfuse/bash.py b/src/tests/ftest/dfuse/bash.py
index 964d0295954..eb5897d1aa9 100644
--- a/src/tests/ftest/dfuse/bash.py
+++ b/src/tests/ftest/dfuse/bash.py
@@ -1,5 +1,6 @@
"""
(C) Copyright 2020-2024 Intel Corporation.
+ Copyright 2025 Hewlett Packard Enterprise Development LP
SPDX-License-Identifier: BSD-2-Clause-Patent
"""
@@ -112,7 +113,7 @@ def run_bashcmd(self, il_lib=None, compatible_mode=False):
# f'more {fuse_root_dir}/src.c', # more hangs over ssh somehow
f"dos2unix {fuse_root_dir}/src.c",
f"gcc -o {fuse_root_dir}/output {fuse_root_dir}/src.c",
- f"valgrind size {fuse_root_dir}/output",
+ f'export DEBUGINFOD_URLS=""; valgrind size {fuse_root_dir}/output',
f"readelf -s {fuse_root_dir}/output",
f"strip -s {fuse_root_dir}/output",
f"g++ -o {fuse_root_dir}/output {fuse_root_dir}/src.c",
@@ -136,8 +137,16 @@ def run_bashcmd(self, il_lib=None, compatible_mode=False):
'fio --readwrite=randwrite --name=test --size="2M" --directory '
f'{fuse_root_dir}/ --bs=1M --numjobs="1" --ioengine=libaio --iodepth=16'
'--group_reporting --exitall_on_error --continue_on_error=none',
- f'curl "https://www.google.com" -o {fuse_root_dir}/download.html',
]
+ # If set, use the HTTPS_PROXY for curl command
+ https_proxy = os.environ.get('HTTPS_PROXY')
+ if https_proxy:
+ proxy_option = f'--proxy "{https_proxy}"'
+ else:
+ proxy_option = ''
+ cmd = f'curl "https://www.google.com" -o {fuse_root_dir}/download.html {proxy_option}'
+ commands.append(cmd)
+
for cmd in commands:
self.log_step(f'Running command: {cmd}')
result = run_remote(self.log, dfuse_hosts, env_str + cmd)
diff --git a/src/tests/ftest/process_core_files.py b/src/tests/ftest/process_core_files.py
index 47fbf7a4ef4..60d94334a03 100644
--- a/src/tests/ftest/process_core_files.py
+++ b/src/tests/ftest/process_core_files.py
@@ -1,5 +1,6 @@
"""
(C) Copyright 2022-2024 Intel Corporation.
+ Copyright 2025 Hewlett Packard Enterprise Development LP
SPDX-License-Identifier: BSD-2-Clause-Patent
"""
@@ -264,6 +265,8 @@ def install_debuginfo_packages(self):
cmds = []
# -debuginfo packages that don't get installed with debuginfo-install
+ self.log.debug("Installing -debuginfo packages that don't get installed",
+ " with debuginfo-install")
for pkg in ['systemd', 'ndctl', 'mercury', 'hdf5',
'libabt0' if "suse" in self.distro_info.name.lower() else "argobots",
'libfabric', 'hdf5-vol-daos', 'hdf5-vol-daos-mpich',
@@ -279,6 +282,7 @@ def install_debuginfo_packages(self):
cmds.append(["sudo", "rm", "-f", path])
if self.USE_DEBUGINFO_INSTALL:
+ self.log.debug("self.USE_DEBUGINFO_INSTALL")
dnf_args = ["--nobest", "--exclude", "ompi-debuginfo"]
if os.getenv("TEST_RPMS", 'false') == 'true':
if "suse" in self.distro_info.name.lower():
@@ -311,9 +315,11 @@ def install_debuginfo_packages(self):
# yum_base.processTransaction(rpmDisplay=yum.rpmtrans.NoOutputCallBack())
# Now install a few pkgs that debuginfo-install wouldn't
+ self.log.debug("Now install a few pkgs that debuginfo-install wouldn't")
cmd = ["sudo", "dnf", "-y"]
if self.is_el() or "suse" in self.distro_info.name.lower():
cmd.append("--enablerepo=*debug*")
+ cmd.append("--disablerepo='epel-*'")
cmd.append("install")
for pkg in install_pkgs:
try:
@@ -325,7 +331,7 @@ def install_debuginfo_packages(self):
retry = False
for cmd in cmds:
- if not run_local(self.log, " ".join(cmd)).passed:
+ if not run_local(self.log, " ".join(cmd), True, 120).passed:
# got an error, so abort this list of commands and re-run
# it with a dnf clean, makecache first
retry = True
@@ -335,11 +341,13 @@ def install_debuginfo_packages(self):
cmd_prefix = ["sudo", "dnf"]
if self.is_el() or "suse" in self.distro_info.name.lower():
cmd_prefix.append("--enablerepo=*debug*")
+ cmd_prefix.append("--disablerepo='epel-*'")
cmds.insert(0, cmd_prefix + ["clean", "all"])
cmds.insert(1, cmd_prefix + ["makecache"])
for cmd in cmds:
if not run_local(self.log, " ".join(cmd)).passed:
break
+ self.log.info("Installing debuginfo packages for stacktrace creation - DONE")
def is_el(self):
"""Determine if the distro is EL based.
diff --git a/src/tests/ftest/scripts/main.sh b/src/tests/ftest/scripts/main.sh
index 706d622479b..307198717cb 100755
--- a/src/tests/ftest/scripts/main.sh
+++ b/src/tests/ftest/scripts/main.sh
@@ -2,6 +2,7 @@
# shellcheck disable=SC1113
# /*
# * (C) Copyright 2016-2024 Intel Corporation.
+# * Copyright 2025 Hewlett Packard Enterprise Development LP
# *
# * SPDX-License-Identifier: BSD-2-Clause-Patent
# */
@@ -89,6 +90,10 @@ export TEST_RPMS
export DAOS_BASE
export DAOS_TEST_APP_SRC=${DAOS_TEST_APP_SRC:-"/scratch/daos_test/apps"}
export DAOS_TEST_APP_DIR=${DAOS_TEST_APP_DIR:-"${DAOS_TEST_SHARED_DIR}/daos_test/apps"}
+if [ -n "$HTTPS_PROXY" ]; then
+ # shellcheck disable=SC2154
+ export HTTPS_PROXY="${HTTPS_PROXY:-""}"
+fi
launch_node_args="-ts ${TEST_NODES}"
if [ "${STAGE_NAME}" == "Functional Hardware 24" ]; then
diff --git a/utils/rpms/daos.spec b/utils/rpms/daos.spec
index 95b575aad1a..86ab0fe5b40 100644
--- a/utils/rpms/daos.spec
+++ b/utils/rpms/daos.spec
@@ -23,7 +23,7 @@
Name: daos
Version: 2.6.3
-Release: 7%{?relval}%{?dist}
+Release: 8%{?relval}%{?dist}
Summary: DAOS Storage Engine
License: BSD-2-Clause-Patent
@@ -232,11 +232,12 @@ Requires: lbzip2
Requires: attr
Requires: ior
Requires: go >= 1.21
+# Require lmod fix for https://github.com/TACC/Lmod/issues/687
%if (0%{?suse_version} >= 1315)
-Requires: lua-lmod
+Requires: lua-lmod >= 8.7.36
Requires: libcapstone-devel
%else
-Requires: Lmod
+Requires: Lmod >= 8.7.36
Requires: capstone-devel
%endif
%if (0%{?rhel} >= 8)
@@ -263,6 +264,7 @@ Requires: hdf5-%{openmpi}-tests
Requires: hdf5-vol-daos-%{openmpi}-tests
Requires: MACSio-%{openmpi}
Requires: simul-%{openmpi}
+Requires: %{openmpi}
%description client-tests-openmpi
This is the package needed to run the DAOS client test suite openmpi tools
@@ -273,14 +275,14 @@ BuildArch: noarch
Requires: %{name}-client-tests%{?_isa} = %{version}-%{release}
Requires: mpifileutils-mpich
Requires: testmpio
-Requires: mpich
+Requires: mpich = 4.1~a1
Requires: ior
Requires: hdf5-mpich-tests
Requires: hdf5-vol-daos-mpich-tests
Requires: MACSio-mpich
Requires: simul-mpich
Requires: romio-tests
-Requires: python3-mpi4py-tests
+Requires: python3-mpi4py-tests >= 3.1.6
%description client-tests-mpich
This is the package needed to run the DAOS client test suite mpich tools
@@ -630,6 +632,13 @@ getent passwd daos_agent >/dev/null || useradd -s /sbin/nologin -r -g daos_agent
# No files in a shim package
%changelog
+* Mon May 12 2025 Tomasz Gromadzki 2.6.3-8
+- Bump lua-lmod version to >=8.7.36
+- Bump lmod version to >=8.7.36
+- Bump mpich version to 4.1~a1
+- Bump python3-mpi4py-tests version to >= 3.1.6
+- Add openmpi requiremnent for daos-client-tests on Leap.
+
* Fri Apr 11 2025 Jeff Olivier 2.6.3-7
- Remove raft as external dependency
diff --git a/utils/run_utest.py b/utils/run_utest.py
index 1835f230e36..a555e9f8203 100755
--- a/utils/run_utest.py
+++ b/utils/run_utest.py
@@ -1,6 +1,8 @@
#!/usr/bin/env python3
"""
- (C) Copyright 2023-2024 Intel Corporation.
+ Copyright 2023-2024 Intel Corporation.
+ Copyright 2025 Hewlett Packard Enterprise Development LP
+ All rights reserved.
SPDX-License-Identifier: BSD-2-Clause-Patent
@@ -357,6 +359,11 @@ def __init__(self, suite, config, path_info, args):
if self.needs_aio():
self.env["VOS_BDEV_CLASS"] = "AIO"
+ # If set, retain the HTTPS_PROXY for valgrind
+ http_proxy = os.environ.get('HTTPS_PROXY')
+ if http_proxy:
+ self.env['HTTPS_PROXY'] = http_proxy
+
def log_dir(self):
"""Return the log directory"""
return os.path.join(self.path_info["LOG_DIR"], self.name)