diff --git a/Jenkinsfile b/Jenkinsfile index 1c0438e4635..25792cb5199 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -189,7 +189,7 @@ pipeline { // 'stages of this run (i.e. auto, auto_md_on_ssd, auto:-3DNAND, ' + // '0000:81:00.0, etc.). Does not apply to MD on SSD stages.') string(name: 'BuildType', - defaultValue: '', + defaultValue: 'release', description: 'Type of build. Passed to scons as BUILD_TYPE. (I.e. dev, release, debug, etc.). ' + 'Defaults to release on an RC or dev otherwise.') string(name: 'TestRepeat', @@ -205,12 +205,15 @@ pipeline { 'stages. Specifies the default provider to use the daos_server ' + 'config file when running functional tests (the launch.py ' + '--provider argument; i.e. "ucx+dc_x", "ofi+verbs", "ofi+tcp")') - booleanParam(name: 'CI_SKIP_CANCEL_PREV_BUILD', + booleanParam(name: 'CI_CANCEL_PREV_BUILD_SKIP', defaultValue: false, description: 'Do not cancel previous build.') booleanParam(name: 'CI_BUILD_PACKAGES_ONLY', defaultValue: false, description: 'Only build RPM and DEB packages, Skip unit tests.') + string(name: 'CI_SCONS_ARGS', + defaultValue: '', + description: 'Arguments for scons when building DAOS') string(name: 'CI_RPM_TEST_VERSION', defaultValue: '', description: 'Package version to use instead of building. example: 1.3.103-1, 1.2-2') @@ -273,6 +276,9 @@ pipeline { booleanParam(name: 'CI_TEST_LEAP15_RPMs', defaultValue: true, description: 'Run the Test RPMs on Leap 15 test stage') + booleanParam(name: 'CI_FUNCTIONAL_TEST_SKIP', + defaultValue: false, + description: 'Skip all functional test stages (Test)') booleanParam(name: 'CI_MORE_FUNCTIONAL_PR_TESTS', defaultValue: false, description: 'Enable more distros for functional CI tests') @@ -293,6 +299,9 @@ pipeline { defaultValue: false, description: 'Run the Functional on Ubuntu 20.04 test stage' + ' Requires CI_MORE_FUNCTIONAL_PR_TESTS') + booleanParam(name: 'CI_FUNCTIONAL_HARDWARE_TEST_SKIP', + defaultValue: false, + description: 'Skip Functional Hardware (Test Hardware) stage') booleanParam(name: 'CI_medium_TEST', defaultValue: true, description: 'Run the Functional Hardware Medium test stage') @@ -333,7 +342,7 @@ pipeline { defaultValue: 'ci_nvme5', description: 'Label to use for the Functional Hardware Medium (MD on SSD) stages') string(name: 'FUNCTIONAL_HARDWARE_MEDIUM_VERBS_PROVIDER_LABEL', - defaultValue: 'ci_nvme5', + defaultValue: 'ci_ofed5', description: 'Label to use for 5 node Functional Hardware Medium Verbs Provider (MD on SSD) stages') string(name: 'FUNCTIONAL_HARDWARE_MEDIUM_VMD_LABEL', defaultValue: 'ci_vmd5', @@ -407,7 +416,7 @@ pipeline { stage('Cancel Previous Builds') { when { beforeAgent true - expression { !paramsValue('CI_SKIP_CANCEL_PREV_BUILD', false) && !skipStage() } + expression { !paramsValue('CI_CANCEL_PREV_BUILD_SKIP', false) && !skipStage() } } steps { cancelPreviousBuilds() @@ -613,7 +622,7 @@ pipeline { } } } - stage('Build on EL 8') { + stage('Build on EL 8.8') { when { beforeAgent true expression { !params.CI_el8_NOBUILD && !skipStage() } @@ -834,7 +843,9 @@ pipeline { stage('Test') { when { beforeAgent true - expression { !skipStage() } + //expression { !paramsValue('CI_FUNCTIONAL_TEST_SKIP', false) && !skipStage() } + // Above not working, always skipping functional VM tests. + expression { !paramsValue('CI_FUNCTIONAL_TEST_SKIP', false) } } parallel { stage('Functional on EL 8.8 with Valgrind') { @@ -1098,7 +1109,7 @@ pipeline { stage('Test Hardware') { when { beforeAgent true - expression { !skipStage() } + expression { !paramsValue('CI_FUNCTIONAL_HARDWARE_TEST_SKIP', false) && !skipStage() } } steps { script { diff --git a/ci/functional/test_main.sh b/ci/functional/test_main.sh index 56fe36f8571..3e0eed2e5c7 100755 --- a/ci/functional/test_main.sh +++ b/ci/functional/test_main.sh @@ -1,9 +1,10 @@ #!/bin/bash # -# Copyright 2020-2023 Intel Corporation. +# Copyright 2020-2024 Intel Corporation. +# Copyright 2025 Hewlett Packard Enterprise Development LP # # SPDX-License-Identifier: BSD-2-Clause-Patent - +# set -eux if [ -z "$TEST_TAG" ]; then @@ -47,6 +48,8 @@ test_cluster() { NODELIST=${tnodes} \ BUILD_URL=\"${BUILD_URL:-Unknown in GHA}\" \ STAGE_NAME=\"$STAGE_NAME\" \ + JENKINS_URL=\"${JENKINS_URL:-}\" \ + DAOS_DEVOPS_EMAIL=\"${DAOS_DEVOPS_EMAIL:-}\" \ $(cat ci/functional/test_main_prep_node.sh)" } @@ -58,7 +61,11 @@ if ! test_cluster; then if cluster_reboot; then if test_cluster; then hardware_ok=true + else + echo "Hardware test failed again after reboot" fi + else + echo "Cluster reboot failed" fi else hardware_ok=true @@ -99,6 +106,7 @@ if "$hardware_ok"; then FTEST_ARG=\"${FTEST_ARG:-}\" \ WITH_VALGRIND=\"${WITH_VALGRIND:-}\" \ STAGE_NAME=\"$STAGE_NAME\" \ + HTTPS_PROXY=\"${HTTPS_PROXY:-}\" \ $(cat ci/functional/test_main_node.sh)" else ./ftest.sh "$test_tag" "$tnodes" "$FTEST_ARG" diff --git a/ci/functional/test_main_node.sh b/ci/functional/test_main_node.sh index f86ad21f441..1a0b2e02794 100755 --- a/ci/functional/test_main_node.sh +++ b/ci/functional/test_main_node.sh @@ -1,5 +1,10 @@ #!/bin/bash - +# +# Copyright 2020-2022 Intel Corporation. +# Copyright 2025 Hewlett Packard Enterprise Development LP +# +# SPDX-License-Identifier: BSD-2-Clause-Patent +# set -eux DAOS_TEST_SHARED_DIR=$(mktemp -d -p /mnt/share/) @@ -11,4 +16,5 @@ export REMOTE_ACCT=jenkins export WITH_VALGRIND="$WITH_VALGRIND" export STAGE_NAME="$STAGE_NAME" -/usr/lib/daos/TESTING/ftest/ftest.sh "$TEST_TAG" "$TNODES" "$FTEST_ARG" +HTTPS_PROXY="${HTTPS_PROXY:-}" /usr/lib/daos/TESTING/ftest/ftest.sh \ + "$TEST_TAG" "$TNODES" "$FTEST_ARG" diff --git a/ci/functional/test_main_prep_node.sh b/ci/functional/test_main_prep_node.sh index 32993d114e4..d096737d24d 100755 --- a/ci/functional/test_main_prep_node.sh +++ b/ci/functional/test_main_prep_node.sh @@ -1,6 +1,7 @@ #!/bin/bash # # Copyright 2020-2023 Intel Corporation. +# Copyright 2025 Hewlett Packard Enterprise Development LP # # SPDX-License-Identifier: BSD-2-Clause-Patent # @@ -10,6 +11,11 @@ set -eux : "${OPERATIONS_EMAIL:=}" : "${STAGE_NAME:=Unknown}" : "${BUILD_URL:=Unknown}" +: "${JENKINS_URL:=https://jenkins.example.com}" +domain1="${JENKINS_URL#https://}" +mail_domain="${domain1%%/*}" +: "${EMAIL_DOMAIN:=$mail_domain}" +: "${DAOS_DEVOPS_EMAIL:="$HOSTNAME"@"$EMAIL_DOMAIN"}" result=0 mail_message='' @@ -22,6 +28,7 @@ testfails=0 myhost="${HOSTNAME%%.*}" : "${NODELIST:=$myhost}" mynodenum=0 + # in order for junit test names to be consistent between test runs # Need to use the position number of the host in the node list for # the junit report. @@ -42,7 +49,7 @@ function do_mail { # shellcheck disable=SC2059 build_info="BUILD_URL = $BUILD_URL$nl STAGE = $STAGE_NAME$nl$nl" mail -s "Hardware check failed after reboot!" \ - -r "$HOSTNAME"@intel.com "$OPERATIONS_EMAIL" \ + -r "$DAOS_DEVOPS_EMAIL" "$OPERATIONS_EMAIL" \ <<< "$build_info$mail_message" set -x } @@ -63,17 +70,27 @@ set +x while IFS= read -r line; do ((opa_count++)) || true done < <(lspci -mm | grep "Omni-Path") +echo "Found $opa_count Omni-Path adapters." if [ "$opa_count" -gt 0 ]; then ((ib_count=opa_count)) || true fi +last_pci_bus='' while IFS= read -r line; do + pci_bus="${line%.*}" + if [ "$pci_bus" == "$last_pci_bus" ]; then + # We only use one interface on a dual interface HBA + # Fortunately lspci appears to group them together + continue + fi + last_pci_bus="$pci_bus" mlnx_type="${line##*ConnectX-}" mlnx_type="${mlnx_type%]*}" if [ "$mlnx_type" -ge 6 ]; then ((hdr_count++)) || true fi -done < <(lspci -mm | grep "ConnectX") +done < <(lspci -mm | grep "ConnectX" | grep -i "infiniband" ) +echo "Found $hdr_count Mellanox HDR adapters." if [ "$hdr_count" -gt 0 ]; then ((ib_count=hdr_count)) || true fi @@ -85,33 +102,66 @@ if [ "$hdr_count" -gt 0 ] && [ "$opa_count" -gt 0 ]; then $hdr_count Mellanox HDR ConnectX adapters, and $opa_count Omni-Path adapters. -The Onmi-Path adapters will not be used." +The Omni-Path adapters will not be used." mail_message+="${nl}${ib_message}${nl}" echo "$ib_message" fi set -x +# Wait for at least the expected IB devices to show up. +# in the case of dual port HBAs, not all IB devices will +# show up. +# For some unknown reason, sometimes IB devices will not show up +# except in the lspci output unless an ip link set up command for +# at least one device that should be present shows up. +good_ibs=() function do_wait_for_ib { + local ib_devs=("$@") + local working_ib ib_timeout=300 # 5 minutes retry_wait=10 # seconds timeout=$((SECONDS + ib_timeout)) while [ "$SECONDS" -lt "$timeout" ]; do - ip link set up "$1" || true - sleep 2 - if ip addr show "$1" | grep "inet "; then - return 0 - fi - sleep ${retry_wait} + for ib_dev in "${ib_devs[@]}"; do + ip link set up "$ib_dev" || true + done + sleep 2 + working_ib=0 + good_ibs=() + for ib_dev in "${ib_devs[@]}"; do + if ip addr show "$ib_dev" | grep "inet "; then + good_ibs+=("$ib_dev") + ((working_ib++)) || true + fi + # With udev rules, the ib adapter name has the numa + # affinity in its name. On a single adapter system + # we do not have an easy way to know what that + # adapter name is in the case of a udev rule, so we have to try + # both possible names. + if [ "$working_ib" -ge "$ib_count" ]; then + return 0 + fi + done + sleep ${retry_wait} done return 1 } -# First check for infinband devices -for i in $(seq 0 $((ib_count-1))); do - ((testruns++)) || true - testcases+=" ${nl}" - iface="ib$i" - if do_wait_for_ib "$iface"; then +# Migrating to using udev rules for network devices +if [ -e /etc/udev/rules.d/70-persistent-ipoib.rules ]; then + ib_list=('ib_cpu0_0' 'ib_cpu1_0') +else + ib_list=('ib0') + if [ "$ib_count" -gt 1 ]; then + ib_list+=('ib1') + fi +fi + +function check_ib_devices { + local ib_devs=("$@") + for iface in "${ib_devs[@]}"; do + ((testruns++)) || true + testcases+=" ${nl}" set +x if ! ip addr show "$iface" | grep "inet "; then ib_message="$({ @@ -136,31 +186,31 @@ for i in $(seq 0 $((ib_count-1))); do cat "/sys/class/net/$iface/device/numa_node" fi set -x + testcases+=" $nl" + done +} + + +# First check for InfiniBand devices +if [ "$ib_count" -gt 0 ]; then + if do_wait_for_ib "${ib_list[@]}"; then + echo "Found at least $ib_count working devices in" "${ib_list[@]}" + # All good, generate Junit report + check_ib_devices "${good_ibs[@]}" else - ib_message="Failed to bring up interface $iface on $HOSTNAME. " - mail_message+="${nl}${ib_message}${nl}" - echo "$ib_message" - ((testfails++)) || true - testcases+=" - - $nl" - result=1 + # Something wrong, generate Junit report and update e-mail + check_ib_devices "${ib_list[@]}" fi - testcases+=" $nl" -done +fi # having -x just makes the console log harder to read. -set +x -if [ -e /sys/class/net/ib1 ]; then - # now check for pmem & NVMe drives when ib1 is present. +# set +x +if [ "$ib_count" -ge 2 ]; then + # now check for pmem & NVMe drives when multiple ib are present. # ipmctl show -dimm should show an even number of drives, all healthy - dimm_count=0 - while IFS= read -r line; do - if [[ "$line" != *"| Healthy "* ]]; then continue; fi - ((dimm_count++)) || true - done < <(ipmctl show -dimm) + dimm_count=$(ipmctl show -dimm | grep Healthy -c) if [ "$dimm_count" -eq 0 ] || [ $((dimm_count%2)) -ne 0 ]; then - # Not fatal, the PMEM DIMM should be replaced when downtime can be + # May not be fatal, the PMEM DIMM should be replaced when downtime can be # scheduled for this system. dimm_message="FAIL: Wrong number $dimm_count healthy PMEM DIMMs seen." mail_message+="$nl$dimm_message$nl$(ipmctl show -dimm)$nl" @@ -184,7 +234,7 @@ if [ -e /sys/class/net/ib1 ]; then testcases+=" $nl" - result=1 + result=3 else echo "OK: Found $dimm_rcount DIMM PMEM regions." fi @@ -211,23 +261,15 @@ if [ -e /sys/class/net/ib1 ]; then testcases+=" $nl" - result=1 + result=4 else echo "OK: Even number ($nvme_count) of NVMe devices seen." fi testcases+=" $nl" # All storage found by lspci should also be in lsblk report - lsblk_nvme=0 - lsblk_pmem=0 - while IFS= read -r line; do - if [[ "$line" = nvme* ]];then - ((lsblk_nvme++)) || true - fi - if [[ "$line" = pmem* ]];then - ((lsblk_pmem++)) || true - fi - done < <(lsblk) + lsblk_nvme=$(lsblk | grep nvme -c) + lsblk_pmem=$(lsblk | grep pmem -c) ((testruns++)) || true testcases+=" ${nl}" @@ -238,7 +280,7 @@ if [ -e /sys/class/net/ib1 ]; then testcases+=" $nl" - result=1 + result=5 else echo "OK: All $nvme_count NVMe devices are in lsblk report." fi @@ -253,7 +295,7 @@ if [ -e /sys/class/net/ib1 ]; then testcases+=" $nl" - result=1 + result=6 else echo "OK: All $dimm_rcount PMEM devices are in lsblk report." fi @@ -295,4 +337,8 @@ echo "$junit_xml" > "./hardware_prep_node_results.xml" do_mail +if [ "$result" -ne 0 ]; then + echo "Check failure $result" +fi + exit $result diff --git a/ci/gha_functions.sh b/ci/gha_functions.sh index 7fbc8bcac28..d7234c457d9 100644 --- a/ci/gha_functions.sh +++ b/ci/gha_functions.sh @@ -233,7 +233,7 @@ test_test_tag_and_features() { CP_FEATURES="foo bar" get_test_tags "-hw")" "always_passes,-hw always_fails,-hw" } -test_jenkins_curl() { - JENKINS_URL="${JENKINS_URL:-https://build.hpdd.intel.com/}" - assert_equals "$(QUIET=true VERBOSE=false jenkins_curl -X POST "${JENKINS_URL}api/xml" 3>&1 >/dev/null | tr -d '\r' | grep '^X-Content-Type-Options:')" "X-Content-Type-Options: nosniff" -} +#test_jenkins_curl() { +# JENKINS_URL="${JENKINS_URL:-https://build.hpdd.intel.com/}" +# assert_equals "$(QUIET=true VERBOSE=false jenkins_curl -X POST "${JENKINS_URL}api/xml" 3>&1 >/dev/null | tr -d '\r' | grep '^X-Content-Type-Options:')" "X-Content-Type-Options: nosniff" +#} diff --git a/ci/provisioning/post_provision_config.sh b/ci/provisioning/post_provision_config.sh index 92575907ca8..14980c86a03 100755 --- a/ci/provisioning/post_provision_config.sh +++ b/ci/provisioning/post_provision_config.sh @@ -1,5 +1,10 @@ #!/bin/bash - +# +# Copyright 2020-2023 Intel Corporation. +# Copyright 2025 Hewlett Packard Enterprise Development LP +# +# SPDX-License-Identifier: BSD-2-Clause-Patent +# set -eux export PS4='+ ${HOSTNAME%%.*}:${BASH_SOURCE:+$BASH_SOURCE:}$LINENO:${FUNCNAME:+$FUNCNAME():} ' @@ -24,12 +29,38 @@ source ci/junit.sh : "${MLNX_VER_NUM:=24.04-0.6.6.0}" : "${DISTRO:=EL_7}" -DSL_REPO_var="DAOS_STACK_${DISTRO}_LOCAL_REPO" -DSG_REPO_var="DAOS_STACK_${DISTRO}_GROUP_REPO" -DSA_REPO_var="DAOS_STACK_${DISTRO}_APPSTREAM_REPO" retry_cmd 300 clush -B -S -l root -w "$NODESTRING" -c ci_key* --dest=/tmp/ +function create_host_file() { + local node_string="$1" + local output_file="${2:-./hosts}" + local input_file="${3:-}" + rm -rf "$output_file" 2>/dev/null + if [ -n "$input_file" ]; then + cp "$input_file" "$output_file" + fi + IFS=',' read -ra NODES <<< "$node_string" + for node in "${NODES[@]}"; do + ip_address=$(nslookup "$node" 2>/dev/null | awk '/^Address: / {print $2}' | head -n 1) + long_name=$(nslookup "$node" 2>/dev/null | awk '/^Name:/ {print $2}' | head -n 1) + if [ -n "$ip_address" ] && [ -n "$long_name" ]; then + echo "$ip_address $long_name $node" >> "$output_file" + else + echo "ERROR: Could not resolve $node" + return 1 + fi + done + return 0 +} + +if create_host_file "$NODESTRING" "./hosts" "/etc/hosts"; then + retry_cmd 300 clush -B -S -l root -w "$NODESTRING" -c ./hosts --dest=/etc/hosts +else + echo "ERROR: Failed to create host file" +fi + + # shellcheck disable=SC2001 sanitized_commit_message="$(echo "$COMMIT_MESSAGE" | sed -e 's/\(["\$]\)/\\\1/g')" @@ -42,9 +73,6 @@ if ! retry_cmd 2400 clush -B -S -l root -w "$NODESTRING" \ GPG_KEY_URLS=\"${GPG_KEY_URLS:-}\" REPOSITORY_URL=\"${REPOSITORY_URL:-}\" JENKINS_URL=\"${JENKINS_URL:-}\" - DAOS_STACK_LOCAL_REPO=\"${!DSL_REPO_var}\" - DAOS_STACK_GROUP_REPO=\"${!DSG_REPO_var:-}\" - DAOS_STACK_EL_8_APPSTREAM_REPO=\"${!DSA_REPO_var:-}\" DISTRO=\"$DISTRO\" DAOS_STACK_RETRY_DELAY_SECONDS=\"$DAOS_STACK_RETRY_DELAY_SECONDS\" DAOS_STACK_RETRY_COUNT=\"$DAOS_STACK_RETRY_COUNT\" diff --git a/ci/provisioning/post_provision_config_common.sh b/ci/provisioning/post_provision_config_common.sh index 06ad80b984a..d5300fd3a8f 100755 --- a/ci/provisioning/post_provision_config_common.sh +++ b/ci/provisioning/post_provision_config_common.sh @@ -1,5 +1,10 @@ #!/bin/bash - +# +# Copyright 2021-2023 Intel Corporation. +# Copyright 2025 Hewlett Packard Enterprise Development LP +# +# SPDX-License-Identifier: BSD-2-Clause-Patent +# set -eux repo_server_pragma=$(echo "$COMMIT_MESSAGE" | sed -ne '/^Repo-servers: */s/.*: *//p') @@ -24,6 +29,7 @@ if [ -n "$repo_files_pr" ]; then REPO_FILE_URL="${JENKINS_URL:-https://build.hpdd.intel.com/}job/daos-do/job/repo-files/job/$branch/$build_number/artifact/" fi +# shellcheck disable=SC1091 . /etc/os-release # shellcheck disable=SC2034 EXCLUDE_UPGRADE=mercury,daos,daos-\* diff --git a/ci/provisioning/post_provision_config_common_functions.sh b/ci/provisioning/post_provision_config_common_functions.sh index 3edc30025b4..1feddc2e31b 100755 --- a/ci/provisioning/post_provision_config_common_functions.sh +++ b/ci/provisioning/post_provision_config_common_functions.sh @@ -1,5 +1,10 @@ #!/bin/bash - +# +# Copyright 2022-2023 Intel Corporation. +# Copyright 2025 Hewlett Packard Enterprise Development LP +# +# SPDX-License-Identifier: BSD-2-Clause-Patent +# set -eux : "${DAOS_STACK_RETRY_DELAY_SECONDS:=60}" @@ -8,6 +13,11 @@ set -eux : "${BUILD_URL:=Not_in_jenkins}" : "${STAGE_NAME:=Unknown_Stage}" : "${OPERATIONS_EMAIL:=$USER@localhost}" +: "${JENKINS_URL:=https://jenkins.example.com}" +domain1="${JENKINS_URL#https://}" +mail_domain="${domain1%%/*}" +: "${EMAIL_DOMAIN:=$mail_domain}" +: "${DAOS_DEVOPS_EMAIL:="$HOSTNAME"@"$EMAIL_DOMAIN"}" # functions common to more than one distro specific provisioning url_to_repo() { @@ -49,17 +59,6 @@ add_repo() { fi } -add_group_repo() { - local match="$1" - - add_repo "$match" "$DAOS_STACK_GROUP_REPO" - group_repo_post -} - -add_local_repo() { - add_repo 'argobots' "$DAOS_STACK_LOCAL_REPO" false -} - disable_gpg_check() { local url="$1" @@ -107,9 +106,6 @@ retry_dnf() { # non-experimental one after trying twice with the experimental one set_local_repo "${repo_servers[1]}" dnf -y makecache - if [ -n "${POWERTOOLSREPO:-}" ]; then - POWERTOOLSREPO=${POWERTOOLSREPO/${repo_servers[0]}/${repo_servers[1]}} - fi fi sleep "${RETRY_DELAY_SECONDS:-$DAOS_STACK_RETRY_DELAY_SECONDS}" fi @@ -117,6 +113,10 @@ retry_dnf() { if [ "$rc" -ne 0 ]; then send_mail "Command retry failed in $STAGE_NAME after $attempt attempts using ${repo_server:-nexus} as initial repo server " \ "Command: $*\nAttempts: $attempt\nStatus: $rc" + echo "Command retry failed in $STAGE_NAME after $attempt attempts using ${repo_server:-nexus} as initial repo server " + echo "Command: $*" + echo "Attempts: $attempt" + echo "Status: $rc" fi return 1 @@ -140,7 +140,7 @@ send_mail() { echo "Host: $HOSTNAME" echo "" echo -e "$message" - } 2>&1 | mail -s "$subject" -r "$HOSTNAME"@intel.com "$recipients" + } 2>&1 | mail -s "$subject" -r "$DAOS_DEVOPS_EMAIL" "$recipients" set -x } @@ -186,6 +186,10 @@ retry_cmd() { if [ "$rc" -ne 0 ]; then send_mail "Command retry failed in $STAGE_NAME after $attempt attempts" \ "Command: $*\nAttempts: $attempt\nStatus: $rc" + echo "Command retry failed in $STAGE_NAME after $attempt attempts" + echo "Command: $*" + echo "Attempts: $attempt" + echo "Status: $rc" fi return 1 } @@ -217,6 +221,10 @@ timeout_cmd() { if [ "$rc" -ne 0 ]; then send_mail "Command timeout failed in $STAGE_NAME after $attempt attempts" \ "Command: $*\nAttempts: $attempt\nStatus: $rc" + echo "Command timeout failed in $STAGE_NAME after $attempt attempts" + echo "Command: $*" + echo "Attempts: $attempt" + echo "Status: $rc" fi return "$rc" } @@ -229,6 +237,7 @@ fetch_repo_config() { local repo_file="daos_ci-${ID}${VERSION_ID%%.*}-$repo_server" local repopath="${REPOS_DIR}/$repo_file" if ! curl -f -o "$repopath" "$REPO_FILE_URL$repo_file.repo"; then + echo "Failed to fetch repo file $REPO_FILE_URL$repo_file.repo" return 1 fi @@ -274,8 +283,13 @@ set_local_repo() { # Disable the daos repo so that the Jenkins job repo or a PR-repos*: repo is # used for daos packages dnf -y config-manager \ - --disable daos-stack-daos-"${DISTRO_GENERIC}"-"${VERSION_ID%%.*}"-x86_64-stable-local-artifactory + --disable daos-stack-daos-"${DISTRO_GENERIC}"-"${VERSION_ID%%.*}"*-stable-local-artifactory + else + dnf -y config-manager \ + --enable daos-stack-daos-"${DISTRO_GENERIC}"-"${VERSION_ID%%.*}"*-stable-local-artifactory fi + dnf -y config-manager \ + --enable daos-stack-deps-"${DISTRO_GENERIC}"-"${VERSION_ID%%.*}"*-stable-local-artifactory fi dnf repolist @@ -290,6 +304,7 @@ update_repos() { if ! fetch_repo_config "$repo_server"; then # leave the existing on-image repo config alone if the repo fetch fails send_mail "Fetch repo file for repo server \"$repo_server\" failed. Continuing on with in-image repos." + echo "Fetch repo file for repo server \"$repo_server\" failed. Continuing on with in-image repos." return 1 fi done @@ -306,8 +321,12 @@ update_repos() { # successfully grabbed them all, so replace the entire $REPOS_DIR # content with them + + # This is not working right on a second run. + # using a quick hack to stop deleting a critical repo local file for file in "$REPOS_DIR"/*.repo; do + [[ $file == *"artifactory"* ]] && continue [ -e "$file" ] || break # empty the file but keep it around so that updates don't recreate it true > "$file" @@ -395,6 +414,7 @@ post_provision_config_nodes() { fi if ! "${cmd[@]}"; then dump_repos + echo "Failed to upgrade packages" return 1 fi @@ -413,35 +433,12 @@ post_provision_config_nodes() { if ! retry_dnf 360 install "${inst_rpms[@]/%/${DAOS_VERSION:-}}"; then rc=${PIPESTATUS[0]} dump_repos + echo "Failed to install packages" return "$rc" fi fi if lspci | grep "ConnectX-6" && ! grep MOFED_VERSION /etc/do-release; then - # Need this module file - version="$(rpm -q --qf "%{version}" openmpi)" - mkdir -p /etc/modulefiles/mpi/ - cat << EOF > /etc/modulefiles/mpi/mlnx_openmpi-x86_64 -#%Module 1.0 -# -# OpenMPI module for use with 'environment-modules' package: -# -conflict mpi -prepend-path PATH /usr/mpi/gcc/openmpi-$version/bin -prepend-path LD_LIBRARY_PATH /usr/mpi/gcc/openmpi-$version/lib64 -prepend-path PKG_CONFIG_PATH /usr/mpi/gcc/openmpi-$version/lib64/pkgconfig -prepend-path MANPATH /usr/mpi/gcc/openmpi-$version/share/man -setenv MPI_BIN /usr/mpi/gcc/openmpi-$version/bin -setenv MPI_SYSCONFIG /usr/mpi/gcc/openmpi-$version/etc -setenv MPI_FORTRAN_MOD_DIR /usr/mpi/gcc/openmpi-$version/lib64 -setenv MPI_INCLUDE /usr/mpi/gcc/openmpi-$version/include -setenv MPI_LIB /usr/mpi/gcc/openmpi-$version/lib64 -setenv MPI_MAN /usr/mpi/gcc/openmpi-$version/share/man -setenv MPI_COMPILER openmpi-x86_64 -setenv MPI_SUFFIX _openmpi -setenv MPI_HOME /usr/mpi/gcc/openmpi-$version -EOF - printf 'MOFED_VERSION=%s\n' "$MLNX_VER_NUM" >> /etc/do-release fi diff --git a/ci/provisioning/post_provision_config_nodes.sh b/ci/provisioning/post_provision_config_nodes.sh index e2f76f0d2da..14ac540d3a4 100644 --- a/ci/provisioning/post_provision_config_nodes.sh +++ b/ci/provisioning/post_provision_config_nodes.sh @@ -1,8 +1,21 @@ #!/bin/bash - +# +# Copyright 2020-2023 Intel Corporation. +# Copyright 2025 Hewlett Packard Enterprise Development LP +# +# SPDX-License-Identifier: BSD-2-Clause-Patent +# set -eux env > /root/last_run-env.txt + +# Need this fix earlier +# For some reason sssd_common must be reinstalled +# to fix up the restored image. +if command -v dnf; then + bootstrap_dnf +fi + if ! grep ":$MY_UID:" /etc/group; then groupadd -g "$MY_UID" jenkins fi @@ -29,13 +42,14 @@ echo "jenkins ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/jenkins # /scratch is needed on test nodes mkdir -p /scratch -mount "${DAOS_CI_INFO_DIR}" /scratch +retry_cmd 2400 mount "${DAOS_CI_INFO_DIR}" /scratch # defined in ci/functional/post_provision_config_nodes_.sh # and catted to the remote node along with this script if ! post_provision_config_nodes; then - rc=${PIPESTATUS[0]} - exit "$rc" + rc=${PIPESTATUS[0]} + echo "post_provision_config_nodes failed with rc=$rc" + exit "$rc" fi # Workaround to enable binding devices back to nvme or vfio-pci after they are unbound from vfio-pci @@ -47,6 +61,80 @@ if lspci | grep -i nvme; then daos_server nvme reset && rmmod vfio_pci && modprobe vfio_pci fi +# FOR now limit to 2 devices per CPU NUMA node +: "${DAOS_CI_NVME_NUMA_LIMIT:=2}" + +function mount_nvme_drive { + local drive="$1" + file_system=$(file -sL "/dev/$drive") + if [[ "$file_system" != *"ext4 filesystem"* ]]; then + yes | mkfs -t ext4 "/dev/$drive" + fi + mkdir -p "/mnt/$drive" + mount "/dev/$drive" "/mnt/$drive" +} + + +nvme_class="/sys/class/nvme/" +function nvme_limit { + set +x + if [ ! -d /sys/class/nvme ]; then + echo "No NVMe devices found" + return + fi + local numa0_devices=() + local numa1_devices=() + for nvme_path in "$nvme_class"*; do + nvme="$(basename "$nvme_path")n1" + numa_node="$(cat "${nvme_path}/numa_node")" + if mount | grep "$nvme"; then + continue + fi + if [ "$numa_node" -eq 0 ]; then + numa0_devices+=("$nvme") + else + numa1_devices+=("$nvme") + fi + done + echo numa0 "${numa0_devices[@]}" + echo numa1 "${numa1_devices[@]}" + if [ "${#numa0_devices[@]}" -gt 0 ] && [ "${#numa1_devices[@]}" -gt 0 ]; then + echo "balanced NVMe configuration possible" + nvme_count=0 + for nvme in "${numa0_devices[@]}"; do + if [ "$nvme_count" -ge "${DAOS_CI_NVME_NUMA_LIMIT}" ]; then + mount_nvme_drive "$nvme" + else + ((nvme_count++)) || true + fi + done + nvme_count=0 + for nvme in "${numa1_devices[@]}"; do + if [ "$nvme_count" -ge "${DAOS_CI_NVME_NUMA_LIMIT}" ]; then + mount_nvme_drive "$nvme" + else + ((nvme_count++)) || true + fi + done + else + echo "balanced NVMe configuration not possible" + for nvme in "${numa0_devices[@]}" "${numa1_devices[@]}"; do + ((needed = "$DAOS_CI_NVME_NUMA_LIMIT" + 1)) || true + nvme_count=0 + if [ "$nvme_count" -ge "$needed" ]; then + mount_nvme_drive "$nvme" + else + ((nvme_count++)) || true + fi + done + fi + set -x +} + +# Force only the desired number of NVMe devices to be seen by DAOS tests +# by mounting the extra ones. +nvme_limit + systemctl enable nfs-server.service systemctl start nfs-server.service sync diff --git a/ci/provisioning/post_provision_config_nodes_EL_8.sh b/ci/provisioning/post_provision_config_nodes_EL_8.sh index 6451bf332a9..27b35fb4b15 100644 --- a/ci/provisioning/post_provision_config_nodes_EL_8.sh +++ b/ci/provisioning/post_provision_config_nodes_EL_8.sh @@ -6,8 +6,18 @@ # SPDX-License-Identifier: BSD-2-Clause-Patent bootstrap_dnf() { +set +e systemctl enable postfix.service systemctl start postfix.service + postfix_start_exit=$? + if [ $postfix_start_exit -ne 0 ]; then + echo "WARNING: Postfix not started: $postfix_start_exit" + systemctl status postfix.service + journalctl -xe -u postfix.service + fi +set -e + # Seems to be needed to fix some issues. + dnf -y reinstall sssd-common } group_repo_post() { diff --git a/ci/provisioning/post_provision_config_nodes_LEAP_15.sh b/ci/provisioning/post_provision_config_nodes_LEAP_15.sh index 2c7c66da133..2e4315f348c 100755 --- a/ci/provisioning/post_provision_config_nodes_LEAP_15.sh +++ b/ci/provisioning/post_provision_config_nodes_LEAP_15.sh @@ -1,8 +1,15 @@ #!/bin/bash +# +# Copyright 2021-2024 Intel Corporation. +# Copyright 2025 Hewlett Packard Enterprise Development LP +# +# SPDX-License-Identifier: BSD-2-Clause-Patent bootstrap_dnf() { rm -rf "$REPOS_DIR" ln -s ../zypp/repos.d "$REPOS_DIR" + dnf -y remove lua-lmod + dnf -y install lua-lmod '--repo=*lua*' --repo '*network-cluster*' } group_repo_post() { diff --git a/ci/provisioning/post_provision_config_nodes_UBUNTU_20_04.sh b/ci/provisioning/post_provision_config_nodes_UBUNTU_20_04.sh index 484a678a0a8..0d9c3b618b8 100755 --- a/ci/provisioning/post_provision_config_nodes_UBUNTU_20_04.sh +++ b/ci/provisioning/post_provision_config_nodes_UBUNTU_20_04.sh @@ -1,4 +1,10 @@ #!/bin/bash +# +# Copyright 2020-2022 Intel Corporation. +# Copyright 2025 Hewlett Packard Enterprise Development LP +# +# SPDX-License-Identifier: BSD-2-Clause-Patent +# post_provision_config_nodes() { # should we port this to Ubuntu or just consider $CONFIG_POWER_ONLY dead? @@ -12,15 +18,7 @@ post_provision_config_nodes() { # slurm-example-configs slurmctld slurm-slurmmd #fi codename=$(lsb_release -s -c) - if [ -n "$DAOS_STACK_GROUP_REPO" ]; then - add-apt-repository \ - "deb $REPOSITORY_URL/$DAOS_STACK_GROUP_REPO $codename" - fi - - if [ -n "$DAOS_STACK_LOCAL_REPO" ]; then - echo "deb [trusted=yes] $REPOSITORY_URL/$DAOS_STACK_LOCAL_REPO $codename main" >> /etc/apt/sources.list - fi - + echo "$codename" if [ -n "$INST_REPOS" ]; then for repo in $INST_REPOS; do branch="master" diff --git a/ci/storage/test_main_storage_prepare_node.sh b/ci/storage/test_main_storage_prepare_node.sh index 0be5a33167c..f87333327b8 100755 --- a/ci/storage/test_main_storage_prepare_node.sh +++ b/ci/storage/test_main_storage_prepare_node.sh @@ -1,5 +1,10 @@ #!/bin/bash - +# +# Copyright 2021-2023 Intel Corporation. +# Copyright 2025 Hewlett Packard Enterprise Development LP +# +# SPDX-License-Identifier: BSD-2-Clause-Patent +# set -eux : "${STORAGE_PREP_OPT:=}" @@ -21,12 +26,16 @@ else ;; esac dnf -y config-manager \ - --disable daos-stack-daos-"${DISTRO_GENERIC}"-"${VERSION_ID%%.*}"-x86_64-stable-local-artifactory + --disable daos-stack-daos-"${DISTRO_GENERIC}"-"${VERSION_ID%%.*}"*-stable-local-artifactory fi +# this needs to be made more generic in the future. +dnf -y config-manager \ + --enable daos-stack-deps-"${DISTRO_GENERIC}"-"${VERSION_ID%%.*}"*-stable-local-artifactory + dnf -y install ipmctl daos-server"$DAOS_PKG_VERSION" -lspci | grep Mellanox -lscpu | grep Virtualization +lspci | grep Mellanox || true +lscpu | grep Virtualization || true lscpu | grep -E -e Socket -e NUMA if command -v opainfo; then opainfo || true; fi @@ -51,7 +60,12 @@ if ipmctl show -dimm; then fi fi else - if ip addr show ib1; then + counter=0 + for ib in /sys/class/net/ib*; do + ((counter++)) || true + ip addr show "$ib" + done + if "$counter" -ge 2; then # All of our CI nodes with two ib adapters should have PMEM DIMMs echo 'No PMEM DIMM devices found on CI node!' exit 1 diff --git a/ci/unit/test_main.sh b/ci/unit/test_main.sh index 4c5922013fe..5ad364b7d16 100755 --- a/ci/unit/test_main.sh +++ b/ci/unit/test_main.sh @@ -1,5 +1,10 @@ #!/bin/bash - +# +# Copyright 2020-2023 Intel Corporation. +# Copyright 2025 Hewlett Packard Enterprise Development LP +# +# SPDX-License-Identifier: BSD-2-Clause-Patent +# # This is the script used for running unit testing # run_utest.py and run_utest.py with memcheck stages on the CI set -uex @@ -30,8 +35,6 @@ if $USE_BULLSEYE; then rm -rf bullseye mkdir -p bullseye tar -C bullseye --strip-components=1 -xf bullseye.tar -else - BULLSEYE= fi NODE=${NODELIST%%,*} @@ -43,6 +46,6 @@ rsync -rlpt -z -e "ssh $SSH_KEY_ARGS" . jenkins@"$NODE":build/ ssh -tt "$SSH_KEY_ARGS" jenkins@"$NODE" "HOSTNAME=$HOSTNAME \ HOSTPWD=$PWD \ WITH_VALGRIND=$WITH_VALGRIND \ - BULLSEYE=$BULLSEYE \ + HTTPS_PROXY=\"${HTTPS_PROXY:-}\" \ BDEV_TEST=$BDEV_TEST \ ./build/ci/unit/test_main_node.sh" diff --git a/ci/unit/test_main_node.sh b/ci/unit/test_main_node.sh index a14b1fc3880..65f6e859eba 100755 --- a/ci/unit/test_main_node.sh +++ b/ci/unit/test_main_node.sh @@ -1,5 +1,10 @@ #!/bin/bash - +# +# Copyright 2020-2023 Intel Corporation. +# Copyright 2025 Hewlett Packard Enterprise Development LP +# +# SPDX-License-Identifier: BSD-2-Clause-Patent +# # This is a script to be run by the ci/unit/test_main.sh to run a test # on a CI node. @@ -21,6 +26,7 @@ sudo mount --bind build "${SL_SRC_DIR}" log_prefix="unit_test" +: "${BULLSEYE:=}" if [ -n "$BULLSEYE" ]; then pushd "${SL_SRC_DIR}/bullseye" set +x @@ -47,6 +53,7 @@ sudo ln -sf "$SL_PREFIX/share/spdk/scripts/common.sh" /usr/share/spdk/scripts/ sudo ln -s "$SL_PREFIX/include" /usr/share/spdk/include # set CMOCKA envs here +: "${WITH_VALGRIND:=}" export CMOCKA_MESSAGE_OUTPUT=xml if [[ -z ${WITH_VALGRIND} ]]; then export CMOCKA_XML_FILE="${SL_SRC_DIR}/test_results/%g.xml" @@ -86,5 +93,5 @@ pip install --requirement requirements-utest.txt pip install /opt/daos/lib/daos/python/ -utils/run_utest.py $RUN_TEST_VALGRIND --no-fail-on-error $VDB_ARG --log_dir="$test_log_dir" \ - $SUDO_ARG +HTTPS_PROXY="${HTTPS_PROXY:-}" utils/run_utest.py $RUN_TEST_VALGRIND \ + --no-fail-on-error $VDB_ARG --log_dir="$test_log_dir" $SUDO_ARG diff --git a/ftest.sh b/ftest.sh index 0140c7ed098..03947a83891 100755 --- a/ftest.sh +++ b/ftest.sh @@ -1,7 +1,7 @@ #!/bin/bash # /* # * (C) Copyright 2016-2022 Intel Corporation. -# * (C) Copyright 2025 Hewlett Packard Enterprise Development LP +# * Copyright 2025 Hewlett Packard Enterprise Development LP # * # * SPDX-License-Identifier: BSD-2-Clause-Patent # */ @@ -113,6 +113,7 @@ args="${1:-quick}" shift || true args+=" $*" +_HTTPS_PROXY=${HTTPS_PROXY:-} # shellcheck disable=SC2029 # shellcheck disable=SC2086 if ! ssh -A $SSH_KEY_ARGS ${REMOTE_ACCT:-jenkins}@"${nodes[0]}" \ @@ -128,6 +129,7 @@ if ! ssh -A $SSH_KEY_ARGS ${REMOTE_ACCT:-jenkins}@"${nodes[0]}" \ LAUNCH_OPT_ARGS=\"$LAUNCH_OPT_ARGS\" WITH_VALGRIND=\"$WITH_VALGRIND\" STAGE_NAME=\"$STAGE_NAME\" + HTTPS_PROXY=\"$_HTTPS_PROXY\" $(sed -e '1,/^$/d' "$SCRIPT_LOC"/main.sh)"; then rc=${PIPESTATUS[0]} if ${SETUP_ONLY:-false}; then diff --git a/site_scons/env_modules.py b/site_scons/env_modules.py index b7a20bf769b..f6e4c58dd76 100644 --- a/site_scons/env_modules.py +++ b/site_scons/env_modules.py @@ -1,4 +1,5 @@ # Copyright 2019-2023 Intel Corporation +# Copyright 2025 Hewlett Packard Enterprise Development LP # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -34,8 +35,7 @@ class _env_module(): # pylint: disable=invalid-name env_module_init = None _mpi_map = {"mpich": ['mpi/mpich-x86_64', 'gnu-mpich'], - "openmpi": ['mpi/mlnx_openmpi-x86_64', 'mpi/openmpi3-x86_64', - 'gnu-openmpi', 'mpi/openmpi-x86_64']} + "openmpi": ['mpi/openmpi3-x86_64', 'gnu-openmpi', 'mpi/openmpi-x86_64']} def __init__(self, silent=False): """Load Modules for initializing environment variables""" diff --git a/src/tests/ftest/dfuse/bash.py b/src/tests/ftest/dfuse/bash.py index 964d0295954..eb5897d1aa9 100644 --- a/src/tests/ftest/dfuse/bash.py +++ b/src/tests/ftest/dfuse/bash.py @@ -1,5 +1,6 @@ """ (C) Copyright 2020-2024 Intel Corporation. + Copyright 2025 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -112,7 +113,7 @@ def run_bashcmd(self, il_lib=None, compatible_mode=False): # f'more {fuse_root_dir}/src.c', # more hangs over ssh somehow f"dos2unix {fuse_root_dir}/src.c", f"gcc -o {fuse_root_dir}/output {fuse_root_dir}/src.c", - f"valgrind size {fuse_root_dir}/output", + f'export DEBUGINFOD_URLS=""; valgrind size {fuse_root_dir}/output', f"readelf -s {fuse_root_dir}/output", f"strip -s {fuse_root_dir}/output", f"g++ -o {fuse_root_dir}/output {fuse_root_dir}/src.c", @@ -136,8 +137,16 @@ def run_bashcmd(self, il_lib=None, compatible_mode=False): 'fio --readwrite=randwrite --name=test --size="2M" --directory ' f'{fuse_root_dir}/ --bs=1M --numjobs="1" --ioengine=libaio --iodepth=16' '--group_reporting --exitall_on_error --continue_on_error=none', - f'curl "https://www.google.com" -o {fuse_root_dir}/download.html', ] + # If set, use the HTTPS_PROXY for curl command + https_proxy = os.environ.get('HTTPS_PROXY') + if https_proxy: + proxy_option = f'--proxy "{https_proxy}"' + else: + proxy_option = '' + cmd = f'curl "https://www.google.com" -o {fuse_root_dir}/download.html {proxy_option}' + commands.append(cmd) + for cmd in commands: self.log_step(f'Running command: {cmd}') result = run_remote(self.log, dfuse_hosts, env_str + cmd) diff --git a/src/tests/ftest/process_core_files.py b/src/tests/ftest/process_core_files.py index 47fbf7a4ef4..60d94334a03 100644 --- a/src/tests/ftest/process_core_files.py +++ b/src/tests/ftest/process_core_files.py @@ -1,5 +1,6 @@ """ (C) Copyright 2022-2024 Intel Corporation. + Copyright 2025 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -264,6 +265,8 @@ def install_debuginfo_packages(self): cmds = [] # -debuginfo packages that don't get installed with debuginfo-install + self.log.debug("Installing -debuginfo packages that don't get installed", + " with debuginfo-install") for pkg in ['systemd', 'ndctl', 'mercury', 'hdf5', 'libabt0' if "suse" in self.distro_info.name.lower() else "argobots", 'libfabric', 'hdf5-vol-daos', 'hdf5-vol-daos-mpich', @@ -279,6 +282,7 @@ def install_debuginfo_packages(self): cmds.append(["sudo", "rm", "-f", path]) if self.USE_DEBUGINFO_INSTALL: + self.log.debug("self.USE_DEBUGINFO_INSTALL") dnf_args = ["--nobest", "--exclude", "ompi-debuginfo"] if os.getenv("TEST_RPMS", 'false') == 'true': if "suse" in self.distro_info.name.lower(): @@ -311,9 +315,11 @@ def install_debuginfo_packages(self): # yum_base.processTransaction(rpmDisplay=yum.rpmtrans.NoOutputCallBack()) # Now install a few pkgs that debuginfo-install wouldn't + self.log.debug("Now install a few pkgs that debuginfo-install wouldn't") cmd = ["sudo", "dnf", "-y"] if self.is_el() or "suse" in self.distro_info.name.lower(): cmd.append("--enablerepo=*debug*") + cmd.append("--disablerepo='epel-*'") cmd.append("install") for pkg in install_pkgs: try: @@ -325,7 +331,7 @@ def install_debuginfo_packages(self): retry = False for cmd in cmds: - if not run_local(self.log, " ".join(cmd)).passed: + if not run_local(self.log, " ".join(cmd), True, 120).passed: # got an error, so abort this list of commands and re-run # it with a dnf clean, makecache first retry = True @@ -335,11 +341,13 @@ def install_debuginfo_packages(self): cmd_prefix = ["sudo", "dnf"] if self.is_el() or "suse" in self.distro_info.name.lower(): cmd_prefix.append("--enablerepo=*debug*") + cmd_prefix.append("--disablerepo='epel-*'") cmds.insert(0, cmd_prefix + ["clean", "all"]) cmds.insert(1, cmd_prefix + ["makecache"]) for cmd in cmds: if not run_local(self.log, " ".join(cmd)).passed: break + self.log.info("Installing debuginfo packages for stacktrace creation - DONE") def is_el(self): """Determine if the distro is EL based. diff --git a/src/tests/ftest/scripts/main.sh b/src/tests/ftest/scripts/main.sh index 706d622479b..307198717cb 100755 --- a/src/tests/ftest/scripts/main.sh +++ b/src/tests/ftest/scripts/main.sh @@ -2,6 +2,7 @@ # shellcheck disable=SC1113 # /* # * (C) Copyright 2016-2024 Intel Corporation. +# * Copyright 2025 Hewlett Packard Enterprise Development LP # * # * SPDX-License-Identifier: BSD-2-Clause-Patent # */ @@ -89,6 +90,10 @@ export TEST_RPMS export DAOS_BASE export DAOS_TEST_APP_SRC=${DAOS_TEST_APP_SRC:-"/scratch/daos_test/apps"} export DAOS_TEST_APP_DIR=${DAOS_TEST_APP_DIR:-"${DAOS_TEST_SHARED_DIR}/daos_test/apps"} +if [ -n "$HTTPS_PROXY" ]; then + # shellcheck disable=SC2154 + export HTTPS_PROXY="${HTTPS_PROXY:-""}" +fi launch_node_args="-ts ${TEST_NODES}" if [ "${STAGE_NAME}" == "Functional Hardware 24" ]; then diff --git a/utils/rpms/daos.spec b/utils/rpms/daos.spec index 95b575aad1a..86ab0fe5b40 100644 --- a/utils/rpms/daos.spec +++ b/utils/rpms/daos.spec @@ -23,7 +23,7 @@ Name: daos Version: 2.6.3 -Release: 7%{?relval}%{?dist} +Release: 8%{?relval}%{?dist} Summary: DAOS Storage Engine License: BSD-2-Clause-Patent @@ -232,11 +232,12 @@ Requires: lbzip2 Requires: attr Requires: ior Requires: go >= 1.21 +# Require lmod fix for https://github.com/TACC/Lmod/issues/687 %if (0%{?suse_version} >= 1315) -Requires: lua-lmod +Requires: lua-lmod >= 8.7.36 Requires: libcapstone-devel %else -Requires: Lmod +Requires: Lmod >= 8.7.36 Requires: capstone-devel %endif %if (0%{?rhel} >= 8) @@ -263,6 +264,7 @@ Requires: hdf5-%{openmpi}-tests Requires: hdf5-vol-daos-%{openmpi}-tests Requires: MACSio-%{openmpi} Requires: simul-%{openmpi} +Requires: %{openmpi} %description client-tests-openmpi This is the package needed to run the DAOS client test suite openmpi tools @@ -273,14 +275,14 @@ BuildArch: noarch Requires: %{name}-client-tests%{?_isa} = %{version}-%{release} Requires: mpifileutils-mpich Requires: testmpio -Requires: mpich +Requires: mpich = 4.1~a1 Requires: ior Requires: hdf5-mpich-tests Requires: hdf5-vol-daos-mpich-tests Requires: MACSio-mpich Requires: simul-mpich Requires: romio-tests -Requires: python3-mpi4py-tests +Requires: python3-mpi4py-tests >= 3.1.6 %description client-tests-mpich This is the package needed to run the DAOS client test suite mpich tools @@ -630,6 +632,13 @@ getent passwd daos_agent >/dev/null || useradd -s /sbin/nologin -r -g daos_agent # No files in a shim package %changelog +* Mon May 12 2025 Tomasz Gromadzki 2.6.3-8 +- Bump lua-lmod version to >=8.7.36 +- Bump lmod version to >=8.7.36 +- Bump mpich version to 4.1~a1 +- Bump python3-mpi4py-tests version to >= 3.1.6 +- Add openmpi requiremnent for daos-client-tests on Leap. + * Fri Apr 11 2025 Jeff Olivier 2.6.3-7 - Remove raft as external dependency diff --git a/utils/run_utest.py b/utils/run_utest.py index 1835f230e36..a555e9f8203 100755 --- a/utils/run_utest.py +++ b/utils/run_utest.py @@ -1,6 +1,8 @@ #!/usr/bin/env python3 """ - (C) Copyright 2023-2024 Intel Corporation. + Copyright 2023-2024 Intel Corporation. + Copyright 2025 Hewlett Packard Enterprise Development LP + All rights reserved. SPDX-License-Identifier: BSD-2-Clause-Patent @@ -357,6 +359,11 @@ def __init__(self, suite, config, path_info, args): if self.needs_aio(): self.env["VOS_BDEV_CLASS"] = "AIO" + # If set, retain the HTTPS_PROXY for valgrind + http_proxy = os.environ.get('HTTPS_PROXY') + if http_proxy: + self.env['HTTPS_PROXY'] = http_proxy + def log_dir(self): """Return the log directory""" return os.path.join(self.path_info["LOG_DIR"], self.name)