This repository was archived by the owner on Apr 16, 2026. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathofed.cpp
More file actions
131 lines (110 loc) · 4.71 KB
/
ofed.cpp
File metadata and controls
131 lines (110 loc) · 4.71 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
/*
* Copyright 2022 Vinícius Ferrão <vinicius@ferrao.net.br>
* SPDX-License-Identifier: Apache-2.0
*/
#include <fmt/core.h>
#include <cloysterhpc/cloyster.h>
#include <cloysterhpc/functions.h>
#include <cloysterhpc/ofed.h>
#include <cloysterhpc/services/options.h>
#include <cloysterhpc/services/osservice.h>
#include <cloysterhpc/services/repos.h>
#include <utility>
using cloyster::functions::IRunner;
void OFED::setKind(Kind kind) { m_kind = kind; }
OFED::Kind OFED::getKind() const { return m_kind; }
bool OFED::installed() const
{
const auto opts = cloyster::Singleton<cloyster::services::Options>::get();
if (opts->shouldForce("infiniband-install")) {
return false;
}
// Return false so the installation runs on dry run
if (opts->dryRun) {
return false;
}
auto runner = cloyster::Singleton<IRunner>::get();
switch (m_kind) {
case OFED::Kind::Mellanox:
return runner->executeCommand("rpm -q doca-ofed") == 0;
case OFED::Kind::Inbox:
return runner->executeCommand(
"dnf group info \"Infiniband Support\"")
== 0;
case OFED::Kind::Oracle:
throw std::logic_error("Not implemented");
}
std::unreachable();
}
void OFED::install() const
{
const auto opts = cloyster::Singleton<cloyster::services::Options>::get();
const auto cluster = cloyster::Singleton<cloyster::models::Cluster>::get();
const auto osinfo = cluster->getNodes()[0].getOS();
if (opts->dryRun) {
LOG_WARN("Dry-Run: Skiping OFED installation");
return;
}
// Idempotency check
if (installed()) {
LOG_WARN("Inifiniband already installed, skipping, use `--force "
"infiniband-install` to force");
return;
}
switch (m_kind) {
case OFED::Kind::Inbox:
cloyster::Singleton<cloyster::services::IOSService>::get()
->groupInstall("Infiniband Support");
break;
case OFED::Kind::Mellanox: {
auto runner
= cloyster::Singleton<cloyster::services::IRunner>::get();
auto repoManager = cloyster::Singleton<
cloyster::services::repos::RepoManager>::get();
auto osService
= cloyster::Singleton<cloyster::services::IOSService>::get();
const std::string kernelVersion = std::string(osinfo.getKernel());
repoManager->enable("doca");
// Install the required packages
runner->checkCommand("dnf makecache --repo=doca");
runner->checkCommand(
fmt::format("dnf -y install kernel-{kernelVersion} "
"kernel-devel-{kernelVersion} doca-extra",
fmt::arg("kernelVersion", kernelVersion)));
if (osService->getKernelRunning() != kernelVersion) {
LOG_WARN("New kernel installed! Rebooting after the "
"installation finishes is advised!");
}
LOG_INFO("Compiling OFED DOCA drivers, this may take a while, use "
"`--skip compile-doca-driver` to skip");
// Run the Mellanox script, this generates an RPM at tmp.
//
// Use the kernel-devel version instead of the booted kernel
// version, this is to handle the case where a new kernel is
// installed but no reboot was done yet. After compiling the
// drivers the headnode should be rebooted to reload the new kernel.
// The driver may support weak updates modules and load without
// need for reboot.
if (!opts->shouldSkip("compile-doca-driver")) {
runner->checkCommand(fmt::format(
"/opt/mellanox/doca/tools/doca-kernel-support -k {}",
kernelVersion));
}
// Get the last rpm in /tmp/DOCA*/ folder
// On dry-run the below command will not run so we
// cannot get the output of it
auto rpm = runner->checkOutput(
"bash -c \"find /tmp/DOCA*/ -name '*.rpm' -printf '%T@ %p\n' | "
"sort -nk1 | tail -1 | awk '{print $2}'\"");
assert(rpm.size() > 0); // at last one line
// Install the (last) generated rpm
runner->executeCommand(fmt::format("dnf install -y {}", rpm[0]));
runner->checkCommand(R"(dnf makecache --repo=doca*)");
runner->checkCommand("dnf install -y doca-ofed mlnx-fw-updater");
runner->executeCommand("systemctl restart openibd");
} break;
case OFED::Kind::Oracle:
throw std::logic_error("Oracle RDMA release is not yet supported");
break;
}
}