Skip to content
This repository was archived by the owner on Apr 16, 2026. It is now read-only.

Commit 5f56f16

Browse files
committed
Implement DOCA kernel driver installation into the netboot image
1 parent 73ed389 commit 5f56f16

9 files changed

Lines changed: 129 additions & 105 deletions

File tree

include/cloysterhpc/functions.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,7 @@ void copyFile(std::filesystem::path source, std::filesystem::path destination);
228228
* @param data The contents of the file to install
229229
*/
230230
void installFile(const std::filesystem::path& path, std::istream& data);
231+
void installFile(const std::filesystem::path& path, std::string&& data);
231232

232233
} // namespace cloyster
233234

include/cloysterhpc/ofed.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,11 @@
66
#ifndef CLOYSTERHPC_OFED_H_
77
#define CLOYSTERHPC_OFED_H_
88

9-
#include "services/repos.h"
109
#include <string>
11-
1210
#include <utility>
1311

12+
#include <cloysterhpc/services/repos.h>
13+
1414

1515
/**
1616
* @class OFED
@@ -51,7 +51,7 @@ class OFED {
5151
* This method installs the appropriate OFED components based on the
5252
* specified kind.
5353
*/
54-
void install(cloyster::services::repos::RepoManager& repoManager) const;
54+
void install() const;
5555
};
5656

5757
#endif // CLOYSTERHPC_OFED_H_

include/cloysterhpc/services/shell.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -145,8 +145,8 @@ class Shell final : public Execution {
145145
*
146146
* This function sets up the InfiniBand interconnect settings.
147147
*/
148-
void configureInfiniband();
149-
void configureMailSystem();
148+
static void configureInfiniband();
149+
static void configureMailSystem();
150150

151151
/**
152152
* @brief Removes memory lock limits.

src/functions.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -374,4 +374,10 @@ void installFile(const std::filesystem::path& path, std::istream& data)
374374
fil << data.rdbuf();
375375
}
376376

377+
void installFile(const std::filesystem::path& path, std::string&& data)
378+
{
379+
std::istringstream stringData(std::move(data));
380+
installFile(path, stringData);
381+
}
382+
377383
}; // namespace cloyster

src/main.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,13 +147,15 @@ int main(int argc, const char** argv)
147147
}
148148
}());
149149

150+
#ifndef NDEBUG
150151
if (!loadConfFile.empty()) {
151152
LOG_INFO("Loading file {}", loadConfFile);
152153
auto file = cloyster::services::files::KeyFile(loadConfFile);
153154
LOG_INFO("Groups: {}", fmt::join(file.getGroups(), ","));
154155
LOG_INFO("Contents: {}", file.toData());
155156
return EXIT_SUCCESS;
156157
}
158+
#endif
157159

158160
if (cloyster::showVersion) {
159161
fmt::print("{}: Version {}\n", productName, productVersion);

src/ofed.cpp

Lines changed: 36 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
*/
55

66
#include <cloysterhpc/functions.h>
7+
#include <cloysterhpc/services/repos.h>
78
#include <cloysterhpc/ofed.h>
89
#include <utility>
910

@@ -48,47 +49,16 @@ std::string headnodeDistroName()
4849
std::unreachable();
4950
}
5051

51-
void installMellanoxDoca(cloyster::services::repos::RepoManager& repoManager, const OFED& ofed)
52+
void installMellanoxDoca(const OFED& ofed)
5253
{
53-
auto runner = cloyster::Singleton<cloyster::services::BaseRunner>::get();
54-
55-
if (runner->executeCommand("modprobe mlx5_core") == 0) {
56-
LOG_WARN("mlx5_core module loaded, skiping DOCA setup");
57-
return;
58-
}
59-
60-
// @FIMXE deduce "rockylinux9.2"
61-
auto repoData = docaRepoTemplate(ofed.getVersion(), headnodeDistroName());
62-
std::filesystem::path path = "/etc/yum.repos.d/mlx-doca.repo";
63-
64-
// Install the repository and enable it
65-
cloyster::installFile(path, repoData);
66-
repoManager.install(path);
67-
repoManager.enable("doca");
68-
69-
// Install the required packages
70-
runner->executeCommand("dnf makecache");
71-
runner->executeCommand("dnf install –y kernel kernel-devel doca-extra");
72-
73-
// Run the Mellanox script, this generates an RPM at tmp
74-
assert(runner->executeCommand("/opt/mellanox/doca/tools/doca-kernel-support -k $(rpm -q --qf \"%{VERSION}-%{RELEASE}.%{ARCH}\n\" kernel-devel") == 0);
75-
76-
// Install the generated rpm
77-
runner->executeCommand("rpm -ivh $(find /tmp/DOCA.*/ -name '*.rpm' -printf \"%T@ %p\n\" | sort -nrk1 | tail -1 | awk '{print $2}')");
78-
79-
runner->executeCommand("dnf makecache");
80-
runner->executeCommand("dnf install –y kernel kernel-devel doca-extra");
81-
if (runner->executeCommand("lsmod | grep mlx5_core") != 0) {
82-
runner->executeCommand("modprobe mlx_core");
83-
}
8454
}
8555
};
8656

8757
void OFED::setKind(Kind kind) { m_kind = kind; }
8858

8959
OFED::Kind OFED::getKind() const { return m_kind; }
9060

91-
void OFED::install(cloyster::services::repos::RepoManager& repoManager) const
61+
void OFED::install() const
9262
{
9363
switch (m_kind) {
9464
case OFED::Kind::Inbox:
@@ -97,8 +67,40 @@ void OFED::install(cloyster::services::repos::RepoManager& repoManager) const
9767
break;
9868

9969
case OFED::Kind::Mellanox:
100-
installMellanoxDoca(repoManager, *this);
70+
{
71+
auto runner = cloyster::Singleton<cloyster::services::BaseRunner>::get();
72+
auto repoManager = cloyster::Singleton<cloyster::services::repos::RepoManager>::get();
73+
74+
if (runner->executeCommand("modprobe mlx5_core") == 0) {
75+
LOG_WARN("mlx5_core module loaded, skiping DOCA setup");
76+
return;
77+
}
78+
79+
auto repoData = docaRepoTemplate(getVersion(), headnodeDistroName());
80+
std::filesystem::path path = "/etc/yum.repos.d/mlx-doca.repo";
81+
82+
// Install the repository and enable it
83+
cloyster::installFile(path, repoData);
84+
repoManager->install(path);
85+
repoManager->enable("doca");
86+
87+
// Install the required packages
88+
runner->executeCommand("dnf makecache");
89+
runner->executeCommand("dnf install –y kernel kernel-devel doca-extra");
90+
91+
// Run the Mellanox script, this generates an RPM at tmp
92+
assert(runner->executeCommand("/opt/mellanox/doca/tools/doca-kernel-support -k $(rpm -q --qf \"%{VERSION}-%{RELEASE}.%{ARCH}\n\" kernel-devel") == 0);
93+
94+
// Install the (last) generated rpm
95+
runner->executeCommand("rpm -ivh $(find /tmp/DOCA.*/ -name '*.rpm' -printf \"%T@ %p\n\" | sort -nrk1 | tail -1 | awk '{print $2}')");
96+
97+
runner->executeCommand("dnf makecache");
98+
runner->executeCommand("dnf install –y kernel kernel-devel doca-extra");
99+
if (runner->executeCommand("lsmod | grep mlx5_core") != 0) {
100+
runner->executeCommand("modprobe mlx_core");
101+
}
101102

103+
}
102104
break;
103105

104106
case OFED::Kind::Oracle:

src/services/repos.cpp

Lines changed: 7 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include <algorithm>
77
#include <filesystem>
88
#include <functional>
9+
#include <glibmm/keyfile.h>
910
#include <memory>
1011
#include <ranges>
1112
#include <sstream>
@@ -68,8 +69,8 @@ class RPMRepository final : public IRepository {
6869
std::string m_name;
6970
std::optional<std::string> m_baseurl;
7071
std::optional<std::string> m_metalink;
72+
std::optional<std::string> m_gpgkey;
7173
bool m_gpgcheck = true;
72-
std::string m_gpgkey;
7374
std::filesystem::path m_source;
7475
std::string m_group;
7576

@@ -102,7 +103,7 @@ class RPMRepository final : public IRepository {
102103
return m_metalink;
103104
};
104105
[[nodiscard]] bool gpgcheck() const { return m_gpgcheck; };
105-
[[nodiscard]] std::string gpgkey() const { return m_gpgkey; };
106+
[[nodiscard]] std::optional<std::string> gpgkey() const { return m_gpgkey; };
106107

107108
void id(std::string value) override { m_id = value; };
108109
void enabled(bool enabled) override { m_enabled = enabled; };
@@ -119,7 +120,7 @@ class RPMRepository final : public IRepository {
119120
m_metalink = std::move(metalink);
120121
};
121122
void gpgcheck(bool gpgcheck) { m_gpgcheck = gpgcheck; };
122-
void gpgkey(std::string gpgkey) { m_gpgkey = std::move(gpgkey); };
123+
void gpgkey(std::optional<std::string> gpgkey) { m_gpgkey = std::move(gpgkey); };
123124

124125
void valid() const;
125126

@@ -171,7 +172,7 @@ class RPMRepositoryParser final {
171172
auto baseurl = file.getStringOpt(repogroup, "baseurl");
172173
auto enabled = file.getBoolean(repogroup, "enabled");
173174
auto gpgcheck = file.getBoolean(repogroup, "gpgcheck");
174-
auto gpgkey = file.getString(repogroup, "gpgkey");
175+
auto gpgkey = file.getStringOpt(repogroup, "gpgkey");
175176

176177
RPMRepository repo;
177178
repo.group(repogroup);
@@ -282,19 +283,8 @@ class RPMRepoManager final {
282283
void install(std::filesystem::directory_iterator&& dirIter)
283284
{
284285
for (const auto& fil : std::move(dirIter)) {
285-
auto fname = fil.path().filename().string();
286-
// Return true if the repository should not be loaded
287-
constexpr auto blacklisted = [](const std::string& repo) {
288-
if (repo.starts_with("doca-kernel-")) {
289-
// @FIXME: This is the repositories created by the doca scripts
290-
// Skipping them for now because they break the glib parser
291-
LOG_DEBUG("Skipping DOCA local repositories {}", repo);
292-
return true; // doca repositories break glib parser
293-
}
294-
295-
return false;
296-
};
297-
if (fname.ends_with(".repo") && !blacklisted(fname)) {
286+
std::string fname = fil.path().filename().string();
287+
if (fname.ends_with(".repo")) {
298288
install(fil);
299289
}
300290
}

src/services/shell.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -380,10 +380,9 @@ void Shell::configureMailSystem()
380380

381381
void Shell::configureInfiniband()
382382
{
383-
auto repos = cloyster::Singleton<repos::RepoManager>::get();
384383
if (const auto& ofed = cluster()->getOFED()) {
385384
LOG_INFO("Setting up Infiniband support")
386-
ofed->install(*repos); // shared pointer
385+
ofed->install(); // shared pointer
387386
}
388387
}
389388

0 commit comments

Comments
 (0)