Skip to content
This repository was archived by the owner on Apr 16, 2026. It is now read-only.

Commit a3e7612

Browse files
committed
Implement DOCA kernel driver installation into the netboot image
1 parent 73ed389 commit a3e7612

12 files changed

Lines changed: 163 additions & 151 deletions

File tree

include/cloysterhpc/functions.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,7 @@ void copyFile(std::filesystem::path source, std::filesystem::path destination);
228228
* @param data The contents of the file to install
229229
*/
230230
void installFile(const std::filesystem::path& path, std::istream& data);
231+
void installFile(const std::filesystem::path& path, std::string&& data);
231232

232233
} // namespace cloyster
233234

@@ -289,7 +290,7 @@ std::string enumToString(T enumValue)
289290
*/
290291
template <typename T>
291292
requires std::is_enum_v<T>
292-
std::optional<T> enumOfStringOpt(const std::string& str)
293+
std::optional<T> enumOfStringOpt(std::string_view str)
293294
{
294295
return magic_enum::enum_cast<T>(str, magic_enum::case_insensitive);
295296
}

include/cloysterhpc/models/answerfile.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ class AnswerFile {
159159
};
160160

161161
struct AFOFED {
162-
std::string kind = static_cast<std::string>(magic_enum::enum_name(OFED::Kind::Inbox));
162+
std::string kind = cloyster::utils::enumToString(OFED::Kind::Inbox);
163163
std::string version = "latest";
164164
bool enabled = false;
165165
};

include/cloysterhpc/ofed.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,11 @@
66
#ifndef CLOYSTERHPC_OFED_H_
77
#define CLOYSTERHPC_OFED_H_
88

9-
#include "services/repos.h"
109
#include <string>
11-
1210
#include <utility>
1311

12+
#include <cloysterhpc/services/repos.h>
13+
1414

1515
/**
1616
* @class OFED
@@ -51,7 +51,7 @@ class OFED {
5151
* This method installs the appropriate OFED components based on the
5252
* specified kind.
5353
*/
54-
void install(cloyster::services::repos::RepoManager& repoManager) const;
54+
void install() const;
5555
};
5656

5757
#endif // CLOYSTERHPC_OFED_H_

include/cloysterhpc/services/shell.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -145,8 +145,8 @@ class Shell final : public Execution {
145145
*
146146
* This function sets up the InfiniBand interconnect settings.
147147
*/
148-
void configureInfiniband();
149-
void configureMailSystem();
148+
static void configureInfiniband();
149+
static void configureMailSystem();
150150

151151
/**
152152
* @brief Removes memory lock limits.

src/functions.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -374,4 +374,10 @@ void installFile(const std::filesystem::path& path, std::istream& data)
374374
fil << data.rdbuf();
375375
}
376376

377+
void installFile(const std::filesystem::path& path, std::string&& data)
378+
{
379+
std::istringstream stringData(std::move(data));
380+
installFile(path, stringData);
381+
}
382+
377383
}; // namespace cloyster

src/main.cpp

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -29,32 +29,33 @@
2929
namespace {
3030
void initializeSingletons(auto&& cluster)
3131
{
32-
using cloyster::models::Cluster;
33-
cloyster::Singleton<Cluster>::init(std::forward<decltype(cluster)>(cluster));
32+
using cloyster::models::Cluster;
33+
cloyster::Singleton<Cluster>::init(
34+
std::forward<decltype(cluster)>(cluster));
3435

35-
cloyster::Singleton<cloyster::services::BaseRunner>::init([](){
36-
using cloyster::services::BaseRunner;
37-
using cloyster::services::DryRunner;
38-
using cloyster::services::Runner;
36+
cloyster::Singleton<cloyster::services::BaseRunner>::init([](){
37+
using cloyster::services::BaseRunner;
38+
using cloyster::services::DryRunner;
39+
using cloyster::services::Runner;
3940

40-
if (cloyster::dryRun) {
41-
return cloyster::makeUniqueDerived<BaseRunner, DryRunner>();
42-
}
41+
if (cloyster::dryRun) {
42+
return cloyster::makeUniqueDerived<BaseRunner, DryRunner>();
43+
}
4344

44-
return cloyster::makeUniqueDerived<BaseRunner, Runner>();
45-
});
45+
return cloyster::makeUniqueDerived<BaseRunner, Runner>();
46+
});
4647

47-
using cloyster::services::repos::RepoManager;
48-
cloyster::Singleton<RepoManager>::init([]() {
49-
auto clusterPtr = cloyster::Singleton<Cluster>::get();
50-
const auto& osinfo = clusterPtr->getHeadnode().getOS();
51-
return std::make_unique<RepoManager>(osinfo);
52-
});
48+
using cloyster::services::repos::RepoManager;
49+
cloyster::Singleton<RepoManager>::init([]() {
50+
auto clusterPtr = cloyster::Singleton<Cluster>::get();
51+
const auto& osinfo = clusterPtr->getHeadnode().getOS();
52+
return std::make_unique<RepoManager>(osinfo);
53+
});
5354

54-
cloyster::Singleton<MessageBus>::init([]() {
55-
return cloyster::makeUniqueDerived<MessageBus, DBusClient>(
56-
"org.freedesktop.systemd1", "/org/freedesktop/systemd1");
57-
});
55+
cloyster::Singleton<MessageBus>::init([]() {
56+
return cloyster::makeUniqueDerived<MessageBus, DBusClient>(
57+
"org.freedesktop.systemd1", "/org/freedesktop/systemd1");
58+
});
5859
}
5960
}; // anonymous namespace
6061

@@ -73,9 +74,6 @@ int main(int argc, const char** argv)
7374

7475
app.add_flag("-t, --tui", cloyster::enableTUI, "Enable TUI");
7576

76-
#if 0
77-
app.add_flag("-c, --cli", cloyster::enableCLI, "Enable CLI");
78-
#endif
7977

8078
cloyster::logLevelInput
8179
= fmt::format("{}", magic_enum::enum_name(Log::Level::Info));
@@ -147,13 +145,15 @@ int main(int argc, const char** argv)
147145
}
148146
}());
149147

148+
#ifndef NDEBUG
150149
if (!loadConfFile.empty()) {
151150
LOG_INFO("Loading file {}", loadConfFile);
152151
auto file = cloyster::services::files::KeyFile(loadConfFile);
153152
LOG_INFO("Groups: {}", fmt::join(file.getGroups(), ","));
154153
LOG_INFO("Contents: {}", file.toData());
155154
return EXIT_SUCCESS;
156155
}
156+
#endif
157157

158158
if (cloyster::showVersion) {
159159
fmt::print("{}: Version {}\n", productName, productVersion);

src/models/cluster.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -322,8 +322,7 @@ void Cluster::printData()
322322
LOG_DEBUG("DomainName: {}", getDomainName());
323323
LOG_DEBUG("FQDN: {}", this->m_headnode.getFQDN());
324324
if (m_ofed) {
325-
auto ofed = m_ofed.value();
326-
LOG_DEBUG("OFED: {} {}", utils::enumToString(ofed.getKind()), ofed.getVersion());
325+
LOG_DEBUG("OFED: {} {}", utils::enumToString(m_ofed->getKind()), m_ofed->getVersion());
327326
}
328327

329328
printNetworks(m_network);
@@ -593,19 +592,20 @@ void Cluster::fillData(const std::filesystem::path& answerfilePath)
593592
"{0}.{1}", this->m_headnode.getHostname(), getDomainName()));
594593

595594
if (answerfil.ofed.enabled) {
595+
// Install the cofigured OFED variant
596596
LOG_DEBUG("Loading OFED {}", answerfil.ofed.kind);
597597
auto kind = utils::enumOfStringOpt<OFED::Kind>(answerfil.ofed.kind);
598598
if (!kind) {
599599
throw std::runtime_error(
600-
fmt::format("Invalid OFED kind, expected one of {}, found {}",
600+
fmt::format("Invalid OFED kind, expected one of {}, found {}. Edit the anwerfile {} [ofed] sectino and try again.",
601+
cloyster::answerfile,
601602
fmt::join(magic_enum::enum_names<OFED::Kind>(), ", "),
602603
answerfil.ofed.kind
603604
));
604605
}
605-
auto version = answerfil.ofed.version;
606-
setOFED(kind.value(), version);
606+
setOFED(kind.value(), answerfil.ofed.version);
607607
} else {
608-
// @FIXME: Is this correct? It installs the Inbox infiniband stack by default
608+
// Install Inbox OFED by default
609609
setOFED(OFED::Kind::Inbox);
610610
}
611611

src/models/os.cpp

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
*/
55

66
#include <cloysterhpc/cloyster.h>
7+
#include <cloysterhpc/functions.h>
78
#include <cloysterhpc/models/os.h>
89
#include <cloysterhpc/services/dnf.h>
910
#include <cloysterhpc/services/package_manager.h>
@@ -161,20 +162,9 @@ void OS::setDistro(OS::Distro distro) { m_distro = distro; }
161162

162163
void OS::setDistro(std::string_view distro)
163164
{
164-
// This code block is left for future reference, if an insensitive
165-
// comparison in magic_enum would be implemented it may easily replace the
166-
// lambda block. Reference: https://github.com/Neargye/magic_enum/pull/139
167-
168-
#if 1
169-
if (const auto& rv
170-
= magic_enum::enum_cast<Distro>(distro, magic_enum::case_insensitive)) {
171-
#else
172-
if (const auto &rv
173-
= magic_enum::enum_cast<Distro>(distro, [](char lhs, char rhs) {
174-
return std::tolower(lhs) == std::tolower(rhs);
175-
}))
176-
#endif
177-
setDistro(rv.value());
165+
if (const auto& rval
166+
= cloyster::utils::enumOfStringOpt<OS::Distro>(std::string(distro))) {
167+
setDistro(rval.value());
178168
} else {
179169
throw std::runtime_error(
180170
fmt::format("Unsupported Distribution: {}", distro));

src/ofed.cpp

Lines changed: 36 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
*/
55

66
#include <cloysterhpc/functions.h>
7+
#include <cloysterhpc/services/repos.h>
78
#include <cloysterhpc/ofed.h>
89
#include <utility>
910

@@ -48,47 +49,16 @@ std::string headnodeDistroName()
4849
std::unreachable();
4950
}
5051

51-
void installMellanoxDoca(cloyster::services::repos::RepoManager& repoManager, const OFED& ofed)
52+
void installMellanoxDoca(const OFED& ofed)
5253
{
53-
auto runner = cloyster::Singleton<cloyster::services::BaseRunner>::get();
54-
55-
if (runner->executeCommand("modprobe mlx5_core") == 0) {
56-
LOG_WARN("mlx5_core module loaded, skiping DOCA setup");
57-
return;
58-
}
59-
60-
// @FIMXE deduce "rockylinux9.2"
61-
auto repoData = docaRepoTemplate(ofed.getVersion(), headnodeDistroName());
62-
std::filesystem::path path = "/etc/yum.repos.d/mlx-doca.repo";
63-
64-
// Install the repository and enable it
65-
cloyster::installFile(path, repoData);
66-
repoManager.install(path);
67-
repoManager.enable("doca");
68-
69-
// Install the required packages
70-
runner->executeCommand("dnf makecache");
71-
runner->executeCommand("dnf install –y kernel kernel-devel doca-extra");
72-
73-
// Run the Mellanox script, this generates an RPM at tmp
74-
assert(runner->executeCommand("/opt/mellanox/doca/tools/doca-kernel-support -k $(rpm -q --qf \"%{VERSION}-%{RELEASE}.%{ARCH}\n\" kernel-devel") == 0);
75-
76-
// Install the generated rpm
77-
runner->executeCommand("rpm -ivh $(find /tmp/DOCA.*/ -name '*.rpm' -printf \"%T@ %p\n\" | sort -nrk1 | tail -1 | awk '{print $2}')");
78-
79-
runner->executeCommand("dnf makecache");
80-
runner->executeCommand("dnf install –y kernel kernel-devel doca-extra");
81-
if (runner->executeCommand("lsmod | grep mlx5_core") != 0) {
82-
runner->executeCommand("modprobe mlx_core");
83-
}
8454
}
8555
};
8656

8757
void OFED::setKind(Kind kind) { m_kind = kind; }
8858

8959
OFED::Kind OFED::getKind() const { return m_kind; }
9060

91-
void OFED::install(cloyster::services::repos::RepoManager& repoManager) const
61+
void OFED::install() const
9262
{
9363
switch (m_kind) {
9464
case OFED::Kind::Inbox:
@@ -97,8 +67,40 @@ void OFED::install(cloyster::services::repos::RepoManager& repoManager) const
9767
break;
9868

9969
case OFED::Kind::Mellanox:
100-
installMellanoxDoca(repoManager, *this);
70+
{
71+
auto runner = cloyster::Singleton<cloyster::services::BaseRunner>::get();
72+
auto repoManager = cloyster::Singleton<cloyster::services::repos::RepoManager>::get();
73+
74+
if (runner->executeCommand("modprobe mlx5_core") == 0) {
75+
LOG_WARN("mlx5_core module loaded, skiping DOCA setup");
76+
return;
77+
}
78+
79+
auto repoData = docaRepoTemplate(getVersion(), headnodeDistroName());
80+
std::filesystem::path path = "/etc/yum.repos.d/mlx-doca.repo";
81+
82+
// Install the repository and enable it
83+
cloyster::installFile(path, repoData);
84+
repoManager->install(path);
85+
repoManager->enable("doca");
86+
87+
// Install the required packages
88+
runner->executeCommand("dnf makecache");
89+
runner->executeCommand("dnf install –y kernel kernel-devel doca-extra");
90+
91+
// Run the Mellanox script, this generates an RPM at tmp
92+
assert(runner->executeCommand("/opt/mellanox/doca/tools/doca-kernel-support -k $(rpm -q --qf \"%{VERSION}-%{RELEASE}.%{ARCH}\n\" kernel-devel") == 0);
93+
94+
// Install the (last) generated rpm
95+
runner->executeCommand("rpm -ivh $(find /tmp/DOCA.*/ -name '*.rpm' -printf \"%T@ %p\n\" | sort -nrk1 | tail -1 | awk '{print $2}')");
96+
97+
runner->executeCommand("dnf makecache");
98+
runner->executeCommand("dnf install –y kernel kernel-devel doca-extra");
99+
if (runner->executeCommand("lsmod | grep mlx5_core") != 0) {
100+
runner->executeCommand("modprobe mlx_core");
101+
}
101102

103+
}
102104
break;
103105

104106
case OFED::Kind::Oracle:

src/services/repos.cpp

Lines changed: 7 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include <algorithm>
77
#include <filesystem>
88
#include <functional>
9+
#include <glibmm/keyfile.h>
910
#include <memory>
1011
#include <ranges>
1112
#include <sstream>
@@ -68,8 +69,8 @@ class RPMRepository final : public IRepository {
6869
std::string m_name;
6970
std::optional<std::string> m_baseurl;
7071
std::optional<std::string> m_metalink;
72+
std::optional<std::string> m_gpgkey;
7173
bool m_gpgcheck = true;
72-
std::string m_gpgkey;
7374
std::filesystem::path m_source;
7475
std::string m_group;
7576

@@ -102,7 +103,7 @@ class RPMRepository final : public IRepository {
102103
return m_metalink;
103104
};
104105
[[nodiscard]] bool gpgcheck() const { return m_gpgcheck; };
105-
[[nodiscard]] std::string gpgkey() const { return m_gpgkey; };
106+
[[nodiscard]] std::optional<std::string> gpgkey() const { return m_gpgkey; };
106107

107108
void id(std::string value) override { m_id = value; };
108109
void enabled(bool enabled) override { m_enabled = enabled; };
@@ -119,7 +120,7 @@ class RPMRepository final : public IRepository {
119120
m_metalink = std::move(metalink);
120121
};
121122
void gpgcheck(bool gpgcheck) { m_gpgcheck = gpgcheck; };
122-
void gpgkey(std::string gpgkey) { m_gpgkey = std::move(gpgkey); };
123+
void gpgkey(std::optional<std::string> gpgkey) { m_gpgkey = std::move(gpgkey); };
123124

124125
void valid() const;
125126

@@ -171,7 +172,7 @@ class RPMRepositoryParser final {
171172
auto baseurl = file.getStringOpt(repogroup, "baseurl");
172173
auto enabled = file.getBoolean(repogroup, "enabled");
173174
auto gpgcheck = file.getBoolean(repogroup, "gpgcheck");
174-
auto gpgkey = file.getString(repogroup, "gpgkey");
175+
auto gpgkey = file.getStringOpt(repogroup, "gpgkey");
175176

176177
RPMRepository repo;
177178
repo.group(repogroup);
@@ -282,19 +283,8 @@ class RPMRepoManager final {
282283
void install(std::filesystem::directory_iterator&& dirIter)
283284
{
284285
for (const auto& fil : std::move(dirIter)) {
285-
auto fname = fil.path().filename().string();
286-
// Return true if the repository should not be loaded
287-
constexpr auto blacklisted = [](const std::string& repo) {
288-
if (repo.starts_with("doca-kernel-")) {
289-
// @FIXME: This is the repositories created by the doca scripts
290-
// Skipping them for now because they break the glib parser
291-
LOG_DEBUG("Skipping DOCA local repositories {}", repo);
292-
return true; // doca repositories break glib parser
293-
}
294-
295-
return false;
296-
};
297-
if (fname.ends_with(".repo") && !blacklisted(fname)) {
286+
std::string fname = fil.path().filename().string();
287+
if (fname.ends_with(".repo")) {
298288
install(fil);
299289
}
300290
}

0 commit comments

Comments
 (0)