Skip to content
This repository was archived by the owner on Apr 16, 2026. It is now read-only.

Commit 259cc22

Browse files
committed
Implement DOCA kernel driver installation into the netboot image
1 parent 73ed389 commit 259cc22

5 files changed

Lines changed: 93 additions & 64 deletions

File tree

include/cloysterhpc/functions.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,7 @@ void copyFile(std::filesystem::path source, std::filesystem::path destination);
228228
* @param data The contents of the file to install
229229
*/
230230
void installFile(const std::filesystem::path& path, std::istream& data);
231+
void installFile(const std::filesystem::path& path, std::string&& data);
231232

232233
} // namespace cloyster
233234

src/functions.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -374,4 +374,10 @@ void installFile(const std::filesystem::path& path, std::istream& data)
374374
fil << data.rdbuf();
375375
}
376376

377+
void installFile(const std::filesystem::path& path, std::string&& data)
378+
{
379+
std::istringstream stringData(std::move(data));
380+
installFile(path, stringData);
381+
}
382+
377383
}; // namespace cloyster

src/main.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,13 +147,15 @@ int main(int argc, const char** argv)
147147
}
148148
}());
149149

150+
#ifndef NDEBUG
150151
if (!loadConfFile.empty()) {
151152
LOG_INFO("Loading file {}", loadConfFile);
152153
auto file = cloyster::services::files::KeyFile(loadConfFile);
153154
LOG_INFO("Groups: {}", fmt::join(file.getGroups(), ","));
154155
LOG_INFO("Contents: {}", file.toData());
155156
return EXIT_SUCCESS;
156157
}
158+
#endif
157159

158160
if (cloyster::showVersion) {
159161
fmt::print("{}: Version {}\n", productName, productVersion);

src/services/repos.cpp

Lines changed: 7 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include <algorithm>
77
#include <filesystem>
88
#include <functional>
9+
#include <glibmm/keyfile.h>
910
#include <memory>
1011
#include <ranges>
1112
#include <sstream>
@@ -68,8 +69,8 @@ class RPMRepository final : public IRepository {
6869
std::string m_name;
6970
std::optional<std::string> m_baseurl;
7071
std::optional<std::string> m_metalink;
72+
std::optional<std::string> m_gpgkey;
7173
bool m_gpgcheck = true;
72-
std::string m_gpgkey;
7374
std::filesystem::path m_source;
7475
std::string m_group;
7576

@@ -102,7 +103,7 @@ class RPMRepository final : public IRepository {
102103
return m_metalink;
103104
};
104105
[[nodiscard]] bool gpgcheck() const { return m_gpgcheck; };
105-
[[nodiscard]] std::string gpgkey() const { return m_gpgkey; };
106+
[[nodiscard]] std::optional<std::string> gpgkey() const { return m_gpgkey; };
106107

107108
void id(std::string value) override { m_id = value; };
108109
void enabled(bool enabled) override { m_enabled = enabled; };
@@ -119,7 +120,7 @@ class RPMRepository final : public IRepository {
119120
m_metalink = std::move(metalink);
120121
};
121122
void gpgcheck(bool gpgcheck) { m_gpgcheck = gpgcheck; };
122-
void gpgkey(std::string gpgkey) { m_gpgkey = std::move(gpgkey); };
123+
void gpgkey(std::optional<std::string> gpgkey) { m_gpgkey = std::move(gpgkey); };
123124

124125
void valid() const;
125126

@@ -171,7 +172,7 @@ class RPMRepositoryParser final {
171172
auto baseurl = file.getStringOpt(repogroup, "baseurl");
172173
auto enabled = file.getBoolean(repogroup, "enabled");
173174
auto gpgcheck = file.getBoolean(repogroup, "gpgcheck");
174-
auto gpgkey = file.getString(repogroup, "gpgkey");
175+
auto gpgkey = file.getStringOpt(repogroup, "gpgkey");
175176

176177
RPMRepository repo;
177178
repo.group(repogroup);
@@ -282,19 +283,8 @@ class RPMRepoManager final {
282283
void install(std::filesystem::directory_iterator&& dirIter)
283284
{
284285
for (const auto& fil : std::move(dirIter)) {
285-
auto fname = fil.path().filename().string();
286-
// Return true if the repository should not be loaded
287-
constexpr auto blacklisted = [](const std::string& repo) {
288-
if (repo.starts_with("doca-kernel-")) {
289-
// @FIXME: This is the repositories created by the doca scripts
290-
// Skipping them for now because they break the glib parser
291-
LOG_DEBUG("Skipping DOCA local repositories {}", repo);
292-
return true; // doca repositories break glib parser
293-
}
294-
295-
return false;
296-
};
297-
if (fname.ends_with(".repo") && !blacklisted(fname)) {
286+
std::string fname = fil.path().filename().string();
287+
if (fname.ends_with(".repo")) {
298288
install(fil);
299289
}
300290
}

src/services/xcat.cpp

Lines changed: 77 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,34 @@ using cloyster::models::Cluster;
2424

2525
inline auto cluster() { return cloyster::Singleton<Cluster>::get(); }
2626

27+
// Returns the distribution name with the version, e.g., rocky9.5
28+
std::string getOSImageDistroVersion()
29+
{
30+
using cloyster::models::OS;
31+
std::string osimage;
32+
33+
switch (cluster()->getDiskImage().getDistro()) {
34+
case OS::Distro::RHEL:
35+
osimage += "rhels";
36+
osimage += cluster()->getNodes()[0].getOS().getVersion();
37+
break;
38+
case OS::Distro::OL:
39+
osimage += "ol";
40+
osimage += cluster()->getNodes()[0].getOS().getVersion();
41+
osimage += ".0";
42+
break;
43+
case OS::Distro::Rocky:
44+
osimage += "rocky";
45+
osimage += cluster()->getNodes()[0].getOS().getVersion();
46+
break;
47+
case OS::Distro::AlmaLinux:
48+
osimage += "alma";
49+
osimage += cluster()->getNodes()[0].getOS().getVersion();
50+
break;
51+
}
52+
return osimage;
2753
}
54+
}; // namespace{}
2855

2956
namespace cloyster::services {
3057

@@ -49,8 +76,9 @@ XCAT::XCAT()
4976

5077
void XCAT::installPackages()
5178
{
52-
cluster()->getHeadnode().getOS().packageManager()->install("initscripts");
53-
cluster()->getHeadnode().getOS().packageManager()->install("xCAT");
79+
auto packageManager = cluster()->getHeadnode().getOS().packageManager();
80+
packageManager->install("initscripts");
81+
packageManager->install("xCAT");
5482
}
5583

5684
void XCAT::patchInstall()
@@ -178,16 +206,56 @@ void XCAT::configureTimeService()
178206

179207
void XCAT::configureInfiniband()
180208
{
181-
if (const auto& ofed = cluster()->getOFED())
209+
LOG_INFO("[xCAT] Configuring infiniband");
210+
if (const auto& ofed = cluster()->getOFED()) {
182211
switch (ofed->getKind()) {
183212
case OFED::Kind::Inbox:
184213
m_stateless.otherpkgs.emplace_back("@infiniband");
185214

186215
break;
187216

188217
case OFED::Kind::Mellanox:
189-
throw std::logic_error("@TODO MLNX OFED is not yet supported");
190-
218+
{
219+
m_stateless.otherpkgs.emplace_back("kmod-mlnx-ofa_kernel");
220+
// The install sketch:
221+
// rpm -qa | grep kmod-mlnx >> /install/custom/netboot/compute.otherpkglist
222+
// mkdir -p /install/post/otherpkgs/rocky9.5/x86_64/
223+
// cp -v /usr/share/doca-host-25.01-0.6.0.0/Modules/5.14.0-503.29.1.el9_5.x86_64/*.rpm
224+
// /install/post/otherpkgs/rocky9.5/x86_64/
225+
// createrepo /install/post/otherpkgs/rocky9.5/x86_64/
226+
// genimage rocky9.5-x86_64-netboot-compute
227+
// packimage rocky9.5-x86_64-netboot-compute
228+
// - Create /etc/httpd/conf.d/rpmrepo.conf
229+
// - cp -a /usr/share/doca-host-25.01-0.6.0.0/Modules/5.14.0-503.29.1.el9_5.x86_64 rpmrepo
230+
// chmod -R 755 /var/www/html/rpmrepo
231+
// chdef -t osimage rocky9.5-x86_64-netboot-compute --plus otherpkgdir=http://$(hostname)/rpmrepo
232+
auto runner = cloyster::Singleton<BaseRunner>::get();
233+
auto arch = cloyster::utils::enumToString(cluster()->getNodes()[0].getOS().getArch());
234+
auto repoFolder = std::string_view("/var/www/html/rpmrepo");
235+
cloyster::installFile(
236+
"/etc/httpd.d/conf.d/rpmrepo.conf",
237+
fmt::format(
238+
R"(Alias "/rpmrepo" "{0}"
239+
<Directory "{0}">
240+
Options +Indexes +FollowSymLinks
241+
AllowOverride None
242+
Require all granted
243+
IndexOptions FancyIndexing VersionSort NameWidth=* HTMLTable Charset=UTF-8
244+
</Directory>
245+
)", repoFolder));
246+
runner->executeCommand("apachectl configtest");
247+
runner->executeCommand("systemctl restart httpd");
248+
runner->executeCommand(
249+
fmt::format("bash -c \"cp -v /usr/share/doca-host-*/Modules/*.{}/*.rpm {}\"",
250+
arch, repoFolder));
251+
runner->executeCommand(
252+
fmt::format("createrepo {}", repoFolder));
253+
runner->executeCommand(
254+
fmt::format("bash -c \"chdef -t osimage {} --plus otherpkgdir=http://$(hostname)/rpmrepo\"",
255+
m_stateless.osimage));
256+
257+
258+
}
191259
break;
192260

193261
case OFED::Kind::Oracle:
@@ -196,6 +264,7 @@ void XCAT::configureInfiniband()
196264

197265
break;
198266
}
267+
}
199268
}
200269

201270
void XCAT::configureSLURM()
@@ -514,27 +583,7 @@ void XCAT::resetNodes() { cloyster::runCommand("rpower compute reset"); }
514583

515584
void XCAT::generateOSImageName(ImageType imageType, NodeType nodeType)
516585
{
517-
std::string osimage;
518-
519-
switch (cluster()->getDiskImage().getDistro()) {
520-
case OS::Distro::RHEL:
521-
osimage += "rhels";
522-
osimage += cluster()->getNodes()[0].getOS().getVersion();
523-
break;
524-
case OS::Distro::OL:
525-
osimage += "ol";
526-
osimage += cluster()->getNodes()[0].getOS().getVersion();
527-
osimage += ".0";
528-
break;
529-
case OS::Distro::Rocky:
530-
osimage += "rocky";
531-
osimage += cluster()->getNodes()[0].getOS().getVersion();
532-
break;
533-
case OS::Distro::AlmaLinux:
534-
osimage += "alma";
535-
osimage += cluster()->getNodes()[0].getOS().getVersion();
536-
break;
537-
}
586+
std::string osimage = getOSImageDistroVersion();
538587
osimage += "-";
539588

540589
switch (cluster()->getNodes()[0].getOS().getArch()) {
@@ -578,27 +627,7 @@ void XCAT::generateOSImagePath(ImageType imageType, NodeType nodeType)
578627
}
579628

580629
std::filesystem::path chroot = "/install/netboot/";
581-
582-
switch (cluster()->getNodes()[0].getOS().getDistro()) {
583-
case OS::Distro::RHEL:
584-
chroot += "rhels";
585-
chroot += cluster()->getNodes()[0].getOS().getVersion();
586-
break;
587-
case OS::Distro::OL:
588-
chroot += "ol";
589-
chroot += cluster()->getNodes()[0].getOS().getVersion();
590-
chroot += ".0";
591-
break;
592-
case OS::Distro::Rocky:
593-
chroot += "rocky";
594-
chroot += cluster()->getNodes()[0].getOS().getVersion();
595-
break;
596-
case OS::Distro::AlmaLinux:
597-
chroot += "alma";
598-
chroot += cluster()->getNodes()[0].getOS().getVersion();
599-
break;
600-
}
601-
630+
chroot += getOSImageDistroVersion();
602631
chroot += "/";
603632

604633
switch (cluster()->getNodes()[0].getOS().getArch()) {
@@ -655,6 +684,7 @@ void XCAT::installRepositories()
655684
}
656685
}
657686

687+
658688
[[deprecated("Refactoring RepoManager, replace the function with the same name "
659689
"in repo manager")]]
660690
std::vector<std::string> XCAT::getxCATOSImageRepos() const

0 commit comments

Comments
 (0)