Skip to content

Commit aa48433

Browse files
daemon: Add socket activation via /run/rpm-ostreed.socket
For historical reasons, the daemon ends up doing a lot of initialization before even claiming the DBus name. For example, it calls `ostree_sysroot_load()`. We also end up scanning the RPM database, and actually parse all the GPG keys in `/etc/pki/rpm-gpg` etc. This causes two related problems: - By doing all this work before claiming the bus name, we race against the (pretty low) DBus service timeout of 25s. - If something hard fails at initialization, the client can't easily see the error because it just appears in the systemd journal. The client will just see a service timeout. The solution to this is to adopt systemd socket activation. There's a new `rpm-ostreed.socket` and the daemon can be activated that way. The client (when run as root, the socket is only accessible to root right now) connects, which will activate the daemon and attempt initialization - without claiming the DBus name yet. If something goes wrong here, the daemon will reply to the client that activated it with the error, and then also exit with failure. On success, everything continues as normal, including claiming the DBus name. Note that this also logically replaces the code that does explicit `systemctl start rpm-ostreed` invocations. After this patch: ``` $ systemctl stop rpm-ostreed $ umount /boot $ rpm-ostree status error: Couldn't start daemon: Error setting up sysroot: loading sysroot: Unexpected state: /run/ostree-booted found, but no /boot/loader directory ``` Co-authored-by: Colin Walters <[email protected]>
1 parent 1b2c90d commit aa48433

15 files changed

+368
-54
lines changed

Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ cap-std-ext = "2.0"
5151
cap-std = { version = "1.0.3", features = ["fs_utf8"] }
5252
containers-image-proxy = "0.5.1"
5353
# Explicitly force on libc
54-
rustix = { version = "0.37", features = ["use-libc"] }
54+
rustix = { version = "0.37", features = ["use-libc", "net"] }
5555
cap-primitives = "1.0.3"
5656
cap-tempfile = "1.0.15"
5757
chrono = { version = "0.4.26", features = ["serde"] }

Makefile-daemon.am

+4-3
Original file line numberDiff line numberDiff line change
@@ -63,14 +63,15 @@ systemdunit_service_file_names = \
6363
$(NULL)
6464

6565
systemdunit_service_files = $(addprefix $(srcdir)/src/daemon/,$(systemdunit_service_file_names))
66-
systemdunit_timer_files = \
66+
systemdunit_other_files = \
6767
$(srcdir)/src/daemon/rpm-ostreed-automatic.timer \
6868
$(srcdir)/src/daemon/rpm-ostree-countme.timer \
69+
$(srcdir)/src/daemon/rpm-ostreed.socket \
6970
$(NULL)
7071

7172
systemdunit_DATA = \
7273
$(systemdunit_service_files) \
73-
$(systemdunit_timer_files) \
74+
$(systemdunit_other_files) \
7475
$(NULL)
7576

7677
systemdunitdir = $(prefix)/lib/systemd/system/
@@ -103,7 +104,7 @@ EXTRA_DIST += \
103104
$(sysconf_DATA) \
104105
$(service_in_files) \
105106
$(systemdunit_service_in_files) \
106-
$(systemdunit_timer_files) \
107+
$(systemdunit_other_files) \
107108
$(NULL)
108109

109110
CLEANFILES += \

rpmostree-cxxrs.cxx

+49
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include "rpmostree-package-variants.h"
99
#include "rpmostree-rpm-util.h"
1010
#include "rpmostree-util.h"
11+
#include "rpmostreed-daemon.hpp"
1112
#include "rpmostreemain.h"
1213
#include "src/libpriv/rpmostree-cxxrs-prelude.h"
1314
#include <algorithm>
@@ -2195,6 +2196,10 @@ extern "C"
21952196
::rust::Str opt_deploy_id, ::rust::Str opt_os_name,
21962197
::rpmostreecxx::OstreeDeployment **return$) noexcept;
21972198

2199+
::rust::repr::PtrLen rpmostreecxx$cxxbridge1$daemon_main (bool debug) noexcept;
2200+
2201+
void rpmostreecxx$cxxbridge1$daemon_terminate () noexcept;
2202+
21982203
::rust::repr::PtrLen rpmostreecxx$cxxbridge1$daemon_sanitycheck_environment (
21992204
const ::rpmostreecxx::OstreeSysroot &sysroot) noexcept;
22002205

@@ -2970,6 +2975,34 @@ extern "C"
29702975
return throw$;
29712976
}
29722977

2978+
::rust::repr::PtrLen
2979+
rpmostreecxx$cxxbridge1$daemon_init_inner (bool debug) noexcept
2980+
{
2981+
void (*daemon_init_inner$) (bool) = ::rpmostreecxx::daemon_init_inner;
2982+
::rust::repr::PtrLen throw$;
2983+
::rust::behavior::trycatch (
2984+
[&] {
2985+
daemon_init_inner$ (debug);
2986+
throw$.ptr = nullptr;
2987+
},
2988+
::rust::detail::Fail (throw$));
2989+
return throw$;
2990+
}
2991+
2992+
::rust::repr::PtrLen
2993+
rpmostreecxx$cxxbridge1$daemon_main_inner () noexcept
2994+
{
2995+
void (*daemon_main_inner$) () = ::rpmostreecxx::daemon_main_inner;
2996+
::rust::repr::PtrLen throw$;
2997+
::rust::behavior::trycatch (
2998+
[&] {
2999+
daemon_main_inner$ ();
3000+
throw$.ptr = nullptr;
3001+
},
3002+
::rust::detail::Fail (throw$));
3003+
return throw$;
3004+
}
3005+
29733006
::rust::repr::PtrLen
29743007
rpmostreecxx$cxxbridge1$client_require_root () noexcept
29753008
{
@@ -4038,6 +4071,22 @@ deployment_get_base (::rpmostreecxx::OstreeSysroot &sysroot, ::rust::Str opt_dep
40384071
return ::std::move (return$.value);
40394072
}
40404073

4074+
void
4075+
daemon_main (bool debug)
4076+
{
4077+
::rust::repr::PtrLen error$ = rpmostreecxx$cxxbridge1$daemon_main (debug);
4078+
if (error$.ptr)
4079+
{
4080+
throw ::rust::impl< ::rust::Error>::error (error$);
4081+
}
4082+
}
4083+
4084+
void
4085+
daemon_terminate () noexcept
4086+
{
4087+
rpmostreecxx$cxxbridge1$daemon_terminate ();
4088+
}
4089+
40414090
void
40424091
daemon_sanitycheck_environment (const ::rpmostreecxx::OstreeSysroot &sysroot)
40434092
{

rpmostree-cxxrs.h

+5
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include "rpmostree-package-variants.h"
99
#include "rpmostree-rpm-util.h"
1010
#include "rpmostree-util.h"
11+
#include "rpmostreed-daemon.hpp"
1112
#include "rpmostreemain.h"
1213
#include "src/libpriv/rpmostree-cxxrs-prelude.h"
1314
#include <algorithm>
@@ -1843,6 +1844,10 @@ ::rpmostreecxx::OstreeDeployment *deployment_get_base (::rpmostreecxx::OstreeSys
18431844
::rust::Str opt_deploy_id,
18441845
::rust::Str opt_os_name);
18451846

1847+
void daemon_main (bool debug);
1848+
1849+
void daemon_terminate () noexcept;
1850+
18461851
void daemon_sanitycheck_environment (const ::rpmostreecxx::OstreeSysroot &sysroot);
18471852

18481853
::rust::String deployment_generate_id (const ::rpmostreecxx::OstreeDeployment &deployment) noexcept;

rust/src/client.rs

+56-31
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use crate::core::OSTREE_BOOTED;
66
use crate::cxxrsutil::*;
77
use crate::ffi::SystemHostType;
88
use crate::utils;
9-
use anyhow::{anyhow, Result};
9+
use anyhow::{anyhow, Context, Result};
1010
use fn_error_context::context;
1111
use gio::prelude::*;
1212
use ostree_ext::{gio, glib};
@@ -49,7 +49,8 @@ impl ClientConnection {
4949
SYSROOT_PATH,
5050
"org.projectatomic.rpmostree1.Sysroot",
5151
gio::Cancellable::NONE,
52-
)?;
52+
)
53+
.context("Initializing sysroot proxy")?;
5354
// Today the daemon mode requires running inside a booted deployment.
5455
let booted = sysroot_proxy
5556
.cached_property("Booted")
@@ -156,46 +157,70 @@ pub(crate) fn client_handle_fd_argument(
156157
}
157158
}
158159

159-
/// Explicitly ensure the daemon is started via systemd, if possible.
160-
///
161-
/// This works around bugs from DBus activation, see
162-
/// https://github.com/coreos/rpm-ostree/pull/2932
163-
///
164-
/// Basically we load too much data before claiming the bus name,
165-
/// and dbus doesn't give us a useful error. Instead, let's talk
166-
/// to systemd directly and use its client tools to scrape errors.
167-
///
168-
/// What we really should do probably is use native socket activation.
160+
/// Connect to the client socket and ensure the daemon is initialized;
161+
/// this avoids DBus and ensures that we get any early startup errors
162+
/// returned cleanly.
163+
#[context("Starting daemon via socket")]
164+
fn start_daemon_via_socket() -> Result<()> {
165+
use cap_std::io_lifetimes::IntoSocketlike;
166+
167+
futures::executor::block_on(async {
168+
let c = tokio::net::UnixStream::connect("/run/rpm-ostree/client.sock").await?;
169+
Ok::<_, anyhow::Error>(c)
170+
})
171+
.context("Connecting")?;
172+
173+
let address = sockaddr()?;
174+
let socket = rustix::net::socket(
175+
rustix::net::AddressFamily::UNIX,
176+
rustix::net::SocketType::STREAM,
177+
rustix::net::Protocol::from_raw(0),
178+
)?;
179+
let addr = crate::client::sockaddr()?;
180+
tracing::debug!("Starting daemon via {address:?}");
181+
rustix::net::connect_unix(&socket, &addr)
182+
.with_context(|| anyhow!("Failed to connect to {address:?}"))?;
183+
let socket = socket.into_socketlike();
184+
crate::daemon::write_message(
185+
&socket,
186+
crate::daemon::SocketMessage::ClientHello {
187+
selfid: crate::core::self_id()?,
188+
},
189+
)
190+
.context("Writing ClientHello")?;
191+
rustix::net::shutdown(&socket, rustix::net::Shutdown::Write).expect("shutdown");
192+
let resp = crate::daemon::recv_message(&socket)?;
193+
match resp {
194+
crate::daemon::SocketMessage::ServerOk => Ok(()),
195+
crate::daemon::SocketMessage::ServerError { msg } => {
196+
Err(anyhow!("server error: {msg}").into())
197+
}
198+
o => Err(anyhow!("unexpected message: {o:?}").into()),
199+
}
200+
}
201+
202+
/// Returns the address of the client socket.
203+
pub(crate) fn sockaddr() -> Result<rustix::net::SocketAddrUnix> {
204+
rustix::net::SocketAddrUnix::new("/run/rpm-ostree/client.sock").map_err(anyhow::Error::msg)
205+
}
206+
169207
pub(crate) fn client_start_daemon() -> CxxResult<()> {
170-
let service = "rpm-ostreed.service";
171-
// Assume non-root can't use systemd right now.
208+
// systemctl and socket paths only work for root right now; in the future
209+
// the socket may be opened up.
172210
if rustix::process::getuid().as_raw() != 0 {
173211
return Ok(());
174212
}
175-
// Unfortunately, RHEL8 systemd will count "systemctl start"
176-
// invocations against the restart limit, so query the status
177-
// first.
178-
let activeres = Command::new("systemctl")
179-
.args(&["is-active", "rpm-ostreed"])
180-
.output()?;
181-
// Explicitly don't check the error return value, we don't want to
182-
// hard fail on it.
183-
if String::from_utf8_lossy(&activeres.stdout).starts_with("active") {
184-
// It's active, we're done. Note that while this is a race
185-
// condition, that's fine because it will be handled by DBus
186-
// activation.
187-
return Ok(());
188-
}
213+
let socket = "rpm-ostreed.socket";
189214
let res = Command::new("systemctl")
190-
.args(&["--no-ask-password", "start", service])
215+
.args(&["--no-ask-password", "start", socket])
191216
.status()?;
192217
if !res.success() {
193218
let _ = Command::new("systemctl")
194-
.args(&["--no-pager", "status", service])
219+
.args(&["--no-pager", "status", socket])
195220
.status();
196221
return Err(anyhow!("{}", res).into());
197222
}
198-
Ok(())
223+
return start_daemon_via_socket().map_err(Into::into);
199224
}
200225

201226
/// Convert the GVariant parameters from the DownloadProgress DBus API to a human-readable English string.

rust/src/core.rs

+8
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,14 @@ fn stage_container_rpm_files(rpms: Vec<File>) -> CxxResult<Vec<String>> {
350350
Ok(r)
351351
}
352352

353+
/// Return an opaque identifier for the current executing binary. This can
354+
/// be passed via IPC to verify that client and server are running the same code.
355+
pub(crate) fn self_id() -> Result<String> {
356+
use std::os::unix::fs::MetadataExt;
357+
let metadata = std::fs::metadata("/proc/self/exe").context("Failed to read /proc/self/exe")?;
358+
Ok(format!("dev={};inode={}", metadata.dev(), metadata.ino()))
359+
}
360+
353361
#[cfg(test)]
354362
mod test {
355363
use super::*;

0 commit comments

Comments
 (0)