Skip to content

Commit c993732

Browse files
daemon: Add socket activation via /run/rpm-ostreed.socket
For historical reasons, the daemon ends up doing a lot of initialization before even claiming the DBus name. For example, it calls `ostree_sysroot_load()`. We also end up scanning the RPM database, and actually parse all the GPG keys in `/etc/pki/rpm-gpg` etc. This causes two related problems: - By doing all this work before claiming the bus name, we race against the (pretty low) DBus service timeout of 25s. - If something hard fails at initialization, the client can't easily see the error because it just appears in the systemd journal. The client will just see a service timeout. The solution to this is to adopt systemd socket activation. There's a new `rpm-ostreed.socket` and the daemon can be activated that way. The client (when run as root, the socket is only accessible to root right now) connects, which will activate the daemon and attempt initialization - without claiming the DBus name yet. If something goes wrong here, the daemon will reply to the client that activated it with the error, and then also exit with failure. On success, everything continues as normal, including claiming the DBus name. Note that this also logically replaces the code that does explicit `systemctl start rpm-ostreed` invocations. After this patch: ``` $ systemctl stop rpm-ostreed $ umount /boot $ rpm-ostree status error: Couldn't start daemon: Error setting up sysroot: loading sysroot: Unexpected state: /run/ostree-booted found, but no /boot/loader directory ``` Co-authored-by: Colin Walters <[email protected]>
1 parent 1d4e1f5 commit c993732

15 files changed

+371
-61
lines changed

Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ cap-std-ext = "2.0"
5151
cap-std = { version = "1.0.3", features = ["fs_utf8"] }
5252
containers-image-proxy = "0.5.1"
5353
# Explicitly force on libc
54-
rustix = { version = "0.37", features = ["use-libc"] }
54+
rustix = { version = "0.37", features = ["use-libc", "net"] }
5555
cap-primitives = "1.0.3"
5656
chrono = { version = "0.4.26", features = ["serde"] }
5757
clap = { version = "4.3", features = ["derive"] }

Makefile-daemon.am

+4-3
Original file line numberDiff line numberDiff line change
@@ -63,14 +63,15 @@ systemdunit_service_file_names = \
6363
$(NULL)
6464

6565
systemdunit_service_files = $(addprefix $(srcdir)/src/daemon/,$(systemdunit_service_file_names))
66-
systemdunit_timer_files = \
66+
systemdunit_other_files = \
6767
$(srcdir)/src/daemon/rpm-ostreed-automatic.timer \
6868
$(srcdir)/src/daemon/rpm-ostree-countme.timer \
69+
$(srcdir)/src/daemon/rpm-ostreed.socket \
6970
$(NULL)
7071

7172
systemdunit_DATA = \
7273
$(systemdunit_service_files) \
73-
$(systemdunit_timer_files) \
74+
$(systemdunit_other_files) \
7475
$(NULL)
7576

7677
systemdunitdir = $(prefix)/lib/systemd/system/
@@ -103,7 +104,7 @@ EXTRA_DIST += \
103104
$(sysconf_DATA) \
104105
$(service_in_files) \
105106
$(systemdunit_service_in_files) \
106-
$(systemdunit_timer_files) \
107+
$(systemdunit_other_files) \
107108
$(NULL)
108109

109110
CLEANFILES += \

rpmostree-cxxrs.cxx

+49
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include "rpmostree-package-variants.h"
99
#include "rpmostree-rpm-util.h"
1010
#include "rpmostree-util.h"
11+
#include "rpmostreed-daemon.hpp"
1112
#include "rpmostreemain.h"
1213
#include "src/libpriv/rpmostree-cxxrs-prelude.h"
1314
#include <algorithm>
@@ -2195,6 +2196,10 @@ extern "C"
21952196
::rust::Str opt_deploy_id, ::rust::Str opt_os_name,
21962197
::rpmostreecxx::OstreeDeployment **return$) noexcept;
21972198

2199+
::rust::repr::PtrLen rpmostreecxx$cxxbridge1$daemon_main (bool debug) noexcept;
2200+
2201+
void rpmostreecxx$cxxbridge1$daemon_terminate () noexcept;
2202+
21982203
::rust::repr::PtrLen rpmostreecxx$cxxbridge1$daemon_sanitycheck_environment (
21992204
const ::rpmostreecxx::OstreeSysroot &sysroot) noexcept;
22002205

@@ -2970,6 +2975,34 @@ extern "C"
29702975
return throw$;
29712976
}
29722977

2978+
::rust::repr::PtrLen
2979+
rpmostreecxx$cxxbridge1$daemon_init_inner (bool debug) noexcept
2980+
{
2981+
void (*daemon_init_inner$) (bool) = ::rpmostreecxx::daemon_init_inner;
2982+
::rust::repr::PtrLen throw$;
2983+
::rust::behavior::trycatch (
2984+
[&] {
2985+
daemon_init_inner$ (debug);
2986+
throw$.ptr = nullptr;
2987+
},
2988+
::rust::detail::Fail (throw$));
2989+
return throw$;
2990+
}
2991+
2992+
::rust::repr::PtrLen
2993+
rpmostreecxx$cxxbridge1$daemon_main_inner () noexcept
2994+
{
2995+
void (*daemon_main_inner$) () = ::rpmostreecxx::daemon_main_inner;
2996+
::rust::repr::PtrLen throw$;
2997+
::rust::behavior::trycatch (
2998+
[&] {
2999+
daemon_main_inner$ ();
3000+
throw$.ptr = nullptr;
3001+
},
3002+
::rust::detail::Fail (throw$));
3003+
return throw$;
3004+
}
3005+
29733006
::rust::repr::PtrLen
29743007
rpmostreecxx$cxxbridge1$client_require_root () noexcept
29753008
{
@@ -4038,6 +4071,22 @@ deployment_get_base (::rpmostreecxx::OstreeSysroot &sysroot, ::rust::Str opt_dep
40384071
return ::std::move (return$.value);
40394072
}
40404073

4074+
void
4075+
daemon_main (bool debug)
4076+
{
4077+
::rust::repr::PtrLen error$ = rpmostreecxx$cxxbridge1$daemon_main (debug);
4078+
if (error$.ptr)
4079+
{
4080+
throw ::rust::impl< ::rust::Error>::error (error$);
4081+
}
4082+
}
4083+
4084+
void
4085+
daemon_terminate () noexcept
4086+
{
4087+
rpmostreecxx$cxxbridge1$daemon_terminate ();
4088+
}
4089+
40414090
void
40424091
daemon_sanitycheck_environment (const ::rpmostreecxx::OstreeSysroot &sysroot)
40434092
{

rpmostree-cxxrs.h

+5
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include "rpmostree-package-variants.h"
99
#include "rpmostree-rpm-util.h"
1010
#include "rpmostree-util.h"
11+
#include "rpmostreed-daemon.hpp"
1112
#include "rpmostreemain.h"
1213
#include "src/libpriv/rpmostree-cxxrs-prelude.h"
1314
#include <algorithm>
@@ -1843,6 +1844,10 @@ ::rpmostreecxx::OstreeDeployment *deployment_get_base (::rpmostreecxx::OstreeSys
18431844
::rust::Str opt_deploy_id,
18441845
::rust::Str opt_os_name);
18451846

1847+
void daemon_main (bool debug);
1848+
1849+
void daemon_terminate () noexcept;
1850+
18461851
void daemon_sanitycheck_environment (const ::rpmostreecxx::OstreeSysroot &sysroot);
18471852

18481853
::rust::String deployment_generate_id (const ::rpmostreecxx::OstreeDeployment &deployment) noexcept;

rust/src/client.rs

+58-37
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,14 @@ use crate::core::OSTREE_BOOTED;
66
use crate::cxxrsutil::*;
77
use crate::ffi::SystemHostType;
88
use crate::utils;
9-
use anyhow::{anyhow, Result};
9+
use anyhow::{anyhow, Context, Result};
10+
use camino::Utf8Path;
1011
use fn_error_context::context;
1112
use gio::prelude::*;
1213
use ostree_ext::{gio, glib};
1314
use std::io::{BufRead, Write};
1415
use std::os::unix::io::IntoRawFd;
16+
use std::os::unix::net::UnixStream;
1517
use std::process::Command;
1618

1719
/// The well-known bus name.
@@ -20,6 +22,7 @@ const BUS_NAME: &str = "org.projectatomic.rpmostree1";
2022
const SYSROOT_PATH: &str = "/org/projectatomic/rpmostree1/Sysroot";
2123
const OS_INTERFACE: &str = "org.projectatomic.rpmostree1.OS";
2224
const OS_EX_INTERFACE: &str = "org.projectatomic.rpmostree1.OSExperimental";
25+
const CLIENT_SOCKET_PATH: &str = "/run/rpm-ostree/client.sock";
2326

2427
/// A unique DBus connection to the rpm-ostree daemon.
2528
/// This currently wraps a C++ client connection.
@@ -49,7 +52,8 @@ impl ClientConnection {
4952
SYSROOT_PATH,
5053
"org.projectatomic.rpmostree1.Sysroot",
5154
gio::Cancellable::NONE,
52-
)?;
55+
)
56+
.context("Initializing sysroot proxy")?;
5357
// Today the daemon mode requires running inside a booted deployment.
5458
let booted = sysroot_proxy
5559
.cached_property("Booted")
@@ -156,46 +160,63 @@ pub(crate) fn client_handle_fd_argument(
156160
}
157161
}
158162

159-
/// Explicitly ensure the daemon is started via systemd, if possible.
160-
///
161-
/// This works around bugs from DBus activation, see
162-
/// https://github.com/coreos/rpm-ostree/pull/2932
163-
///
164-
/// Basically we load too much data before claiming the bus name,
165-
/// and dbus doesn't give us a useful error. Instead, let's talk
166-
/// to systemd directly and use its client tools to scrape errors.
167-
///
168-
/// What we really should do probably is use native socket activation.
169-
pub(crate) fn client_start_daemon() -> CxxResult<()> {
170-
let service = "rpm-ostreed.service";
171-
// Assume non-root can't use systemd right now.
172-
if rustix::process::getuid().as_raw() != 0 {
173-
return Ok(());
163+
// If the client socket doesn't exist, try to ask systemd to start it.
164+
// This can happen even when the socket unit is installed because
165+
// presets may not enable it.
166+
fn check_and_start_daemon_socket() -> Result<bool> {
167+
if Utf8Path::new(CLIENT_SOCKET_PATH).exists() {
168+
return Ok(true);
174169
}
175-
// Unfortunately, RHEL8 systemd will count "systemctl start"
176-
// invocations against the restart limit, so query the status
177-
// first.
178-
let activeres = Command::new("systemctl")
179-
.args(&["is-active", "rpm-ostreed"])
170+
let socket_unit = "rpm-ostreed.socket";
171+
tracing::debug!("{CLIENT_SOCKET_PATH} does not exist, explicitly starting socket unit");
172+
let start_result = Command::new("systemctl")
173+
.args(&["--no-ask-password", "start", socket_unit])
180174
.output()?;
181-
// Explicitly don't check the error return value, we don't want to
182-
// hard fail on it.
183-
if String::from_utf8_lossy(&activeres.stdout).starts_with("active") {
184-
// It's active, we're done. Note that while this is a race
185-
// condition, that's fine because it will be handled by DBus
186-
// activation.
175+
if !start_result.status.success() {
176+
let err = String::from_utf8_lossy(&start_result.stderr);
177+
tracing::warn!("Failed to start {socket_unit}: {err}");
178+
return Ok(false);
179+
}
180+
Ok(true)
181+
}
182+
183+
/// Connect to the client socket and ensure the daemon is initialized;
184+
/// this avoids DBus and ensures that we get any early startup errors
185+
/// returned cleanly.
186+
#[context("Starting daemon via socket")]
187+
fn start_daemon_via_socket() -> Result<()> {
188+
let capable = check_and_start_daemon_socket()?;
189+
if !capable {
190+
tracing::debug!("Falling back to DBus activation");
187191
return Ok(());
188192
}
189-
let res = Command::new("systemctl")
190-
.args(&["--no-ask-password", "start", service])
191-
.status()?;
192-
if !res.success() {
193-
let _ = Command::new("systemctl")
194-
.args(&["--no-pager", "status", service])
195-
.status();
196-
return Err(anyhow!("{}", res).into());
193+
194+
tracing::debug!("Starting daemon via {CLIENT_SOCKET_PATH}");
195+
let mut socket = UnixStream::connect(CLIENT_SOCKET_PATH)?;
196+
crate::daemon::write_message(
197+
&mut socket,
198+
crate::daemon::SocketMessage::ClientHello {
199+
selfid: crate::core::self_id()?,
200+
},
201+
)
202+
.context("Writing ClientHello")?;
203+
let resp = crate::daemon::recv_message(&mut socket)?;
204+
match resp {
205+
crate::daemon::SocketMessage::ServerOk => Ok(()),
206+
crate::daemon::SocketMessage::ServerError { msg } => {
207+
Err(anyhow!("server error: {msg}").into())
208+
}
209+
o => Err(anyhow!("unexpected message: {o:?}").into()),
197210
}
198-
Ok(())
211+
}
212+
213+
pub(crate) fn client_start_daemon() -> CxxResult<()> {
214+
// systemctl and socket paths only work for root right now; in the future
215+
// the socket may be opened up.
216+
if rustix::process::getuid().as_raw() != 0 {
217+
return Ok(());
218+
}
219+
return start_daemon_via_socket().map_err(Into::into);
199220
}
200221

201222
/// Convert the GVariant parameters from the DownloadProgress DBus API to a human-readable English string.

rust/src/core.rs

+8
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,14 @@ fn stage_container_rpm_files(rpms: Vec<File>) -> CxxResult<Vec<String>> {
350350
Ok(r)
351351
}
352352

353+
/// Return an opaque identifier for the current executing binary. This can
354+
/// be passed via IPC to verify that client and server are running the same code.
355+
pub(crate) fn self_id() -> Result<String> {
356+
use std::os::unix::fs::MetadataExt;
357+
let metadata = std::fs::metadata("/proc/self/exe").context("Failed to read /proc/self/exe")?;
358+
Ok(format!("dev={};inode={}", metadata.dev(), metadata.ino()))
359+
}
360+
353361
#[cfg(test)]
354362
mod test {
355363
use super::*;

0 commit comments

Comments
 (0)