Skip to content

Commit d8f96dd

Browse files
committed
daemon: Add socket activation via /run/rpm-ostreed.socket
For historical reasons, the daemon ends up doing a lot of initialization before even claiming the DBus name. For example, it calls `ostree_sysroot_load()`. We also end up scanning the RPM database, and actually parse all the GPG keys in `/etc/pki/rpm-gpg` etc. This causes two related problems: - By doing all this work before claiming the bus name, we race against the (pretty low) DBus service timeout of 25s. - If something hard fails at initialization, the client can't easily see the error because it just appears in the systemd journal. The client will just see a service timeout. The solution to this is to adopt systemd socket activation, which drops out DBus as an intermediary. On daemon startup, we now do the process-global initialization (like ostree sysroot) and if that fails, the daemon just sticks around (but without claiming the bus name), ready to return the error message to each client. After this patch: ``` $ systemctl stop rpm-ostreed $ umount /boot $ rpm-ostree status error: Couldn't start daemon: Error setting up sysroot: loading sysroot: Unexpected state: /run/ostree-booted found, but no /boot/loader directory ```
1 parent 6698a8c commit d8f96dd

11 files changed

+261
-24
lines changed

Cargo.toml

+2
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,8 @@ lto = "thin"
123123
# Note: If you add a feature here, you also probably want to update utils.rs:get_features()
124124
fedora-integration = []
125125
rhsm = ["libdnf-sys/rhsm"]
126+
# Enable hard requirement on `rpm-ostreed.socket`; requires https://bugzilla.redhat.com/show_bug.cgi?id=2110012
127+
client-socket = []
126128
bin-unit-tests = []
127129
# ASAN+UBSAN
128130
sanitizers = []

Makefile-daemon.am

+7-3
Original file line numberDiff line numberDiff line change
@@ -63,14 +63,18 @@ systemdunit_service_in_files = \
6363
$(NULL)
6464

6565
systemdunit_service_files = $(systemdunit_service_in_files:.service.in=.service)
66-
systemdunit_timer_files = \
66+
systemdunit_other_files = \
6767
$(srcdir)/src/daemon/rpm-ostreed-automatic.timer \
6868
$(srcdir)/src/daemon/rpm-ostree-countme.timer \
6969
$(NULL)
7070

71+
if CLIENT_SOCKET
72+
systemdunit_other_files += $(srcdir)/src/daemon/rpm-ostreed.socket
73+
endif
74+
7175
systemdunit_DATA = \
7276
$(systemdunit_service_files) \
73-
$(systemdunit_timer_files) \
77+
$(systemdunit_other_files) \
7478
$(NULL)
7579

7680
systemdunitdir = $(prefix)/lib/systemd/system/
@@ -110,7 +114,7 @@ EXTRA_DIST += \
110114
$(sysconf_DATA) \
111115
$(service_in_files) \
112116
$(systemdunit_service_in_files) \
113-
$(systemdunit_timer_files) \
117+
$(systemdunit_other_files) \
114118
$(NULL)
115119

116120
CLEANFILES += \

configure.ac

+2
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,8 @@ AC_ARG_ENABLE(featuresrs,
7171
[enable_featuresrs=])
7272
AC_SUBST([RUST_FEATURES], $enable_featuresrs)
7373

74+
AM_CONDITIONAL(CLIENT_SOCKET, [echo $enable_featuresrs | grep -q 'client-socket'])
75+
7476
# Initialize libtool
7577
LT_PREREQ([2.2.4])
7678
LT_INIT([disable-static])

rust/src/client.rs

+35-1
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,26 @@ pub(crate) fn client_handle_fd_argument(
154154
}
155155
}
156156

157+
/// Connect to the client socket and ensure the daemon is initialized;
158+
/// this avoids DBus and ensures that we get any early startup errors
159+
/// returned cleanly.
160+
#[cfg(feature = "client-socket")]
161+
fn start_daemon_via_socket() -> Result<()> {
162+
let address = "/run/rpm-ostree/client.sock";
163+
tracing::debug!("Starting daemon via {address}");
164+
let s = std::os::unix::net::UnixStream::connect(address)
165+
.with_context(|| anyhow!("Failed to connect to {}", address))?;
166+
let mut s = std::io::BufReader::new(s);
167+
let mut r = String::new();
168+
s.read_to_string(&mut r)
169+
.context("Reading from client socket")?;
170+
if r.is_empty() {
171+
Ok(())
172+
} else {
173+
Err(anyhow!("{r}").into())
174+
}
175+
}
176+
157177
/// Explicitly ensure the daemon is started via systemd, if possible.
158178
///
159179
/// This works around bugs from DBus activation, see
@@ -164,12 +184,14 @@ pub(crate) fn client_handle_fd_argument(
164184
/// to systemd directly and use its client tools to scrape errors.
165185
///
166186
/// What we really should do probably is use native socket activation.
167-
pub(crate) fn client_start_daemon() -> CxxResult<()> {
187+
#[cfg(not(feature = "client-socket"))]
188+
fn start_daemon_via_systemctl() -> Result<()> {
168189
let service = "rpm-ostreed.service";
169190
// Assume non-root can't use systemd right now.
170191
if rustix::process::getuid().as_raw() != 0 {
171192
return Ok(());
172193
}
194+
173195
// Unfortunately, RHEL8 systemd will count "systemctl start"
174196
// invocations against the restart limit, so query the status
175197
// first.
@@ -196,6 +218,18 @@ pub(crate) fn client_start_daemon() -> CxxResult<()> {
196218
Ok(())
197219
}
198220

221+
pub(crate) fn client_start_daemon() -> CxxResult<()> {
222+
// systemctl and socket paths only work for root right now; in the future
223+
// the socket may be opened up.
224+
if rustix::process::getuid().as_raw() != 0 {
225+
return Ok(());
226+
}
227+
#[cfg(feature = "client-socket")]
228+
return start_daemon_via_socket().map_err(Into::into);
229+
#[cfg(not(feature = "client-socket"))]
230+
return start_daemon_via_systemctl().map_err(Into::into);
231+
}
232+
199233
/// Convert the GVariant parameters from the DownloadProgress DBus API to a human-readable English string.
200234
pub(crate) fn client_render_download_progress(progress: &crate::ffi::GVariant) -> String {
201235
let progress = progress

rust/src/daemon.rs

+131-3
Original file line numberDiff line numberDiff line change
@@ -8,19 +8,23 @@ use crate::cxxrsutil::*;
88
use crate::ffi::{
99
OverrideReplacementSource, OverrideReplacementType, ParsedRevision, ParsedRevisionKind,
1010
};
11-
use anyhow::{anyhow, format_err, Result};
11+
use anyhow::{anyhow, format_err, Context, Result};
1212
use cap_std::fs::Dir;
1313
use cap_std_ext::dirext::CapStdExtDirExt;
1414
use cap_std_ext::{cap_std, rustix};
1515
use fn_error_context::context;
1616
use glib::prelude::*;
17+
use once_cell::sync::Lazy;
1718
use ostree_ext::{gio, glib, ostree};
18-
use rustix::fd::BorrowedFd;
19+
use rustix::fd::{BorrowedFd, FromRawFd};
1920
use rustix::fs::MetadataExt;
2021
use std::collections::{BTreeMap, BTreeSet};
21-
use std::io::Read;
22+
use std::io::{Read, Write};
2223
use std::os::unix::fs::PermissionsExt;
2324
use std::path::Path;
25+
use std::sync::Mutex;
26+
use tokio::net::{UnixListener, UnixStream};
27+
use tokio::sync::oneshot::{Receiver, Sender};
2428

2529
const RPM_OSTREED_COMMIT_VERIFICATION_CACHE: &str = "rpm-ostree/gpgcheck-cache";
2630

@@ -131,6 +135,130 @@ fn deployment_populate_variant_origin(
131135
Ok(())
132136
}
133137

138+
async fn send_ok_result_to_client(_client: UnixStream) {
139+
// On success we close the stream without writing anything,
140+
// which acknowledges successful startup to the client.
141+
// In the future we may actually implement a protocol here, so this
142+
// is stubbed out as a full async fn in preparation for that.
143+
tracing::debug!("Acknowleged client");
144+
}
145+
146+
static SHUTDOWN_SIGNAL: Lazy<Mutex<Option<Sender<()>>>> = Lazy::new(|| Mutex::new(None));
147+
148+
async fn process_clients_with_ok(listener: UnixListener, mut cancel: Receiver<()>) {
149+
tracing::debug!("Processing clients...");
150+
loop {
151+
tokio::select! {
152+
_ = &mut cancel => {
153+
tracing::debug!("Got cancellation event");
154+
return
155+
}
156+
r = listener.accept() => {
157+
match r {
158+
Ok((stream, _addr)) => {
159+
send_ok_result_to_client(stream).await;
160+
},
161+
Err(e) => {
162+
tracing::debug!("failed to accept client: {e}")
163+
}
164+
}
165+
}
166+
}
167+
}
168+
}
169+
170+
/// Ensure all asynchronous tasks in this Rust half of the daemon code are stopped.
171+
/// Called from C++.
172+
pub(crate) fn daemon_terminate() {
173+
let chan = (*SHUTDOWN_SIGNAL).lock().unwrap().take().unwrap();
174+
let _ = chan.send(());
175+
}
176+
177+
fn process_one_client(listener: std::os::unix::net::UnixListener, e: anyhow::Error) -> Result<()> {
178+
let mut incoming = match listener.incoming().next() {
179+
Some(r) => r?,
180+
None => {
181+
anyhow::bail!("Expected to find client socket from activation");
182+
}
183+
};
184+
185+
let buf = format!("{e}");
186+
incoming.write_all(buf.as_bytes())?;
187+
188+
todo!()
189+
}
190+
191+
/// Perform initialization steps required by systemd service activation.
192+
///
193+
/// This ensures that the system is running under systemd, then receives the
194+
/// socket-FD for main IPC logic, and notifies systemd about ready-state.
195+
pub(crate) fn daemon_main(debug: bool) -> Result<()> {
196+
let handle = tokio::runtime::Handle::current();
197+
let _tokio_guard = handle.enter();
198+
use std::os::unix::net::UnixListener as StdUnixListener;
199+
if !systemd::daemon::booted()? {
200+
return Err(anyhow!("not running as a systemd service"));
201+
}
202+
203+
let init_res: Result<()> = crate::ffi::daemon_init_inner(debug).map_err(|e| e.into());
204+
tracing::debug!("Initialization result: {init_res:?}");
205+
206+
let mut fds = systemd::daemon::listen_fds(false)?.iter();
207+
let listener = match fds.next() {
208+
None => {
209+
// If started directly via `systemctl start` or DBus activation, we
210+
// directly propagate the error back to our exit code.
211+
init_res?;
212+
tracing::debug!("Initializing directly (not socket activated)");
213+
cfg!(feature = "client-socket")
214+
.then(|| StdUnixListener::bind("/run/rpm-ostree/client.sock"))
215+
.transpose()
216+
.context("Binding to socket")?
217+
}
218+
Some(fd) => {
219+
if fds.next().is_some() {
220+
return Err(anyhow!("Expected exactly 1 fd from systemd activation"));
221+
}
222+
tracing::debug!("Initializing from socket activation; fd={fd}");
223+
let listener = unsafe { StdUnixListener::from_raw_fd(fd) };
224+
225+
match init_res {
226+
Ok(_) => Some(listener),
227+
Err(e) => {
228+
let err_copy = anyhow!("{e}");
229+
tracing::debug!("Reporting initialization error: {e}");
230+
match process_one_client(listener, err_copy) {
231+
Ok(()) => {
232+
tracing::debug!("Acknowleged initial client");
233+
}
234+
Err(e) => {
235+
tracing::debug!("Caught error while processing client {e}");
236+
}
237+
}
238+
return Err(e);
239+
}
240+
}
241+
}
242+
};
243+
244+
if let Some(listener) = listener {
245+
let (shutdown_send, shutdown_recv) = tokio::sync::oneshot::channel();
246+
(*SHUTDOWN_SIGNAL).lock().unwrap().replace(shutdown_send);
247+
248+
let listener = UnixListener::from_std(listener)?;
249+
250+
// On success, we spawn a helper task that just responds with
251+
// sucess to clients that connect via the socket. In the future,
252+
// perhaps we'll expose an API here.
253+
tracing::debug!("Spawning acknowledgement task");
254+
tokio::task::spawn(async { process_clients_with_ok(listener, shutdown_recv).await });
255+
}
256+
257+
tracing::debug!("Entering daemon mainloop");
258+
// And now, enter the main loop.
259+
Ok(crate::ffi::daemon_main_inner()?)
260+
}
261+
134262
/// Serialize information about the given deployment into the `dict`;
135263
/// this will be exposed via DBus and is hence public API.
136264
pub(crate) fn deployment_populate_variant(

rust/src/lib.rs

+8
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,8 @@ pub mod ffi {
305305

306306
// daemon.rs
307307
extern "Rust" {
308+
fn daemon_main(debug: bool) -> Result<()>;
309+
fn daemon_terminate();
308310
fn daemon_sanitycheck_environment(sysroot: &OstreeSysroot) -> Result<()>;
309311
fn deployment_generate_id(deployment: &OstreeDeployment) -> String;
310312
fn deployment_populate_variant(
@@ -768,6 +770,12 @@ pub mod ffi {
768770
fn c_unit_tests() -> Result<()>;
769771
}
770772

773+
unsafe extern "C++" {
774+
include!("rpmostreed-daemon.hpp");
775+
fn daemon_init_inner(debug: bool) -> Result<()>;
776+
fn daemon_main_inner() -> Result<()>;
777+
}
778+
771779
unsafe extern "C++" {
772780
include!("rpmostree-clientlib.h");
773781
fn client_require_root() -> Result<()>;

rust/src/main.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,9 @@ fn inner_main() -> Result<i32> {
135135
.enable_all()
136136
.build()
137137
.context("Failed to build tokio runtime")?;
138-
runtime.block_on(dispatch_multicall(callname, args))
138+
let r = runtime.block_on(dispatch_multicall(callname, args));
139+
tracing::debug!("Exiting inner main with result: {r:?}");
140+
r
139141
}
140142

141143
fn print_error(e: anyhow::Error) {

src/app/rpmostree-builtin-start-daemon.cxx

+35-16
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
#include "rpmostree-libbuiltin.h"
3636
#include "rpmostree-util.h"
3737
#include "rpmostreed-daemon.h"
38+
#include "rpmostreed-utils.h"
3839

3940
typedef enum
4041
{
@@ -53,6 +54,7 @@ static GOptionEntry opt_entries[] = { { "debug", 'd', 0, G_OPTION_ARG_NONE, &opt
5354
"Use system root SYSROOT (default: /)", "SYSROOT" },
5455
{ NULL } };
5556

57+
static GDBusConnection *bus = NULL;
5658
static RpmostreedDaemon *rpm_ostree_daemon = NULL;
5759

5860
static void
@@ -211,17 +213,15 @@ on_log_handler (const gchar *log_domain, GLogLevelFlags log_level, const gchar *
211213
sd_journal_print (priority, "%s", message);
212214
}
213215

214-
gboolean
215-
rpmostree_builtin_start_daemon (int argc, char **argv, RpmOstreeCommandInvocation *invocation,
216-
GCancellable *cancellable, GError **error)
216+
namespace rpmostreecxx
217217
{
218-
g_autoptr (GOptionContext) opt_context = g_option_context_new (" - start the daemon process");
219-
g_option_context_add_main_entries (opt_context, opt_entries, NULL);
220-
221-
if (!g_option_context_parse (opt_context, &argc, &argv, error))
222-
return FALSE;
223-
224-
if (opt_debug)
218+
// This function is always called from the Rust side. Hopefully
219+
// soon we'll move more of this code into daemon.rs.
220+
void
221+
daemon_init_inner (bool debug)
222+
{
223+
g_autoptr (GError) local_error = NULL;
224+
if (debug)
225225
{
226226
g_autoptr (GIOChannel) channel = NULL;
227227
g_log_set_handler (G_LOG_DOMAIN, (GLogLevelFlags)(G_LOG_LEVEL_DEBUG | G_LOG_LEVEL_INFO),
@@ -243,19 +243,24 @@ rpmostree_builtin_start_daemon (int argc, char **argv, RpmOstreeCommandInvocatio
243243
g_unix_signal_add (SIGTERM, on_sigint, NULL);
244244

245245
/* Get an explicit ref to the bus so we can use it later */
246-
g_autoptr (GDBusConnection) bus = g_bus_get_sync (G_BUS_TYPE_SYSTEM, NULL, error);
246+
g_autoptr (GDBusConnection) bus = g_bus_get_sync (G_BUS_TYPE_SYSTEM, NULL, &local_error);
247247
if (!bus)
248-
return FALSE;
249-
if (!start_daemon (bus, error))
248+
util::throw_gerror (local_error);
249+
if (!start_daemon (bus, &local_error))
250250
{
251-
if (*error)
252-
sd_notifyf (0, "STATUS=error: %s", (*error)->message);
253-
return FALSE;
251+
sd_notifyf (0, "STATUS=error: %s", local_error->message);
252+
util::throw_gerror (local_error);
254253
}
254+
}
255255

256+
// Called from rust side to enter mainloop.
257+
void
258+
daemon_main_inner ()
259+
{
256260
state_transition (APPSTATE_RUNNING);
257261

258262
g_debug ("Entering main event loop");
263+
g_assert (rpm_ostree_daemon);
259264
rpmostreed_daemon_run_until_idle_exit (rpm_ostree_daemon);
260265

261266
if (bus)
@@ -285,6 +290,20 @@ rpmostree_builtin_start_daemon (int argc, char **argv, RpmOstreeCommandInvocatio
285290
g_autoptr (GMainContext) mainctx = g_main_context_default ();
286291
while (appstate == APPSTATE_FLUSHING)
287292
g_main_context_iteration (mainctx, TRUE);
293+
}
294+
} /* namespace */
295+
296+
gboolean
297+
rpmostree_builtin_start_daemon (int argc, char **argv, RpmOstreeCommandInvocation *invocation,
298+
GCancellable *cancellable, GError **error)
299+
{
300+
g_autoptr (GOptionContext) opt_context = g_option_context_new (" - start the daemon process");
301+
g_option_context_add_main_entries (opt_context, opt_entries, NULL);
302+
303+
if (!g_option_context_parse (opt_context, &argc, &argv, error))
304+
return FALSE;
305+
306+
rpmostreecxx::daemon_main (opt_debug);
288307

289308
return TRUE;
290309
}

0 commit comments

Comments
 (0)