Skip to content

Commit f6db24d

Browse files
supercomputer7spholz
authored andcommitted
Kernel+runc: Remove the pivot_root functionality in copy_mount syscall
That functionality seems to be too much complicated. We shouldn't overengineer how the copy_mount syscall works, so instead of allowing replacement of the root filesystem, let's make the unshare file descriptor to be configured via a special ioctl call before we initialize a new VFSRootContext object. The special ioctl can either set a new root filesystem for the upcoming VFSRootContext object, or remove it (by passing fd of -1). If there's no specified root filesystem, a new RAMFS instance will be created automatically when invoking the unshare_create syscall. This also simplifies the code in the boot process, hence making it much more readable. It should be noted, that we assumed during pivot_root that the first mountpoint in a context is the root mountpoint, which is probably a fair assumption, but we don't assume this anywhere else in the VFSRootContext code. If this functionality ever comes back, we should ensure that we make some effort to not assume this again.
1 parent 2a4a096 commit f6db24d

File tree

14 files changed

+143
-119
lines changed

14 files changed

+143
-119
lines changed

Base/usr/share/runc/full-buggiebox-container.json

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,8 @@
55
"hostname": null,
66
"layout": [
77
{
8-
"type": "mount",
8+
"type": "root_mount",
99
"source": null,
10-
"target": "/",
1110
"fs_type": "RAMFS"
1211
},
1312
{

Kernel/API/Ioctl.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ enum IOCtlNumber {
110110
KEYBOARD_IOCTL_SET_CAPS_LOCK,
111111
MOUNT_IOCTL_SET_MOUNT_SPECIFIC_FLAG,
112112
MOUNT_IOCTL_DELETE_MOUNT_SPECIFIC_FLAG,
113+
UNSHARE_IOCTL_ATTACH_ROOT_FILESYSTEM_AT_FD,
113114
SIOCATMARK,
114115
SIOCSIFADDR,
115116
SIOCGIFADDR,
@@ -196,6 +197,7 @@ enum IOCtlNumber {
196197
#define FIONREAD FIONREAD
197198
#define MOUNT_IOCTL_SET_MOUNT_SPECIFIC_FLAG MOUNT_IOCTL_SET_MOUNT_SPECIFIC_FLAG
198199
#define MOUNT_IOCTL_DELETE_MOUNT_SPECIFIC_FLAG MOUNT_IOCTL_DELETE_MOUNT_SPECIFIC_FLAG
200+
#define UNSHARE_IOCTL_ATTACH_ROOT_FILESYSTEM_AT_FD UNSHARE_IOCTL_ATTACH_ROOT_FILESYSTEM_AT_FD
199201
#define SOUNDCARD_IOCTL_SET_SAMPLE_RATE SOUNDCARD_IOCTL_SET_SAMPLE_RATE
200202
#define SOUNDCARD_IOCTL_GET_SAMPLE_RATE SOUNDCARD_IOCTL_GET_SAMPLE_RATE
201203
#define STORAGE_DEVICE_GET_SIZE STORAGE_DEVICE_GET_SIZE

Kernel/Devices/Storage/StorageManagement.cpp

Lines changed: 1 addition & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -453,8 +453,6 @@ u32 StorageManagement::generate_controller_id()
453453

454454
ErrorOr<NonnullRefPtr<VFSRootContext>> StorageManagement::create_first_vfs_root_context() const
455455
{
456-
auto vfs_root_context = TRY(VFSRootContext::create_with_empty_ramfs(VFSRootContext::AddToGlobalContextList::Yes));
457-
458456
auto const* fs_type_initializer = TRY(VirtualFileSystem::find_filesystem_type_initializer("ext2"sv));
459457
VERIFY(fs_type_initializer);
460458
auto mount_file = TRY(MountFile::create(*fs_type_initializer, root_mount_flags));
@@ -467,25 +465,7 @@ ErrorOr<NonnullRefPtr<VFSRootContext>> StorageManagement::create_first_vfs_root_
467465
auto description = TRY(OpenFileDescription::try_create(boot_device_description.release_nonnull()));
468466

469467
auto fs = TRY(FileBackedFileSystem::create_and_append_filesystems_list_from_mount_file_and_description(mount_file, description));
470-
471-
// NOTE: Fake a mounted count of 1 so the called VirtualFileSystem function in the
472-
// next pivot_root logic block thinks everything is OK.
473-
fs->mounted_count().with([](auto& mounted_count) {
474-
mounted_count++;
475-
});
476-
477-
// Adding filesystems to the all_file_systems_list usually happens in add_to_mounts_list_and_increment_fs_mounted_count(),
478-
// but that function isn't called for the root filesystem of the first VFSRootContext.
479-
FileSystem::all_file_systems_list().with([&fs](auto& fs_list) {
480-
fs_list.append(*fs);
481-
});
482-
483-
TRY(VirtualFileSystem::pivot_root_by_copying_mounted_fs_instance(*vfs_root_context, *fs, root_mount_flags));
484-
// NOTE: Return the mounted count to normal now we have it really mounted.
485-
fs->mounted_count().with([](auto& mounted_count) {
486-
mounted_count--;
487-
});
488-
return vfs_root_context;
468+
return TRY(VFSRootContext::create_with_filesystem(VFSRootContext::AddToGlobalContextList::Yes, fs));
489469
}
490470

491471
UNMAP_AFTER_INIT void StorageManagement::initialize(bool poll)

Kernel/FileSystem/UnsharedResourceFile.cpp

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,14 @@ ErrorOr<unsigned> UnsharedResourceFile::initialize_resource()
3939
return scoped_process_list->id().value();
4040
}
4141
case UnshareType::VFSRootContext: {
42-
auto vfs_root_context = TRY(VFSRootContext::create_with_empty_ramfs(VFSRootContext::AddToGlobalContextList::Yes));
43-
return vfs_root_context->id().value();
42+
return m_root_filesystem.with_exclusive([&](auto& root_filesystem) -> ErrorOr<unsigned> {
43+
if (root_filesystem) {
44+
auto vfs_root_context = TRY(VFSRootContext::create_with_filesystem(VFSRootContext::AddToGlobalContextList::Yes, *root_filesystem));
45+
return vfs_root_context->id().value();
46+
}
47+
auto vfs_root_context = TRY(VFSRootContext::create_with_empty_ramfs(VFSRootContext::AddToGlobalContextList::Yes));
48+
return vfs_root_context->id().value();
49+
});
4450
}
4551
case UnshareType::HostnameContext: {
4652
// NOTE: Technically, we create a new context, based on the
@@ -64,9 +70,33 @@ ErrorOr<unsigned> UnsharedResourceFile::initialize_resource()
6470

6571
UnsharedResourceFile::~UnsharedResourceFile() = default;
6672

67-
ErrorOr<void> UnsharedResourceFile::ioctl(OpenFileDescription&, unsigned, Userspace<void*>)
73+
ErrorOr<void> UnsharedResourceFile::ioctl(OpenFileDescription&, unsigned request, Userspace<void*> arg)
6874
{
69-
return ENOTSUP;
75+
switch (request) {
76+
case UNSHARE_IOCTL_ATTACH_ROOT_FILESYSTEM_AT_FD: {
77+
auto current_vfs_root_context = Process::current().vfs_root_context();
78+
auto credentials = Process::current().credentials();
79+
auto fd = static_cast<int>(arg.ptr());
80+
auto description = TRY(Process::current().open_file_description_ignoring_negative(fd));
81+
return m_root_filesystem.with_exclusive([&credentials, &current_vfs_root_context, &description](auto& root_filesystem) -> ErrorOr<void> {
82+
if (!description) {
83+
root_filesystem = nullptr;
84+
return {};
85+
}
86+
87+
if (!description->is_directory())
88+
return ENOTDIR;
89+
if (!description->metadata().may_execute(credentials))
90+
return EACCES;
91+
VERIFY(description->custody());
92+
root_filesystem = TRY(current_vfs_root_context->mount_point_to_guest_filesystem(*description->custody()));
93+
return {};
94+
});
95+
}
96+
97+
default:
98+
return EINVAL;
99+
}
70100
}
71101

72102
ErrorOr<NonnullOwnPtr<KString>> UnsharedResourceFile::pseudo_path(OpenFileDescription const&) const

Kernel/FileSystem/UnsharedResourceFile.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ class UnsharedResourceFile final : public File {
3232

3333
explicit UnsharedResourceFile(UnshareType);
3434
UnshareType const m_type;
35+
36+
MutexProtected<RefPtr<FileSystem>> m_root_filesystem;
3537
};
3638

3739
}

Kernel/FileSystem/VFSRootContext.cpp

Lines changed: 30 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -26,15 +26,20 @@ ErrorOr<NonnullRefPtr<VFSRootContext>> VFSRootContext::create_with_empty_ramfs(A
2626

2727
auto fs = TRY(RAMFS::try_create({}));
2828
TRY(fs->initialize());
29-
auto root_custody = TRY(Custody::try_create(nullptr, ""sv, fs->root_inode(), 0));
29+
return create_with_filesystem(add_to_global_context_list, fs);
30+
}
31+
32+
ErrorOr<NonnullRefPtr<VFSRootContext>> VFSRootContext::create_with_filesystem(AddToGlobalContextList add_to_global_context_list, FileSystem& fs)
33+
{
34+
auto root_custody = TRY(Custody::try_create(nullptr, ""sv, fs.root_inode(), 0));
3035
auto context = TRY(adopt_nonnull_ref_or_enomem(new (nothrow) VFSRootContext(root_custody)));
31-
auto new_mount = TRY(adopt_nonnull_own_or_enomem(new (nothrow) Mount(fs->root_inode(), 0)));
36+
auto new_mount = TRY(adopt_nonnull_own_or_enomem(new (nothrow) Mount(fs.root_inode(), 0)));
3237
TRY(context->m_details.with([&](auto& details) -> ErrorOr<void> {
3338
if (add_to_global_context_list == AddToGlobalContextList::Yes) {
3439
dbgln("VFSRootContext({}): Root (\"/\") FileSystemID {}, Mounting {} at inode {} with flags {}",
3540
context->id(),
36-
fs->fsid(),
37-
fs->class_name(),
41+
fs.fsid(),
42+
fs.class_name(),
3843
root_custody->inode().identifier(),
3944
0);
4045
}
@@ -123,52 +128,6 @@ void VFSRootContext::add_to_mounts_list_and_increment_fs_mounted_count(DoBindMou
123128
mounts_list.append(*new_mount.leak_ptr());
124129
}
125130

126-
ErrorOr<void> VFSRootContext::pivot_root(FileBackedFileSystem::List& file_backed_file_systems_list, FileSystem& fs, NonnullOwnPtr<Mount> new_mount, NonnullRefPtr<Custody> root_mount_point, int root_mount_flags)
127-
{
128-
return m_details.with([&](auto& details) -> ErrorOr<void> {
129-
return fs.mounted_count().with([&](auto& mounted_count) -> ErrorOr<void> {
130-
// NOTE: If the mounted count is 0, then this filesystem is about to be
131-
// deleted, so this must be a kernel bug as we don't include such filesystem
132-
// in the VirtualFileSystem s_details->file_backed_file_systems_list list anymore.
133-
VERIFY(mounted_count > 0);
134-
135-
// NOTE: The mounts table should not be empty as it always need
136-
// to have at least one mount!
137-
VERIFY(!details.mounts.is_empty());
138-
139-
// NOTE: If we have many mounts in the table, then simply don't allow
140-
// userspace to override them but instead require to unmount everything except
141-
// the root mount first.
142-
if (details.mounts.size_slow() != 1)
143-
return EPERM;
144-
145-
auto& mount = *details.mounts.first();
146-
TRY(VirtualFileSystem::remove_mount(mount, file_backed_file_systems_list));
147-
VERIFY(details.mounts.is_empty());
148-
149-
dbgln("VFSRootContext({}): Root mount set to FileSystemID {}, Mounting {} at inode {} with flags {}",
150-
id(),
151-
new_mount->guest_fs().fsid(),
152-
new_mount->guest_fs().class_name(),
153-
root_mount_point->inode().identifier(),
154-
root_mount_flags);
155-
156-
// NOTE: Leak the mount pointer so it can be added to the mount list, but it won't be
157-
// deleted after being added.
158-
details.mounts.append(*new_mount.leak_ptr());
159-
160-
// NOTE: We essentially do the same thing like VFSRootContext::add_to_mounts_list_and_increment_fs_mounted_count function
161-
// but because we already locked the spinlock of the attach count, we can't call that function here.
162-
mounted_count++;
163-
// NOTE: Now fill the root custody with a valid custody for the new root mount.
164-
m_root_custody.with([&root_mount_point](auto& custody) {
165-
custody = move(root_mount_point);
166-
});
167-
return {};
168-
});
169-
});
170-
}
171-
172131
ErrorOr<void> VFSRootContext::do_full_teardown(Badge<PowerStateSwitchTask>)
173132
{
174133
// NOTE: We are going to tear down the entire VFS root context from its mounts.
@@ -313,6 +272,27 @@ ErrorOr<void> VFSRootContext::apply_to_mount_for_host_custody(Custody const& cur
313272
return do_on_mount_for_host_custody(ValidateImmutableFlag::Yes, current_custody, move(callback));
314273
}
315274

275+
ErrorOr<NonnullRefPtr<FileSystem>> VFSRootContext::mount_point_to_guest_filesystem(Custody const& custody)
276+
{
277+
return m_details.with([&](auto& details) -> ErrorOr<NonnullRefPtr<FileSystem>> {
278+
// NOTE: We either search for the root mount or for a mount that has a parent custody!
279+
if (!custody.parent()) {
280+
for (auto& mount : details.mounts) {
281+
if (!mount.host_custody())
282+
return mount.guest_fs();
283+
}
284+
// There must be a root mount entry, so fail if we don't find it.
285+
VERIFY_NOT_REACHED();
286+
} else {
287+
for (auto& mount : details.mounts) {
288+
if (mount.host_custody() && VirtualFileSystem::check_matching_absolute_path_hierarchy(*mount.host_custody(), custody))
289+
return mount.guest_fs();
290+
}
291+
}
292+
return ENODEV;
293+
});
294+
}
295+
316296
ErrorOr<VFSRootContext::CurrentMountState> VFSRootContext::current_mount_state_for_host_custody(Custody const& current_custody) const
317297
{
318298
RefPtr<FileSystem> guest_fs;

Kernel/FileSystem/VFSRootContext.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ class VFSRootContext : public AtomicRefCounted<VFSRootContext> {
3535
No,
3636
};
3737
static ErrorOr<NonnullRefPtr<VFSRootContext>> create_with_empty_ramfs(AddToGlobalContextList);
38+
static ErrorOr<NonnullRefPtr<VFSRootContext>> create_with_filesystem(AddToGlobalContextList, FileSystem&);
3839
static ErrorOr<NonnullRefPtr<VFSRootContext>> create_empty();
3940

4041
SpinlockProtected<NonnullRefPtr<Custody>, LockRank::None>& root_custody() { return m_root_custody; }
@@ -49,10 +50,11 @@ class VFSRootContext : public AtomicRefCounted<VFSRootContext> {
4950
ErrorOr<void> do_full_teardown(Badge<PowerStateSwitchTask>);
5051

5152
ErrorOr<void> unmount(FileBackedFileSystem::List& file_backed_file_systems_list, Inode& guest_inode, StringView custody_path);
52-
ErrorOr<void> pivot_root(FileBackedFileSystem::List& file_backed_file_systems_list, FileSystem& fs, NonnullOwnPtr<Mount> new_mount, NonnullRefPtr<Custody> root_mount_point, int root_mount_flags);
5353

5454
ErrorOr<void> apply_to_mount_for_host_custody(Custody const& current_custody, Function<void(Mount&)>);
5555

56+
ErrorOr<NonnullRefPtr<FileSystem>> mount_point_to_guest_filesystem(Custody const& custody);
57+
5658
struct CurrentMountState {
5759
Mount::Details details;
5860
int flags { 0 };

Kernel/FileSystem/VirtualFileSystem.cpp

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -242,13 +242,6 @@ ErrorOr<void> VirtualFileSystem::copy_mount(Custody& original_custody, VFSRootCo
242242
if (&original_custody.inode() != &original_custody.inode().fs().root_inode())
243243
return EINVAL;
244244

245-
// NOTE: If the user specified the root custody ("/") on the destination context
246-
// then try to `pivot_root` the destination context root mount with the desired
247-
// custody.
248-
auto destination_context_root_custody = destination_context.root_custody().with([](auto& custody) -> NonnullRefPtr<Custody> { return custody; });
249-
if (&new_mount_point == destination_context_root_custody.ptr())
250-
return pivot_root_by_copying_mounted_fs_instance(destination_context, original_custody.inode().fs(), flags);
251-
252245
TRY(destination_context.add_new_mount(VFSRootContext::DoBindMount::No, original_custody.inode(), new_mount_point, flags));
253246
return {};
254247
}
@@ -315,16 +308,6 @@ ErrorOr<void> VirtualFileSystem::remove_mount(Mount& mount, FileBackedFileSystem
315308
return {};
316309
}
317310

318-
ErrorOr<void> VirtualFileSystem::pivot_root_by_copying_mounted_fs_instance(VFSRootContext& context, FileSystem& fs, int root_mount_flags)
319-
{
320-
auto root_mount_point = TRY(Custody::try_create(nullptr, ""sv, fs.root_inode(), root_mount_flags));
321-
auto new_mount = TRY(adopt_nonnull_own_or_enomem(new (nothrow) Mount(fs.root_inode(), root_mount_flags)));
322-
323-
return s_details->file_backed_file_systems_list.with_exclusive([&](auto& file_backed_file_systems_list) -> ErrorOr<void> {
324-
return context.pivot_root(file_backed_file_systems_list, fs, move(new_mount), move(root_mount_point), root_mount_flags);
325-
});
326-
}
327-
328311
ErrorOr<void> VirtualFileSystem::unmount(VFSRootContext& context, Inode& guest_inode, StringView custody_path)
329312
{
330313
return s_details->file_backed_file_systems_list.with_exclusive([&](auto& file_backed_file_systems_list) -> ErrorOr<void> {

Kernel/FileSystem/VirtualFileSystem.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,6 @@ ErrorOr<FileSystemInitializer const*> find_filesystem_type_initializer(StringVie
5959
ErrorOr<void> remove_mount(Mount& mount, FileBackedFileSystem::List& file_backed_fs_list);
6060

6161
ErrorOr<void> mount(VFSRootContext&, MountFile&, OpenFileDescription*, Custody& mount_point, int flags);
62-
ErrorOr<void> pivot_root_by_copying_mounted_fs_instance(VFSRootContext&, FileSystem& fs, int root_mount_flags);
6362

6463
ErrorOr<void> bind_mount(VFSRootContext&, Custody& source, Custody& mount_point, int flags);
6564
ErrorOr<void> copy_mount(Custody& source, VFSRootContext& destination, Custody& mount_point, int flags);

Userland/Utilities/runc/LayoutParsing.cpp

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,36 @@ static ErrorOr<void> handle_property(VFSRootContextLayout& layout, JsonObject co
161161
}
162162

163163
auto type = object.get_byte_string("type"sv).value();
164-
dbgln("WARNING: Unknown object type - {}, it might affect layout creation severely", type);
164+
if (type != "root_mount"sv)
165+
dbgln("WARNING: Unknown object type - {}, it might affect layout creation severely", type);
166+
return {};
167+
}
168+
169+
ErrorOr<void> create_root_mount_point(StringView preparation_environment_path, JsonArray const& layout_creation_sequence)
170+
{
171+
for (size_t index = 0; index < layout_creation_sequence.size(); index++) {
172+
auto& maybe_object = layout_creation_sequence[index];
173+
if (!maybe_object.is_object())
174+
return Error::from_string_view("Invalid layout JSON object"sv);
175+
if (!maybe_object.as_object().has_string("type"sv))
176+
return Error::from_string_view("Invalid layout JSON object - no type being specified"sv);
177+
178+
auto& object = maybe_object.as_object();
179+
180+
auto type = object.get_byte_string("type"sv).value();
181+
if (type != "root_mount"sv)
182+
continue;
183+
184+
if (!object.has_null("source"sv) && !object.has_string("source"sv))
185+
return Error::from_string_view("Object root_mount source property not found"sv);
186+
if (!object.has_string("fs_type"sv))
187+
return Error::from_string_view("Object root_mount fs_type property not found"sv);
188+
189+
auto fs_type = object.get_byte_string("fs_type"sv).value();
190+
auto source = object.get_byte_string("source"sv).value_or("none");
191+
auto source_fd = TRY(VFSRootContextLayout::get_source_fd(source));
192+
TRY(Core::System::mount({}, source_fd, preparation_environment_path, fs_type, 0));
193+
}
165194
return {};
166195
}
167196

0 commit comments

Comments
 (0)