Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions libcontainer/mount_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,11 @@ import (
"strconv"
"strings"

securejoin "github.com/cyphar/filepath-securejoin"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"

devices "github.com/opencontainers/cgroups/devices/config"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/internal/userns"
"github.com/opencontainers/runc/libcontainer/utils"
Expand Down Expand Up @@ -342,3 +344,29 @@ func mountFd(nsHandles *userns.Handles, m *configs.Mount) (_ *mountSource, retEr
file: mountFile,
}, nil
}

// Use mknod to create devices in the initial user namespace to pass devices with 0660 permissions
func usernsMknod(rootfs string, node *devices.Device) (*mountSource, error) {
var err error

destPath, err := securejoin.SecureJoin(rootfs, node.Path)
if err != nil {
return nil, err
}

err = createDeviceNode(rootfs, node, false)
if err != nil {
return nil, err
}

mountFile, err := os.OpenFile(destPath, unix.O_PATH|unix.O_CLOEXEC, 0)
if err != nil {
return nil, err
}
Comment on lines +352 to +365
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is all insecure. You need to use mknodat with OpenInRoot and then after doing the re-open (again with OpenInRoot) you need to do VerifyInode. libpathrs has helpers for tne first bit.


sourceType := mountSourcePlain
return &mountSource{
Type: sourceType,
file: mountFile,
}, nil
}
42 changes: 41 additions & 1 deletion libcontainer/process_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,15 @@
"syscall"
"time"

securejoin "github.com/cyphar/filepath-securejoin"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"

"github.com/opencontainers/runtime-spec/specs-go"

"github.com/opencontainers/cgroups"
"github.com/opencontainers/cgroups/fs2"
"github.com/opencontainers/runc/internal/sys"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/intelrdt"
"github.com/opencontainers/runc/libcontainer/internal/userns"
Expand Down Expand Up @@ -729,7 +731,45 @@
if !ok {
break loop
}
src, err := mountFd(nsHandles, m)
var src *mountSource
var err error
if m.Device == "usernsMknod" {
// Create device in initial user ns
for _, device := range p.config.Config.Devices {
if device.Path == m.Source {
src, err = usernsMknod(p.config.Config.Rootfs, device)
break
}
}
if src == nil && err == nil {
err = fmt.Errorf("can not find device node")
}
Comment on lines +736 to +746
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A magical mount type really feels like the wrong way of doing this. It would also need a runtime-spec change.

} else if m.Device != "tmpfs" {
src, err = mountFd(nsHandles, m)
} else {
// Mount tmpfs in initial user ns and chown it
entry := mountEntry{Mount: m}
mountConfig := &mountConfig{
root: p.config.Config.Rootfs,
label: p.config.Config.MountLabel,
rootlessCgroups: p.config.Config.RootlessCgroups,
cgroupns: p.config.Config.Namespaces.Contains(configs.NEWCGROUP),
}
err := mountToRootfs(mountConfig, entry)
if err == nil {
destPath, _ := securejoin.SecureJoin(mountConfig.root, m.Destination)
mountFile, err := os.OpenFile(destPath, unix.O_PATH|unix.O_CLOEXEC, 0)

Check failure on line 761 in libcontainer/process_linux.go

View workflow job for this annotation

GitHub Actions / lint

ineffectual assignment to err (ineffassign)
Comment on lines +760 to +761
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is also insecure for the same reason as above, this should've been OpenInRoot. (But I think this code shouldn't exist.)

uid, _ := p.config.Config.HostRootUID()
gid, _ := p.config.Config.HostRootGID()
err = sys.FchownFile(mountFile, uid, gid)
if err == nil {
src = &mountSource{
file: mountFile,
Type: mountSourcePlain,
}
}
}
Comment on lines +750 to +771
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be done with fsopen, this is all very wrong and pollutes the host mount table AFAICS?

}
logrus.Debugf("mount source thread: handling request for %q: %v %v", m.Source, src, err)
responseCh <- response{
src: src,
Expand Down
56 changes: 49 additions & 7 deletions libcontainer/rootfs_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,13 @@ import (

"github.com/cyphar/filepath-securejoin/pathrs-lite/procfs"
"github.com/moby/sys/mountinfo"
"github.com/moby/sys/userns"
"github.com/mrunalp/fileutils"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/opencontainers/selinux/go-selinux/label"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"

"github.com/moby/sys/userns"
"github.com/opencontainers/cgroups"
devices "github.com/opencontainers/cgroups/devices/config"
"github.com/opencontainers/cgroups/fs2"
Expand Down Expand Up @@ -113,14 +113,17 @@ func prepareRootfs(pipe *syncSocket, iConfig *initConfig) (err error) {
rootlessCgroups: config.RootlessCgroups,
cgroupns: config.Namespaces.Contains(configs.NEWCGROUP),
}

useMknodInInitialUserNS := (userns.RunningInUserNS() || config.Namespaces.Contains(configs.NEWUSER)) && !config.RootlessEUID

for _, m := range config.Mounts {
entry := mountEntry{Mount: m}
// Figure out whether we need to request runc to give us an
// open_tree(2)-style mountfd. For idmapped mounts, this is always
// necessary. For bind-mounts, this is only necessary if we cannot
// resolve the parent mount (this is only hit if you are running in a
// userns -- but for rootless the host-side thread can't help).
wantSourceFile := m.IsIDMapped()
wantSourceFile := m.IsIDMapped() || useMknodInInitialUserNS && pathrs.LexicallyCleanPath(m.Destination) == "/dev"
if m.IsBind() && !config.RootlessEUID {
if _, err := os.Stat(m.Source); err != nil {
wantSourceFile = true
Expand All @@ -143,7 +146,7 @@ func prepareRootfs(pipe *syncSocket, iConfig *initConfig) (err error) {
// that while m.Source might contain symlinks, the (*os.File).Name
// is based on the path provided to os.OpenFile, not what it
// resolves to. So this should never happen.
if sync.File.Name() != m.Source {
if sync.File.Name() != m.Source && (!useMknodInInitialUserNS || m.Destination != "/dev") {
return fmt.Errorf("returned mountfd for %q doesn't match requested mount configuration: mountfd path is %q", m.Source, sync.File.Name())
}
// Unmarshal the procMountFd argument (the file is sync.File).
Expand All @@ -160,16 +163,56 @@ func prepareRootfs(pipe *syncSocket, iConfig *initConfig) (err error) {
src.file = sync.File
entry.srcFile = src
}
if useMknodInInitialUserNS && pathrs.LexicallyCleanPath(m.Destination) == "/dev" {
continue
}
if err := mountToRootfs(mountConfig, entry); err != nil {
return fmt.Errorf("error mounting %q to rootfs at %q: %w", m.Source, m.Destination, err)
}
}

setupDev := needsSetupDev(config)
if setupDev {
if err := createDevices(config); err != nil {
return fmt.Errorf("error creating device nodes: %w", err)
if !useMknodInInitialUserNS {
if err := createDevices(config, config.RootlessEUID); err != nil {
return fmt.Errorf("error creating device nodes: %w", err)
}
} else {
for _, node := range config.Devices {
if pathrs.LexicallyCleanPath(node.Path) == "/dev/ptmx" {
continue
}

// We need to bind /dev/null to restore masked pathes.
if pathrs.LexicallyCleanPath(node.Path) == "/dev/null" {
if err := createDeviceNode(config.Rootfs, node, true); err != nil {
return fmt.Errorf("error creating device nodes: %w", err)
}
}

m := &configs.Mount{
Source: node.Path,
Destination: node.Path,
Device: "usernsMknod",
}

// Request a source file from the host.
if err := writeSyncArg(pipe, procMountPlease, m); err != nil {
return fmt.Errorf("failed to request mountfd for %q: %w", m.Source, err)
}

sync, err := readSyncFull(pipe, procMountFd)
if err != nil {
return fmt.Errorf("mountfd request for %q failed: %w", m.Source, err)
}

if sync.File == nil {
return fmt.Errorf("mountfd request for %q: response missing attached fd", m.Source)
}
defer sync.File.Close()
}
}

if err := setupPtmx(config); err != nil {
return fmt.Errorf("error setting up ptmx: %w", err)
}
Expand Down Expand Up @@ -935,8 +978,7 @@ func reOpenDevNull() error {
}

// Create the device nodes in the container.
func createDevices(config *configs.Config) error {
useBindMount := userns.RunningInUserNS() || config.Namespaces.Contains(configs.NEWUSER)
func createDevices(config *configs.Config, useBindMount bool) error {
for _, node := range config.Devices {

// The /dev/ptmx device is setup by setupPtmx()
Expand Down
18 changes: 8 additions & 10 deletions libcontainer/specconv/spec_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -1116,17 +1116,15 @@ func setupUserNamespace(spec *specs.Spec, config *configs.Config) error {
"gid_map": gidMap,
}).Debugf("config uses path-based userns configuration -- current uid and gid mappings cached")
}
rootUID, err := config.HostRootUID()
if err != nil {
return err
}
rootGID, err := config.HostRootGID()
if err != nil {
return err
}
for _, node := range config.Devices {
node.Uid = uint32(rootUID)
node.Gid = uint32(rootGID)
hostUID, err := config.HostUID(int(node.Uid))
if err == nil {
node.Uid = uint32(hostUID)
}
hostGID, err := config.HostGID(int(node.Gid))
if err == nil {
node.Gid = uint32(hostGID)
}
}
Comment on lines 1119 to 1128
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure about this in general, but in the err != nil case you need to use rootUID/rootGID otherwise the devices will be unmapped.

return nil
}
Expand Down
66 changes: 66 additions & 0 deletions tests/integration/userns.bats
Original file line number Diff line number Diff line change
Expand Up @@ -280,3 +280,69 @@ function teardown() {
# is deleted during the namespace cleanup.
run ! ip link del dummy0
}

@test "checkpoint userns with the new device" {
requires criu root

update_config ' .linux.devices += [{"path": "/dev/another", "type": "c", "major": 1, "minor": 9, "fileMode": 432}]'

runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox
[ "$status" -eq 0 ]

testcontainer test_busybox running

runc exec test_busybox head -n 1 /dev/another
[ "$status" -eq 0 ]

for _ in $(seq 2); do
runc checkpoint --work-path ./work-dir test_busybox
[ "$status" -eq 0 ]

testcontainer test_busybox checkpointed

# we need to chown images because child process can try to read them.
chown -R 100000:200000 ./checkpoint

runc restore -d --work-path ./work-dir --console-socket "$CONSOLE_SOCKET" test_busybox
[ "$status" -eq 0 ]

testcontainer test_busybox running

runc exec test_busybox head -n 1 /dev/another
[ "$status" -eq 0 ]
done
}

@test "checkpoint userns with the new device and non-root user" {
requires criu root

update_config ' .process.user.uid = 42
| .process.user.gid = 42
| .linux.devices += [{"path": "/dev/another", "type": "c", "major": 1, "minor": 9, "fileMode": 432, "uid": 42, "gid": 42}]'

runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox
[ "$status" -eq 0 ]

testcontainer test_busybox running

runc exec test_busybox head -n 1 /dev/another
[ "$status" -eq 0 ]

for _ in $(seq 2); do
runc checkpoint --work-path ./work-dir test_busybox
[ "$status" -eq 0 ]

testcontainer test_busybox checkpointed

# we need to chown images because child process can try to read them.
chown -R 100000:200000 ./checkpoint

runc restore -d --work-path ./work-dir --console-socket "$CONSOLE_SOCKET" test_busybox
[ "$status" -eq 0 ]

testcontainer test_busybox running

runc exec test_busybox head -n 1 /dev/another
[ "$status" -eq 0 ]
done
}
Loading