Skip to content

Commit

Permalink
Merge branch 'main' into add-kernelversion-to-traces
Browse files Browse the repository at this point in the history
  • Loading branch information
Anirudh2112 authored Dec 5, 2024
2 parents d03cdc6 + 499e3cf commit 503c6d6
Show file tree
Hide file tree
Showing 54 changed files with 1,192 additions and 405 deletions.
4 changes: 2 additions & 2 deletions .goreleaser.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# yaml-language-server: $schema=https://goreleaser.com/static/schema.json
# vim: set ts=2 sw=2 tw=0 fo=cnqoj

version: 1
version: 2

before:
hooks:
Expand All @@ -22,7 +22,7 @@ builds:
- windows
- darwin
ldflags:
- -X github.com/microsoft/retina/cli/cmd.Version=v{{.Version}}
- -X github.com/microsoft/retina/internal/buildinfo.Version=v{{.Version}}
main: cli/main.go

archives:
Expand Down
173 changes: 173 additions & 0 deletions cli/cmd/shell.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
package cmd

import (
"errors"
"fmt"
"os"
"time"

"github.com/microsoft/retina/internal/buildinfo"
"github.com/microsoft/retina/shell"
"github.com/spf13/cobra"
v1 "k8s.io/api/core/v1"
"k8s.io/cli-runtime/pkg/genericclioptions"
"k8s.io/cli-runtime/pkg/resource"
cmdutil "k8s.io/kubectl/pkg/cmd/util"
"k8s.io/kubectl/pkg/scheme"
"k8s.io/kubectl/pkg/util/templates"
)

var (
configFlags *genericclioptions.ConfigFlags
matchVersionFlags *cmdutil.MatchVersionFlags
retinaShellImageRepo string
retinaShellImageVersion string
mountHostFilesystem bool
allowHostFilesystemWrite bool
hostPID bool
capabilities []string
timeout time.Duration
)

var (
// AKS requires clusters to allow access to MCR, so use this repository by default.
defaultRetinaShellImageRepo = "mcr.microsoft.com/containernetworking/retina-shell"

// Default version is the same as CLI version, set at link time.
defaultRetinaShellImageVersion = buildinfo.Version

defaultTimeout = 30 * time.Second

errMissingRequiredRetinaShellImageVersionArg = errors.New("missing required --retina-shell-image-version")
errUnsupportedResourceType = errors.New("unsupported resource type")
)

var shellCmd = &cobra.Command{
Use: "shell (NODE | TYPE[[.VERSION].GROUP]/NAME)",
Short: "[EXPERIMENTAL] Interactively debug a node or pod",
Long: templates.LongDesc(`
[EXPERIMENTAL] This is an experimental command. The flags and behavior may change in the future.
Start a shell with networking tools in a node or pod for adhoc debugging.
* For nodes, this creates a pod on the node in the root network namespace.
* For pods, this creates an ephemeral container inside the pod's network namespace.
You can override the default image used for the shell container with either
CLI flags (--retina-shell-image-repo and --retina-shell-image-version) or
environment variables (RETINA_SHELL_IMAGE_REPO and RETINA_SHELL_IMAGE_VERSION).
CLI flags take precedence over env vars.
`),

Example: templates.Examples(`
# start a shell in a node
kubectl retina shell node0001
# start a shell in a node, with debug pod in kube-system namespace
kubectl retina shell -n kube-system node0001
# start a shell as an ephemeral container inside an existing pod
kubectl retina shell -n kube-system pod/coredns-d459997b4-7cpzx
# start a shell in a node, mounting the host filesystem to /host with ability to chroot
kubectl retina shell node001 --mount-host-filesystem --capabilities SYS_CHROOT
# start a shell in a node, with NET_RAW and NET_ADMIN capabilities
# (required for iptables and tcpdump)
kubectl retina shell node001 --capabilities NET_RAW,NET_ADMIN
`),
Args: cobra.ExactArgs(1),
RunE: func(_ *cobra.Command, args []string) error {
// retinaShellImageVersion defaults to the CLI version, but that might not be set if the CLI is built without -ldflags.
if retinaShellImageVersion == "" {
return errMissingRequiredRetinaShellImageVersionArg
}

namespace, explicitNamespace, err := matchVersionFlags.ToRawKubeConfigLoader().Namespace()
if err != nil {
return fmt.Errorf("error retrieving namespace arg: %w", err)
}

// This interprets the first arg as either a node or pod (same as kubectl):
// "node001" -> node
// "node/node001" -> node
// "pod/example-7cpzx" -> pod
r := resource.NewBuilder(configFlags).
WithScheme(scheme.Scheme, scheme.Scheme.PrioritizedVersionsAllGroups()...).
FilenameParam(explicitNamespace, &resource.FilenameOptions{}).
NamespaceParam(namespace).DefaultNamespace().ResourceNames("nodes", args[0]).
Do()
if rerr := r.Err(); rerr != nil {
return fmt.Errorf("error constructing resource builder: %w", rerr)
}

restConfig, err := matchVersionFlags.ToRESTConfig()
if err != nil {
return fmt.Errorf("error constructing REST config: %w", err)
}

config := shell.Config{
RestConfig: restConfig,
RetinaShellImage: fmt.Sprintf("%s:%s", retinaShellImageRepo, retinaShellImageVersion),
MountHostFilesystem: mountHostFilesystem,
AllowHostFilesystemWrite: allowHostFilesystemWrite,
HostPID: hostPID,
Capabilities: capabilities,
Timeout: timeout,
}

return r.Visit(func(info *resource.Info, err error) error {
if err != nil {
return err
}

switch obj := info.Object.(type) {
case *v1.Node:
podDebugNamespace := namespace
nodeName := obj.Name
return shell.RunInNode(config, nodeName, podDebugNamespace)
case *v1.Pod:
return shell.RunInPod(config, obj.Namespace, obj.Name)
default:
gvk := obj.GetObjectKind().GroupVersionKind()
return fmt.Errorf("unsupported resource %s/%s: %w", gvk.GroupVersion(), gvk.Kind, errUnsupportedResourceType)
}
})
},
}

func init() {
Retina.AddCommand(shellCmd)
shellCmd.PersistentPreRun = func(cmd *cobra.Command, _ []string) {
// Avoid printing full usage message if the command exits with an error.
cmd.SilenceUsage = true
cmd.SilenceErrors = true

// Allow setting image repo and version via environment variables (CLI flags still take precedence).
if !cmd.Flags().Changed("retina-shell-image-repo") {
if envRepo := os.Getenv("RETINA_SHELL_IMAGE_REPO"); envRepo != "" {
retinaShellImageRepo = envRepo
}
}
if !cmd.Flags().Changed("retina-shell-image-version") {
if envVersion := os.Getenv("RETINA_SHELL_IMAGE_VERSION"); envVersion != "" {
retinaShellImageVersion = envVersion
}
}
}
shellCmd.Flags().StringVar(&retinaShellImageRepo, "retina-shell-image-repo", defaultRetinaShellImageRepo, "The container registry repository for the image to use for the shell container")
shellCmd.Flags().StringVar(&retinaShellImageVersion, "retina-shell-image-version", defaultRetinaShellImageVersion, "The version (tag) of the image to use for the shell container")
shellCmd.Flags().BoolVarP(&mountHostFilesystem, "mount-host-filesystem", "m", false, "Mount the host filesystem to /host. Applies only to nodes, not pods.")
shellCmd.Flags().BoolVarP(&allowHostFilesystemWrite, "allow-host-filesystem-write", "w", false,
"Allow write access to the host filesystem. Implies --mount-host-filesystem. Applies only to nodes, not pods.")
shellCmd.Flags().BoolVar(&hostPID, "host-pid", false, "Set HostPID on the shell container. Applies only to nodes, not pods.")
shellCmd.Flags().StringSliceVarP(&capabilities, "capabilities", "c", []string{}, "Add capabilities to the shell container")
shellCmd.Flags().DurationVar(&timeout, "timeout", defaultTimeout, "The maximum time to wait for the shell container to start")

// configFlags and matchVersion flags are used to load kubeconfig.
// This uses the same mechanism as `kubectl debug` to connect to apiserver and attach to containers.
configFlags = genericclioptions.NewConfigFlags(true)
configFlags.AddFlags(shellCmd.PersistentFlags())
matchVersionFlags = cmdutil.NewMatchVersionFlags(configFlags)
matchVersionFlags.AddFlags(shellCmd.PersistentFlags())
}
185 changes: 185 additions & 0 deletions docs/06-Troubleshooting/shell.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
# Shell TSG

**EXPERIMENTAL: `retina shell` is an experimental feature, so the flags and behavior may change in future versions.**

The `retina shell` command allows you to start an interactive shell on a Kubernetes node or pod. This runs a container image with many common networking tools installed (`ping`, `curl`, etc.).

## Testing connectivity

Start a shell on a node or inside a pod

```bash
# To start a shell in a node (root network namespace):
kubectl retina shell aks-nodepool1-15232018-vmss000001

# To start a shell inside a pod (pod network namespace):
kubectl retina shell -n kube-system pods/coredns-d459997b4-7cpzx
```

Check connectivity using `ping`:

```text
root [ / ]# ping 10.224.0.4
PING 10.224.0.4 (10.224.0.4) 56(84) bytes of data.
64 bytes from 10.224.0.4: icmp_seq=1 ttl=64 time=0.964 ms
64 bytes from 10.224.0.4: icmp_seq=2 ttl=64 time=1.13 ms
64 bytes from 10.224.0.4: icmp_seq=3 ttl=64 time=0.908 ms
64 bytes from 10.224.0.4: icmp_seq=4 ttl=64 time=1.07 ms
64 bytes from 10.224.0.4: icmp_seq=5 ttl=64 time=1.01 ms
--- 10.224.0.4 ping statistics ---
5 packets transmitted, 5 received, 0% packet loss, time 4022ms
rtt min/avg/max/mdev = 0.908/1.015/1.128/0.077 ms
```

Check DNS resolution using `dig`:

```text
root [ / ]# dig example.com +short
93.184.215.14
```

The tools `nslookup` and `drill` are also available if you prefer those.

Check connectivity to apiserver using `nc` and `curl`:

```text
root [ / ]# nc -zv 10.0.0.1 443
Ncat: Version 7.95 ( https://nmap.org/ncat )
Ncat: Connected to 10.0.0.1:443.
Ncat: 0 bytes sent, 0 bytes received in 0.06 seconds.
root [ / ]# curl -k https://10.0.0.1
{
"kind": "Status",
"apiVersion": "v1",
"metadata": {},
"status": "Failure",
"message": "Unauthorized",
"reason": "Unauthorized",
"code": 401
}
```

### nftables and iptables

Accessing nftables and iptables rules requires `NET_RAW` and `NET_ADMIN` capabilities.

```bash
kubectl retina shell aks-nodepool1-15232018-vmss000002 --capabilities NET_ADMIN,NET_RAW
```

Then you can run `iptables` and `nft`:

```text
root [ / ]# iptables -nvL | head -n 2
Chain INPUT (policy ACCEPT 1191K packets, 346M bytes)
pkts bytes target prot opt in out source destination
root [ / ]# nft list ruleset | head -n 2
# Warning: table ip filter is managed by iptables-nft, do not touch!
table ip filter {
```

**If you see the error "Operation not permitted (you must be root)", check that your `kubectl retina shell` command sets `--capabilities NET_RAW,NET_ADMIN`.**

`iptables` in the shell image uses `iptables-legacy`, which may or may not match the configuration on the node. For example, Ubuntu maps `iptables` to `iptables-nft`. To use the exact same `iptables` binary as installed on the node, you will need to `chroot` into the host filesystem (see below).

## Accessing the host filesystem

On nodes, you can mount the host filesystem to `/host`:

```bash
kubectl retina shell aks-nodepool1-15232018-vmss000002 --mount-host-filesystem
```

This mounts the host filesystem (`/`) to `/host` in the debug pod:

```text
root [ / ]# ls /host
NOTICE.txt bin boot dev etc home lib lib64 libx32 lost+found media mnt opt proc root run sbin srv sys tmp usr var
```

The host filesystem is mounted read-only by default. If you need write access, use the `--allow-host-filesystem-write` flag.

Symlinks between files on the host filesystem may not resolve correctly. If you see "No such file or directory" errors for symlinks, try following the instructions below to `chroot` to the host filesystem.

## Chroot to the host filesystem

`chroot` requires the `SYS_CHROOT` capability:

```bash
kubectl retina shell aks-nodepool1-15232018-vmss000002 --mount-host-filesystem --capabilities SYS_CHROOT
```

Then you can use `chroot` to switch to start a shell inside the host filesystem:

```text
root [ / ]# chroot /host bash
root@aks-nodepool1-15232018-vmss000002:/# cat /etc/resolv.conf | tail -n 2
nameserver 168.63.129.16
search shncgv2kgepuhm1ls1dwgholsd.cx.internal.cloudapp.net
```

`chroot` allows you to:

* Execute binaries installed on the node.
* Resolve symlinks that point to files in the host filesystem (such as /etc/resolv.conf -> /run/systemd/resolve/resolv.conf)
* Use `sysctl` to view or modify kernel parameters.
* Use `journalctl` to view systemd unit and kernel logs.
* Use `ip netns` to view network namespaces. (However, `ip netns exec` does not work.)

## Systemctl

`systemctl` commands require both `chroot` to the host filesystem and host PID:

```bash
kubectl retina shell aks-nodepool1-15232018-vmss000002 --mount-host-filesystem --capabilities SYS_CHROOT --host-pid
```

Then `chroot` to the host filesystem and run `systemctl status`:

```text
root [ / ]# chroot /host systemctl status | head -n 2
● aks-nodepool1-15232018-vmss000002
State: running
```

**If `systemctl` shows an error "Failed to connect to bus: No data available", check that the `retina shell` command has `--host-pid` set and that you have chroot'd to /host.**

## Troubleshooting

### Timeouts

If `kubectl retina shell` fails with a timeout error, then:

1. Increase the timeout by setting `--timeout` flag.
2. Check the pod using `kubectl describe pod` to determine why retina shell is failing to start.

Example:

```bash
kubectl retina shell --timeout 10m node001 # increase timeout to 10 minutes
```

### Firewalls and ImagePullBackoff

Some clusters are behind a firewall that blocks pulling the retina-shell image. To workaround this:

1. Replicate the retina-shell images to a container registry accessible from within the cluster.
2. Override the image used by Retina CLI with the environment variable `RETINA_SHELL_IMAGE_REPO`.

Example:

```bash
export RETINA_SHELL_IMAGE_REPO="example.azurecr.io/retina/retina-shell"
export RETINA_SHELL_IMAGE_VERSION=v0.0.1 # optional, if not set defaults to the Retina CLI version.
kubectl retina shell node0001 # this will use the image "example.azurecr.io/retina/retina-shell:v0.0.1"
```

## Limitations

* Windows nodes and pods are not yet supported.
* `bpftool` and `bpftrace` are not supported.
* The shell image link `iptables` commands to `iptables-legacy`, even if the node itself links to `iptables-nft`.
* `nsenter` is not supported.
* `ip netns` will not work without `chroot` to the host filesystem.
Loading

0 comments on commit 503c6d6

Please sign in to comment.