-
Notifications
You must be signed in to change notification settings - Fork 216
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'main' into add-kernelversion-to-traces
- Loading branch information
Showing
54 changed files
with
1,192 additions
and
405 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,173 @@ | ||
package cmd | ||
|
||
import ( | ||
"errors" | ||
"fmt" | ||
"os" | ||
"time" | ||
|
||
"github.com/microsoft/retina/internal/buildinfo" | ||
"github.com/microsoft/retina/shell" | ||
"github.com/spf13/cobra" | ||
v1 "k8s.io/api/core/v1" | ||
"k8s.io/cli-runtime/pkg/genericclioptions" | ||
"k8s.io/cli-runtime/pkg/resource" | ||
cmdutil "k8s.io/kubectl/pkg/cmd/util" | ||
"k8s.io/kubectl/pkg/scheme" | ||
"k8s.io/kubectl/pkg/util/templates" | ||
) | ||
|
||
var ( | ||
configFlags *genericclioptions.ConfigFlags | ||
matchVersionFlags *cmdutil.MatchVersionFlags | ||
retinaShellImageRepo string | ||
retinaShellImageVersion string | ||
mountHostFilesystem bool | ||
allowHostFilesystemWrite bool | ||
hostPID bool | ||
capabilities []string | ||
timeout time.Duration | ||
) | ||
|
||
var ( | ||
// AKS requires clusters to allow access to MCR, so use this repository by default. | ||
defaultRetinaShellImageRepo = "mcr.microsoft.com/containernetworking/retina-shell" | ||
|
||
// Default version is the same as CLI version, set at link time. | ||
defaultRetinaShellImageVersion = buildinfo.Version | ||
|
||
defaultTimeout = 30 * time.Second | ||
|
||
errMissingRequiredRetinaShellImageVersionArg = errors.New("missing required --retina-shell-image-version") | ||
errUnsupportedResourceType = errors.New("unsupported resource type") | ||
) | ||
|
||
var shellCmd = &cobra.Command{ | ||
Use: "shell (NODE | TYPE[[.VERSION].GROUP]/NAME)", | ||
Short: "[EXPERIMENTAL] Interactively debug a node or pod", | ||
Long: templates.LongDesc(` | ||
[EXPERIMENTAL] This is an experimental command. The flags and behavior may change in the future. | ||
Start a shell with networking tools in a node or pod for adhoc debugging. | ||
* For nodes, this creates a pod on the node in the root network namespace. | ||
* For pods, this creates an ephemeral container inside the pod's network namespace. | ||
You can override the default image used for the shell container with either | ||
CLI flags (--retina-shell-image-repo and --retina-shell-image-version) or | ||
environment variables (RETINA_SHELL_IMAGE_REPO and RETINA_SHELL_IMAGE_VERSION). | ||
CLI flags take precedence over env vars. | ||
`), | ||
|
||
Example: templates.Examples(` | ||
# start a shell in a node | ||
kubectl retina shell node0001 | ||
# start a shell in a node, with debug pod in kube-system namespace | ||
kubectl retina shell -n kube-system node0001 | ||
# start a shell as an ephemeral container inside an existing pod | ||
kubectl retina shell -n kube-system pod/coredns-d459997b4-7cpzx | ||
# start a shell in a node, mounting the host filesystem to /host with ability to chroot | ||
kubectl retina shell node001 --mount-host-filesystem --capabilities SYS_CHROOT | ||
# start a shell in a node, with NET_RAW and NET_ADMIN capabilities | ||
# (required for iptables and tcpdump) | ||
kubectl retina shell node001 --capabilities NET_RAW,NET_ADMIN | ||
`), | ||
Args: cobra.ExactArgs(1), | ||
RunE: func(_ *cobra.Command, args []string) error { | ||
// retinaShellImageVersion defaults to the CLI version, but that might not be set if the CLI is built without -ldflags. | ||
if retinaShellImageVersion == "" { | ||
return errMissingRequiredRetinaShellImageVersionArg | ||
} | ||
|
||
namespace, explicitNamespace, err := matchVersionFlags.ToRawKubeConfigLoader().Namespace() | ||
if err != nil { | ||
return fmt.Errorf("error retrieving namespace arg: %w", err) | ||
} | ||
|
||
// This interprets the first arg as either a node or pod (same as kubectl): | ||
// "node001" -> node | ||
// "node/node001" -> node | ||
// "pod/example-7cpzx" -> pod | ||
r := resource.NewBuilder(configFlags). | ||
WithScheme(scheme.Scheme, scheme.Scheme.PrioritizedVersionsAllGroups()...). | ||
FilenameParam(explicitNamespace, &resource.FilenameOptions{}). | ||
NamespaceParam(namespace).DefaultNamespace().ResourceNames("nodes", args[0]). | ||
Do() | ||
if rerr := r.Err(); rerr != nil { | ||
return fmt.Errorf("error constructing resource builder: %w", rerr) | ||
} | ||
|
||
restConfig, err := matchVersionFlags.ToRESTConfig() | ||
if err != nil { | ||
return fmt.Errorf("error constructing REST config: %w", err) | ||
} | ||
|
||
config := shell.Config{ | ||
RestConfig: restConfig, | ||
RetinaShellImage: fmt.Sprintf("%s:%s", retinaShellImageRepo, retinaShellImageVersion), | ||
MountHostFilesystem: mountHostFilesystem, | ||
AllowHostFilesystemWrite: allowHostFilesystemWrite, | ||
HostPID: hostPID, | ||
Capabilities: capabilities, | ||
Timeout: timeout, | ||
} | ||
|
||
return r.Visit(func(info *resource.Info, err error) error { | ||
if err != nil { | ||
return err | ||
} | ||
|
||
switch obj := info.Object.(type) { | ||
case *v1.Node: | ||
podDebugNamespace := namespace | ||
nodeName := obj.Name | ||
return shell.RunInNode(config, nodeName, podDebugNamespace) | ||
case *v1.Pod: | ||
return shell.RunInPod(config, obj.Namespace, obj.Name) | ||
default: | ||
gvk := obj.GetObjectKind().GroupVersionKind() | ||
return fmt.Errorf("unsupported resource %s/%s: %w", gvk.GroupVersion(), gvk.Kind, errUnsupportedResourceType) | ||
} | ||
}) | ||
}, | ||
} | ||
|
||
func init() { | ||
Retina.AddCommand(shellCmd) | ||
shellCmd.PersistentPreRun = func(cmd *cobra.Command, _ []string) { | ||
// Avoid printing full usage message if the command exits with an error. | ||
cmd.SilenceUsage = true | ||
cmd.SilenceErrors = true | ||
|
||
// Allow setting image repo and version via environment variables (CLI flags still take precedence). | ||
if !cmd.Flags().Changed("retina-shell-image-repo") { | ||
if envRepo := os.Getenv("RETINA_SHELL_IMAGE_REPO"); envRepo != "" { | ||
retinaShellImageRepo = envRepo | ||
} | ||
} | ||
if !cmd.Flags().Changed("retina-shell-image-version") { | ||
if envVersion := os.Getenv("RETINA_SHELL_IMAGE_VERSION"); envVersion != "" { | ||
retinaShellImageVersion = envVersion | ||
} | ||
} | ||
} | ||
shellCmd.Flags().StringVar(&retinaShellImageRepo, "retina-shell-image-repo", defaultRetinaShellImageRepo, "The container registry repository for the image to use for the shell container") | ||
shellCmd.Flags().StringVar(&retinaShellImageVersion, "retina-shell-image-version", defaultRetinaShellImageVersion, "The version (tag) of the image to use for the shell container") | ||
shellCmd.Flags().BoolVarP(&mountHostFilesystem, "mount-host-filesystem", "m", false, "Mount the host filesystem to /host. Applies only to nodes, not pods.") | ||
shellCmd.Flags().BoolVarP(&allowHostFilesystemWrite, "allow-host-filesystem-write", "w", false, | ||
"Allow write access to the host filesystem. Implies --mount-host-filesystem. Applies only to nodes, not pods.") | ||
shellCmd.Flags().BoolVar(&hostPID, "host-pid", false, "Set HostPID on the shell container. Applies only to nodes, not pods.") | ||
shellCmd.Flags().StringSliceVarP(&capabilities, "capabilities", "c", []string{}, "Add capabilities to the shell container") | ||
shellCmd.Flags().DurationVar(&timeout, "timeout", defaultTimeout, "The maximum time to wait for the shell container to start") | ||
|
||
// configFlags and matchVersion flags are used to load kubeconfig. | ||
// This uses the same mechanism as `kubectl debug` to connect to apiserver and attach to containers. | ||
configFlags = genericclioptions.NewConfigFlags(true) | ||
configFlags.AddFlags(shellCmd.PersistentFlags()) | ||
matchVersionFlags = cmdutil.NewMatchVersionFlags(configFlags) | ||
matchVersionFlags.AddFlags(shellCmd.PersistentFlags()) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,185 @@ | ||
# Shell TSG | ||
|
||
**EXPERIMENTAL: `retina shell` is an experimental feature, so the flags and behavior may change in future versions.** | ||
|
||
The `retina shell` command allows you to start an interactive shell on a Kubernetes node or pod. This runs a container image with many common networking tools installed (`ping`, `curl`, etc.). | ||
|
||
## Testing connectivity | ||
|
||
Start a shell on a node or inside a pod | ||
|
||
```bash | ||
# To start a shell in a node (root network namespace): | ||
kubectl retina shell aks-nodepool1-15232018-vmss000001 | ||
|
||
# To start a shell inside a pod (pod network namespace): | ||
kubectl retina shell -n kube-system pods/coredns-d459997b4-7cpzx | ||
``` | ||
|
||
Check connectivity using `ping`: | ||
|
||
```text | ||
root [ / ]# ping 10.224.0.4 | ||
PING 10.224.0.4 (10.224.0.4) 56(84) bytes of data. | ||
64 bytes from 10.224.0.4: icmp_seq=1 ttl=64 time=0.964 ms | ||
64 bytes from 10.224.0.4: icmp_seq=2 ttl=64 time=1.13 ms | ||
64 bytes from 10.224.0.4: icmp_seq=3 ttl=64 time=0.908 ms | ||
64 bytes from 10.224.0.4: icmp_seq=4 ttl=64 time=1.07 ms | ||
64 bytes from 10.224.0.4: icmp_seq=5 ttl=64 time=1.01 ms | ||
--- 10.224.0.4 ping statistics --- | ||
5 packets transmitted, 5 received, 0% packet loss, time 4022ms | ||
rtt min/avg/max/mdev = 0.908/1.015/1.128/0.077 ms | ||
``` | ||
|
||
Check DNS resolution using `dig`: | ||
|
||
```text | ||
root [ / ]# dig example.com +short | ||
93.184.215.14 | ||
``` | ||
|
||
The tools `nslookup` and `drill` are also available if you prefer those. | ||
|
||
Check connectivity to apiserver using `nc` and `curl`: | ||
|
||
```text | ||
root [ / ]# nc -zv 10.0.0.1 443 | ||
Ncat: Version 7.95 ( https://nmap.org/ncat ) | ||
Ncat: Connected to 10.0.0.1:443. | ||
Ncat: 0 bytes sent, 0 bytes received in 0.06 seconds. | ||
root [ / ]# curl -k https://10.0.0.1 | ||
{ | ||
"kind": "Status", | ||
"apiVersion": "v1", | ||
"metadata": {}, | ||
"status": "Failure", | ||
"message": "Unauthorized", | ||
"reason": "Unauthorized", | ||
"code": 401 | ||
} | ||
``` | ||
|
||
### nftables and iptables | ||
|
||
Accessing nftables and iptables rules requires `NET_RAW` and `NET_ADMIN` capabilities. | ||
|
||
```bash | ||
kubectl retina shell aks-nodepool1-15232018-vmss000002 --capabilities NET_ADMIN,NET_RAW | ||
``` | ||
|
||
Then you can run `iptables` and `nft`: | ||
|
||
```text | ||
root [ / ]# iptables -nvL | head -n 2 | ||
Chain INPUT (policy ACCEPT 1191K packets, 346M bytes) | ||
pkts bytes target prot opt in out source destination | ||
root [ / ]# nft list ruleset | head -n 2 | ||
# Warning: table ip filter is managed by iptables-nft, do not touch! | ||
table ip filter { | ||
``` | ||
|
||
**If you see the error "Operation not permitted (you must be root)", check that your `kubectl retina shell` command sets `--capabilities NET_RAW,NET_ADMIN`.** | ||
|
||
`iptables` in the shell image uses `iptables-legacy`, which may or may not match the configuration on the node. For example, Ubuntu maps `iptables` to `iptables-nft`. To use the exact same `iptables` binary as installed on the node, you will need to `chroot` into the host filesystem (see below). | ||
|
||
## Accessing the host filesystem | ||
|
||
On nodes, you can mount the host filesystem to `/host`: | ||
|
||
```bash | ||
kubectl retina shell aks-nodepool1-15232018-vmss000002 --mount-host-filesystem | ||
``` | ||
|
||
This mounts the host filesystem (`/`) to `/host` in the debug pod: | ||
|
||
```text | ||
root [ / ]# ls /host | ||
NOTICE.txt bin boot dev etc home lib lib64 libx32 lost+found media mnt opt proc root run sbin srv sys tmp usr var | ||
``` | ||
|
||
The host filesystem is mounted read-only by default. If you need write access, use the `--allow-host-filesystem-write` flag. | ||
|
||
Symlinks between files on the host filesystem may not resolve correctly. If you see "No such file or directory" errors for symlinks, try following the instructions below to `chroot` to the host filesystem. | ||
|
||
## Chroot to the host filesystem | ||
|
||
`chroot` requires the `SYS_CHROOT` capability: | ||
|
||
```bash | ||
kubectl retina shell aks-nodepool1-15232018-vmss000002 --mount-host-filesystem --capabilities SYS_CHROOT | ||
``` | ||
|
||
Then you can use `chroot` to switch to start a shell inside the host filesystem: | ||
|
||
```text | ||
root [ / ]# chroot /host bash | ||
root@aks-nodepool1-15232018-vmss000002:/# cat /etc/resolv.conf | tail -n 2 | ||
nameserver 168.63.129.16 | ||
search shncgv2kgepuhm1ls1dwgholsd.cx.internal.cloudapp.net | ||
``` | ||
|
||
`chroot` allows you to: | ||
|
||
* Execute binaries installed on the node. | ||
* Resolve symlinks that point to files in the host filesystem (such as /etc/resolv.conf -> /run/systemd/resolve/resolv.conf) | ||
* Use `sysctl` to view or modify kernel parameters. | ||
* Use `journalctl` to view systemd unit and kernel logs. | ||
* Use `ip netns` to view network namespaces. (However, `ip netns exec` does not work.) | ||
|
||
## Systemctl | ||
|
||
`systemctl` commands require both `chroot` to the host filesystem and host PID: | ||
|
||
```bash | ||
kubectl retina shell aks-nodepool1-15232018-vmss000002 --mount-host-filesystem --capabilities SYS_CHROOT --host-pid | ||
``` | ||
|
||
Then `chroot` to the host filesystem and run `systemctl status`: | ||
|
||
```text | ||
root [ / ]# chroot /host systemctl status | head -n 2 | ||
● aks-nodepool1-15232018-vmss000002 | ||
State: running | ||
``` | ||
|
||
**If `systemctl` shows an error "Failed to connect to bus: No data available", check that the `retina shell` command has `--host-pid` set and that you have chroot'd to /host.** | ||
|
||
## Troubleshooting | ||
|
||
### Timeouts | ||
|
||
If `kubectl retina shell` fails with a timeout error, then: | ||
|
||
1. Increase the timeout by setting `--timeout` flag. | ||
2. Check the pod using `kubectl describe pod` to determine why retina shell is failing to start. | ||
|
||
Example: | ||
|
||
```bash | ||
kubectl retina shell --timeout 10m node001 # increase timeout to 10 minutes | ||
``` | ||
|
||
### Firewalls and ImagePullBackoff | ||
|
||
Some clusters are behind a firewall that blocks pulling the retina-shell image. To workaround this: | ||
|
||
1. Replicate the retina-shell images to a container registry accessible from within the cluster. | ||
2. Override the image used by Retina CLI with the environment variable `RETINA_SHELL_IMAGE_REPO`. | ||
|
||
Example: | ||
|
||
```bash | ||
export RETINA_SHELL_IMAGE_REPO="example.azurecr.io/retina/retina-shell" | ||
export RETINA_SHELL_IMAGE_VERSION=v0.0.1 # optional, if not set defaults to the Retina CLI version. | ||
kubectl retina shell node0001 # this will use the image "example.azurecr.io/retina/retina-shell:v0.0.1" | ||
``` | ||
|
||
## Limitations | ||
|
||
* Windows nodes and pods are not yet supported. | ||
* `bpftool` and `bpftrace` are not supported. | ||
* The shell image link `iptables` commands to `iptables-legacy`, even if the node itself links to `iptables-nft`. | ||
* `nsenter` is not supported. | ||
* `ip netns` will not work without `chroot` to the host filesystem. |
Oops, something went wrong.