Skip to content

Commit

Permalink
feat: implement retina shell CLI command
Browse files Browse the repository at this point in the history
Signed-off-by: Will Daly <[email protected]>
  • Loading branch information
wedaly committed Nov 6, 2024
1 parent ed639b0 commit 381f4eb
Show file tree
Hide file tree
Showing 9 changed files with 550 additions and 0 deletions.
150 changes: 150 additions & 0 deletions cli/cmd/shell.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
package cmd

import (
"fmt"
"os"
"time"

"github.com/microsoft/retina/shell"
"github.com/spf13/cobra"
v1 "k8s.io/api/core/v1"
"k8s.io/cli-runtime/pkg/genericclioptions"
"k8s.io/cli-runtime/pkg/resource"
cmdutil "k8s.io/kubectl/pkg/cmd/util"
"k8s.io/kubectl/pkg/scheme"
"k8s.io/kubectl/pkg/util/templates"
)

var (
configFlags *genericclioptions.ConfigFlags
matchVersionFlags *cmdutil.MatchVersionFlags
retinaShellImageRepo string
retinaShellImageVersion string
mountHostFilesystem bool
allowHostFilesystemWrite bool
hostPID bool
capabilities []string
timeout time.Duration
)

const defaultRetinaShellImageRepo = "ghcr.io/microsoft/retina/retina-shell"

var shellCmd = &cobra.Command{
Use: "shell (NODE | TYPE[[.VERSION].GROUP]/NAME)",
Short: "[EXPERIMENTAL] Interactively debug a node or pod",
Long: templates.LongDesc(`
[EXPERIMENTAL] This is an experimental command. The flags and behavior may change in the future.
Start a shell with networking tools in a node or pod for adhoc debugging.
* For nodes, this creates a pod on the node in the root network namespace.
* For pods, this creates an ephemeral container inside the pod's network namespace.
You can override the default image used for the shell container with either
CLI flags (--retina-shell-image-repo and --retina-shell-image-version) or
environment variables (RETINA_SHELL_IMAGE_REPO and RETINA_SHELL_IMAGE_VERSION).
CLI flags take precedence over env vars.
`),

Example: templates.Examples(`
# start a shell in a node
kubectl retina shell node0001
# start a shell in a node, with debug pod in kube-system namespace
kubectl retina shell -n kube-system node0001
# start a shell as an ephemeral container inside an existing pod
kubectl retina shell -n kube-system pod/coredns-d459997b4-7cpzx
# start a shell in a node, mounting the host filesystem to /host with ability to chroot
kubectl retina shell node001 --mount-host-filesystem --capabilities SYS_CHROOT
# start a shell in a node, with NET_RAW and NET_ADMIN capabilities
# (required for iptables and tcpdump)
kubectl retina shell node001 --capabilities NET_RAW,NET_ADMIN
`),
Args: cobra.ExactArgs(1),
RunE: func(cmd *cobra.Command, args []string) error {
namespace, explicitNamespace, err := matchVersionFlags.ToRawKubeConfigLoader().Namespace()
if err != nil {
return err
}

// This interprets the first arg as either a node or pod (same as kubectl):
// "node001" -> node
// "node/node001" -> node
// "pod/example-7cpzx" -> pod
r := resource.NewBuilder(configFlags).
WithScheme(scheme.Scheme, scheme.Scheme.PrioritizedVersionsAllGroups()...).
FilenameParam(explicitNamespace, &resource.FilenameOptions{}).
NamespaceParam(namespace).DefaultNamespace().ResourceNames("nodes", args[0]).
Do()
if err := r.Err(); err != nil {
return err
}

restConfig, err := matchVersionFlags.ToRESTConfig()
if err != nil {
return err
}

config := shell.Config{
RestConfig: restConfig,
RetinaShellImage: fmt.Sprintf("%s:%s", retinaShellImageRepo, retinaShellImageVersion),
MountHostFilesystem: mountHostFilesystem,
AllowHostFilesystemWrite: allowHostFilesystemWrite,
HostPID: hostPID,
Capabilities: capabilities,
Timeout: timeout,
}

return r.Visit(func(info *resource.Info, err error) error {
switch obj := info.Object.(type) {
case *v1.Node:
podDebugNamespace := namespace
nodeName := obj.Name
return shell.RunInNode(config, nodeName, podDebugNamespace)
case *v1.Pod:
return shell.RunInPod(config, obj.Namespace, obj.Name)
default:
gvk := obj.GetObjectKind().GroupVersionKind()
return fmt.Errorf("unsupported resource %s/%s", gvk.GroupVersion(), gvk.Kind)
}
})
},
}

func init() {
Retina.AddCommand(shellCmd)
shellCmd.PersistentPreRun = func(cmd *cobra.Command, args []string) {
// Avoid printing full usage message if the command exits with an error.
cmd.SilenceUsage = true
cmd.SilenceErrors = true

// Allow setting image repo and version via environment variables (CLI flags still take precedence).
if !cmd.Flags().Changed("retina-shell-image-repo") {
if envRepo := os.Getenv("RETINA_SHELL_IMAGE_REPO"); envRepo != "" {
retinaShellImageRepo = envRepo
}
}
if !cmd.Flags().Changed("retina-shell-image-version") {
if envVersion := os.Getenv("RETINA_SHELL_IMAGE_VERSION"); envVersion != "" {
retinaShellImageVersion = envVersion
}
}
}
shellCmd.Flags().StringVar(&retinaShellImageRepo, "retina-shell-image-repo", defaultRetinaShellImageRepo, "The container registry repository for the image to use for the shell container")
shellCmd.Flags().StringVar(&retinaShellImageVersion, "retina-shell-image-version", Version, "The version (tag) of the image to use for the shell container")
shellCmd.Flags().BoolVarP(&mountHostFilesystem, "mount-host-filesystem", "m", false, "Mount the host filesystem to /host. Applies only to nodes, not pods.")
shellCmd.Flags().BoolVarP(&allowHostFilesystemWrite, "allow-host-filesystem-write", "w", false, "Allow write access to the host filesystem. Implies --mount-host-filesystem. Applies only to nodes, not pods.")
shellCmd.Flags().BoolVar(&hostPID, "host-pid", false, "Set HostPID on the shell container. Applies only to nodes, not pods.")
shellCmd.Flags().StringSliceVarP(&capabilities, "capabilities", "c", []string{}, "Add capabilities to the shell container")
shellCmd.Flags().DurationVar(&timeout, "timeout", 30*time.Second, "The maximum time to wait for the shell container to start")

// configFlags and matchVersion flags are used to load kubeconfig.
// This uses the same mechanism as `kubectl debug` to connect to apiserver and attach to containers.
configFlags = genericclioptions.NewConfigFlags(true)
configFlags.AddFlags(shellCmd.PersistentFlags())
matchVersionFlags = cmdutil.NewMatchVersionFlags(configFlags)
matchVersionFlags.AddFlags(shellCmd.PersistentFlags())
}
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,11 @@ require (
github.com/evanphx/json-patch v5.9.0+incompatible // indirect
github.com/evanphx/json-patch/v5 v5.9.0 // indirect
github.com/exponent-io/jsonpath v0.0.0-20151013193312-d6023ce2651d // indirect
github.com/fatih/camelcase v1.0.0 // indirect
github.com/fatih/color v1.16.0 // indirect
github.com/felixge/httpsnoop v1.0.4 // indirect
github.com/fsnotify/fsnotify v1.7.0 // indirect
github.com/fvbommel/sortorder v1.1.0 // indirect
github.com/go-errors/errors v1.4.2 // indirect
github.com/go-gorp/gorp/v3 v3.1.0 // indirect
github.com/go-jose/go-jose/v3 v3.0.3 // indirect
Expand Down
4 changes: 4 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,8 @@ github.com/evanphx/json-patch/v5 v5.9.0 h1:kcBlZQbplgElYIlo/n1hJbls2z/1awpXxpRi0
github.com/evanphx/json-patch/v5 v5.9.0/go.mod h1:VNkHZ/282BpEyt/tObQO8s5CMPmYYq14uClGH4abBuQ=
github.com/exponent-io/jsonpath v0.0.0-20151013193312-d6023ce2651d h1:105gxyaGwCFad8crR9dcMQWvV9Hvulu6hwUh4tWPJnM=
github.com/exponent-io/jsonpath v0.0.0-20151013193312-d6023ce2651d/go.mod h1:ZZMPRZwes7CROmyNKgQzC3XPs6L/G2EJLHddWejkmf4=
github.com/fatih/camelcase v1.0.0 h1:hxNvNX/xYBp0ovncs8WyWZrOrpBNub/JfaMvbURyft8=
github.com/fatih/camelcase v1.0.0/go.mod h1:yN2Sb0lFhZJUdVvtELVWefmrXpuZESvPmqwoZc+/fpc=
github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4=
github.com/fatih/color v1.9.0/go.mod h1:eQcE1qtQxscV5RaZvpXrrb8Drkc3/DdQ+uUYCNjL+zU=
github.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYFFOfk=
Expand All @@ -312,6 +314,8 @@ github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMo
github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA=
github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM=
github.com/fvbommel/sortorder v1.1.0 h1:fUmoe+HLsBTctBDoaBwpQo5N+nrCp8g/BjKb/6ZQmYw=
github.com/fvbommel/sortorder v1.1.0/go.mod h1:uk88iVf1ovNn1iLfgUVU2F9o5eO30ui720w+kxuqRs0=
github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
github.com/go-chi/chi v4.1.2+incompatible h1:fGFk2Gmi/YKXk0OmGfBh0WgmN3XB8lVnEyNz34tQRec=
github.com/go-chi/chi v4.1.2+incompatible/go.mod h1:eB3wogJHnLi3x/kFX2A+IbTBlXxmMeXJVKy9tTv1XzQ=
Expand Down
14 changes: 14 additions & 0 deletions shell/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# retina-shell

Retina CLI provides a command to launch an interactive shell in a node or pod for adhoc debugging.

* The CLI command `kubectl retina shell` creates a pod with `HostNetwork=true` (for node debugging) or an ephemeral container in an existing pod (for pod debugging).
* The container runs an image built from the Dockerfile in this directory. The image is based on Azure Linux and includes commonly-used networking tools.

For testing, you can override the image used by `retina shell` either with CLI arguments
(`--retina-shell-image-repo` and `--retina-shell-image-version`) or environment variables
(`RETINA_SHELL_IMAGE_REPO` and `RETINA_SHELL_IMAGE_VERSION`).

Run `kubectl retina shell -h` for full documentation and examples.

Currently only Linux is supported; Windows support will be added in the future.
78 changes: 78 additions & 0 deletions shell/attach.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
package shell

import (
"context"
"errors"
"fmt"
"os"
"time"

v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/cli-runtime/pkg/genericiooptions"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
"k8s.io/kubectl/pkg/cmd/attach"
"k8s.io/kubectl/pkg/cmd/exec"
)

func attachToShell(restConfig *rest.Config, namespace string, podName string, containerName string, pod *v1.Pod) error {
attachOpts := &attach.AttachOptions{
Config: restConfig,
StreamOptions: exec.StreamOptions{
Namespace: namespace,
PodName: podName,
ContainerName: containerName,
IOStreams: genericiooptions.IOStreams{
In: os.Stdin,
Out: os.Stdout,
ErrOut: os.Stderr,
},
Stdin: true,
TTY: true,
Quiet: true,
},
Attach: &attach.DefaultRemoteAttach{},
AttachFunc: attach.DefaultAttachFunc,
Pod: pod,
}

return attachOpts.Run()
}

func waitForContainerRunning(ctx context.Context, timeout time.Duration, clientset *kubernetes.Clientset, namespace, podName, containerName string) error {
ctx, cancel := context.WithTimeout(ctx, timeout)
defer cancel()
for {
pod, err := clientset.CoreV1().
Pods(namespace).
Get(ctx, podName, metav1.GetOptions{})
if err != nil {
if errors.Is(err, context.DeadlineExceeded) {
return waitTimeoutError(timeout, containerName)
}
return err
}

for _, status := range pod.Status.ContainerStatuses {
if status.Name == containerName && status.State.Running != nil {
return nil
}
}
for _, status := range pod.Status.EphemeralContainerStatuses {
if status.Name == containerName && status.State.Running != nil {
return nil
}
}

select {
case <-ctx.Done():
return waitTimeoutError(timeout, containerName)
case <-time.After(1 * time.Second):
}
}
}

func waitTimeoutError(timeout time.Duration, containerName string) error {
return fmt.Errorf("timed out after %s waiting for container %s to start. The timeout can be increased by setting --timeout", timeout, containerName)
}
83 changes: 83 additions & 0 deletions shell/manifests.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
package shell

import (
"fmt"

v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
utilrand "k8s.io/apimachinery/pkg/util/rand"
)

// convertToCapabilities converts a slice of strings to a slice of v1.Capability
func ephemeralContainerForPodDebug(config Config) v1.EphemeralContainer {
return v1.EphemeralContainer{
EphemeralContainerCommon: v1.EphemeralContainerCommon{
Name: fmt.Sprintf("retina-shell-%s", utilrand.String(5)),
Image: config.RetinaShellImage,
Stdin: true,
TTY: true,
SecurityContext: &v1.SecurityContext{
Capabilities: &v1.Capabilities{
Drop: []v1.Capability{"ALL"},
Add: stringSliceToCapabilities(config.Capabilities),
},
},
},
}
}

func hostNetworkPodForNodeDebug(config Config, debugPodNamespace string, nodeName string) *v1.Pod {
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: fmt.Sprintf("retina-shell-%s", utilrand.String(5)),
Namespace: debugPodNamespace,
},
Spec: v1.PodSpec{
NodeName: nodeName,
RestartPolicy: v1.RestartPolicyNever,
Tolerations: []v1.Toleration{{Operator: v1.TolerationOpExists}},
HostNetwork: true,
HostPID: config.HostPID,
Containers: []v1.Container{
{
Name: "retina-shell",
Image: config.RetinaShellImage,
Stdin: true,
TTY: true,
SecurityContext: &v1.SecurityContext{
Capabilities: &v1.Capabilities{
Drop: []v1.Capability{"ALL"},
Add: stringSliceToCapabilities(config.Capabilities),
},
},
},
},
},
}

if config.MountHostFilesystem || config.AllowHostFilesystemWrite {
pod.Spec.Volumes = append(pod.Spec.Volumes, v1.Volume{
Name: "host-filesystem",
VolumeSource: v1.VolumeSource{
HostPath: &v1.HostPathVolumeSource{
Path: "/",
},
},
})
pod.Spec.Containers[0].VolumeMounts = append(pod.Spec.Containers[0].VolumeMounts, v1.VolumeMount{
Name: "host-filesystem",
MountPath: "/host",
ReadOnly: !config.AllowHostFilesystemWrite,
})
}

return pod
}

func stringSliceToCapabilities(ss []string) []v1.Capability {
var caps []v1.Capability
for _, s := range ss {
caps = append(caps, v1.Capability(s))
}
return caps
}
Loading

0 comments on commit 381f4eb

Please sign in to comment.