11package plugin
22
33import (
4+ "context"
45 "encoding/json"
56 "fmt"
67 "github.com/aylei/kubectl-debug/pkg/util"
@@ -9,14 +10,19 @@ import (
910 "io"
1011 corev1 "k8s.io/api/core/v1"
1112 "k8s.io/apimachinery/pkg/apis/meta/v1"
13+ "k8s.io/apimachinery/pkg/util/uuid"
1214 "k8s.io/cli-runtime/pkg/genericclioptions"
1315 "k8s.io/client-go/kubernetes"
1416 coreclient "k8s.io/client-go/kubernetes/typed/core/v1"
1517 restclient "k8s.io/client-go/rest"
1618 "k8s.io/client-go/tools/remotecommand"
19+ "k8s.io/client-go/tools/watch"
20+ "k8s.io/kubernetes/pkg/client/conditions"
21+ "k8s.io/kubernetes/pkg/util/interrupt"
1722 "log"
1823 "net/url"
1924 "os/user"
25+ "time"
2026)
2127
2228const (
@@ -54,17 +60,17 @@ type DebugOptions struct {
5460 PodName string
5561
5662 // Debug options
57- RetainContainer bool
58- Image string
59- ContainerName string
60- Command [] string
61- AgentPort int
62- ConfigLocation string
63-
64- Flags * genericclioptions.ConfigFlags
65- PodClient coreclient.PodsGetter
66- Args []string
67- Config * restclient.Config
63+ Image string
64+ ContainerName string
65+ Command [] string
66+ AgentPort int
67+ ConfigLocation string
68+ Fork bool
69+
70+ Flags * genericclioptions.ConfigFlags
71+ CoreClient coreclient.CoreV1Interface
72+ Args []string
73+ Config * restclient.Config
6874
6975 genericclioptions.IOStreams
7076}
@@ -106,6 +112,8 @@ func NewDebugCmd(streams genericclioptions.IOStreams) *cobra.Command {
106112 fmt .Sprintf ("Agent port for debug cli to connect, default to %d" , defaultAgentPort ))
107113 cmd .Flags ().StringVar (& opts .ConfigLocation , "debug-config" , "" ,
108114 fmt .Sprintf ("Debug config file, default to ~%s" , defaultConfigLocation ))
115+ cmd .Flags ().BoolVar (& opts .Fork , "fork" , false ,
116+ "Fork a new pod for debugging (useful if the pod status is CrashLoopBackoff)" )
109117 opts .Flags .AddFlags (cmd .Flags ())
110118
111119 return cmd
@@ -173,7 +181,7 @@ func (o *DebugOptions) Complete(cmd *cobra.Command, args []string, argsLenAtDash
173181 if err != nil {
174182 return err
175183 }
176- o .PodClient = clientset .CoreV1 ()
184+ o .CoreClient = clientset .CoreV1 ()
177185
178186 return nil
179187}
@@ -190,14 +198,10 @@ func (o *DebugOptions) Validate() error {
190198
191199func (o * DebugOptions ) Run () error {
192200
193- pod , err := o .PodClient .Pods (o .Namespace ).Get (o .PodName , v1.GetOptions {})
201+ pod , err := o .CoreClient .Pods (o .Namespace ).Get (o .PodName , v1.GetOptions {})
194202 if err != nil {
195203 return err
196204 }
197- if pod .Status .Phase == corev1 .PodSucceeded || pod .Status .Phase == corev1 .PodFailed {
198- return fmt .Errorf ("cannot debug in a completed pod; current phase is %s" , pod .Status .Phase )
199- }
200- hostIP := pod .Status .HostIP
201205
202206 containerName := o .ContainerName
203207 if len (containerName ) == 0 {
@@ -208,6 +212,35 @@ func (o *DebugOptions) Run() error {
208212 containerName = pod .Spec .Containers [0 ].Name
209213 }
210214
215+ // in fork mode, we launch an new pod as a copy of target pod
216+ // and hack the entry point of the target container with sleep command
217+ // which keeps the container running.
218+ if o .Fork {
219+ pod = copyAndStripPod (pod , containerName )
220+ pod , err = o .CoreClient .Pods (pod .Namespace ).Create (pod )
221+ if err != nil {
222+ return err
223+ }
224+ watcher , err := o .CoreClient .Pods (pod .Namespace ).Watch (v1 .SingleObject (pod .ObjectMeta ))
225+ if err != nil {
226+ return err
227+ }
228+ // FIXME: hard code -> config
229+ ctx , cancel := context .WithTimeout (context .Background (), 5 * time .Minute )
230+ defer cancel ()
231+ log .Println ("waiting for forked container running..." )
232+ event , err := watch .UntilWithoutRetry (ctx , watcher , conditions .PodRunning )
233+ if err != nil {
234+ return err
235+ }
236+ pod = event .Object .(* corev1.Pod )
237+ }
238+
239+ if pod .Status .Phase == corev1 .PodSucceeded || pod .Status .Phase == corev1 .PodFailed {
240+ return fmt .Errorf ("cannot debug in a completed pod; current phase is %s" , pod .Status .Phase )
241+ }
242+ hostIP := pod .Status .HostIP
243+
211244 containerId , err := o .getContainerIdByName (pod , containerName )
212245 if err != nil {
213246 return err
@@ -244,7 +277,20 @@ func (o *DebugOptions) Run() error {
244277 return o .remoteExecute ("POST" , uri , o .Config , o .In , o .Out , o .ErrOut , t .Raw , sizeQueue )
245278 }
246279
247- if err := t .Safe (fn ); err != nil {
280+ // ensure forked pod is deleted on cancelation
281+ withCleanUp := func () error {
282+ return interrupt .Chain (nil , func () {
283+ if o .Fork {
284+ err := o .CoreClient .Pods (pod .Namespace ).Delete (pod .Name , v1 .NewDeleteOptions (0 ))
285+ if err != nil {
286+ // we may leak pod here, but we have nothing to do except noticing the user
287+ log .Printf ("failed to delete pod %s, consider manual deletion." , pod .Name )
288+ }
289+ }
290+ }).Run (fn )
291+ }
292+
293+ if err := t .Safe (withCleanUp ); err != nil {
248294 fmt .Printf ("error execute remote, %v\n " , err )
249295 return err
250296 }
@@ -308,3 +354,31 @@ func (o *DebugOptions) setupTTY() term.TTY {
308354 }
309355 return t
310356}
357+
358+ // copyAndStripPod copy the given pod template, strip the probes and labels,
359+ // and replace the entry point
360+ func copyAndStripPod (pod * corev1.Pod , targetContainer string ) * corev1.Pod {
361+ copied := & corev1.Pod {
362+ ObjectMeta : * pod .ObjectMeta .DeepCopy (),
363+ Spec : * pod .Spec .DeepCopy (),
364+ }
365+ copied .Name = fmt .Sprintf ("%s-%s-debug" , pod .Name , uuid .NewUUID ())
366+ copied .Labels = nil
367+ copied .Spec .RestartPolicy = corev1 .RestartPolicyNever
368+ for i , c := range copied .Spec .Containers {
369+ copied .Spec .Containers [i ].LivenessProbe = nil
370+ copied .Spec .Containers [i ].ReadinessProbe = nil
371+ if c .Name == targetContainer {
372+ // Hack, infinite sleep command to keep the container running
373+ copied .Spec .Containers [i ].Command = []string {"sh" , "-c" , "--" }
374+ copied .Spec .Containers [i ].Args = []string {"while true; do sleep 30; done;" }
375+ }
376+ }
377+ copied .ResourceVersion = ""
378+ copied .UID = ""
379+ copied .SelfLink = ""
380+ copied .CreationTimestamp = v1.Time {}
381+ copied .OwnerReferences = []v1.OwnerReference {}
382+
383+ return copied
384+ }
0 commit comments