From c2d08aae2afb2b07d87b87c8ce8f3ca25c495886 Mon Sep 17 00:00:00 2001 From: Clemens Kolbitsch Date: Tue, 20 Apr 2021 00:11:40 -0700 Subject: [PATCH] More flexible signal propagation Add a few more configuration options around signal handling: - allow specifying the "stop signal" to pass to the child process, in case the child does not happen to handle SIGTERM. - allow suppressing the exit code of the child process to 0 if the child does not gracefully exit when being stopped, but the caller needs that to happen (typically the case for k8s jobs). Since we happen to extend configurations, also allow enabling logging with timestamps, which had been set to off. All configurations are optional and backwards compatible. --- README.md | 10 +++++++ cmd/kubexit/main.go | 54 ++++++++++++++++++++++++++++++++++-- pkg/supervisor/supervisor.go | 10 +++++-- 3 files changed, 70 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 7cd1910..a63e7b6 100644 --- a/README.md +++ b/README.md @@ -67,6 +67,16 @@ Birth Dependency: - `KUBEXIT_POD_NAME` - The name of the Kubernetes pod that this process and all its siblings are in. - `KUBEXIT_NAMESPACE` - The name of the Kubernetes namespace that this pod is in. +Signal Handling: +- `KUBEXIT_STOPSIGNAL` - Optional signal to use to stop the child processes. One of `SIGINT` (or just `INT`) or + `SIGTERM` (or just `TERM`). +- `KUBEXIT_SUPPRESS_STOPPED_EXITCODE` - If `true`, suppress any exit code from the child process that is returned + in response to `kubexit` stopping it (either via the stop-signal or by killing it). Ensures that `kubexit` returns + exit code 0 whenever a stop is triggered due to one of th dependencies. + +Misc: +- `KUBEXIT_LOG_DATETIME` - Include timestamp in logs + ## Install While kubexit can easily be installed on your local machine, the primary use cases require execution within Kubernetes pod containers. So the recommended method of installation is to either side-load kubexit using a shared volume and an init container, or build kubexit into your own container images. diff --git a/cmd/kubexit/main.go b/cmd/kubexit/main.go index 827cd95..4d86949 100644 --- a/cmd/kubexit/main.go +++ b/cmd/kubexit/main.go @@ -8,6 +8,7 @@ import ( "os/exec" "os/signal" "path/filepath" + "strconv" "strings" "syscall" "time" @@ -24,8 +25,15 @@ import ( func main() { var err error - // remove log timestamp - log.SetFlags(log.Flags() &^ (log.Ldate | log.Ltime)) + // remove log timestamp (by default, unless configured to be kept) + logDateTime, err := parseBoolEnv("KUBEXIT_LOG_DATETIME", false) + if err != nil { + log.Printf("Error: Invalid KUBEXIT_LOG_DATETIME (%s)\n", err.Error()) + os.Exit(2) + } + if !logDateTime { + log.SetFlags(log.Flags() &^ (log.Ldate | log.Ltime)) + } args := os.Args[1:] if len(args) == 0 { @@ -119,6 +127,31 @@ func main() { child := supervisor.New(args[0], args[1:]...) + stopSignal := os.Getenv("KUBEXIT_STOPSIGNAL") + switch stopSignal { + case "INT", "SIGINT": + log.Println("Using SIGINT as stop child signal") + child.WithStopSignal(syscall.SIGINT) + case "TERM", "SIGTERM": + log.Println("Using SIGTERM as stop child signal") + child.WithStopSignal(syscall.SIGTERM) + case "": + log.Println("Using default stop child signal") + default: + log.Println("Error: Invalid stop child signal") + } + + // In many situations, it's important to have all containers in a k8s "job" with an + // exit code indicating success. But, if we stop the process, tools may reflect that + // in their exit code that they were terminated unexpectedly. + // Allow suppressing the exit code and return code 0 to the caller if kubexit is the + // reason for the process termination + suppressStoppedExitcode, err := parseBoolEnv("KUBEXIT_SUPPRESS_STOPPED_EXITCODE", false) + if err != nil { + log.Printf("Error: Invalid KUBEXIT_SUPPRESS_STOPPED_EXITCODE (%s)\n", err.Error()) + os.Exit(2) + } + // watch for death deps early, so they can interrupt waiting for birth deps if len(deathDeps) > 0 { ctx, stopGraveyardWatcher := context.WithCancel(context.Background()) @@ -166,9 +199,26 @@ func main() { os.Exit(1) } + if suppressStoppedExitcode { + log.Printf("Suppressing child exit code (%d): it was stopped by kubexit\n", code) + code = 0 + } os.Exit(code) } +func parseBoolEnv(key string, defaultValue bool) (bool, error) { + value := defaultValue + envValue := os.Getenv(key) + if envValue != "" { + var err error + value, err = strconv.ParseBool(envValue) + if err != nil { + return false, err + } + } + return value, nil +} + func waitForBirthDeps(birthDeps []string, namespace, podName string, timeout time.Duration) error { // Cancel context on SIGTERM to trigger graceful exit ctx := withCancelOnSignal(context.Background(), syscall.SIGTERM) diff --git a/pkg/supervisor/supervisor.go b/pkg/supervisor/supervisor.go index bc8eb81..b742621 100644 --- a/pkg/supervisor/supervisor.go +++ b/pkg/supervisor/supervisor.go @@ -19,6 +19,7 @@ type Supervisor struct { sigCh chan os.Signal startStopLock sync.Mutex shutdownTimer *time.Timer + stopSignal syscall.Signal } func New(name string, args ...string) *Supervisor { @@ -31,10 +32,15 @@ func New(name string, args ...string) *Supervisor { cmd.Stderr = os.Stderr cmd.Env = os.Environ() return &Supervisor{ - cmd: cmd, + cmd: cmd, + stopSignal: syscall.SIGTERM, } } +func (s *Supervisor) WithStopSignal(stopSignal syscall.Signal) { + s.stopSignal = stopSignal +} + func (s *Supervisor) Start() error { s.startStopLock.Lock() defer s.startStopLock.Unlock() @@ -119,7 +125,7 @@ func (s *Supervisor) ShutdownWithTimeout(timeout time.Duration) error { } log.Println("Terminating child process...") - err := s.cmd.Process.Signal(syscall.SIGTERM) + err := s.cmd.Process.Signal(s.stopSignal) if err != nil { return fmt.Errorf("failed to terminate child process: %v", err) }