From c84c2bb0e874cc7a71ee64f00c7835f01c22990e Mon Sep 17 00:00:00 2001 From: Karl Isenberg Date: Wed, 12 Aug 2020 14:07:49 -0700 Subject: [PATCH 01/13] Handle tombstones that already exit when kubexit starts. This avoids a race condition where already dead death dependencies weren't causing kubexit to exit. --- cmd/kubexit/main.go | 23 ++++++++++++----------- pkg/supervisor/supervisor.go | 6 ++---- pkg/tombstone/tombstone.go | 29 +++++++++++++++++++++++++---- 3 files changed, 39 insertions(+), 19 deletions(-) diff --git a/cmd/kubexit/main.go b/cmd/kubexit/main.go index 827cd95..8f2aeff 100644 --- a/cmd/kubexit/main.go +++ b/cmd/kubexit/main.go @@ -126,15 +126,16 @@ func main() { defer stopGraveyardWatcher() log.Println("Watching graveyard...") - err = tombstone.Watch(ctx, graveyard, onDeathOfAny(deathDeps, func() { + err = tombstone.Watch(ctx, graveyard, onDeathOfAny(deathDeps, func() error { stopGraveyardWatcher() // trigger graceful shutdown - // Skipped if not started. - err := child.ShutdownWithTimeout(gracePeriod) + // Error & exit if not started. // ShutdownWithTimeout doesn't block until timeout + err := child.ShutdownWithTimeout(gracePeriod) if err != nil { - log.Printf("Error: failed to shutdown: %v\n", err) + return fmt.Errorf("failed to shutdown: %v", err) } + return nil })) if err != nil { fatalf(child, ts, "Error: failed to watch graveyard: %v\n", err) @@ -314,16 +315,16 @@ func onReadyOfAll(birthDeps []string, callback func()) kubernetes.EventHandler { // onDeathOfAny returns an EventHandler that executes the callback when any of // the deathDeps processes have died. -func onDeathOfAny(deathDeps []string, callback func()) tombstone.EventHandler { +func onDeathOfAny(deathDeps []string, callback func() error) tombstone.EventHandler { deathDepSet := map[string]struct{}{} for _, depName := range deathDeps { deathDepSet[depName] = struct{}{} } - return func(event fsnotify.Event) { + return func(event fsnotify.Event) error { if event.Op&fsnotify.Create != fsnotify.Create && event.Op&fsnotify.Write != fsnotify.Write { // ignore other events - return + return nil } graveyard := filepath.Dir(event.Name) name := filepath.Base(event.Name) @@ -331,23 +332,23 @@ func onDeathOfAny(deathDeps []string, callback func()) tombstone.EventHandler { log.Printf("Tombstone modified: %s\n", name) if _, ok := deathDepSet[name]; !ok { // ignore other tombstones - return + return nil } log.Printf("Reading tombstone: %s\n", name) ts, err := tombstone.Read(graveyard, name) if err != nil { log.Printf("Error: failed to read tombstone: %v\n", err) - return + return nil } if ts.Died == nil { // still alive - return + return nil } log.Printf("New death: %s\n", name) log.Printf("Tombstone(%s): %s\n", name, ts) - callback() + return callback() } } diff --git a/pkg/supervisor/supervisor.go b/pkg/supervisor/supervisor.go index bc8eb81..ce5e97b 100644 --- a/pkg/supervisor/supervisor.go +++ b/pkg/supervisor/supervisor.go @@ -91,8 +91,7 @@ func (s *Supervisor) ShutdownNow() error { defer s.startStopLock.Unlock() if !s.isRunning() { - log.Println("Skipping ShutdownNow: child process not running") - return nil + return errors.New("child process not running") } log.Println("Killing child process...") @@ -110,8 +109,7 @@ func (s *Supervisor) ShutdownWithTimeout(timeout time.Duration) error { defer s.startStopLock.Unlock() if !s.isRunning() { - log.Println("Skipping ShutdownWithTimeout: child process not running") - return nil + return errors.New("child process not running") } if s.shutdownTimer != nil { diff --git a/pkg/tombstone/tombstone.go b/pkg/tombstone/tombstone.go index fd011b6..3bcd2dd 100644 --- a/pkg/tombstone/tombstone.go +++ b/pkg/tombstone/tombstone.go @@ -112,10 +112,10 @@ func Read(graveyard, name string) (*Tombstone, error) { return &t, nil } -type EventHandler func(fsnotify.Event) +type EventHandler func(fsnotify.Event) error // LoggingEventHandler is an example EventHandler that logs fsnotify events -func LoggingEventHandler(event fsnotify.Event) { +func LoggingEventHandler(event fsnotify.Event) error { if event.Op&fsnotify.Create == fsnotify.Create { log.Printf("Tombstone Watch: file created: %s\n", event.Name) } @@ -131,6 +131,7 @@ func LoggingEventHandler(event fsnotify.Event) { if event.Op&fsnotify.Chmod == fsnotify.Chmod { log.Printf("Tombstone Watch: file chmoded: %s\n", event.Name) } + return nil } // Watch a graveyard and call the eventHandler (asyncronously) when an @@ -152,12 +153,15 @@ func Watch(ctx context.Context, graveyard string, eventHandler EventHandler) err if !ok { return } - eventHandler(event) + err := eventHandler(event) + if err != nil { + log.Printf("Tombstone Watch(%s): error handling file system event: %v\n", graveyard, err) + } case err, ok := <-watcher.Errors: if !ok { return } - log.Printf("Tombstone Watch(%s): error: %v\n", graveyard, err) + log.Printf("Tombstone Watch(%s): error from fsnotify: %v\n", graveyard, err) // TODO: wrap ctx with WithCancel and cancel on terminal errors, if any } } @@ -167,5 +171,22 @@ func Watch(ctx context.Context, graveyard string, eventHandler EventHandler) err if err != nil { return fmt.Errorf("failed to add watcher: %v", err) } + + files, err := ioutil.ReadDir(graveyard) + if err != nil { + return fmt.Errorf("failed to read graveyard dir: %v", err) + } + + for _, f := range files { + event := fsnotify.Event{ + Name: filepath.Join(graveyard, f.Name()), + Op: fsnotify.Create, + } + err = eventHandler(event) + if err != nil { + return fmt.Errorf("failed handling existing tombstone: %v", err) + } + } + return nil } From 410339a352dd4fe50e16ddceec2edbe562d38d23 Mon Sep 17 00:00:00 2001 From: Karl Isenberg Date: Tue, 18 Aug 2020 12:14:31 -0700 Subject: [PATCH 02/13] Add slow start test --- .github/workflows/e2e-test.yaml | 18 ++-- .../client-server-slow-start/apply-job.sh | 8 ++ .../client-server-slow-start/await-job.sh | 36 +++++++ .../client-server-slow-start/delete-job.sh | 12 +++ ci/e2e-test/client-server-slow-start/job.yaml | 95 +++++++++++++++++++ .../kustomization.yaml | 11 +++ .../role-binding.yaml | 13 +++ .../client-server-slow-start/role.yaml | 9 ++ .../service-account.yaml | 5 + 9 files changed, 201 insertions(+), 6 deletions(-) create mode 100755 ci/e2e-test/client-server-slow-start/apply-job.sh create mode 100755 ci/e2e-test/client-server-slow-start/await-job.sh create mode 100755 ci/e2e-test/client-server-slow-start/delete-job.sh create mode 100644 ci/e2e-test/client-server-slow-start/job.yaml create mode 100644 ci/e2e-test/client-server-slow-start/kustomization.yaml create mode 100644 ci/e2e-test/client-server-slow-start/role-binding.yaml create mode 100644 ci/e2e-test/client-server-slow-start/role.yaml create mode 100644 ci/e2e-test/client-server-slow-start/service-account.yaml diff --git a/.github/workflows/e2e-test.yaml b/.github/workflows/e2e-test.yaml index 0bf870d..d00b307 100644 --- a/.github/workflows/e2e-test.yaml +++ b/.github/workflows/e2e-test.yaml @@ -53,12 +53,18 @@ jobs: kubectl get pods -n kube-system echo "current-context:" "$(kubectl config current-context)" echo "environment-kubeconfig:" "${KUBECONFIG}" - - name: Apply job - run: ci/e2e-test/client-server/apply-job.sh - - name: Await job - run: ci/e2e-test/client-server/await-job.sh - - name: Delete job - run: ci/e2e-test/client-server/delete-job.sh + - name: Test client-server + run: | + set -o errexit -o nounset -o pipefail + ci/e2e-test/client-server/apply-job.sh + ci/e2e-test/client-server/await-job.sh + ci/e2e-test/client-server/delete-job.sh + - name: Test client-server-slow-start + run: | + set -o errexit -o nounset -o pipefail + ci/e2e-test/client-server-slow-start/apply-job.sh + ci/e2e-test/client-server-slow-start/await-job.sh + ci/e2e-test/client-server-slow-start/delete-job.sh - name: Delete kind cluster if: always() run: kind delete cluster --name e2e-test diff --git a/ci/e2e-test/client-server-slow-start/apply-job.sh b/ci/e2e-test/client-server-slow-start/apply-job.sh new file mode 100755 index 0000000..79f8f81 --- /dev/null +++ b/ci/e2e-test/client-server-slow-start/apply-job.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +set -o errexit -o nounset -o pipefail -o posix + +cd "$(dirname "${BASH_SOURCE[0]}")" + +kustomize edit set image karlkfi/kubexit=karlkfi/kubexit:latest +kustomize build . | kubectl apply -f - diff --git a/ci/e2e-test/client-server-slow-start/await-job.sh b/ci/e2e-test/client-server-slow-start/await-job.sh new file mode 100755 index 0000000..7cce52e --- /dev/null +++ b/ci/e2e-test/client-server-slow-start/await-job.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash + +set -o errexit -o nounset -o pipefail -o posix + +REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." && pwd -P)" +cd "${REPO_ROOT}" + +echo "Awaiting Job Init..." +kubectl wait pod --selector=job-name=client-server-job --for=condition=Initialized --timeout=2m + +echo +echo "Awaiting Job Completed or Error..." +timeout=120 +SECONDS=0 +while (( SECONDS < timeout )); do + job_status="$(kubectl get pods --selector=job-name=client-server-job -o jsonpath="{.items[*].status.containerStatuses[*].state.terminated.reason}")" + if [[ "${job_status}" == "Completed Completed" || "${job_status}" == *"Error" ]]; then + echo "Status: ${job_status}" + break + fi + echo "Sleeping 2s..." + sleep 2 +done + +echo +echo "Container Logs:" +kubectl logs --selector=job-name=client-server-job --all-containers --tail=-1 + +echo +echo "Pod Respurces:" +kubectl get pods --selector=job-name=client-server-job -o json | jq '.items[].status' + +echo "Status: ${job_status}" +if [[ "${job_status}" == *"Error" ]]; then + exit 1 +fi diff --git a/ci/e2e-test/client-server-slow-start/delete-job.sh b/ci/e2e-test/client-server-slow-start/delete-job.sh new file mode 100755 index 0000000..cf3f9ec --- /dev/null +++ b/ci/e2e-test/client-server-slow-start/delete-job.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash + +set -o errexit -o nounset -o pipefail -o posix + +REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." && pwd -P)" +cd "${REPO_ROOT}" + +kubectl delete job client-server-job + +echo +echo "Awaiting Pod Deletion..." +kubectl wait pods --for=delete --selector=job-name=client-server-job --timeout=2m \ No newline at end of file diff --git a/ci/e2e-test/client-server-slow-start/job.yaml b/ci/e2e-test/client-server-slow-start/job.yaml new file mode 100644 index 0000000..6818d6d --- /dev/null +++ b/ci/e2e-test/client-server-slow-start/job.yaml @@ -0,0 +1,95 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: client-server-job +spec: + backoffLimit: 1 + template: + metadata: + labels: + app: client-server-job + spec: + serviceAccountName: client-server-job + restartPolicy: Never + volumes: + - name: graveyard + emptyDir: + medium: Memory + - name: kubexit + emptyDir: {} + + initContainers: + - name: kubexit + image: karlkfi/kubexit:latest + command: ['cp', '/bin/kubexit', '/kubexit/kubexit'] + volumeMounts: + - mountPath: /kubexit + name: kubexit + + containers: + - name: client + image: alpine:3.11 + command: ['sh', '-c'] + args: + - | + set -o errexit -o nounset -o pipefail + apk --no-cache add ca-certificates tzdata curl bash + /kubexit/kubexit curl -v --fail http://localhost:80/ + env: + - name: KUBEXIT_NAME + value: client + - name: KUBEXIT_GRAVEYARD + value: /graveyard + - name: KUBEXIT_BIRTH_DEPS + value: server + - name: KUBEXIT_BIRTH_TIMEOUT + value: "15s" + - name: KUBEXIT_POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: KUBEXIT_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + volumeMounts: + - mountPath: /graveyard + name: graveyard + - mountPath: /kubexit + name: kubexit + - name: server + image: nginx:1.17-alpine + command: ['sh', '-c'] + args: + - | + set -o errexit -o nounset -o pipefail + sleep 30 + /kubexit/kubexit nginx -g 'daemon off;' + env: + - name: KUBEXIT_NAME + value: server + - name: KUBEXIT_GRAVEYARD + value: /graveyard + - name: KUBEXIT_DEATH_DEPS + value: client + volumeMounts: + - mountPath: /graveyard + name: graveyard + - mountPath: /kubexit + name: kubexit + livenessProbe: + tcpSocket: + port: 80 + initialDelaySeconds: 30 + periodSeconds: 5 + timeoutSeconds: 5 + successThreshold: 1 + failureThreshold: 6 + readinessProbe: + tcpSocket: + port: 80 + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 5 + successThreshold: 1 + failureThreshold: 6 diff --git a/ci/e2e-test/client-server-slow-start/kustomization.yaml b/ci/e2e-test/client-server-slow-start/kustomization.yaml new file mode 100644 index 0000000..e335f8b --- /dev/null +++ b/ci/e2e-test/client-server-slow-start/kustomization.yaml @@ -0,0 +1,11 @@ +resources: +- service-account.yaml +- role.yaml +- role-binding.yaml +- job.yaml +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +images: +- name: karlkfi/kubexit + newName: karlkfi/kubexit + newTag: latest diff --git a/ci/e2e-test/client-server-slow-start/role-binding.yaml b/ci/e2e-test/client-server-slow-start/role-binding.yaml new file mode 100644 index 0000000..57c6998 --- /dev/null +++ b/ci/e2e-test/client-server-slow-start/role-binding.yaml @@ -0,0 +1,13 @@ +kind: RoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: namespace-viewer + namespace: default +subjects: +- kind: ServiceAccount + name: client-server-job + apiGroup: "" +roleRef: + kind: Role + name: namespace-viewer + apiGroup: rbac.authorization.k8s.io diff --git a/ci/e2e-test/client-server-slow-start/role.yaml b/ci/e2e-test/client-server-slow-start/role.yaml new file mode 100644 index 0000000..ca16153 --- /dev/null +++ b/ci/e2e-test/client-server-slow-start/role.yaml @@ -0,0 +1,9 @@ +kind: Role +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: namespace-viewer + namespace: default +rules: +- apiGroups: [""] + resources: ["pods"] + verbs: ["get", "watch", "list"] diff --git a/ci/e2e-test/client-server-slow-start/service-account.yaml b/ci/e2e-test/client-server-slow-start/service-account.yaml new file mode 100644 index 0000000..5ced1d1 --- /dev/null +++ b/ci/e2e-test/client-server-slow-start/service-account.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: client-server-job + namespace: default From 06ed18f7e9af6075c2459e18107b2b0386f9f794 Mon Sep 17 00:00:00 2001 From: Karl Isenberg Date: Thu, 20 Aug 2020 16:33:37 -0700 Subject: [PATCH 03/13] Switch logging to Logrus - Switch to json log formating - Add log timestamps - Add container_name to all log messsages - Add support for Stackdriver severity field - Pass the logger (with "global" fields) through the context --- cmd/kubexit/main.go | 126 +++++++++++++++++------------------ go.mod | 1 + go.sum | 6 ++ pkg/kubernetes/watch.go | 20 ++++-- pkg/log/context.go | 67 +++++++++++++++++++ pkg/supervisor/supervisor.go | 29 +++++--- pkg/tombstone/tombstone.go | 34 +++++----- 7 files changed, 187 insertions(+), 96 deletions(-) create mode 100644 pkg/log/context.go diff --git a/cmd/kubexit/main.go b/cmd/kubexit/main.go index 8f2aeff..3ff7e07 100644 --- a/cmd/kubexit/main.go +++ b/cmd/kubexit/main.go @@ -3,7 +3,6 @@ package main import ( "context" "fmt" - "log" "os" "os/exec" "os/signal" @@ -14,6 +13,7 @@ import ( "github.com/fsnotify/fsnotify" "github.com/karlkfi/kubexit/pkg/kubernetes" + "github.com/karlkfi/kubexit/pkg/log" "github.com/karlkfi/kubexit/pkg/supervisor" "github.com/karlkfi/kubexit/pkg/tombstone" @@ -24,21 +24,23 @@ import ( func main() { var err error - // remove log timestamp - log.SetFlags(log.Flags() &^ (log.Ldate | log.Ltime)) + ctx := log.WithLogger(context.Background(), log.L) args := os.Args[1:] if len(args) == 0 { - log.Println("Error: no arguments found") + log.G(ctx).Error("no arguments found") os.Exit(2) } name := os.Getenv("KUBEXIT_NAME") if name == "" { - log.Println("Error: missing env var: KUBEXIT_NAME") + log.G(ctx).Error("missing env var: KUBEXIT_NAME") os.Exit(2) } - log.Printf("Name: %s\n", name) + + // add field to the context logger to differentiate when pod container logs are intermingled + ctx = log.WithLogger(ctx, log.G(ctx).WithField("container_name", name)) + log.G(ctx).Info("KUBEXIT_NAME parsed") graveyard := os.Getenv("KUBEXIT_GRAVEYARD") if graveyard == "" { @@ -47,85 +49,78 @@ func main() { graveyard = strings.TrimRight(graveyard, "/") graveyard = filepath.Clean(graveyard) } - log.Printf("Graveyard: %s\n", graveyard) - ts := &tombstone.Tombstone{ Graveyard: graveyard, Name: name, } - log.Printf("Tombstone: %s\n", ts.Path()) + log.G(ctx). + WithField("graveyard", graveyard). + WithField("tombstone", ts.Path()). + Info("KUBEXIT_GRAVEYARD parsed") birthDepsStr := os.Getenv("KUBEXIT_BIRTH_DEPS") var birthDeps []string - if birthDepsStr == "" { - log.Println("Birth Deps: N/A") - } else { + if birthDepsStr != "" { birthDeps = strings.Split(birthDepsStr, ",") - log.Printf("Birth Deps: %s\n", strings.Join(birthDeps, ",")) } + log.G(ctx).WithField("birth_deps", birthDeps).Info("KUBEXIT_BIRTH_DEPS parsed") deathDepsStr := os.Getenv("KUBEXIT_DEATH_DEPS") var deathDeps []string - if deathDepsStr == "" { - log.Println("Death Deps: N/A") - } else { + if deathDepsStr != "" { deathDeps = strings.Split(deathDepsStr, ",") - log.Printf("Death Deps: %s\n", strings.Join(deathDeps, ",")) } + log.G(ctx).WithField("death_deps", deathDeps).Info("KUBEXIT_DEATH_DEPS parsed") birthTimeout := 30 * time.Second birthTimeoutStr := os.Getenv("KUBEXIT_BIRTH_TIMEOUT") if birthTimeoutStr != "" { birthTimeout, err = time.ParseDuration(birthTimeoutStr) if err != nil { - log.Printf("Error: failed to parse birth timeout: %v\n", err) + log.G(ctx).Errorf("failed to parse birth timeout: %v\n", err) os.Exit(2) } } - log.Printf("Birth Timeout: %s\n", birthTimeout) + log.G(ctx).WithField("birth_timeout", birthTimeout).Info("KUBEXIT_BIRTH_TIMEOUT parsed") gracePeriod := 30 * time.Second gracePeriodStr := os.Getenv("KUBEXIT_GRACE_PERIOD") if gracePeriodStr != "" { gracePeriod, err = time.ParseDuration(gracePeriodStr) if err != nil { - log.Printf("Error: failed to parse grace period: %v\n", err) + log.G(ctx).Printf("Error: failed to parse grace period: %v\n", err) os.Exit(2) } } - log.Printf("Grace Period: %s\n", gracePeriod) + log.G(ctx).WithField("grace_period", gracePeriod).Info("KUBEXIT_GRACE_PERIOD parsed") podName := os.Getenv("KUBEXIT_POD_NAME") if podName == "" { if len(birthDeps) > 0 { - log.Println("Error: missing env var: KUBEXIT_POD_NAME") + log.G(ctx).Error("missing env var: KUBEXIT_POD_NAME") os.Exit(2) } - log.Println("Pod Name: N/A") - } else { - log.Printf("Pod Name: %s\n", podName) } + log.G(ctx).WithField("pod_name", podName).Info("KUBEXIT_POD_NAME parsed") namespace := os.Getenv("KUBEXIT_NAMESPACE") if namespace == "" { if len(birthDeps) > 0 { - log.Println("Error: missing env var: KUBEXIT_NAMESPACE") + log.G(ctx).Error("missing env var: KUBEXIT_NAMESPACE") os.Exit(2) } - log.Println("Namespace: N/A") - } else { - log.Printf("Namespace: %s\n", namespace) } + log.G(ctx).WithField("namespace", namespace).Info("KUBEXIT_POD_NAME parsed") - child := supervisor.New(args[0], args[1:]...) + child := supervisor.New(ctx, args[0], args[1:]...) // watch for death deps early, so they can interrupt waiting for birth deps if len(deathDeps) > 0 { - ctx, stopGraveyardWatcher := context.WithCancel(context.Background()) + ctx, stopGraveyardWatcher := context.WithCancel(ctx) // stop graveyard watchers on exit, if not sooner defer stopGraveyardWatcher() - log.Println("Watching graveyard...") + log.G(ctx).Info("Watching graveyard...") err = tombstone.Watch(ctx, graveyard, onDeathOfAny(deathDeps, func() error { stopGraveyardWatcher() // trigger graceful shutdown @@ -138,32 +133,32 @@ func main() { return nil })) if err != nil { - fatalf(child, ts, "Error: failed to watch graveyard: %v\n", err) + fatal(ctx, child, ts, fmt.Errorf("failed to watch graveyard: %v", err)) } } if len(birthDeps) > 0 { err = waitForBirthDeps(birthDeps, namespace, podName, birthTimeout) if err != nil { - fatalf(child, ts, "Error: %v\n", err) + fatal(ctx, child, ts, err) } } err = child.Start() if err != nil { - fatalf(child, ts, "Error: %v\n", err) + fatal(ctx, child, ts, err) } - err = ts.RecordBirth() + err = ts.RecordBirth(ctx) if err != nil { - fatalf(child, ts, "Error: %v\n", err) + fatal(ctx, child, ts, err) } - code := waitForChildExit(child) + code := waitForChildExit(ctx, child) - err = ts.RecordDeath(code) + err = ts.RecordDeath(ctx, code) if err != nil { - log.Printf("Error: %v\n", err) + log.G(ctx).Printf("Error: %v\n", err) os.Exit(1) } @@ -178,9 +173,9 @@ func waitForBirthDeps(birthDeps []string, namespace, podName string, timeout tim // Stop pod watcher on exit, if not sooner defer stopPodWatcher() - log.Println("Watching pod updates...") + log.G(ctx).Info("Watching pod updates...") err := kubernetes.WatchPod(ctx, namespace, podName, - onReadyOfAll(birthDeps, stopPodWatcher), + onReadyOfAll(ctx, birthDeps, stopPodWatcher), ) if err != nil { return fmt.Errorf("failed to watch pod: %v", err) @@ -196,7 +191,7 @@ func waitForBirthDeps(birthDeps []string, namespace, podName string, timeout tim return fmt.Errorf("waiting for birth deps to be ready: %v", err) } - log.Printf("All birth deps ready: %v\n", strings.Join(birthDeps, ", ")) + log.G(ctx).Printf("All birth deps ready: %v\n", strings.Join(birthDeps, ", ")) return nil } @@ -215,7 +210,7 @@ func withCancelOnSignal(ctx context.Context, signals ...os.Signal) context.Conte if !ok { return } - log.Printf("Received shutdown signal: %v", s) + log.G(ctx).Printf("Received shutdown signal: %v", s) cancel() case <-ctx.Done(): signal.Reset() @@ -228,7 +223,7 @@ func withCancelOnSignal(ctx context.Context, signals ...os.Signal) context.Conte } // wait for the child to exit and return the exit code -func waitForChildExit(child *supervisor.Supervisor) int { +func waitForChildExit(ctx context.Context, child *supervisor.Supervisor) int { var code int err := child.Wait() if err != nil { @@ -237,35 +232,37 @@ func waitForChildExit(child *supervisor.Supervisor) int { } else { code = -1 } - log.Printf("Child Exited(%d): %v\n", code, err) } else { code = 0 - log.Println("Child Exited(0)") } + log.G(ctx). + WithField("exit_code", code). + WithField("error", err). + Info("child exited") return code } -// fatalf is for terminal errors. +// fatal logs a terminal error and exits. // The child process may or may not be running. -func fatalf(child *supervisor.Supervisor, ts *tombstone.Tombstone, msg string, args ...interface{}) { - log.Printf(msg, args...) +func fatal(ctx context.Context, child *supervisor.Supervisor, ts *tombstone.Tombstone, err error) { + log.G(ctx).Error(err) // Skipped if not started. - err := child.ShutdownNow() + err = child.ShutdownNow() if err != nil { - log.Printf("Error: failed to shutdown child process: %v", err) + log.G(ctx).Errorf("failed to shutdown child process: %v", err) os.Exit(1) } // Wait for shutdown... //TODO: timout in case the process is zombie? - code := waitForChildExit(child) + code := waitForChildExit(ctx, child) // Attempt to record death, if possible. // Another process may be waiting for it. - err = ts.RecordDeath(code) + err = ts.RecordDeath(ctx, code) if err != nil { - log.Printf("Error: %v\n", err) + log.G(ctx).Errorf("failed to record death of child process: %v", err) os.Exit(1) } @@ -274,14 +271,14 @@ func fatalf(child *supervisor.Supervisor, ts *tombstone.Tombstone, msg string, a // onReadyOfAll returns an EventHandler that executes the callback when all of // the birthDeps containers are ready. -func onReadyOfAll(birthDeps []string, callback func()) kubernetes.EventHandler { +func onReadyOfAll(ctx context.Context, birthDeps []string, callback func()) kubernetes.EventHandler { birthDepSet := map[string]struct{}{} for _, depName := range birthDeps { birthDepSet[depName] = struct{}{} } return func(event watch.Event) { - fmt.Printf("Event Type: %v\n", event.Type) + log.G(ctx).WithField("event_type", event.Type).Info("recieved pod watch event") // ignore Deleted (Watch will auto-stop on delete) if event.Type == watch.Deleted { return @@ -289,7 +286,7 @@ func onReadyOfAll(birthDeps []string, callback func()) kubernetes.EventHandler { pod, ok := event.Object.(*corev1.Pod) if !ok { - log.Printf("Error: unexpected non-pod object type: %+v\n", event.Object) + log.G(ctx).WithField("object", event.Object).Warn("recieved unexpected non-pod object type") return } @@ -321,7 +318,7 @@ func onDeathOfAny(deathDeps []string, callback func() error) tombstone.EventHand deathDepSet[depName] = struct{}{} } - return func(event fsnotify.Event) error { + return func(ctx context.Context, event fsnotify.Event) error { if event.Op&fsnotify.Create != fsnotify.Create && event.Op&fsnotify.Write != fsnotify.Write { // ignore other events return nil @@ -329,16 +326,15 @@ func onDeathOfAny(deathDeps []string, callback func() error) tombstone.EventHand graveyard := filepath.Dir(event.Name) name := filepath.Base(event.Name) - log.Printf("Tombstone modified: %s\n", name) + log.G(ctx).WithField("tombstone", name).Info("recieved tombstone watch event") if _, ok := deathDepSet[name]; !ok { // ignore other tombstones return nil } - log.Printf("Reading tombstone: %s\n", name) ts, err := tombstone.Read(graveyard, name) if err != nil { - log.Printf("Error: failed to read tombstone: %v\n", err) + log.G(ctx).WithField("tombstone", name).Errorf("failed to read tombstone: %v", err) return nil } @@ -346,8 +342,10 @@ func onDeathOfAny(deathDeps []string, callback func() error) tombstone.EventHand // still alive return nil } - log.Printf("New death: %s\n", name) - log.Printf("Tombstone(%s): %s\n", name, ts) + log.G(ctx). + WithField("tombstone", name). + WithField("tombstone_content", ts). + Errorf("recieved new death event") return callback() } diff --git a/go.mod b/go.mod index cde1781..67ec0fa 100644 --- a/go.mod +++ b/go.mod @@ -4,6 +4,7 @@ go 1.14 require ( github.com/fsnotify/fsnotify v1.4.9 + github.com/sirupsen/logrus v1.6.0 k8s.io/api v0.18.2 k8s.io/apimachinery v0.18.2 k8s.io/client-go v0.18.2 diff --git a/go.sum b/go.sum index f3fdec6..266f05d 100644 --- a/go.sum +++ b/go.sum @@ -73,6 +73,7 @@ github.com/json-iterator/go v1.1.8/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/u github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= @@ -97,11 +98,14 @@ github.com/onsi/gomega v1.7.0/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1Cpa github.com/peterbourgon/diskv v2.0.1+incompatible/go.mod h1:uqqh8zWWbv1HBMNONnaR/tNboyR3/BZd58JJSHlUSCU= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/sirupsen/logrus v1.6.0 h1:UBcNElsrwanuuMsnGSlYmtmgbb23qDR5dG+6X6Oo89I= +github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88= github.com/spf13/afero v1.2.2/go.mod h1:9ZxEEn6pIJ8Rxe320qSDBk6AsU0r9pR7Q4OcevTdifk= github.com/spf13/pflag v0.0.0-20170130214245-9ff6c6923cff/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= @@ -139,6 +143,7 @@ golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5h golang.org/x/sys v0.0.0-20190209173611-3b5209105503/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9 h1:L2auWcuQIvxz9xSEqzESnV/QN/gNRXNApHi3fYwl2w0= golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191022100944-742c48ecaeb7 h1:HmbHVPwrPEKPGLAcHSrMe6+hqSUlvZU0rab6x5EXfGU= @@ -186,6 +191,7 @@ k8s.io/api v0.18.2 h1:wG5g5ZmSVgm5B+eHMIbI9EGATS2L8Z72rda19RIEgY8= k8s.io/api v0.18.2/go.mod h1:SJCWI7OLzhZSvbY7U8zwNl9UA4o1fizoug34OV/2r78= k8s.io/apimachinery v0.18.2 h1:44CmtbmkzVDAhCpRVSiP2R5PPrC2RtlIv/MoB8xpdRA= k8s.io/apimachinery v0.18.2/go.mod h1:9SnR/e11v5IbyPCGbvJViimtJ0SwHG4nfZFjU77ftcA= +k8s.io/apimachinery v0.18.8 h1:jimPrycCqgx2QPearX3to1JePz7wSbVLq+7PdBTTwQ0= k8s.io/client-go v0.18.2 h1:aLB0iaD4nmwh7arT2wIn+lMnAq7OswjaejkQ8p9bBYE= k8s.io/client-go v0.18.2/go.mod h1:Xcm5wVGXX9HAA2JJ2sSBUn3tCJ+4SVlCbl2MNNv+CIU= k8s.io/gengo v0.0.0-20190128074634-0689ccc1d7d6/go.mod h1:ezvh/TsK7cY6rbqRK0oQQ8IAqLxYwwyPxAX1Pzy0ii0= diff --git a/pkg/kubernetes/watch.go b/pkg/kubernetes/watch.go index 6b9b2cb..d97daec 100644 --- a/pkg/kubernetes/watch.go +++ b/pkg/kubernetes/watch.go @@ -3,8 +3,8 @@ package kubernetes import ( "context" "fmt" - "log" + "github.com/karlkfi/kubexit/pkg/log" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/fields" @@ -55,17 +55,24 @@ func WatchPod(ctx context.Context, namespace, podName string, eventHandler Event // cancel the provided context when done, so that caller can block on it defer cancel() + log.G(ctx).WithField("pod_name", podName).Info("pod watcher starting...") + // watch until deleted _, err := watchtools.UntilWithSync(ctx, lw, &corev1.Pod{}, nil, func(event watch.Event) (bool, error) { if event.Type == watch.Error { - log.Printf("Pod Watch(%s): recoverable error: %+v\n", podName, event.Object) + log.G(ctx). + WithField("pod_name", podName). + WithField("object", event.Object). + Warn("recieved recoverable error from pod watcher") return false, nil } eventHandler(event) if event.Type == watch.Deleted { - log.Printf("Pod Watch(%s): pod deleted\n", podName) + log.G(ctx). + WithField("pod_name", podName). + Info("pod deleted") return true, nil } return false, nil @@ -74,9 +81,12 @@ func WatchPod(ctx context.Context, namespace, podName string, eventHandler Event // Since cancellation is the only way we exit, just ignore it. if err != nil && err != wait.ErrWaitTimeout { // TODO: should we do something about this?? - log.Printf("Pod Watch(%s): terminal error: %v\n", podName, err) + log.G(ctx). + WithField("pod_name", podName). + WithField("error", err). + Warn("recieved terminal error from pod watcher") } - log.Printf("Pod Watch(%s): done\n", podName) + log.G(ctx).WithField("pod_name", podName).Info("pod watcher stopped") }() return nil diff --git a/pkg/log/context.go b/pkg/log/context.go new file mode 100644 index 0000000..88c8a25 --- /dev/null +++ b/pkg/log/context.go @@ -0,0 +1,67 @@ +package log + +import ( + "context" + "os" + + "github.com/sirupsen/logrus" +) + +var ( + // DefaultLogger is the standard logger with global config. + DefaultLogger *logrus.Logger + + // L is the DefaultLogger with global fields. + L *logrus.Entry +) + +func init() { + // TODO: make configurable + // use "severity" instead of "level" for Stackdriver + DefaultLogger.SetFormatter( + &logrus.JSONFormatter{ + FieldMap: logrus.FieldMap{ + logrus.FieldKeyLevel: "severity", + }, + }, + ) + + // use stdout instead of stderr for Kubernetes + DefaultLogger.SetOutput(os.Stdout) + + // TODO: make configurable + // ignore debug by default + DefaultLogger.SetLevel(logrus.InfoLevel) + + L = logrus.NewEntry(DefaultLogger) +} + +type ( + // loggerKey is globally unique key for storing a logger in a context value + loggerKey struct{} +) + +// RFC3339NanoFixed is time.RFC3339Nano with nanoseconds padded using zeros to +// ensure the formatted time is always the same number of characters. +const RFC3339NanoFixed = "2006-01-02T15:04:05.000000000Z07:00" + +// WithLogger returns a new context with the provided logger. Use in +// combination with logger.WithField(s) for great effect. +func WithLogger(ctx context.Context, logger *logrus.Entry) context.Context { + return context.WithValue(ctx, loggerKey{}, logger) +} + +// GetLogger retrieves the current logger from the context. If no logger is +// available, the default logger is returned. +func GetLogger(ctx context.Context) *logrus.Entry { + logger := ctx.Value(loggerKey{}) + + if logger == nil { + return L + } + + return logger.(*logrus.Entry) +} + +// G is a short alias to GetLogger +var G = GetLogger diff --git a/pkg/supervisor/supervisor.go b/pkg/supervisor/supervisor.go index ce5e97b..5d894f2 100644 --- a/pkg/supervisor/supervisor.go +++ b/pkg/supervisor/supervisor.go @@ -2,9 +2,9 @@ package supervisor import ( "bytes" + "context" "errors" "fmt" - "log" "os" "os/exec" "os/signal" @@ -12,6 +12,8 @@ import ( "sync" "syscall" "time" + + "github.com/karlkfi/kubexit/pkg/log" ) type Supervisor struct { @@ -19,9 +21,13 @@ type Supervisor struct { sigCh chan os.Signal startStopLock sync.Mutex shutdownTimer *time.Timer + + // TODO: Supervisor needs a rewrite to support Context cancellation and deadlines. + // For now, this context is only used to extract the logger. + ctx context.Context } -func New(name string, args ...string) *Supervisor { +func New(ctx context.Context, name string, args ...string) *Supervisor { // Don't use CommandContext. // We want the child process to exit on its own so we can return its exit code. // If the child doesn't exit on TERM, then neither should the supervisor. @@ -32,6 +38,7 @@ func New(name string, args ...string) *Supervisor { cmd.Env = os.Environ() return &Supervisor{ cmd: cmd, + ctx: ctx, } } @@ -39,7 +46,7 @@ func (s *Supervisor) Start() error { s.startStopLock.Lock() defer s.startStopLock.Unlock() - log.Printf("Starting: %s\n", s) + log.G(s.ctx).WithField("supervisor", s).Info("starting child process") if err := s.cmd.Start(); err != nil { return fmt.Errorf("failed to start child process: %v", err) } @@ -56,7 +63,7 @@ func (s *Supervisor) Start() error { } // log everything but "urgent I/O condition", which gets noisy if sig != syscall.SIGURG { - log.Printf("Received signal: %v\n", sig) + log.G(s.ctx).Printf("Received signal: %v\n", sig) } // ignore "child exited" signal if sig == syscall.SIGCHLD { @@ -64,7 +71,7 @@ func (s *Supervisor) Start() error { } err := s.cmd.Process.Signal(sig) if err != nil { - log.Printf("Signal propegation failed: %v\n", err) + log.G(s.ctx).Printf("Signal propegation failed: %v\n", err) } } }() @@ -82,7 +89,7 @@ func (s *Supervisor) Wait() error { s.shutdownTimer.Stop() } }() - log.Println("Waiting for child process to exit...") + log.G(s.ctx).Info("Waiting for child process to exit...") return s.cmd.Wait() } @@ -94,7 +101,7 @@ func (s *Supervisor) ShutdownNow() error { return errors.New("child process not running") } - log.Println("Killing child process...") + log.G(s.ctx).Println("Killing child process...") // TODO: Use Process.Kill() instead? // Sending Interrupt on Windows is not implemented. err := s.cmd.Process.Signal(syscall.SIGKILL) @@ -116,18 +123,20 @@ func (s *Supervisor) ShutdownWithTimeout(timeout time.Duration) error { return errors.New("shutdown already started") } - log.Println("Terminating child process...") + log.G(s.ctx).Info("Terminating child process...") err := s.cmd.Process.Signal(syscall.SIGTERM) if err != nil { return fmt.Errorf("failed to terminate child process: %v", err) } s.shutdownTimer = time.AfterFunc(timeout, func() { - log.Printf("Timeout elapsed: %s\n", timeout) + log.G(s.ctx).WithField("timeout", timeout).Info("shutdown timeout elapsed") err := s.ShutdownNow() if err != nil { // TODO: ignorable? - log.Printf("Failed after timeout: %v\n", err) + log.G(s.ctx). + WithField("timeout", timeout). + Error(fmt.Errorf("shutdown errored after timeout: %v", err)) } }) diff --git a/pkg/tombstone/tombstone.go b/pkg/tombstone/tombstone.go index 3bcd2dd..5e56a0f 100644 --- a/pkg/tombstone/tombstone.go +++ b/pkg/tombstone/tombstone.go @@ -5,13 +5,13 @@ import ( "encoding/json" "fmt" "io/ioutil" - "log" "os" "path/filepath" "sync" "time" "github.com/fsnotify/fsnotify" + "github.com/karlkfi/kubexit/pkg/log" "sigs.k8s.io/yaml" ) @@ -57,11 +57,11 @@ func (t *Tombstone) Write() error { return nil } -func (t *Tombstone) RecordBirth() error { +func (t *Tombstone) RecordBirth(ctx context.Context) error { born := time.Now() t.Born = &born - log.Printf("Creating tombstone: %s\n", t.Path()) + log.G(ctx).Printf("Creating tombstone: %s\n", t.Path()) err := t.Write() if err != nil { return fmt.Errorf("failed to create tombstone: %v", err) @@ -69,13 +69,13 @@ func (t *Tombstone) RecordBirth() error { return nil } -func (t *Tombstone) RecordDeath(exitCode int) error { +func (t *Tombstone) RecordDeath(ctx context.Context, exitCode int) error { code := exitCode died := time.Now() t.Died = &died t.ExitCode = &code - log.Printf("Updating tombstone: %s\n", t.Path()) + log.G(ctx).Printf("Updating tombstone: %s\n", t.Path()) err := t.Write() if err != nil { return fmt.Errorf("failed to update tombstone: %v", err) @@ -112,24 +112,24 @@ func Read(graveyard, name string) (*Tombstone, error) { return &t, nil } -type EventHandler func(fsnotify.Event) error +type EventHandler func(context.Context, fsnotify.Event) error // LoggingEventHandler is an example EventHandler that logs fsnotify events -func LoggingEventHandler(event fsnotify.Event) error { +func LoggingEventHandler(ctx context.Context, event fsnotify.Event) error { if event.Op&fsnotify.Create == fsnotify.Create { - log.Printf("Tombstone Watch: file created: %s\n", event.Name) + log.G(ctx).Printf("Tombstone Watch: file created: %s\n", event.Name) } if event.Op&fsnotify.Remove == fsnotify.Remove { - log.Printf("Tombstone Watch: file removed: %s\n", event.Name) + log.G(ctx).Printf("Tombstone Watch: file removed: %s\n", event.Name) } if event.Op&fsnotify.Write == fsnotify.Write { - log.Printf("Tombstone Watch: file modified: %s\n", event.Name) + log.G(ctx).Printf("Tombstone Watch: file modified: %s\n", event.Name) } if event.Op&fsnotify.Rename == fsnotify.Rename { - log.Printf("Tombstone Watch: file renamed: %s\n", event.Name) + log.G(ctx).Printf("Tombstone Watch: file renamed: %s\n", event.Name) } if event.Op&fsnotify.Chmod == fsnotify.Chmod { - log.Printf("Tombstone Watch: file chmoded: %s\n", event.Name) + log.G(ctx).Printf("Tombstone Watch: file chmoded: %s\n", event.Name) } return nil } @@ -147,21 +147,21 @@ func Watch(ctx context.Context, graveyard string, eventHandler EventHandler) err for { select { case <-ctx.Done(): - log.Printf("Tombstone Watch(%s): done\n", graveyard) + log.G(ctx).Printf("Tombstone Watch(%s): done\n", graveyard) return case event, ok := <-watcher.Events: if !ok { return } - err := eventHandler(event) + err := eventHandler(ctx, event) if err != nil { - log.Printf("Tombstone Watch(%s): error handling file system event: %v\n", graveyard, err) + log.G(ctx).Printf("Tombstone Watch(%s): error handling file system event: %v\n", graveyard, err) } case err, ok := <-watcher.Errors: if !ok { return } - log.Printf("Tombstone Watch(%s): error from fsnotify: %v\n", graveyard, err) + log.G(ctx).Printf("Tombstone Watch(%s): error from fsnotify: %v\n", graveyard, err) // TODO: wrap ctx with WithCancel and cancel on terminal errors, if any } } @@ -182,7 +182,7 @@ func Watch(ctx context.Context, graveyard string, eventHandler EventHandler) err Name: filepath.Join(graveyard, f.Name()), Op: fsnotify.Create, } - err = eventHandler(event) + err = eventHandler(ctx, event) if err != nil { return fmt.Errorf("failed handling existing tombstone: %v", err) } From e02f8259e2fffdb058bf8ba2f201809fd716d8d7 Mon Sep 17 00:00:00 2001 From: Karl Isenberg Date: Thu, 20 Aug 2020 16:52:11 -0700 Subject: [PATCH 04/13] Fix logs --- cmd/kubexit/main.go | 12 ++++++------ pkg/supervisor/supervisor.go | 4 ++-- pkg/tombstone/tombstone.go | 37 +++++++++++++++++------------------- 3 files changed, 25 insertions(+), 28 deletions(-) diff --git a/cmd/kubexit/main.go b/cmd/kubexit/main.go index 3ff7e07..1ed89d6 100644 --- a/cmd/kubexit/main.go +++ b/cmd/kubexit/main.go @@ -77,7 +77,7 @@ func main() { if birthTimeoutStr != "" { birthTimeout, err = time.ParseDuration(birthTimeoutStr) if err != nil { - log.G(ctx).Errorf("failed to parse birth timeout: %v\n", err) + log.G(ctx).Errorf("failed to parse birth timeout: %v", err) os.Exit(2) } } @@ -88,7 +88,7 @@ func main() { if gracePeriodStr != "" { gracePeriod, err = time.ParseDuration(gracePeriodStr) if err != nil { - log.G(ctx).Printf("Error: failed to parse grace period: %v\n", err) + log.G(ctx).Errorf("failed to parse grace period: %v", err) os.Exit(2) } } @@ -158,7 +158,7 @@ func main() { err = ts.RecordDeath(ctx, code) if err != nil { - log.G(ctx).Printf("Error: %v\n", err) + log.G(ctx).Error(err) os.Exit(1) } @@ -173,7 +173,7 @@ func waitForBirthDeps(birthDeps []string, namespace, podName string, timeout tim // Stop pod watcher on exit, if not sooner defer stopPodWatcher() - log.G(ctx).Info("Watching pod updates...") + log.G(ctx).Info("watching pod updates...") err := kubernetes.WatchPod(ctx, namespace, podName, onReadyOfAll(ctx, birthDeps, stopPodWatcher), ) @@ -191,7 +191,7 @@ func waitForBirthDeps(birthDeps []string, namespace, podName string, timeout tim return fmt.Errorf("waiting for birth deps to be ready: %v", err) } - log.G(ctx).Printf("All birth deps ready: %v\n", strings.Join(birthDeps, ", ")) + log.G(ctx).WithField("birth_deps", birthDeps).Info("all birth deps ready") return nil } @@ -210,7 +210,7 @@ func withCancelOnSignal(ctx context.Context, signals ...os.Signal) context.Conte if !ok { return } - log.G(ctx).Printf("Received shutdown signal: %v", s) + log.G(ctx).WithField("signal", s).Info("received shutdown signal") cancel() case <-ctx.Done(): signal.Reset() diff --git a/pkg/supervisor/supervisor.go b/pkg/supervisor/supervisor.go index 5d894f2..0180099 100644 --- a/pkg/supervisor/supervisor.go +++ b/pkg/supervisor/supervisor.go @@ -63,7 +63,7 @@ func (s *Supervisor) Start() error { } // log everything but "urgent I/O condition", which gets noisy if sig != syscall.SIGURG { - log.G(s.ctx).Printf("Received signal: %v\n", sig) + log.G(s.ctx).WithField("signal", sig).Info("received signal") } // ignore "child exited" signal if sig == syscall.SIGCHLD { @@ -71,7 +71,7 @@ func (s *Supervisor) Start() error { } err := s.cmd.Process.Signal(sig) if err != nil { - log.G(s.ctx).Printf("Signal propegation failed: %v\n", err) + log.G(s.ctx).WithField("signal", sig).Errorf("signal propegation failed: %v", err) } } }() diff --git a/pkg/tombstone/tombstone.go b/pkg/tombstone/tombstone.go index 5e56a0f..f894af7 100644 --- a/pkg/tombstone/tombstone.go +++ b/pkg/tombstone/tombstone.go @@ -61,7 +61,9 @@ func (t *Tombstone) RecordBirth(ctx context.Context) error { born := time.Now() t.Born = &born - log.G(ctx).Printf("Creating tombstone: %s\n", t.Path()) + log.G(ctx). + WithField("tombstone", t.Path()). + Info("creating tombstone...") err := t.Write() if err != nil { return fmt.Errorf("failed to create tombstone: %v", err) @@ -75,7 +77,9 @@ func (t *Tombstone) RecordDeath(ctx context.Context, exitCode int) error { t.Died = &died t.ExitCode = &code - log.G(ctx).Printf("Updating tombstone: %s\n", t.Path()) + log.G(ctx). + WithField("tombstone", t.Path()). + Info("updating tombstone...") err := t.Write() if err != nil { return fmt.Errorf("failed to update tombstone: %v", err) @@ -116,21 +120,7 @@ type EventHandler func(context.Context, fsnotify.Event) error // LoggingEventHandler is an example EventHandler that logs fsnotify events func LoggingEventHandler(ctx context.Context, event fsnotify.Event) error { - if event.Op&fsnotify.Create == fsnotify.Create { - log.G(ctx).Printf("Tombstone Watch: file created: %s\n", event.Name) - } - if event.Op&fsnotify.Remove == fsnotify.Remove { - log.G(ctx).Printf("Tombstone Watch: file removed: %s\n", event.Name) - } - if event.Op&fsnotify.Write == fsnotify.Write { - log.G(ctx).Printf("Tombstone Watch: file modified: %s\n", event.Name) - } - if event.Op&fsnotify.Rename == fsnotify.Rename { - log.G(ctx).Printf("Tombstone Watch: file renamed: %s\n", event.Name) - } - if event.Op&fsnotify.Chmod == fsnotify.Chmod { - log.G(ctx).Printf("Tombstone Watch: file chmoded: %s\n", event.Name) - } + log.G(ctx).WithField("event_name", event.Name).Info("recieved tombstone watch event") return nil } @@ -147,7 +137,9 @@ func Watch(ctx context.Context, graveyard string, eventHandler EventHandler) err for { select { case <-ctx.Done(): - log.G(ctx).Printf("Tombstone Watch(%s): done\n", graveyard) + log.G(ctx). + WithField("graveyard", graveyard). + Info("tombstone watcher stopped") return case event, ok := <-watcher.Events: if !ok { @@ -155,13 +147,18 @@ func Watch(ctx context.Context, graveyard string, eventHandler EventHandler) err } err := eventHandler(ctx, event) if err != nil { - log.G(ctx).Printf("Tombstone Watch(%s): error handling file system event: %v\n", graveyard, err) + log.G(ctx). + WithField("event_name", event.Name). + WithField("graveyard", graveyard). + Warn(fmt.Errorf("error handling file system event: %v", err)) } case err, ok := <-watcher.Errors: if !ok { return } - log.G(ctx).Printf("Tombstone Watch(%s): error from fsnotify: %v\n", graveyard, err) + log.G(ctx). + WithField("graveyard", graveyard). + Warn(fmt.Errorf("error handling file system event: %v", err)) // TODO: wrap ctx with WithCancel and cancel on terminal errors, if any } } From 65940bf9b5f01e8584bae7a11182d6735c9162d1 Mon Sep 17 00:00:00 2001 From: Karl Isenberg Date: Thu, 20 Aug 2020 16:57:24 -0700 Subject: [PATCH 05/13] Fix log from rebase --- pkg/tombstone/tombstone.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/tombstone/tombstone.go b/pkg/tombstone/tombstone.go index f894af7..ef89c78 100644 --- a/pkg/tombstone/tombstone.go +++ b/pkg/tombstone/tombstone.go @@ -90,7 +90,7 @@ func (t *Tombstone) RecordDeath(ctx context.Context, exitCode int) error { func (t *Tombstone) String() string { inline, err := json.Marshal(t) if err != nil { - log.Printf("Error: failed to marshal tombstone as json: %v\n", err) + log.L.Errorf("failed to marshal tombstone as json: %v", err) return "{}" } return string(inline) From cc32d9b8ebdd904687fe66cc9c3f6f664d63d3c0 Mon Sep 17 00:00:00 2001 From: Karl Isenberg Date: Thu, 20 Aug 2020 17:12:48 -0700 Subject: [PATCH 06/13] Add goimports to lint test --- .github/workflows/e2e-test.yaml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.github/workflows/e2e-test.yaml b/.github/workflows/e2e-test.yaml index d00b307..1235106 100644 --- a/.github/workflows/e2e-test.yaml +++ b/.github/workflows/e2e-test.yaml @@ -10,6 +10,15 @@ jobs: lint: runs-on: ubuntu-latest steps: + - name: Install Go + # https://github.com/mvdan/github-actions-golang + uses: actions/setup-go@v2 + with: + go-version: "1.15.x" + - name: Install goimports + run: | + GO111MODULE=off go get golang.org/x/tools/cmd/goimports + echo "::add-path::$(go env GOPATH)/bin" - name: Checkout uses: actions/checkout@v2 - name: Lint Go Code From dee86b8e2965153e9204c7c8b3988eed080789da Mon Sep 17 00:00:00 2001 From: Karl Isenberg Date: Thu, 20 Aug 2020 17:35:50 -0700 Subject: [PATCH 07/13] Use explicit git short sha image tag --- .github/workflows/e2e-test.yaml | 1 + ci/e2e-test/client-server-slow-start/apply-job.sh | 8 +++++++- ci/e2e-test/client-server/apply-job.sh | 8 +++++++- 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/.github/workflows/e2e-test.yaml b/.github/workflows/e2e-test.yaml index 1235106..c951a16 100644 --- a/.github/workflows/e2e-test.yaml +++ b/.github/workflows/e2e-test.yaml @@ -34,6 +34,7 @@ jobs: uses: docker/build-push-action@v1 with: repository: karlkfi/kubexit + tag_with_sha: true # Docker Automated Builds handle publishing push: false - name: Setup kube tools diff --git a/ci/e2e-test/client-server-slow-start/apply-job.sh b/ci/e2e-test/client-server-slow-start/apply-job.sh index 79f8f81..0fd6cb3 100755 --- a/ci/e2e-test/client-server-slow-start/apply-job.sh +++ b/ci/e2e-test/client-server-slow-start/apply-job.sh @@ -4,5 +4,11 @@ set -o errexit -o nounset -o pipefail -o posix cd "$(dirname "${BASH_SOURCE[0]}")" -kustomize edit set image karlkfi/kubexit=karlkfi/kubexit:latest +IMAGE_TAG=latest +if [[ -n "${GITHUB_SHA:-}" ]]; then + IMAGE_TAG="sha-${GITHUB_SHA:0:7}" +fi + +echo "Image: karlkfi/kubexit:${IMAGE_TAG}" +kustomize edit set image "karlkfi/kubexit=karlkfi/kubexit:${IMAGE_TAG}" kustomize build . | kubectl apply -f - diff --git a/ci/e2e-test/client-server/apply-job.sh b/ci/e2e-test/client-server/apply-job.sh index 79f8f81..0fd6cb3 100755 --- a/ci/e2e-test/client-server/apply-job.sh +++ b/ci/e2e-test/client-server/apply-job.sh @@ -4,5 +4,11 @@ set -o errexit -o nounset -o pipefail -o posix cd "$(dirname "${BASH_SOURCE[0]}")" -kustomize edit set image karlkfi/kubexit=karlkfi/kubexit:latest +IMAGE_TAG=latest +if [[ -n "${GITHUB_SHA:-}" ]]; then + IMAGE_TAG="sha-${GITHUB_SHA:0:7}" +fi + +echo "Image: karlkfi/kubexit:${IMAGE_TAG}" +kustomize edit set image "karlkfi/kubexit=karlkfi/kubexit:${IMAGE_TAG}" kustomize build . | kubectl apply -f - From b7e3f1b6354229a275f11aeb7531bfd48e5ba119 Mon Sep 17 00:00:00 2001 From: Karl Isenberg Date: Thu, 20 Aug 2020 17:48:06 -0700 Subject: [PATCH 08/13] Load docker image into KinD --- .github/workflows/e2e-test.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/e2e-test.yaml b/.github/workflows/e2e-test.yaml index c951a16..d5f4ad5 100644 --- a/.github/workflows/e2e-test.yaml +++ b/.github/workflows/e2e-test.yaml @@ -63,6 +63,8 @@ jobs: kubectl get pods -n kube-system echo "current-context:" "$(kubectl config current-context)" echo "environment-kubeconfig:" "${KUBECONFIG}" + - name: Load Image + run: kind load docker-image "karlkfi/kubexit:${GITHUB_SHA:0:7}" --name e2e-test - name: Test client-server run: | set -o errexit -o nounset -o pipefail From 1e76a641695c12fa8cf6214be4821d05858b7f65 Mon Sep 17 00:00:00 2001 From: Karl Isenberg Date: Thu, 20 Aug 2020 17:53:05 -0700 Subject: [PATCH 09/13] short sha in docker tag is prefixed with "sha-" --- .github/workflows/e2e-test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/e2e-test.yaml b/.github/workflows/e2e-test.yaml index d5f4ad5..fe82b0c 100644 --- a/.github/workflows/e2e-test.yaml +++ b/.github/workflows/e2e-test.yaml @@ -64,7 +64,7 @@ jobs: echo "current-context:" "$(kubectl config current-context)" echo "environment-kubeconfig:" "${KUBECONFIG}" - name: Load Image - run: kind load docker-image "karlkfi/kubexit:${GITHUB_SHA:0:7}" --name e2e-test + run: kind load docker-image "karlkfi/kubexit:sha-${GITHUB_SHA:0:7}" --name e2e-test - name: Test client-server run: | set -o errexit -o nounset -o pipefail From 0524188aeb93caacd55fa5d4ca91397b097f6c79 Mon Sep 17 00:00:00 2001 From: Karl Isenberg Date: Thu, 20 Aug 2020 20:01:41 -0700 Subject: [PATCH 10/13] Initialize DefaultLogger --- pkg/log/context.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pkg/log/context.go b/pkg/log/context.go index 88c8a25..db0d4a6 100644 --- a/pkg/log/context.go +++ b/pkg/log/context.go @@ -16,6 +16,8 @@ var ( ) func init() { + DefaultLogger = logrus.StandardLogger() + // TODO: make configurable // use "severity" instead of "level" for Stackdriver DefaultLogger.SetFormatter( From 810a2c1530468ebf8d7dc1caac60b849c6c90530 Mon Sep 17 00:00:00 2001 From: Karl Isenberg Date: Thu, 20 Aug 2020 20:13:53 -0700 Subject: [PATCH 11/13] Pass context through waitForBirthDeps --- cmd/kubexit/main.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cmd/kubexit/main.go b/cmd/kubexit/main.go index 1ed89d6..a5a52f1 100644 --- a/cmd/kubexit/main.go +++ b/cmd/kubexit/main.go @@ -138,7 +138,7 @@ func main() { } if len(birthDeps) > 0 { - err = waitForBirthDeps(birthDeps, namespace, podName, birthTimeout) + err = waitForBirthDeps(ctx, birthDeps, namespace, podName, birthTimeout) if err != nil { fatal(ctx, child, ts, err) } @@ -165,9 +165,9 @@ func main() { os.Exit(code) } -func waitForBirthDeps(birthDeps []string, namespace, podName string, timeout time.Duration) error { +func waitForBirthDeps(ctx context.Context, birthDeps []string, namespace, podName string, timeout time.Duration) error { // Cancel context on SIGTERM to trigger graceful exit - ctx := withCancelOnSignal(context.Background(), syscall.SIGTERM) + ctx = withCancelOnSignal(ctx, syscall.SIGTERM) ctx, stopPodWatcher := context.WithTimeout(ctx, timeout) // Stop pod watcher on exit, if not sooner From c5e3f157e27f6644ee21576f340456ab49b1bfb2 Mon Sep 17 00:00:00 2001 From: Karl Isenberg Date: Thu, 3 Sep 2020 11:58:23 -0700 Subject: [PATCH 12/13] Extract test-client docker image --- .github/workflows/e2e-test.yaml | 8 +++++++- ci/e2e-test/assets/test-client/Dockerfile | 3 +++ ci/e2e-test/client-server-slow-start/apply-job.sh | 1 + ci/e2e-test/client-server-slow-start/job.yaml | 3 +-- ci/e2e-test/client-server-slow-start/kustomization.yaml | 3 +++ ci/e2e-test/client-server/apply-job.sh | 1 + ci/e2e-test/client-server/job.yaml | 3 +-- ci/e2e-test/client-server/kustomization.yaml | 3 +++ 8 files changed, 20 insertions(+), 5 deletions(-) create mode 100644 ci/e2e-test/assets/test-client/Dockerfile diff --git a/.github/workflows/e2e-test.yaml b/.github/workflows/e2e-test.yaml index fe82b0c..8b42832 100644 --- a/.github/workflows/e2e-test.yaml +++ b/.github/workflows/e2e-test.yaml @@ -63,8 +63,14 @@ jobs: kubectl get pods -n kube-system echo "current-context:" "$(kubectl config current-context)" echo "environment-kubeconfig:" "${KUBECONFIG}" - - name: Load Image + - name: Load kubexit Image run: kind load docker-image "karlkfi/kubexit:sha-${GITHUB_SHA:0:7}" --name e2e-test + - name: Build & Load test-client Image + run: | + set -o errexit -o nounset -o pipefail + IMAGE_TAG="karlkfi/test-client:sha-${GITHUB_SHA:0:7}" + docker build --tag "${IMAGE_TAG}" ci/e2e-test/assets/test-client/ + kind load docker-image "${IMAGE_TAG}" --name e2e-test - name: Test client-server run: | set -o errexit -o nounset -o pipefail diff --git a/ci/e2e-test/assets/test-client/Dockerfile b/ci/e2e-test/assets/test-client/Dockerfile new file mode 100644 index 0000000..7153c71 --- /dev/null +++ b/ci/e2e-test/assets/test-client/Dockerfile @@ -0,0 +1,3 @@ +FROM alpine:3.11 +RUN apk --no-cache add ca-certificates tzdata curl bash +ENTRYPOINT ["curl"] \ No newline at end of file diff --git a/ci/e2e-test/client-server-slow-start/apply-job.sh b/ci/e2e-test/client-server-slow-start/apply-job.sh index 0fd6cb3..2e9a30d 100755 --- a/ci/e2e-test/client-server-slow-start/apply-job.sh +++ b/ci/e2e-test/client-server-slow-start/apply-job.sh @@ -11,4 +11,5 @@ fi echo "Image: karlkfi/kubexit:${IMAGE_TAG}" kustomize edit set image "karlkfi/kubexit=karlkfi/kubexit:${IMAGE_TAG}" +kustomize edit set image "karlkfi/test-client=karlkfi/test-client:${IMAGE_TAG}" kustomize build . | kubectl apply -f - diff --git a/ci/e2e-test/client-server-slow-start/job.yaml b/ci/e2e-test/client-server-slow-start/job.yaml index 6818d6d..fdfc3cd 100644 --- a/ci/e2e-test/client-server-slow-start/job.yaml +++ b/ci/e2e-test/client-server-slow-start/job.yaml @@ -28,12 +28,11 @@ spec: containers: - name: client - image: alpine:3.11 + image: karlkfi/test-client:latest command: ['sh', '-c'] args: - | set -o errexit -o nounset -o pipefail - apk --no-cache add ca-certificates tzdata curl bash /kubexit/kubexit curl -v --fail http://localhost:80/ env: - name: KUBEXIT_NAME diff --git a/ci/e2e-test/client-server-slow-start/kustomization.yaml b/ci/e2e-test/client-server-slow-start/kustomization.yaml index e335f8b..e257098 100644 --- a/ci/e2e-test/client-server-slow-start/kustomization.yaml +++ b/ci/e2e-test/client-server-slow-start/kustomization.yaml @@ -9,3 +9,6 @@ images: - name: karlkfi/kubexit newName: karlkfi/kubexit newTag: latest +- name: karlkfi/test-client + newName: karlkfi/test-client + newTag: latest diff --git a/ci/e2e-test/client-server/apply-job.sh b/ci/e2e-test/client-server/apply-job.sh index 0fd6cb3..2e9a30d 100755 --- a/ci/e2e-test/client-server/apply-job.sh +++ b/ci/e2e-test/client-server/apply-job.sh @@ -11,4 +11,5 @@ fi echo "Image: karlkfi/kubexit:${IMAGE_TAG}" kustomize edit set image "karlkfi/kubexit=karlkfi/kubexit:${IMAGE_TAG}" +kustomize edit set image "karlkfi/test-client=karlkfi/test-client:${IMAGE_TAG}" kustomize build . | kubectl apply -f - diff --git a/ci/e2e-test/client-server/job.yaml b/ci/e2e-test/client-server/job.yaml index 2e83197..c84a3d7 100644 --- a/ci/e2e-test/client-server/job.yaml +++ b/ci/e2e-test/client-server/job.yaml @@ -28,12 +28,11 @@ spec: containers: - name: client - image: alpine:3.11 + image: karlkfi/test-client:latest command: ['sh', '-c'] args: - | set -o errexit -o nounset -o pipefail - apk --no-cache add ca-certificates tzdata curl bash /kubexit/kubexit curl -v --fail http://localhost:80/ env: - name: KUBEXIT_NAME diff --git a/ci/e2e-test/client-server/kustomization.yaml b/ci/e2e-test/client-server/kustomization.yaml index e335f8b..e257098 100644 --- a/ci/e2e-test/client-server/kustomization.yaml +++ b/ci/e2e-test/client-server/kustomization.yaml @@ -9,3 +9,6 @@ images: - name: karlkfi/kubexit newName: karlkfi/kubexit newTag: latest +- name: karlkfi/test-client + newName: karlkfi/test-client + newTag: latest From a6d4001822b628d1d6cc6b755a99d752a075f4aa Mon Sep 17 00:00:00 2001 From: Karl Isenberg Date: Thu, 3 Sep 2020 12:10:45 -0700 Subject: [PATCH 13/13] Expect job to exit when kubexit errors on birth timeout --- ci/e2e-test/client-server-slow-start/await-job.sh | 3 ++- ci/e2e-test/client-server/await-job.sh | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/ci/e2e-test/client-server-slow-start/await-job.sh b/ci/e2e-test/client-server-slow-start/await-job.sh index 7cce52e..274c53c 100755 --- a/ci/e2e-test/client-server-slow-start/await-job.sh +++ b/ci/e2e-test/client-server-slow-start/await-job.sh @@ -31,6 +31,7 @@ echo "Pod Respurces:" kubectl get pods --selector=job-name=client-server-job -o json | jq '.items[].status' echo "Status: ${job_status}" -if [[ "${job_status}" == *"Error" ]]; then +if [[ "${job_status}" != *"Error" ]]; then + echo "Expected: Error" exit 1 fi diff --git a/ci/e2e-test/client-server/await-job.sh b/ci/e2e-test/client-server/await-job.sh index 7cce52e..15d457f 100755 --- a/ci/e2e-test/client-server/await-job.sh +++ b/ci/e2e-test/client-server/await-job.sh @@ -32,5 +32,6 @@ kubectl get pods --selector=job-name=client-server-job -o json | jq '.items[].st echo "Status: ${job_status}" if [[ "${job_status}" == *"Error" ]]; then + echo "Expected: Completed Completed" exit 1 fi