Skip to content
This repository has been archived by the owner on May 6, 2021. It is now read-only.

[WIP] Fixes #258 Reset the pods if Jenkins fails to wake up #261

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 103 additions & 6 deletions internal/api/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,15 @@ import (
"github.com/fabric8-services/fabric8-jenkins-idler/metric"
"github.com/julienschmidt/httprouter"
log "github.com/sirupsen/logrus"
"k8s.io/api/core/v1"
)

const (
// OpenShiftAPIParam is the parameter name under which the OpenShift cluster API URL is passed using
// Idle, UnIdle and IsIdle.
OpenShiftAPIParam = "openshift_api_url"
PodResetRetryLimit = 5
PodRetryInterval = 60
)

var (
Expand Down Expand Up @@ -57,7 +60,7 @@ type IdlerAPI interface {
// ClusterDNSView writes a JSON representation of the current cluster state to the response writer.
ClusterDNSView(w http.ResponseWriter, r *http.Request, ps httprouter.Params)

// Reset deletes a pod and starts a new one
// ResetNSPods deletes a pod and starts a new one
Reset(w http.ResponseWriter, r *http.Request, ps httprouter.Params)
}

Expand Down Expand Up @@ -151,18 +154,17 @@ func (api *idler) UnIdle(w http.ResponseWriter, r *http.Request, ps httprouter.P
return
}

// unidle now
for _, service := range pidler.JenkinsServices {
startTime := time.Now()

err = api.openShiftClient.UnIdle(openshiftURL, openshiftToken, ns, service)
elapsedTime := time.Since(startTime).Seconds()
if err != nil {
Recorder.RecordReqDuration(service, "UnIdle", http.StatusInternalServerError, elapsedTime)
respondWithError(w, http.StatusInternalServerError, err)
return
}

// tries best to undle the pod
go resetServicePods(api.openShiftClient, openshiftURL, openshiftToken, ns, service, 0);
Recorder.RecordReqDuration(service, "UnIdle", http.StatusOK, elapsedTime)
}

Expand Down Expand Up @@ -221,7 +223,7 @@ func (api *idler) Info(w http.ResponseWriter, r *http.Request, ps httprouter.Par

func (api *idler) Reset(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {

logger := log.WithFields(log.Fields{"component": "api", "function": "Reset"})
logger := log.WithFields(log.Fields{"component": "api", "function": "ResetNSPods"})

openShiftAPI, openShiftBearerToken, err := api.getURLAndToken(r)
if err != nil {
Expand All @@ -231,7 +233,7 @@ func (api *idler) Reset(w http.ResponseWriter, r *http.Request, ps httprouter.Pa
return
}

err = api.openShiftClient.Reset(openShiftAPI, openShiftBearerToken, ps.ByName("namespace"))
err = api.openShiftClient.ResetNSPods(openShiftAPI, openShiftBearerToken, ps.ByName("namespace"))
if err != nil {
logger.Error(err)
w.WriteHeader(http.StatusInternalServerError)
Expand Down Expand Up @@ -340,3 +342,98 @@ func writeResponse(w http.ResponseWriter, status int, response any) {
w.WriteHeader(status)
json.NewEncoder(w).Encode(response)
}

/*could be better to move it to some other file*/
func resetServicePods(client client.OpenShiftClient, url string, token string, ns string, service string, retry int) bool {

initiating := false
resetOccurred := false

if retry >= PodResetRetryLimit {
log.Errorf("failed to reset the %s service in namespace %s", service, ns)
return false
}

time.Sleep(PodRetryInterval * time.Second)

states, err := client.PodState(url, token, ns, service);
if err != nil {
log.Error(err)
return false
}

for n, s := range states {
if podInitiating(s) {
initiating = true;
break;
}

if (failedToInitiatePod(s)) {
resetOccurred = true
error := client.ResetPod(url, token, ns, n)
if error != nil {
log.Warningf("failed to delete pod %s", n)
}
break;
}
}

if initiating {
return resetServicePods(client, url, token, ns, service, retry)
} else if resetOccurred {
retry++
return resetServicePods(client, url, token, ns, service, retry)
} else {
return true
}
}

func podInitiating(status v1.PodStatus) bool {
return status.Phase == "Pending" && initiatingContainers(status)
}

func initiatingContainers(status v1.PodStatus) bool {

for _, cs := range status.ContainerStatuses {
if cs.State.Waiting != nil || cs.State.Running == nil {
return true
}
}

for _, ics := range status.InitContainerStatuses {
if ics.State.Waiting != nil || ics.State.Running == nil {
return true
}
}

return false
}

func failedToInitiatePod(status v1.PodStatus) bool {
return status.Phase == "Failed" || status.Phase == "Pending" && containersFailed(status)
}

func containersFailed(status v1.PodStatus) bool {

for _, cs := range status.ContainerStatuses {
currentState := cs.State
lastState := cs.LastTerminationState

if currentState.Terminated != nil && currentState.Terminated.Reason == "Error" && cs.RestartCount >= 1 ||
lastState.Terminated != nil && lastState.Terminated.Reason == "Error" && cs.RestartCount >= 1 {
return true
}
}

for _, ics := range status.InitContainerStatuses {
currentState := ics.State
lastState := ics.LastTerminationState

if currentState.Terminated != nil && currentState.Terminated.Reason == "Error" && ics.RestartCount >= 1 ||
lastState.Terminated != nil && lastState.Terminated.Reason == "Error" && ics.RestartCount >= 1 {
return true
}
}

return false
}
85 changes: 76 additions & 9 deletions internal/openshift/client/openshift_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@ type OpenShiftClient interface {
WhoAmI(apiURL string, bearerToken string) (string, error)
WatchBuilds(apiURL string, bearerToken string, buildType string, callback func(model.Object) error) error
WatchDeploymentConfigs(apiURL string, bearerToken string, namespaceSuffix string, callback func(model.DCObject) error) error
Reset(apiURL string, bearerToken string, namespace string) error
ResetPod(apiURL string, bearerToken string, namespace string, podName string) error
ResetNSPods(apiURL string, bearerToken string, namespace string) error
PodState(apiURL string, bearerToken string, namespace string, selector string) (map[string]v1.PodStatus, error)
}

type user struct {
Expand Down Expand Up @@ -145,10 +147,10 @@ func (o openShift) Idle(apiURL string, bearerToken string, namespace string, ser
return
}

// Reset deletes a pod and start a new one
func (o *openShift) Reset(apiURL string, bearerToken string, namespace string) error {
logger.Infof("resetting pods in " + namespace)
// ResetPod deletes a pod and start a new one
func (o *openShift) ResetPod(apiURL string, bearerToken string, namespace string, podName string) error {

/*
req, err := o.reqAPI(apiURL, bearerToken, "GET", namespace, "pods", nil)
if err != nil {
return err
Expand All @@ -167,24 +169,60 @@ func (o *openShift) Reset(apiURL string, bearerToken string, namespace string) e
}

for _, element := range podList.Items {

podName := element.GetName()
*/
if strings.Contains(podName, "deploy") {
continue
return nil;
}

log.Infof("Resetting the pod %q", podName)
log.Infof("resetting pod %q in %q", podName, namespace)
req, err := o.reqAPI(apiURL, bearerToken, "DELETE", namespace, "pods/"+podName, nil)
if err != nil {
return err
}

resp, err = o.do(req)
resp, err := o.do(req)
if err != nil {
return err
}
defer bodyClose(resp)


return nil
}

// ResetNSPods deletes a pod and start a new one
func (o *openShift) ResetNSPods(apiURL string, bearerToken string, namespace string) error {

req, err := o.reqAPI(apiURL, bearerToken, "GET", namespace, "pods", nil)
if err != nil {
return err
}

resp, err := o.do(req)
if err != nil {
return err
}

defer bodyClose(resp)

podList := &v1.PodList{}
err = json.NewDecoder(resp.Body).Decode(podList)
if err != nil {
return err
}

for _, element := range podList.Items {
podName := element.Name
if strings.Contains(podName, "deploy") {
return nil;
}

err = o.ResetPod(apiURL, bearerToken, namespace, podName)
if err != nil {
return err
}
}

return nil
}

Expand Down Expand Up @@ -515,6 +553,35 @@ func (o *openShift) patch(req *http.Request) (b []byte, err error) {
return
}

func (o *openShift) PodState(apiURL string, bearerToken string, namespace string, selector string) (map[string]v1.PodStatus, error) {
status := make(map[string]v1.PodStatus)

selector = fmt.Sprintf("%s=%s", "deploymentconfig",selector)
req, err := o.reqOAPI(apiURL, bearerToken, "GET", namespace, "pods?labelSelector="+selector, nil)
if err != nil {
return status, err
}

resp, err := o.do(req)
if err != nil {
return status, err
}

defer bodyClose(resp)

podList := &v1.PodList{}
err = json.NewDecoder(resp.Body).Decode(podList)
if err != nil {
return status, err
}

for _, pod := range podList.Items {
status[pod.Name] = pod.Status
}

return status, nil;
}

func bodyClose(resp *http.Response) {
io.Copy(ioutil.Discard, resp.Body)
resp.Body.Close()
Expand Down
4 changes: 2 additions & 2 deletions internal/testutils/mock/mock_idler_api.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,10 @@ func (i *IdlerAPI) Status(w http.ResponseWriter, r *http.Request, ps httprouter.
w.WriteHeader(http.StatusOK)
}

// Reset mock resets pods
// ResetNSPods mock resets pods
func (i *IdlerAPI) Reset(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
w.WriteHeader(http.StatusOK)
w.Write([]byte("Reset"))
w.Write([]byte("ResetNSPods"))
}

// ClusterDNSView writes a JSON representation of the current cluster state to the response writer.
Expand Down
15 changes: 14 additions & 1 deletion internal/testutils/mock/mock_openshift_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"fmt"

"github.com/fabric8-services/fabric8-jenkins-idler/internal/model"
"k8s.io/api/core/v1"
)

// OpenShiftClient is a client for OpenShift API
Expand Down Expand Up @@ -43,7 +44,7 @@ func (c *OpenShiftClient) State(apiURL string, bearerToken string, namespace str
return c.IdleState, nil
}

// Reset deletes a pod and start a new one
// ResetNSPods deletes a pod and start a new one
func (c *OpenShiftClient) Reset(apiURL string, bearerToken string, namespace string) error {
if c.IdleError != "" {
return fmt.Errorf(c.IdleError)
Expand Down Expand Up @@ -77,6 +78,18 @@ func (c *OpenShiftClient) WatchDeploymentConfigs(apiURL string, bearerToken stri
return nil
}

func (o *OpenShiftClient) ResetPod(apiURL string, bearerToken string, namespace string, podName string) error {
return nil
}

func (c *OpenShiftClient) ResetNSPods(apiURL string, bearerToken string, namespace string) error {
return nil
}

func (c *OpenShiftClient) PodState(apiURL string, bearerToken string, namespace string, selector string) (map[string] v1.PodStatus, error) {
return nil, nil
}

// ResetCounts resets calls made to the idler(idle/unidle) to 0.
func (c *OpenShiftClient) ResetCounts() {
c.UnIdleCallCount = 0
Expand Down