Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

aws-janitor-boskos: add clean time and process time metrics #75

Merged
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 45 additions & 3 deletions cmd/aws-janitor-boskos/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,15 @@ import (
"github.com/aws/aws-sdk-go/aws/credentials"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
"github.com/sirupsen/logrus"

"k8s.io/test-infra/pkg/flagutil"
"k8s.io/test-infra/prow/config"
prowflagutil "k8s.io/test-infra/prow/flagutil"
"k8s.io/test-infra/prow/logrusutil"
prowmetrics "k8s.io/test-infra/prow/metrics"

"sigs.k8s.io/boskos/aws-janitor/account"
"sigs.k8s.io/boskos/aws-janitor/regions"
"sigs.k8s.io/boskos/aws-janitor/resources"
Expand All @@ -52,6 +59,19 @@ var (
includeTags common.CommaSeparatedStrings
excludeTM resources.TagMatcher
includeTM resources.TagMatcher

instrumentationOptions prowflagutil.InstrumentationOptions

cleaningTimeHistogram = prometheus.NewHistogramVec(prometheus.HistogramOpts{
cpanato marked this conversation as resolved.
Show resolved Hide resolved
Name: "aws_janitor_boskos_cleaning_time_seconds",
ConstLabels: prometheus.Labels{},
Buckets: prometheus.ExponentialBuckets(1, 1.4, 30),
}, []string{"resource_type", "status"})

sweepsGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "aws_janitor_boskos_sweeps",
ConstLabels: prometheus.Labels{},
}, []string{"resource_type"})
)

const (
Expand All @@ -64,10 +84,16 @@ func init() {
"Resources with any of these tags will not be managed by the janitor. Given as a comma-separated list of tags in key[=value] format; excluding the value will match any tag with that key. Keys can be repeated.")
flag.Var(&includeTags, "include-tags",
"Resources must include all of these tags in order to be managed by the janitor. Given as a comma-separated list of tags in key[=value] format; excluding the value will match any tag with that key. Keys can be repeated.")

prometheus.MustRegister(cleaningTimeHistogram)
prometheus.MustRegister(sweepsGauge)
}

func main() {
logrusutil.ComponentInit()
for _, o := range []flagutil.OptionGroup{&instrumentationOptions} {
o.AddFlags(flag.CommandLine)
}
flag.Parse()

level, err := logrus.ParseLevel(*logLevel)
Expand All @@ -76,6 +102,13 @@ func main() {
}
logrus.SetLevel(level)

for _, o := range []flagutil.OptionGroup{&instrumentationOptions} {
if err := o.Validate(false); err != nil {
logrus.Fatalf("Invalid options: %v", err)
}
}
prowmetrics.ExposeMetrics("aws-janitor-boskos", config.PushGateway{}, instrumentationOptions.MetricsPort)

if d, err := time.ParseDuration(*sweepSleep); err != nil {
sweepSleepDuration = time.Second * 30
} else {
Expand Down Expand Up @@ -115,13 +148,17 @@ func run(boskos *client.Client) error {
} else if err != nil {
return errors.Wrap(err, "Couldn't retrieve resources from Boskos")
} else {
startProcess := time.Now()
logrus.WithField("name", res.Name).Info("Acquired resource")
if err := cleanResource(res); err != nil {
collectMetric(startProcess, res.Name, "failed-clean")
return errors.Wrapf(err, "Couldn't clean resource %q", res.Name)
}
if err := boskos.ReleaseOne(res.Name, common.Free); err != nil {
collectMetric(startProcess, res.Name, "failed-release")
return errors.Wrapf(err, "Failed to release resoures %q", res.Name)
}
collectMetric(startProcess, res.Name, "released")
logrus.WithField("name", res.Name).Info("Released resource")
}
}
Expand Down Expand Up @@ -165,8 +202,13 @@ func cleanResource(res *common.Resource) error {
}
}

duration := time.Since(start)

logrus.WithFields(logrus.Fields{"name": res.Name, "duration": duration.Seconds()}).Info("Finished cleaning")
sweepsGauge.WithLabelValues(res.Name).Set(float64(*sweepCount))
collectMetric(start, res.Name, "clean")
logrus.WithFields(logrus.Fields{"name": res.Name, "duration": time.Since(start).Seconds(), "sweeps": *sweepCount}).Info("Finished cleaning")
return nil
}

func collectMetric(startTime time.Time, rType, status string) {
duration := time.Since(startTime).Seconds()
cleaningTimeHistogram.WithLabelValues(rType, status).Observe(duration)
}