Skip to content

Commit

Permalink
in_podman_metrics: Added remove_stale_counters opt
Browse files Browse the repository at this point in the history
For environments with containers being created and removed
often, it might be good to specify an option to remove
counters for removed containers. This option is default
to false, since it increases resource consumption of plugin

Signed-off-by: Paweł Cendrzak <[email protected]>
  • Loading branch information
pabloxxl committed Aug 28, 2023
1 parent 1f125c5 commit 4f58516
Show file tree
Hide file tree
Showing 4 changed files with 111 additions and 11 deletions.
104 changes: 94 additions & 10 deletions plugins/in_podman_metrics/podman_metrics.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
* that are children to root array, and in them, search for ID and name (which is also
* an array.
*/
static int collect_container_data(struct flb_in_metrics *ctx)
static int collect_container_data(struct flb_in_metrics *ctx, int gather_only)
{
/* Buffers for reading data from JSON */
char *buffer;
Expand All @@ -57,6 +57,8 @@ static int collect_container_data(struct flb_in_metrics *ctx)
jsmn_parser p;
jsmntok_t t[JSON_TOKENS];

struct container_id *cid;

flb_utils_read_file(ctx->config, &buffer, &read_bytes);
if (!read_bytes) {
flb_plg_warn(ctx->ins, "Failed to open %s", ctx->config);
Expand Down Expand Up @@ -119,11 +121,26 @@ static int collect_container_data(struct flb_in_metrics *ctx)
image_name[metadata_token_size] = '\0';

flb_plg_trace(ctx->ins, "Found image name %s", image_name);
add_container_to_list(ctx, id, name, image_name);
if (!gather_only) {
add_container_to_list(ctx, id, name, image_name);
}
}
else {
flb_plg_warn(ctx->ins, "Image name was not found for %s", id);
add_container_to_list(ctx, id, name, "unknown");
if (!gather_only) {
add_container_to_list(ctx, id, name, "unknown");
}
}

if (gather_only) {
cid = flb_malloc(sizeof(struct container_id));
if (!cid) {
flb_errno();
return -1;
}
cid->id = flb_sds_create(id);
mk_list_add(&cid->_head, &ctx->ids);
flb_plg_trace(ctx->ins, "Found id for gather only %s", cid->id);
}
collected_containers++;
}
Expand Down Expand Up @@ -173,18 +190,55 @@ static int destroy_container_list(struct flb_in_metrics *ctx)
struct container *cnt;
struct net_iface *iface;
struct sysfs_path *pth;
struct container_id *id;
struct mk_list *head;
struct mk_list *tmp;
struct mk_list *inner_head;
struct mk_list *inner_tmp;
int can_remove_stale_counters = FLB_FALSE;
int id_found;
int collected;

if (ctx->remove_stale_counters) {
collected = collect_container_data(ctx, FLB_TRUE);
if (collected == -1) {
flb_plg_error(ctx->ins, "Could not collect container ids");
}
else {
can_remove_stale_counters = FLB_TRUE;
flb_plg_debug(ctx->ins, "Collected %d for deletion", collected);
}
}

mk_list_foreach_safe(head, tmp, &ctx->items) {
id_found = FLB_FALSE;
cnt = mk_list_entry(head, struct container, _head);
flb_plg_debug(ctx->ins, "Destroying container data (id: %s, name: %s", cnt->id, cnt->name);

/* If recreation was already triggered, there is no point in determining it again */
if (can_remove_stale_counters && !ctx->recreate_cmt) {
mk_list_foreach_safe(inner_head, inner_tmp, &ctx->ids) {
id = mk_list_entry(inner_head, struct container_id, _head);
if (strcmp(cnt->id, id->id) == 0) {
id_found = FLB_TRUE;
break;
}
}

if (!id_found) {
flb_plg_info(ctx->ins, "Counter will be removed because %s is gone", cnt->name);
ctx->recreate_cmt = FLB_TRUE;
}
else {
flb_plg_debug(ctx->ins, "No need to remove stale counters");
}
}


flb_sds_destroy(cnt->id);
flb_sds_destroy(cnt->name);
flb_sds_destroy(cnt->image_name);

mk_list_foreach_safe(inner_head, inner_tmp, &cnt->net_data) {
iface = mk_list_entry(inner_head, struct net_iface, _head);
flb_sds_destroy(iface->name);
Expand All @@ -194,6 +248,7 @@ static int destroy_container_list(struct flb_in_metrics *ctx)
mk_list_del(&cnt->_head);
flb_free(cnt);
}


mk_list_foreach_safe(head, tmp, &ctx->sysfs_items) {
pth = mk_list_entry(head, struct sysfs_path, _head);
Expand All @@ -202,10 +257,19 @@ static int destroy_container_list(struct flb_in_metrics *ctx)
mk_list_del(&pth->_head);
flb_free(pth);
}

if (ctx->remove_stale_counters) {
mk_list_foreach_safe(head, tmp, &ctx->ids) {
id = mk_list_entry(head, struct container_id, _head);
flb_plg_trace(ctx->ins, "Destroying container id: %s", id->id);
flb_sds_destroy(id->id);
mk_list_del(&id->_head);
flb_free(id);
}
}
return 0;
}


/*
* Create counter for given metric name, using name, image name and value as counter labels. Counters
* are created per counter name, so they are "shared" between multiple containers - counter
Expand All @@ -218,8 +282,8 @@ static int create_counter(struct flb_in_metrics *ctx, struct cmt_counter **count
{
flb_sds_t *labels;
uint64_t fvalue = value;

int label_count;

if (value == UINT64_MAX) {
flb_plg_debug(ctx->ins, "Ignoring invalid counter for %s, %s_%s_%s", name, COUNTER_PREFIX, metric_prefix, metric_name);
return -1;
Expand All @@ -246,6 +310,12 @@ static int create_counter(struct flb_in_metrics *ctx, struct cmt_counter **count
*counter = cmt_counter_create(ctx->ins->cmt, COUNTER_PREFIX, metric_prefix, metric_name, description, label_count, fields);
}

if (ctx->recreate_cmt) {
flb_plg_debug(ctx->ins, "Recreating counter for %s, %s_%s_%s", name, COUNTER_PREFIX, metric_prefix, metric_name);
cmt_counter_destroy(*counter);
*counter = cmt_counter_create(ctx->ins->cmt, COUNTER_PREFIX, metric_prefix, metric_name, description, label_count, fields);
}

/* Allow setting value that is not grater that current one (if, for example, memory usage stays exactly the same) */
cmt_counter_allow_reset(*counter);
flb_plg_debug(ctx->ins, "Set counter for %s, %s_%s_%s: %lu", name, COUNTER_PREFIX, metric_prefix, metric_name, fvalue);
Expand All @@ -268,20 +338,26 @@ static int create_gauge(struct flb_in_metrics *ctx, struct cmt_gauge **gauge, fl
{
flb_sds_t *labels;
int label_count;
labels = (char *[]){id, name, image_name};
label_count = 3;

if (value == UINT64_MAX) {
flb_plg_debug(ctx->ins, "Ignoring invalid gauge for %s, %s_%s_%s", name, COUNTER_PREFIX, metric_prefix, metric_name);
return -1;
}

labels = (char *[]){id, name, image_name};
label_count = 3;

/* if gauge was not yet created, it means that this function is called for the first time per counter type */
if (*gauge == NULL) {
flb_plg_debug(ctx->ins, "Creating gauge for %s, %s_%s_%s", name, COUNTER_PREFIX, metric_prefix, metric_name);
*gauge = cmt_gauge_create(ctx->ins->cmt, COUNTER_PREFIX, metric_prefix, metric_name, description, label_count, fields);
}

if (ctx->recreate_cmt) {
flb_plg_debug(ctx->ins, "Recreating gauge for %s, %s_%s_%s", name, COUNTER_PREFIX, metric_prefix, metric_name);
cmt_gauge_destroy(*gauge);
*gauge = cmt_gauge_create(ctx->ins->cmt, COUNTER_PREFIX, metric_prefix, metric_name, description, label_count, fields);
}

flb_plg_debug(ctx->ins, "Set gauge for %s, %s_%s_%s: %lu", name, COUNTER_PREFIX, metric_prefix, metric_name, value);
if (cmt_gauge_set(*gauge, cfl_time_now(), value, label_count, labels) == -1) {
flb_plg_warn(ctx->ins, "Failed to set gauge for %s, %s_%s_%s", name, COUNTER_PREFIX, metric_prefix, metric_name);
Expand Down Expand Up @@ -340,7 +416,12 @@ static int create_counters(struct flb_in_metrics *ctx)
DESCRIPTION_TX_BYTES, iface->name, iface->tx_bytes);
create_counter(ctx, &ctx->tx_errors, cnt->id, cnt->name, cnt->image_name, COUNTER_NETWORK_PREFIX, FIELDS_METRIC_WITH_IFACE, COUNTER_TX_ERRORS,
DESCRIPTION_TX_ERRORS, iface->name, iface->tx_errors);
/* Stop recreating after first iteration, at this point we cleared all counters/gauges */
ctx->recreate_cmt = FLB_FALSE;
}

// Do it again in case of previous loop not looping at all
ctx->recreate_cmt = FLB_FALSE;
}
return 0;
}
Expand All @@ -357,7 +438,7 @@ static int scrape_metrics(struct flb_config *config, struct flb_in_metrics *ctx)
return -1;
}

if (collect_container_data(ctx) == -1) {
if (collect_container_data(ctx, FLB_FALSE) == -1) {
flb_plg_error(ctx->ins, "Could not collect container ids");
return -1;
}
Expand Down Expand Up @@ -429,6 +510,8 @@ static int in_metrics_init(struct flb_input_instance *in, struct flb_config *con
ctx->tx_bytes = NULL;
ctx->tx_errors = NULL;

ctx->recreate_cmt = FLB_FALSE;

if (flb_input_config_map_set(in, (void *) ctx) == -1) {
flb_free(ctx);
return -1;
Expand Down Expand Up @@ -462,6 +545,7 @@ static int in_metrics_init(struct flb_input_instance *in, struct flb_config *con

mk_list_init(&ctx->items);
mk_list_init(&ctx->sysfs_items);
mk_list_init(&ctx->ids);

if (ctx->scrape_interval >= 2 && ctx->scrape_on_start) {
flb_plg_info(ctx->ins, "Generating podman metrics (initial scrape)");
Expand Down Expand Up @@ -490,8 +574,8 @@ static int in_metrics_exit(void *data, struct flb_config *config)
return 0;
}

flb_sds_destroy(ctx->config);
destroy_container_list(ctx);
flb_sds_destroy(ctx->config);
flb_free(ctx);
return 0;
}
Expand Down
7 changes: 6 additions & 1 deletion plugins/in_podman_metrics/podman_metrics.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@

#include "podman_metrics_config.h"

static int collect_container_data(struct flb_in_metrics *ctx);
static int collect_container_data(struct flb_in_metrics *ctx, int gather_only);
static int add_container_to_list(struct flb_in_metrics *ctx, flb_sds_t id, flb_sds_t name, flb_sds_t image_name);
static int destroy_container_list(struct flb_in_metrics *ctx);

Expand Down Expand Up @@ -78,6 +78,11 @@ static struct flb_config_map config_map[] = {
0, FLB_TRUE, offsetof(struct flb_in_metrics, procfs_path),
"Path to proc subsystem directory"
},
{
FLB_CONFIG_MAP_BOOL, "remove_stale_counters", "false",
0, FLB_TRUE, offsetof(struct flb_in_metrics, remove_stale_counters),
"Remove counters for removed containers"
},

/* EOF */
{0}
Expand Down
10 changes: 10 additions & 0 deletions plugins/in_podman_metrics/podman_metrics_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -169,18 +169,28 @@ struct sysfs_path {
struct mk_list _head;
};

struct container_id {
flb_sds_t id;
struct mk_list _head;
};

struct flb_in_metrics {
/* config map options */
int scrape_on_start;
int scrape_interval;
flb_sds_t podman_config_path;
int remove_stale_counters;
int recreate_cmt;

/* container list */
struct mk_list items;

/* sysfs path list */
struct mk_list sysfs_items;

/* container id list */
struct mk_list ids;

/* counters */
struct cmt_counter *c_memory_usage;
struct cmt_counter *c_memory_max_usage;
Expand Down
1 change: 1 addition & 0 deletions tests/runtime/in_podman_metrics.c
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ void flb_test_ipm_regular() {
"scrape_on_start", "true",
"path.sysfs", DPATH_PODMAN_REGULAR,
"path.procfs", DPATH_PODMAN_REGULAR,
"remove_stale_counters", "true",
NULL);
TEST_CHECK(flb_start(ctx) == 0);
sleep(1);
Expand Down

0 comments on commit 4f58516

Please sign in to comment.