Skip to content

Commit

Permalink
shim: add support for containerd v2 metrics\nAdd support for v2 conta…
Browse files Browse the repository at this point in the history
…inerd metrics in the shim, v2 metrics are only used when runsc is run with --system-cgroup=true.\nContainerd requires v2 metrics when the host is run with CGroupsV2.\nThis issue was noticed when attempting to gather metrics on AL2023 which defaults to CGroupsV2.\nFixes: google#11472

Signed-off-by: Champ-Goblem <[email protected]>
  • Loading branch information
Champ-Goblem committed Feb 13, 2025
1 parent dd8ea25 commit 37fe865
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 3 deletions.
2 changes: 2 additions & 0 deletions pkg/shim/runsc/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ go_library(
"@com_github_containerd_cgroups//:go_default_library",
"@com_github_containerd_cgroups//stats/v1:go_default_library",
"@com_github_containerd_cgroups//v2:go_default_library",
"@com_github_containerd_cgroups//v2/stats:go_default_library",
"@com_github_containerd_console//:go_default_library",
"@com_github_containerd_containerd//api/events:go_default_library",
"@com_github_containerd_containerd//api/types/task:go_default_library",
Expand All @@ -47,6 +48,7 @@ go_library(
"@com_github_containerd_errdefs//:go_default_library",
"@com_github_containerd_fifo//:go_default_library",
"@com_github_containerd_log//:go_default_library",
"@com_github_containerd_go_runc//:go_default_library",
"@com_github_containerd_typeurl//:go_default_library",
"@com_github_gogo_protobuf//types:go_default_library",
"@com_github_opencontainers_runtime_spec//specs-go:go_default_library",
Expand Down
55 changes: 52 additions & 3 deletions pkg/shim/runsc/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package runsc
import (
"context"
"fmt"
"github.com/containerd/go-runc"
"io"
"os"
"path/filepath"
Expand All @@ -29,6 +30,7 @@ import (
"github.com/containerd/cgroups"
cgroupsstats "github.com/containerd/cgroups/stats/v1"
cgroupsv2 "github.com/containerd/cgroups/v2"
cgroupsv2stats "github.com/containerd/cgroups/v2/stats"
"github.com/containerd/console"
"github.com/containerd/containerd/api/events"
"github.com/containerd/containerd/api/types/task"
Expand All @@ -49,7 +51,7 @@ import (
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/cleanup"
"gvisor.dev/gvisor/pkg/shim/runtimeoptions/v14"
v14 "gvisor.dev/gvisor/pkg/shim/runtimeoptions/v14"

"gvisor.dev/gvisor/pkg/shim/extension"
"gvisor.dev/gvisor/pkg/shim/proc"
Expand Down Expand Up @@ -660,6 +662,18 @@ func (s *runscService) Stats(ctx context.Context, r *taskAPI.StatsRequest) (*tas
// as runc.
//
// [0]: https://github.com/google/gvisor/blob/277a0d5a1fbe8272d4729c01ee4c6e374d047ebc/runsc/boot/events.go#L61-L81
return s.getStats(stats, r)
}

func (s *runscService) getStats(stats *runc.Stats, r *taskAPI.StatsRequest) (*taskAPI.StatsResponse, error) {
if s.opts.RunscConfig["systemd-cgroup"] == "true" {
return s.getV2Stats(stats, r)
} else {
return s.getV1Stats(stats, r)
}
}

func (s *runscService) getV1Stats(stats *runc.Stats, r *taskAPI.StatsRequest) (*taskAPI.StatsResponse, error) {
metrics := &cgroupsstats.Metrics{
CPU: &cgroupsstats.CPUStat{
Usage: &cgroupsstats.CPUUsage{
Expand Down Expand Up @@ -708,10 +722,45 @@ func (s *runscService) Stats(ctx context.Context, r *taskAPI.StatsRequest) (*tas
}
data, err := typeurl.MarshalAny(metrics)
if err != nil {
log.L.Debugf("Stats error, id: %s: %v", r.ID, err)
log.L.Debugf("Stats error v1, id: %s: %v", r.ID, err)
return nil, err
}
log.L.Debugf("Stats success v1, id: %s: %+v", r.ID, data)
return &taskAPI.StatsResponse{
Stats: data,
}, nil
}

func (s *runscService) getV2Stats(stats *runc.Stats, r *taskAPI.StatsRequest) (*taskAPI.StatsResponse, error) {
metrics := &cgroupsv2stats.Metrics{
// The CGroup V2 stats are in microseconds instead of nanoseconds so divide by 1000
CPU: &cgroupsv2stats.CPUStat{
UsageUsec: stats.Cpu.Usage.Total / 1000,
UserUsec: stats.Cpu.Usage.User / 1000,
SystemUsec: stats.Cpu.Usage.Kernel / 1000,
NrPeriods: stats.Cpu.Throttling.Periods,
NrThrottled: stats.Cpu.Throttling.ThrottledPeriods,
ThrottledUsec: stats.Cpu.Throttling.ThrottledTime / 1000,
},
Memory: &cgroupsv2stats.MemoryStat{
Usage: stats.Memory.Usage.Usage,
UsageLimit: stats.Memory.Usage.Limit,
SwapUsage: stats.Memory.Swap.Usage,
SwapLimit: stats.Memory.Swap.Limit,
Slab: stats.Memory.Kernel.Usage,
File: stats.Memory.Cache,
},
Pids: &cgroupsv2stats.PidsStat{
Current: stats.Pids.Current,
Limit: stats.Pids.Limit,
},
}
data, err := typeurl.MarshalAny(metrics)
if err != nil {
log.L.Debugf("Stats error v2, id: %s: %v", r.ID, err)
return nil, err
}
log.L.Debugf("Stats success, id: %s: %+v", r.ID, data)
log.L.Debugf("Stats success v2, id: %s: %+v", r.ID, data)
return &taskAPI.StatsResponse{
Stats: data,
}, nil
Expand Down

0 comments on commit 37fe865

Please sign in to comment.