Skip to content

Commit

Permalink
Add profiler address parameter on node-agent
Browse files Browse the repository at this point in the history
This allows us to enable the profiler endpoints on both the
server and the node agent.
This helps me in troubleshooting the high memory usage when
restoring lots of small files.

Refs: vmware-tanzu#8582

Signed-off-by: Rob Kenis <[email protected]>
  • Loading branch information
Rob Kenis authored and RobKenis committed Jan 20, 2025
1 parent 0543750 commit 6cb6b48
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 1 deletion.
1 change: 1 addition & 0 deletions changelogs/unreleased/8618-RobKenis
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add profiler address parameter on node-agent
27 changes: 26 additions & 1 deletion pkg/cmd/cli/nodeagent/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"fmt"
"math"
"net/http"
"net/http/pprof"
"os"
"strings"
"time"
Expand Down Expand Up @@ -86,6 +87,7 @@ const (

type nodeAgentServerConfig struct {
metricsAddress string
profilerAddress string
resourceTimeout time.Duration
dataMoverPrepareTimeout time.Duration
nodeAgentConfig string
Expand Down Expand Up @@ -124,7 +126,8 @@ func NewServerCommand(f client.Factory) *cobra.Command {
command.Flags().Var(formatFlag, "log-format", fmt.Sprintf("The format for log output. Valid values are %s.", strings.Join(formatFlag.AllowedValues(), ", ")))
command.Flags().DurationVar(&config.resourceTimeout, "resource-timeout", config.resourceTimeout, "How long to wait for resource processes which are not covered by other specific timeout parameters. Default is 10 minutes.")
command.Flags().DurationVar(&config.dataMoverPrepareTimeout, "data-mover-prepare-timeout", config.dataMoverPrepareTimeout, "How long to wait for preparing a DataUpload/DataDownload. Default is 30 minutes.")
command.Flags().StringVar(&config.metricsAddress, "metrics-address", config.metricsAddress, "The address to expose prometheus metrics")
command.Flags().StringVar(&config.metricsAddress, "metrics-address", config.metricsAddress, "The address to expose prometheus metrics.")
command.Flags().StringVar(&config.profilerAddress, "profiler-address", config.profilerAddress, "The address to expose the pprof profiler.")
command.Flags().StringVar(&config.nodeAgentConfig, "node-agent-configmap", config.nodeAgentConfig, "The name of ConfigMap containing node-agent configurations.")

return command
Expand Down Expand Up @@ -263,6 +266,10 @@ func newNodeAgentServer(logger logrus.FieldLogger, factory client.Factory, confi
func (s *nodeAgentServer) run() {
signals.CancelOnShutdown(s.cancelFunc, s.logger)

if s.config.profilerAddress != "" {
go s.runProfiler()
}

go func() {
metricsMux := http.NewServeMux()
metricsMux.Handle("/metrics", promhttp.Handler())
Expand Down Expand Up @@ -386,6 +393,24 @@ func (s *nodeAgentServer) run() {
}
}

func (s *nodeAgentServer) runProfiler() {
mux := http.NewServeMux()
mux.HandleFunc("/debug/pprof/", pprof.Index)
mux.HandleFunc("/debug/pprof/cmdline", pprof.Cmdline)
mux.HandleFunc("/debug/pprof/profile", pprof.Profile)
mux.HandleFunc("/debug/pprof/symbol", pprof.Symbol)
mux.HandleFunc("/debug/pprof/trace", pprof.Trace)

server := &http.Server{
Addr: s.config.profilerAddress,
Handler: mux,
ReadHeaderTimeout: 3 * time.Second,
}
if err := server.ListenAndServe(); err != nil {
s.logger.WithError(errors.WithStack(err)).Error("error running profiler http server")
}
}

func (s *nodeAgentServer) waitCacheForResume() error {
podInformer, err := s.mgr.GetCache().GetInformer(s.ctx, &v1.Pod{})
if err != nil {
Expand Down

0 comments on commit 6cb6b48

Please sign in to comment.