From 022a3e6d975f04e24f05565d6673f3a3a5c4be11 Mon Sep 17 00:00:00 2001 From: Nayana Bidari Date: Tue, 14 Jan 2025 11:16:55 -0800 Subject: [PATCH] refactor PiperOrigin-RevId: 715454790 --- pkg/sentry/inet/inet.go | 8 -- pkg/sentry/kernel/kernel.go | 8 -- pkg/sentry/socket/hostinet/stack.go | 3 - pkg/sentry/socket/netstack/stack.go | 12 +-- pkg/tcpip/stack/save_restore.go | 4 + pkg/tcpip/stack/stack.go | 34 +------- runsc/boot/controller.go | 9 +++ runsc/boot/loader.go | 3 + runsc/boot/restore.go | 16 ++++ runsc/config/flags.go | 2 +- runsc/sandbox/network.go | 119 ++++++++++++++++++---------- runsc/sandbox/sandbox.go | 17 +++- 12 files changed, 128 insertions(+), 107 deletions(-) diff --git a/pkg/sentry/inet/inet.go b/pkg/sentry/inet/inet.go index 8b4ef8848e..7b6af3c9e9 100644 --- a/pkg/sentry/inet/inet.go +++ b/pkg/sentry/inet/inet.go @@ -100,14 +100,6 @@ type Stack interface { // Restore restarts the network stack after restore. Restore() - // ReplaceConfig replaces the new network stack configuration to the - // loaded or saved network stack after restore. - // TODO(b/379115439): This method is a workaround to update netstack config - // during restore. It should be removed after a new method is added to - // extract the complete config from the spec and update it in the loaded - // stack during restore. - ReplaceConfig(st Stack) - // Destroy the network stack. Destroy() diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go index d9e0a9c421..3fb054ad25 100644 --- a/pkg/sentry/kernel/kernel.go +++ b/pkg/sentry/kernel/kernel.go @@ -836,14 +836,6 @@ func (k *Kernel) LoadFrom(ctx context.Context, r, pagesMetadata io.Reader, pages if saveRestoreNet { log.Infof("netstack save restore is enabled") - s := k.rootNetworkNamespace.Stack() - if s == nil { - panic("inet.Stack cannot be nil when netstack s/r is enabled") - } - if net != nil { - s.ReplaceConfig(net) - } - s.Restore() } else if net != nil { net.Restore() } diff --git a/pkg/sentry/socket/hostinet/stack.go b/pkg/sentry/socket/hostinet/stack.go index 4d1facb0af..cc9f35f88e 100644 --- a/pkg/sentry/socket/hostinet/stack.go +++ b/pkg/sentry/socket/hostinet/stack.go @@ -398,9 +398,6 @@ func (*Stack) Pause() {} // Restore implements inet.Stack.Restore. func (*Stack) Restore() {} -// ReplaceConfig implements inet.Stack.ReplaceConfig. -func (s *Stack) ReplaceConfig(_ inet.Stack) {} - // Resume implements inet.Stack.Resume. func (*Stack) Resume() {} diff --git a/pkg/sentry/socket/netstack/stack.go b/pkg/sentry/socket/netstack/stack.go index fc21771bd3..17927bcbb3 100644 --- a/pkg/sentry/socket/netstack/stack.go +++ b/pkg/sentry/socket/netstack/stack.go @@ -23,6 +23,7 @@ import ( "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/refs" "gvisor.dev/gvisor/pkg/sentry/inet" + "gvisor.dev/gvisor/pkg/sentry/socket/netfilter" "gvisor.dev/gvisor/pkg/sentry/socket/netlink/nlmsg" "gvisor.dev/gvisor/pkg/syserr" "gvisor.dev/gvisor/pkg/tcpip" @@ -922,15 +923,8 @@ func (s *Stack) Pause() { // Restore implements inet.Stack.Restore. func (s *Stack) Restore() { - s.Stack.Restore() -} - -// ReplaceConfig implements inet.Stack.ReplaceConfig. -func (s *Stack) ReplaceConfig(st inet.Stack) { - if _, ok := st.(*Stack); !ok { - panic("netstack.Stack cannot be nil when netstack s/r is enabled") - } - s.Stack.ReplaceConfig(st.(*Stack).Stack) + defaultIPTables := netfilter.DefaultLinuxTables + s.Stack.Restore(defaultIPTables) } // Resume implements inet.Stack.Resume. diff --git a/pkg/tcpip/stack/save_restore.go b/pkg/tcpip/stack/save_restore.go index 838cf5f4fd..58961ba027 100644 --- a/pkg/tcpip/stack/save_restore.go +++ b/pkg/tcpip/stack/save_restore.go @@ -20,10 +20,14 @@ import ( "time" cryptorand "gvisor.dev/gvisor/pkg/rand" + "gvisor.dev/gvisor/pkg/tcpip" ) // afterLoad is invoked by stateify. func (s *Stack) afterLoad(context.Context) { s.insecureRNG = rand.New(rand.NewSource(time.Now().UnixNano())) s.secureRNG = cryptorand.RNGFrom(cryptorand.Reader) + s.mu.Lock() + s.nics = make(map[tcpip.NICID]*nic) + s.mu.Unlock() } diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go index 472b6d4b03..66b9cd9028 100644 --- a/pkg/tcpip/stack/stack.go +++ b/pkg/tcpip/stack/stack.go @@ -1966,45 +1966,17 @@ func (s *Stack) Pause() { } } -func (s *Stack) getNICs() map[tcpip.NICID]*nic { - s.mu.RLock() - defer s.mu.RUnlock() - - nics := s.nics - return nics -} - -// ReplaceConfig replaces config in the loaded stack. -func (s *Stack) ReplaceConfig(st *Stack) { - if st == nil { - panic("stack.Stack cannot be nil when netstack s/r is enabled") - } - - // Update route table. - s.SetRouteTable(st.GetRouteTable()) - - // Update NICs. - nics := st.getNICs() - s.mu.Lock() - defer s.mu.Unlock() - s.nics = make(map[tcpip.NICID]*nic) - for id, nic := range nics { - nic.stack = s - s.nics[id] = nic - _ = s.NextNICID() - } - s.tables = st.tables -} - // Restore restarts the stack after a restore. This must be called after the // entire system has been restored. -func (s *Stack) Restore() { +func (s *Stack) Restore(defaultIPTables func(clock tcpip.Clock, rand *rand.Rand) *IPTables) { // RestoredEndpoint.Restore() may call other methods on s, so we can't hold // s.mu while restoring the endpoints. s.mu.Lock() eps := s.restoredEndpoints s.restoredEndpoints = nil saveRestoreEnabled := s.saveRestoreEnabled + s.icmpRateLimiter = NewICMPRateLimiter(s.clock) + s.tables = defaultIPTables(s.clock, s.insecureRNG) s.mu.Unlock() for _, e := range eps { e.Restore(s) diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go index 217280b1e0..e26bee7eb6 100644 --- a/runsc/boot/controller.go +++ b/runsc/boot/controller.go @@ -120,6 +120,10 @@ const ( // ContMgrContainerRuntimeState returns the runtime state of a container. ContMgrContainerRuntimeState = "containerManager.ContainerRuntimeState" + + // ContMgrStoreNetworkArgs stores the network config which are required + // during restore in the loader. + ContMgrStoreNetworkArgs = "containerManager.StoreNetworkArgs" ) const ( @@ -943,3 +947,8 @@ func (cm *containerManager) ContainerRuntimeState(cid *string, state *ContainerR *state = cm.l.containerRuntimeState(*cid) return nil } + +func (cm *containerManager) StoreNetworkArgs(args *CreateLinksAndRoutesArgs, _ *struct{}) error { + cm.l.networkArgs = args + return nil +} diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index 4ae3b787e5..966e04bd17 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -255,6 +255,9 @@ type Loader struct { // saveRestoreNet indicates if the saved network stack should be used // during restore. saveRestoreNet bool + + // networkArgs contains the network configuration required during restore. + networkArgs *CreateLinksAndRoutesArgs } // execID uniquely identifies a sentry process that is executed in a container. diff --git a/runsc/boot/restore.go b/runsc/boot/restore.go index 1a40804c05..241813b12b 100644 --- a/runsc/boot/restore.go +++ b/runsc/boot/restore.go @@ -714,6 +714,22 @@ func (r *restorer) restore(l *Loader, unsafeSkipRestoreSpecValidation bool) erro // Release `l.mu` before calling into callbacks. cu.Clean() + if eps, ok := l.k.RootNetworkNamespace().Stack().(*netstack.Stack); ok { + // The network stack will be loaded from the state file, we do + // not need this network stack anymore. + oldInetStack.Destroy() + + n := &Network{ + Stack: eps.Stack, + Kernel: l.k, + } + if err := n.CreateLinksAndRoutes(l.networkArgs, nil); err != nil { + return fmt.Errorf("restore network error: %w", err) + } + log.Infof("network Args: %+v", l.networkArgs) + l.k.RootNetworkNamespace().Stack().Restore() + } + // r.restoreDone() signals and waits for the sandbox to start. if err := r.restoreDone(); err != nil { return fmt.Errorf("restorer.restoreDone callback failed: %w", err) diff --git a/runsc/config/flags.go b/runsc/config/flags.go index b8fce2236b..88b5c44ad8 100644 --- a/runsc/config/flags.go +++ b/runsc/config/flags.go @@ -162,7 +162,7 @@ func RegisterFlags(flagSet *flag.FlagSet) { flagSet.Bool("TESTONLY-afs-syscall-panic", false, "TEST ONLY; do not ever use! Used for tests exercising gVisor panic reporting.") flagSet.String("TESTONLY-autosave-image-path", "", "TEST ONLY; enable auto save for syscall tests and set path for state file.") flagSet.Bool("TESTONLY-autosave-resume", false, "TEST ONLY; enable auto save and resume for syscall tests and set path for state file.") - flagSet.Bool("TESTONLY-save-restore-netstack", false, "TEST ONLY; enable save/restore for netstack.") + flagSet.Bool("TESTONLY-save-restore-netstack", true, "TEST ONLY; enable save/restore for netstack.") } // overrideAllowlist lists all flags that can be changed using OCI diff --git a/runsc/sandbox/network.go b/runsc/sandbox/network.go index 7d4a3b5c67..a86ed62f9d 100644 --- a/runsc/sandbox/network.go +++ b/runsc/sandbox/network.go @@ -80,13 +80,19 @@ func setupNetwork(conn *urpc.Client, pid int, conf *config.Config) error { return nil } -func createDefaultLoopbackInterface(conf *config.Config, conn *urpc.Client) error { +func getLoopbackArgs(conf *config.Config) boot.CreateLinksAndRoutesArgs { link := boot.DefaultLoopbackLink link.GVisorGRO = conf.GVisorGRO - if err := conn.Call(boot.NetworkCreateLinksAndRoutes, &boot.CreateLinksAndRoutesArgs{ + args := boot.CreateLinksAndRoutesArgs{ LoopbackLinks: []boot.LoopbackLink{link}, DisconnectOk: conf.NetDisconnectOk, - }, nil); err != nil { + } + return args +} + +func createDefaultLoopbackInterface(conf *config.Config, conn *urpc.Client) error { + args := getLoopbackArgs(conf) + if err := conn.Call(boot.NetworkCreateLinksAndRoutes, &args, nil); err != nil { return fmt.Errorf("creating loopback link and routes: %v", err) } return nil @@ -122,52 +128,31 @@ func isRootNetNS() (bool, error) { } } -// createInterfacesAndRoutesFromNS scrapes the interface and routes from the -// net namespace with the given path, creates them in the sandbox, and removes -// them from the host. -func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, conf *config.Config) error { - switch conf.XDP.Mode { - case config.XDPModeOff: - case config.XDPModeNS: - case config.XDPModeRedirect: - if err := createRedirectInterfacesAndRoutes(conn, conf); err != nil { - return fmt.Errorf("failed to create XDP redirect interface: %w", err) - } - return nil - case config.XDPModeTunnel: - if err := createXDPTunnel(conn, nsPath, conf); err != nil { - return fmt.Errorf("failed to create XDP tunnel: %w", err) - } - return nil - default: - return fmt.Errorf("unknown XDP mode: %v", conf.XDP.Mode) - } - +func getNSArgs(nsPath string, conf *config.Config) (boot.CreateLinksAndRoutesArgs, error) { + args := boot.CreateLinksAndRoutesArgs{} // Join the network namespace that we will be copying. restore, err := joinNetNS(nsPath) if err != nil { - return err + return args, err } defer restore() // Get all interfaces in the namespace. ifaces, err := net.Interfaces() if err != nil { - return fmt.Errorf("querying interfaces: %w", err) + return args, fmt.Errorf("querying interfaces: %w", err) } isRoot, err := isRootNetNS() if err != nil { - return err + return args, err } if isRoot { - return fmt.Errorf("cannot run with network enabled in root network namespace") + return args, fmt.Errorf("cannot run with network enabled in root network namespace") } // Collect addresses and routes from the interfaces. - args := boot.CreateLinksAndRoutesArgs{ - DisconnectOk: conf.NetDisconnectOk, - } + args.DisconnectOk = conf.NetDisconnectOk for _, iface := range ifaces { if iface.Flags&net.FlagUp == 0 { log.Infof("Skipping down interface: %+v", iface) @@ -176,14 +161,14 @@ func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, conf *con allAddrs, err := iface.Addrs() if err != nil { - return fmt.Errorf("fetching interface addresses for %q: %w", iface.Name, err) + return args, fmt.Errorf("fetching interface addresses for %q: %w", iface.Name, err) } // We build our own loopback device. if iface.Flags&net.FlagLoopback != 0 { link, err := loopbackLink(conf, iface, allAddrs) if err != nil { - return fmt.Errorf("getting loopback link for iface %q: %w", iface.Name, err) + return args, fmt.Errorf("getting loopback link for iface %q: %w", iface.Name, err) } args.LoopbackLinks = append(args.LoopbackLinks, link) continue @@ -193,7 +178,7 @@ func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, conf *con for _, ifaddr := range allAddrs { ipNet, ok := ifaddr.(*net.IPNet) if !ok { - return fmt.Errorf("address is not IPNet: %+v", ifaddr) + return args, fmt.Errorf("address is not IPNet: %+v", ifaddr) } ipAddrs = append(ipAddrs, ipNet) } @@ -205,7 +190,7 @@ func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, conf *con // Collect data from the ARP table. dump, err := netlink.NeighList(iface.Index, 0) if err != nil { - return fmt.Errorf("fetching ARP table for %q: %w", iface.Name, err) + return args, fmt.Errorf("fetching ARP table for %q: %w", iface.Name, err) } var neighbors []boot.Neighbor @@ -223,11 +208,11 @@ func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, conf *con // will remove the routes as well. routes, defv4, defv6, err := routesForIface(iface) if err != nil { - return fmt.Errorf("getting routes for interface %q: %v", iface.Name, err) + return args, fmt.Errorf("getting routes for interface %q: %v", iface.Name, err) } if defv4 != nil { if !args.Defaultv4Gateway.Route.Empty() { - return fmt.Errorf("more than one default route found, interface: %v, route: %v, default route: %+v", iface.Name, defv4, args.Defaultv4Gateway) + return args, fmt.Errorf("more than one default route found, interface: %v, route: %v, default route: %+v", iface.Name, defv4, args.Defaultv4Gateway) } args.Defaultv4Gateway.Route = *defv4 args.Defaultv4Gateway.Name = iface.Name @@ -235,7 +220,7 @@ func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, conf *con if defv6 != nil { if !args.Defaultv6Gateway.Route.Empty() { - return fmt.Errorf("more than one default route found, interface: %v, route: %v, default route: %+v", iface.Name, defv6, args.Defaultv6Gateway) + return args, fmt.Errorf("more than one default route found, interface: %v, route: %v, default route: %+v", iface.Name, defv6, args.Defaultv6Gateway) } args.Defaultv6Gateway.Route = *defv6 args.Defaultv6Gateway.Name = iface.Name @@ -244,7 +229,7 @@ func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, conf *con // Get the link for the interface. ifaceLink, err := netlink.LinkByName(iface.Name) if err != nil { - return fmt.Errorf("getting link for interface %q: %w", iface.Name, err) + return args, fmt.Errorf("getting link for interface %q: %w", iface.Name, err) } linkAddress := ifaceLink.Attrs().HardwareAddr @@ -260,18 +245,18 @@ func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, conf *con // If we encounter an error while deleting the ip, // verify the ip is still present on the interface. if present, err := isAddressOnInterface(iface.Name, addr); err != nil { - return fmt.Errorf("checking if address %v is on interface %q: %w", addr, iface.Name, err) + return args, fmt.Errorf("checking if address %v is on interface %q: %w", addr, iface.Name, err) } else if !present { continue } - return fmt.Errorf("removing address %v from device %q: %w", addr, iface.Name, err) + return args, fmt.Errorf("removing address %v from device %q: %w", addr, iface.Name, err) } } if conf.XDP.Mode == config.XDPModeNS { xdpSockFDs, err := createSocketXDP(iface) if err != nil { - return fmt.Errorf("failed to create XDP socket: %v", err) + return args, fmt.Errorf("failed to create XDP socket: %v", err) } args.FilePayload.Files = append(args.FilePayload.Files, xdpSockFDs...) args.XDPLinks = append(args.XDPLinks, boot.XDPLink{ @@ -308,13 +293,13 @@ func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, conf *con log.Debugf("Creating Channel %d", i) socketEntry, err := createSocket(iface, ifaceLink, conf.HostGSO) if err != nil { - return fmt.Errorf("failed to createSocket for %s : %w", iface.Name, err) + return args, fmt.Errorf("failed to createSocket for %s : %w", iface.Name, err) } if i == 0 { link.GSOMaxSize = socketEntry.gsoMaxSize } else { if link.GSOMaxSize != socketEntry.gsoMaxSize { - return fmt.Errorf("inconsistent gsoMaxSize %d and %d when creating multiple channels for same interface: %s", + return args, fmt.Errorf("inconsistent gsoMaxSize %d and %d when creating multiple channels for same interface: %s", link.GSOMaxSize, socketEntry.gsoMaxSize, iface.Name) } } @@ -333,6 +318,35 @@ func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, conf *con } if err := pcapAndNAT(&args, conf); err != nil { + return args, err + } + + return args, nil +} + +// createInterfacesAndRoutesFromNS scrapes the interface and routes from the +// net namespace with the given path, creates them in the sandbox, and removes +// them from the host. +func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, conf *config.Config) error { + switch conf.XDP.Mode { + case config.XDPModeOff: + case config.XDPModeNS: + case config.XDPModeRedirect: + if err := createRedirectInterfacesAndRoutes(conn, conf); err != nil { + return fmt.Errorf("failed to create XDP redirect interface: %w", err) + } + return nil + case config.XDPModeTunnel: + if err := createXDPTunnel(conn, nsPath, conf); err != nil { + return fmt.Errorf("failed to create XDP tunnel: %w", err) + } + return nil + default: + return fmt.Errorf("unknown XDP mode: %v", conf.XDP.Mode) + } + + args, err := getNSArgs(nsPath, conf) + if err != nil { return err } @@ -688,3 +702,20 @@ func checkNftables() (*os.File, error) { return writeNATBlob() } + +func setupRestoreNetworkArgs(pid int, conf *config.Config) (boot.CreateLinksAndRoutesArgs, error) { + log.Infof("Restore network args") + + switch conf.Network { + case config.NetworkNone: + log.Infof("Network is disabled, create loopback interface only") + return getLoopbackArgs(conf), nil + case config.NetworkSandbox: + // Build the path to the net namespace of the sandbox process. + // This is what we will copy. + nsPath := filepath.Join("/proc", strconv.Itoa(pid), "ns/net") + return getNSArgs(nsPath, conf) + default: + return boot.CreateLinksAndRoutesArgs{}, fmt.Errorf("invalid network type during restore: %v", conf.Network) + } +} diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go index 4695003a53..c471551703 100644 --- a/runsc/sandbox/sandbox.go +++ b/runsc/sandbox/sandbox.go @@ -528,9 +528,20 @@ func (s *Sandbox) Restore(conf *config.Config, cid string, imagePath string, dir } defer conn.Close() - // Configure the network. - if err := setupNetwork(conn, s.Pid.load(), conf); err != nil { - return fmt.Errorf("setting up network: %v", err) + if conf.Network == config.NetworkNone || conf.Network == config.NetworkSandbox { + args, err := setupRestoreNetworkArgs(s.Pid.load(), conf) + if err != nil { + return fmt.Errorf("setting restore network args: %v", err) + } + // Store the network args in the loader. + if err := conn.Call(boot.ContMgrStoreNetworkArgs, &args, nil); err != nil { + return fmt.Errorf("storing network args %q: %v", cid, err) + } + } else { + // Configure the network. + if err := setupNetwork(conn, s.Pid.load(), conf); err != nil { + return fmt.Errorf("setting up network: %v", err) + } } // Restore the container and start the root container.