diff --git a/CHANGELOG.md b/CHANGELOG.md index cffc9e2f633..321317fd75b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Deprecated + + * `runc` option `--criu` is now ignored (with a warning), and the option will + be removed entirely in a future release. Users who need a non-standard + `criu` binary should rely on the standard way of looking up binaries in + `$PATH`. (#3316) + ### Changed * When Intel RDT feature is not available, its initialization is skipped, diff --git a/contrib/completions/bash/runc b/contrib/completions/bash/runc index a4cd8993745..0b8bda9b652 100644 --- a/contrib/completions/bash/runc +++ b/contrib/completions/bash/runc @@ -231,12 +231,11 @@ _runc_runc() { --log --log-format --root - --criu --rootless " case "$prev" in - --log | --root | --criu) + --log | --root) case "$cur" in *:*) ;; # TODO somehow do _filedir for stuff inside the image, if it's already specified (which is also somewhat difficult to determine) '') diff --git a/delete.go b/delete.go index 746b0df547f..dd3041f8722 100644 --- a/delete.go +++ b/delete.go @@ -13,7 +13,7 @@ import ( "golang.org/x/sys/unix" ) -func killContainer(container libcontainer.Container) error { +func killContainer(container *libcontainer.Container) error { _ = container.Signal(unix.SIGKILL, false) for i := 0; i < 100; i++ { time.Sleep(100 * time.Millisecond) diff --git a/init.go b/init.go index bddc237f6e5..79176e2f14f 100644 --- a/init.go +++ b/init.go @@ -3,11 +3,9 @@ package main import ( "os" "runtime" - "strconv" "github.com/opencontainers/runc/libcontainer" _ "github.com/opencontainers/runc/libcontainer/nsenter" - "github.com/sirupsen/logrus" ) func init() { @@ -17,23 +15,7 @@ func init() { runtime.GOMAXPROCS(1) runtime.LockOSThread() - level, err := strconv.Atoi(os.Getenv("_LIBCONTAINER_LOGLEVEL")) - if err != nil { - panic(err) - } - - logPipeFd, err := strconv.Atoi(os.Getenv("_LIBCONTAINER_LOGPIPE")) - if err != nil { - panic(err) - } - - logrus.SetLevel(logrus.Level(level)) - logrus.SetOutput(os.NewFile(uintptr(logPipeFd), "logpipe")) - logrus.SetFormatter(new(logrus.JSONFormatter)) - logrus.Debug("child process in init()") - - factory, _ := libcontainer.New("") - if err := factory.StartInitialization(); err != nil { + if err := libcontainer.StartInitialization(); err != nil { // as the error is sent back to the parent there is no need to log // or write it to stderr because the parent process will handle this os.Exit(1) diff --git a/libcontainer/README.md b/libcontainer/README.md index 13eee49d4b9..5b086f1efa9 100644 --- a/libcontainer/README.md +++ b/libcontainer/README.md @@ -32,8 +32,7 @@ func init() { if len(os.Args) > 1 && os.Args[1] == "init" { runtime.GOMAXPROCS(1) runtime.LockOSThread() - factory, _ := libcontainer.New("") - if err := factory.StartInitialization(); err != nil { + if err := libcontainer.StartInitialization(); err != nil { logrus.Fatal(err) } panic("--this line should have never been executed, congratulations--") @@ -45,7 +44,7 @@ Then to create a container you first have to initialize an instance of a factory that will handle the creation and initialization for a container. ```go -factory, err := libcontainer.New("/var/lib/container", libcontainer.Cgroupfs, libcontainer.InitArgs(os.Args[0], "init")) +factory, err := libcontainer.New("/var/lib/container") if err != nil { logrus.Fatal(err) return diff --git a/libcontainer/capabilities/capabilities.go b/libcontainer/capabilities/capabilities.go index d38b8a7cd89..06304473766 100644 --- a/libcontainer/capabilities/capabilities.go +++ b/libcontainer/capabilities/capabilities.go @@ -7,7 +7,7 @@ import ( "sort" "strings" - "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runtime-spec/specs-go" "github.com/sirupsen/logrus" "github.com/syndtr/gocapability/capability" ) @@ -49,7 +49,7 @@ func KnownCapabilities() []string { // New creates a new Caps from the given Capabilities config. Unknown Capabilities // or Capabilities that are unavailable in the current environment are ignored, // printing a warning instead. -func New(capConfig *configs.Capabilities) (*Caps, error) { +func New(capConfig *specs.LinuxCapabilities) (*Caps, error) { var ( err error c Caps diff --git a/libcontainer/capabilities/capabilities_linux_test.go b/libcontainer/capabilities/capabilities_linux_test.go index dfbb44b4a3f..0d8f0079acd 100644 --- a/libcontainer/capabilities/capabilities_linux_test.go +++ b/libcontainer/capabilities/capabilities_linux_test.go @@ -5,7 +5,7 @@ import ( "os" "testing" - "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runtime-spec/specs-go" "github.com/sirupsen/logrus" "github.com/sirupsen/logrus/hooks/test" "github.com/syndtr/gocapability/capability" @@ -13,7 +13,7 @@ import ( func TestNew(t *testing.T) { cs := []string{"CAP_CHOWN", "CAP_UNKNOWN", "CAP_UNKNOWN2"} - conf := configs.Capabilities{ + conf := specs.LinuxCapabilities{ Bounding: cs, Effective: cs, Inheritable: cs, diff --git a/libcontainer/configs/config.go b/libcontainer/configs/config.go index c1b4a0041c2..f7350cd0ad8 100644 --- a/libcontainer/configs/config.go +++ b/libcontainer/configs/config.go @@ -83,9 +83,6 @@ type Syscall struct { Args []*Arg `json:"args"` } -// TODO Windows. Many of these fields should be factored out into those parts -// which are common across platforms, and those which are platform specific. - // Config defines configuration options for executing a process inside a contained environment. type Config struct { // NoPivotRoot will use MS_MOVE and a chroot to jail the process into the container's rootfs @@ -127,7 +124,7 @@ type Config struct { // Capabilities specify the capabilities to keep when executing the process inside the container // All capabilities not specified will be dropped from the processes capability mask - Capabilities *Capabilities `json:"capabilities"` + Capabilities *specs.LinuxCapabilities `json:"capabilities"` // Networks specifies the container's network setup to be created Networks []*Network `json:"networks"` @@ -264,19 +261,6 @@ func KnownHookNames() []string { } } -type Capabilities struct { - // Bounding is the set of capabilities checked by the kernel. - Bounding []string - // Effective is the set of capabilities checked by the kernel. - Effective []string - // Inheritable is the capabilities preserved across execve. - Inheritable []string - // Permitted is the limiting superset for effective capabilities. - Permitted []string - // Ambient is the ambient set of capabilities that are kept. - Ambient []string -} - func (hooks HookList) RunHooks(state *specs.State) error { for i, h := range hooks { if err := h.Run(state); err != nil { diff --git a/libcontainer/configs/validate/rootless.go b/libcontainer/configs/validate/rootless.go index 9a6e5eb32a3..7afdb4310f9 100644 --- a/libcontainer/configs/validate/rootless.go +++ b/libcontainer/configs/validate/rootless.go @@ -8,9 +8,9 @@ import ( "github.com/opencontainers/runc/libcontainer/configs" ) -// rootlessEUID makes sure that the config can be applied when runc +// rootlessEUIDCheck makes sure that the config can be applied when runc // is being executed as a non-root user (euid != 0) in the current user namespace. -func (v *ConfigValidator) rootlessEUID(config *configs.Config) error { +func rootlessEUIDCheck(config *configs.Config) error { if !config.RootlessEUID { return nil } diff --git a/libcontainer/configs/validate/rootless_test.go b/libcontainer/configs/validate/rootless_test.go index 59d15575dd7..0657abf48dc 100644 --- a/libcontainer/configs/validate/rootless_test.go +++ b/libcontainer/configs/validate/rootless_test.go @@ -34,10 +34,8 @@ func rootlessEUIDConfig() *configs.Config { } func TestValidateRootlessEUID(t *testing.T) { - validator := New() - config := rootlessEUIDConfig() - if err := validator.Validate(config); err != nil { + if err := Validate(config); err != nil { t.Errorf("Expected error to not occur: %+v", err) } } @@ -45,31 +43,25 @@ func TestValidateRootlessEUID(t *testing.T) { /* rootlessEUIDMappings */ func TestValidateRootlessEUIDUserns(t *testing.T) { - validator := New() - config := rootlessEUIDConfig() config.Namespaces = nil - if err := validator.Validate(config); err == nil { + if err := Validate(config); err == nil { t.Errorf("Expected error to occur if user namespaces not set") } } func TestValidateRootlessEUIDMappingUid(t *testing.T) { - validator := New() - config := rootlessEUIDConfig() config.UidMappings = nil - if err := validator.Validate(config); err == nil { + if err := Validate(config); err == nil { t.Errorf("Expected error to occur if no uid mappings provided") } } func TestValidateNonZeroEUIDMappingGid(t *testing.T) { - validator := New() - config := rootlessEUIDConfig() config.GidMappings = nil - if err := validator.Validate(config); err == nil { + if err := Validate(config); err == nil { t.Errorf("Expected error to occur if no gid mappings provided") } } @@ -78,8 +70,6 @@ func TestValidateNonZeroEUIDMappingGid(t *testing.T) { func TestValidateRootlessEUIDMountUid(t *testing.T) { config := rootlessEUIDConfig() - validator := New() - config.Mounts = []*configs.Mount{ { Source: "devpts", @@ -88,37 +78,35 @@ func TestValidateRootlessEUIDMountUid(t *testing.T) { }, } - if err := validator.Validate(config); err != nil { + if err := Validate(config); err != nil { t.Errorf("Expected error to not occur when uid= not set in mount options: %+v", err) } config.Mounts[0].Data = "uid=5" - if err := validator.Validate(config); err == nil { + if err := Validate(config); err == nil { t.Errorf("Expected error to occur when setting uid=5 in mount options") } config.Mounts[0].Data = "uid=0" - if err := validator.Validate(config); err != nil { + if err := Validate(config); err != nil { t.Errorf("Expected error to not occur when setting uid=0 in mount options: %+v", err) } config.Mounts[0].Data = "uid=2" config.UidMappings[0].Size = 10 - if err := validator.Validate(config); err != nil { + if err := Validate(config); err != nil { t.Errorf("Expected error to not occur when setting uid=2 in mount options and UidMapping[0].size is 10") } config.Mounts[0].Data = "uid=20" config.UidMappings[0].Size = 10 - if err := validator.Validate(config); err == nil { + if err := Validate(config); err == nil { t.Errorf("Expected error to occur when setting uid=20 in mount options and UidMapping[0].size is 10") } } func TestValidateRootlessEUIDMountGid(t *testing.T) { config := rootlessEUIDConfig() - validator := New() - config.Mounts = []*configs.Mount{ { Source: "devpts", @@ -127,29 +115,29 @@ func TestValidateRootlessEUIDMountGid(t *testing.T) { }, } - if err := validator.Validate(config); err != nil { + if err := Validate(config); err != nil { t.Errorf("Expected error to not occur when gid= not set in mount options: %+v", err) } config.Mounts[0].Data = "gid=5" - if err := validator.Validate(config); err == nil { + if err := Validate(config); err == nil { t.Errorf("Expected error to occur when setting gid=5 in mount options") } config.Mounts[0].Data = "gid=0" - if err := validator.Validate(config); err != nil { + if err := Validate(config); err != nil { t.Errorf("Expected error to not occur when setting gid=0 in mount options: %+v", err) } config.Mounts[0].Data = "gid=5" config.GidMappings[0].Size = 10 - if err := validator.Validate(config); err != nil { + if err := Validate(config); err != nil { t.Errorf("Expected error to not occur when setting gid=5 in mount options and GidMapping[0].size is 10") } config.Mounts[0].Data = "gid=11" config.GidMappings[0].Size = 10 - if err := validator.Validate(config); err == nil { + if err := Validate(config); err == nil { t.Errorf("Expected error to occur when setting gid=11 in mount options and GidMapping[0].size is 10") } } diff --git a/libcontainer/configs/validate/validator.go b/libcontainer/configs/validate/validator.go index 6493124a3f2..6972ec9c7ed 100644 --- a/libcontainer/configs/validate/validator.go +++ b/libcontainer/configs/validate/validator.go @@ -16,30 +16,19 @@ import ( "golang.org/x/sys/unix" ) -type Validator interface { - Validate(*configs.Config) error -} - -func New() Validator { - return &ConfigValidator{} -} - -type ConfigValidator struct{} - type check func(config *configs.Config) error -func (v *ConfigValidator) Validate(config *configs.Config) error { +func Validate(config *configs.Config) error { checks := []check{ - v.cgroups, - v.rootfs, - v.network, - v.hostname, - v.security, - v.usernamespace, - v.cgroupnamespace, - v.sysctl, - v.intelrdt, - v.rootlessEUID, + cgroupsCheck, + rootfs, + network, + hostname, + security, + namespaces, + sysctl, + intelrdtCheck, + rootlessEUIDCheck, } for _, c := range checks { if err := c(config); err != nil { @@ -48,7 +37,7 @@ func (v *ConfigValidator) Validate(config *configs.Config) error { } // Relaxed validation rules for backward compatibility warns := []check{ - v.mounts, // TODO (runc v1.x.x): make this an error instead of a warning + mounts, // TODO (runc v1.x.x): make this an error instead of a warning } for _, c := range warns { if err := c(config); err != nil { @@ -60,7 +49,7 @@ func (v *ConfigValidator) Validate(config *configs.Config) error { // rootfs validates if the rootfs is an absolute path and is not a symlink // to the container's root filesystem. -func (v *ConfigValidator) rootfs(config *configs.Config) error { +func rootfs(config *configs.Config) error { if _, err := os.Stat(config.Rootfs); err != nil { return fmt.Errorf("invalid rootfs: %w", err) } @@ -77,7 +66,7 @@ func (v *ConfigValidator) rootfs(config *configs.Config) error { return nil } -func (v *ConfigValidator) network(config *configs.Config) error { +func network(config *configs.Config) error { if !config.Namespaces.Contains(configs.NEWNET) { if len(config.Networks) > 0 || len(config.Routes) > 0 { return errors.New("unable to apply network settings without a private NET namespace") @@ -86,14 +75,14 @@ func (v *ConfigValidator) network(config *configs.Config) error { return nil } -func (v *ConfigValidator) hostname(config *configs.Config) error { +func hostname(config *configs.Config) error { if config.Hostname != "" && !config.Namespaces.Contains(configs.NEWUTS) { return errors.New("unable to set hostname without a private UTS namespace") } return nil } -func (v *ConfigValidator) security(config *configs.Config) error { +func security(config *configs.Config) error { // restrict sys without mount namespace if (len(config.MaskPaths) > 0 || len(config.ReadonlyPaths) > 0) && !config.Namespaces.Contains(configs.NEWNS) { @@ -106,7 +95,7 @@ func (v *ConfigValidator) security(config *configs.Config) error { return nil } -func (v *ConfigValidator) usernamespace(config *configs.Config) error { +func namespaces(config *configs.Config) error { if config.Namespaces.Contains(configs.NEWUSER) { if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) { return errors.New("USER namespaces aren't enabled in the kernel") @@ -116,15 +105,13 @@ func (v *ConfigValidator) usernamespace(config *configs.Config) error { return errors.New("User namespace mappings specified, but USER namespace isn't enabled in the config") } } - return nil -} -func (v *ConfigValidator) cgroupnamespace(config *configs.Config) error { if config.Namespaces.Contains(configs.NEWCGROUP) { if _, err := os.Stat("/proc/self/ns/cgroup"); os.IsNotExist(err) { return errors.New("cgroup namespaces aren't enabled in the kernel") } } + return nil } @@ -161,7 +148,7 @@ func convertSysctlVariableToDotsSeparator(val string) string { // sysctl validates that the specified sysctl keys are valid or not. // /proc/sys isn't completely namespaced and depending on which namespaces // are specified, a subset of sysctls are permitted. -func (v *ConfigValidator) sysctl(config *configs.Config) error { +func sysctl(config *configs.Config) error { validSysctlMap := map[string]bool{ "kernel.msgmax": true, "kernel.msgmnb": true, @@ -227,7 +214,7 @@ func (v *ConfigValidator) sysctl(config *configs.Config) error { return nil } -func (v *ConfigValidator) intelrdt(config *configs.Config) error { +func intelrdtCheck(config *configs.Config) error { if config.IntelRdt != nil { if !intelrdt.IsCATEnabled() && !intelrdt.IsMBAEnabled() { return errors.New("intelRdt is specified in config, but Intel RDT is not supported or enabled") @@ -248,7 +235,7 @@ func (v *ConfigValidator) intelrdt(config *configs.Config) error { return nil } -func (v *ConfigValidator) cgroups(config *configs.Config) error { +func cgroupsCheck(config *configs.Config) error { c := config.Cgroups if c == nil { return nil @@ -277,7 +264,7 @@ func (v *ConfigValidator) cgroups(config *configs.Config) error { return nil } -func (v *ConfigValidator) mounts(config *configs.Config) error { +func mounts(config *configs.Config) error { for _, m := range config.Mounts { if !filepath.IsAbs(m.Destination) { return fmt.Errorf("invalid mount %+v: mount destination not absolute", m) diff --git a/libcontainer/configs/validate/validator_test.go b/libcontainer/configs/validate/validator_test.go index 5181333fb12..59a4033899e 100644 --- a/libcontainer/configs/validate/validator_test.go +++ b/libcontainer/configs/validate/validator_test.go @@ -14,8 +14,7 @@ func TestValidate(t *testing.T) { Rootfs: "/var", } - validator := New() - err := validator.Validate(config) + err := Validate(config) if err != nil { t.Errorf("Expected error to not occur: %+v", err) } @@ -32,8 +31,7 @@ func TestValidateWithInvalidRootfs(t *testing.T) { Rootfs: dir, } - validator := New() - err := validator.Validate(config) + err := Validate(config) if err == nil { t.Error("Expected error to occur but it was nil") } @@ -47,8 +45,7 @@ func TestValidateNetworkWithoutNETNamespace(t *testing.T) { Networks: []*configs.Network{network}, } - validator := New() - err := validator.Validate(config) + err := Validate(config) if err == nil { t.Error("Expected error to occur but it was nil") } @@ -62,8 +59,7 @@ func TestValidateNetworkRoutesWithoutNETNamespace(t *testing.T) { Routes: []*configs.Route{route}, } - validator := New() - err := validator.Validate(config) + err := Validate(config) if err == nil { t.Error("Expected error to occur but it was nil") } @@ -80,8 +76,7 @@ func TestValidateHostname(t *testing.T) { ), } - validator := New() - err := validator.Validate(config) + err := Validate(config) if err != nil { t.Errorf("Expected error to not occur: %+v", err) } @@ -93,8 +88,7 @@ func TestValidateHostnameWithoutUTSNamespace(t *testing.T) { Hostname: "runc", } - validator := New() - err := validator.Validate(config) + err := Validate(config) if err == nil { t.Error("Expected error to occur but it was nil") } @@ -111,8 +105,7 @@ func TestValidateSecurityWithMaskPaths(t *testing.T) { ), } - validator := New() - err := validator.Validate(config) + err := Validate(config) if err != nil { t.Errorf("Expected error to not occur: %+v", err) } @@ -129,8 +122,7 @@ func TestValidateSecurityWithROPaths(t *testing.T) { ), } - validator := New() - err := validator.Validate(config) + err := Validate(config) if err != nil { t.Errorf("Expected error to not occur: %+v", err) } @@ -143,8 +135,7 @@ func TestValidateSecurityWithoutNEWNS(t *testing.T) { ReadonlyPaths: []string{"/proc/sys"}, } - validator := New() - err := validator.Validate(config) + err := Validate(config) if err == nil { t.Error("Expected error to occur but it was nil") } @@ -163,8 +154,7 @@ func TestValidateUsernamespace(t *testing.T) { ), } - validator := New() - err := validator.Validate(config) + err := Validate(config) if err != nil { t.Errorf("expected error to not occur %+v", err) } @@ -177,8 +167,7 @@ func TestValidateUsernamespaceWithoutUserNS(t *testing.T) { UidMappings: []configs.IDMap{uidMap}, } - validator := New() - err := validator.Validate(config) + err := Validate(config) if err == nil { t.Error("Expected error to occur but it was nil") } @@ -226,8 +215,7 @@ func TestValidateSysctl(t *testing.T) { Sysctl: map[string]string{k: v}, } - validator := New() - err := validator.Validate(config) + err := Validate(config) if err == nil { t.Error("Expected error to occur but it was nil") } @@ -259,8 +247,7 @@ func TestValidateValidSysctl(t *testing.T) { }, } - validator := New() - err := validator.Validate(config) + err := Validate(config) if err != nil { t.Errorf("Expected error to not occur with {%s=%s} but got: %q", k, v, err) } @@ -281,8 +268,7 @@ func TestValidateSysctlWithSameNs(t *testing.T) { ), } - validator := New() - err := validator.Validate(config) + err := Validate(config) if err == nil { t.Error("Expected error to occur but it was nil") } @@ -323,8 +309,7 @@ func TestValidateSysctlWithBindHostNetNS(t *testing.T) { ), } - validator := New() - if err := validator.Validate(config); err == nil { + if err := Validate(config); err == nil { t.Error("Expected error to occur but it was nil") } } @@ -336,8 +321,7 @@ func TestValidateSysctlWithoutNETNamespace(t *testing.T) { Namespaces: []configs.Namespace{}, } - validator := New() - err := validator.Validate(config) + err := Validate(config) if err == nil { t.Error("Expected error to occur but it was nil") } @@ -358,8 +342,6 @@ func TestValidateMounts(t *testing.T) { {isErr: false, dest: "/abs/but/../unclean"}, } - validator := New() - for _, tc := range testCases { config := &configs.Config{ Rootfs: "/var", @@ -368,7 +350,7 @@ func TestValidateMounts(t *testing.T) { }, } - err := validator.Validate(config) + err := Validate(config) if tc.isErr && err == nil { t.Errorf("mount dest: %s, expected error, got nil", tc.dest) } diff --git a/libcontainer/container.go b/libcontainer/container.go index 300c9526cf9..f4e972705d2 100644 --- a/libcontainer/container.go +++ b/libcontainer/container.go @@ -5,11 +5,9 @@ package libcontainer import ( - "os" "time" "github.com/opencontainers/runc/libcontainer/configs" - "github.com/opencontainers/runtime-spec/specs-go" ) // Status is the status of a container. @@ -63,68 +61,3 @@ type BaseState struct { // Config is the container's configuration. Config configs.Config `json:"config"` } - -// BaseContainer is a libcontainer container object. -// -// Each container is thread-safe within the same process. Since a container can -// be destroyed by a separate process, any function may return that the container -// was not found. BaseContainer includes methods that are platform agnostic. -type BaseContainer interface { - // Returns the ID of the container - ID() string - - // Returns the current status of the container. - Status() (Status, error) - - // State returns the current container's state information. - State() (*State, error) - - // OCIState returns the current container's state information. - OCIState() (*specs.State, error) - - // Returns the current config of the container. - Config() configs.Config - - // Returns the PIDs inside this container. The PIDs are in the namespace of the calling process. - // - // Some of the returned PIDs may no longer refer to processes in the Container, unless - // the Container state is PAUSED in which case every PID in the slice is valid. - Processes() ([]int, error) - - // Returns statistics for the container. - Stats() (*Stats, error) - - // Set resources of container as configured - // - // We can use this to change resources when containers are running. - // - Set(config configs.Config) error - - // Start a process inside the container. Returns error if process fails to - // start. You can track process lifecycle with passed Process structure. - Start(process *Process) (err error) - - // Run immediately starts the process inside the container. Returns error if process - // fails to start. It does not block waiting for the exec fifo after start returns but - // opens the fifo after start returns. - Run(process *Process) (err error) - - // Destroys the container, if its in a valid state, after killing any - // remaining running processes. - // - // Any event registrations are removed before the container is destroyed. - // No error is returned if the container is already destroyed. - // - // Running containers must first be stopped using Signal(..). - // Paused containers must first be resumed using Resume(..). - Destroy() error - - // Signal sends the provided signal code to the container's initial process. - // - // If all is specified the signal is sent to all processes in the container - // including the initial process. - Signal(s os.Signal, all bool) error - - // Exec signals the container to exec the users process at the end of the init. - Exec() error -} diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index f6877b7429f..9f3be47014a 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -35,19 +35,14 @@ import ( const stdioFdCount = 3 -type linuxContainer struct { +type Container struct { id string root string config *configs.Config cgroupManager cgroups.Manager - intelRdtManager intelrdt.Manager - initPath string - initArgs []string + intelRdtManager *intelrdt.Manager initProcess parentProcess initProcessStartTime uint64 - criuPath string - newuidmapPath string - newgidmapPath string m sync.Mutex criuVersion int state containerState @@ -84,69 +79,44 @@ type State struct { IntelRdtPath string `json:"intel_rdt_path"` } -// Container is a libcontainer container object. -// -// Each container is thread-safe within the same process. Since a container can -// be destroyed by a separate process, any function may return that the container -// was not found. -type Container interface { - BaseContainer - - // Methods below here are platform specific - - // Checkpoint checkpoints the running container's state to disk using the criu(8) utility. - Checkpoint(criuOpts *CriuOpts) error - - // Restore restores the checkpointed container to a running state using the criu(8) utility. - Restore(process *Process, criuOpts *CriuOpts) error - - // If the Container state is RUNNING or CREATED, sets the Container state to PAUSING and pauses - // the execution of any user processes. Asynchronously, when the container finished being paused the - // state is changed to PAUSED. - // If the Container state is PAUSED, do nothing. - Pause() error - - // If the Container state is PAUSED, resumes the execution of any user processes in the - // Container before setting the Container state to RUNNING. - // If the Container state is RUNNING, do nothing. - Resume() error - - // NotifyOOM returns a read-only channel signaling when the container receives an OOM notification. - NotifyOOM() (<-chan struct{}, error) - - // NotifyMemoryPressure returns a read-only channel signaling when the container reaches a given pressure level - NotifyMemoryPressure(level PressureLevel) (<-chan struct{}, error) -} - -// ID returns the container's unique ID -func (c *linuxContainer) ID() string { +// ID returns the container's unique ID. +func (c *Container) ID() string { return c.id } -// Config returns the container's configuration -func (c *linuxContainer) Config() configs.Config { +// Config returns the container's configuration. +func (c *Container) Config() configs.Config { return *c.config } -func (c *linuxContainer) Status() (Status, error) { +// Status returns the current status of the container. +func (c *Container) Status() (Status, error) { c.m.Lock() defer c.m.Unlock() return c.currentStatus() } -func (c *linuxContainer) State() (*State, error) { +// State returns the current container's state information. +func (c *Container) State() (*State, error) { c.m.Lock() defer c.m.Unlock() return c.currentState() } -func (c *linuxContainer) OCIState() (*specs.State, error) { +// OCIState returns the current container's state information. +func (c *Container) OCIState() (*specs.State, error) { c.m.Lock() defer c.m.Unlock() return c.currentOCIState() } -func (c *linuxContainer) Processes() ([]int, error) { +// Processes returns the PIDs inside this container. The PIDs are in the +// namespace of the calling process. +// +// Some of the returned PIDs may no longer refer to processes in the container, +// unless the container state is PAUSED in which case every PID in the slice is +// valid. +func (c *Container) Processes() ([]int, error) { var pids []int status, err := c.currentStatus() if err != nil { @@ -164,7 +134,8 @@ func (c *linuxContainer) Processes() ([]int, error) { return pids, nil } -func (c *linuxContainer) Stats() (*Stats, error) { +// Stats returns statistics for the container. +func (c *Container) Stats() (*Stats, error) { var ( err error stats = &Stats{} @@ -190,7 +161,9 @@ func (c *linuxContainer) Stats() (*Stats, error) { return stats, nil } -func (c *linuxContainer) Set(config configs.Config) error { +// Set resources of container as configured. Can be used to change resources +// when container is running. +func (c *Container) Set(config configs.Config) error { c.m.Lock() defer c.m.Unlock() status, err := c.currentStatus() @@ -225,7 +198,9 @@ func (c *linuxContainer) Set(config configs.Config) error { return err } -func (c *linuxContainer) Start(process *Process) error { +// Start starts a process inside the container. Returns error if process fails +// to start. You can track process lifecycle with passed Process structure. +func (c *Container) Start(process *Process) error { c.m.Lock() defer c.m.Unlock() if c.config.Cgroups.Resources.SkipDevices { @@ -245,7 +220,10 @@ func (c *linuxContainer) Start(process *Process) error { return nil } -func (c *linuxContainer) Run(process *Process) error { +// Run immediately starts the process inside the container. Returns error if process +// fails to start. It does not block waiting for the exec fifo after start returns but +// opens the fifo after start returns. +func (c *Container) Run(process *Process) error { if err := c.Start(process); err != nil { return err } @@ -255,13 +233,14 @@ func (c *linuxContainer) Run(process *Process) error { return nil } -func (c *linuxContainer) Exec() error { +// Exec signals the container to exec the users process at the end of the init. +func (c *Container) Exec() error { c.m.Lock() defer c.m.Unlock() return c.exec() } -func (c *linuxContainer) exec() error { +func (c *Container) exec() error { path := filepath.Join(c.root, execFifoFilename) pid := c.initProcess.pid() blockingFifoOpenCh := awaitFifoOpen(path) @@ -333,7 +312,7 @@ type openResult struct { err error } -func (c *linuxContainer) start(process *Process) (retErr error) { +func (c *Container) start(process *Process) (retErr error) { parent, err := c.newParentProcess(process) if err != nil { return fmt.Errorf("unable to create new parent process: %w", err) @@ -374,7 +353,11 @@ func (c *linuxContainer) start(process *Process) (retErr error) { return nil } -func (c *linuxContainer) Signal(s os.Signal, all bool) error { +// Signal sends the provided signal code to the container's initial process. +// +// If all is specified the signal is sent to all processes in the container +// including the initial process. +func (c *Container) Signal(s os.Signal, all bool) error { c.m.Lock() defer c.m.Unlock() status, err := c.currentStatus() @@ -406,7 +389,7 @@ func (c *linuxContainer) Signal(s os.Signal, all bool) error { return ErrNotRunning } -func (c *linuxContainer) createExecFifo() error { +func (c *Container) createExecFifo() error { rootuid, err := c.Config().HostRootUID() if err != nil { return err @@ -429,7 +412,7 @@ func (c *linuxContainer) createExecFifo() error { return os.Chown(fifoName, rootuid, rootgid) } -func (c *linuxContainer) deleteExecFifo() { +func (c *Container) deleteExecFifo() { fifoName := filepath.Join(c.root, execFifoFilename) os.Remove(fifoName) } @@ -438,7 +421,7 @@ func (c *linuxContainer) deleteExecFifo() { // container cannot access the statedir (and the FIFO itself remains // un-opened). It then adds the FifoFd to the given exec.Cmd as an inherited // fd, with _LIBCONTAINER_FIFOFD set to its fd number. -func (c *linuxContainer) includeExecFifo(cmd *exec.Cmd) error { +func (c *Container) includeExecFifo(cmd *exec.Cmd) error { fifoName := filepath.Join(c.root, execFifoFilename) fifo, err := os.OpenFile(fifoName, unix.O_PATH|unix.O_CLOEXEC, 0) if err != nil { @@ -452,7 +435,7 @@ func (c *linuxContainer) includeExecFifo(cmd *exec.Cmd) error { return nil } -func (c *linuxContainer) newParentProcess(p *Process) (parentProcess, error) { +func (c *Container) newParentProcess(p *Process) (parentProcess, error) { parentInitPipe, childInitPipe, err := utils.NewSockPair("init") if err != nil { return nil, fmt.Errorf("unable to create init pipe: %w", err) @@ -481,9 +464,9 @@ func (c *linuxContainer) newParentProcess(p *Process) (parentProcess, error) { return c.newInitProcess(p, cmd, messageSockPair, logFilePair) } -func (c *linuxContainer) commandTemplate(p *Process, childInitPipe *os.File, childLogPipe *os.File) *exec.Cmd { - cmd := exec.Command(c.initPath, c.initArgs[1:]...) - cmd.Args[0] = c.initArgs[0] +func (c *Container) commandTemplate(p *Process, childInitPipe *os.File, childLogPipe *os.File) *exec.Cmd { + cmd := exec.Command("/proc/self/exe", "init") + cmd.Args[0] = os.Args[0] cmd.Stdin = p.Stdin cmd.Stdout = p.Stdout cmd.Stderr = p.Stderr @@ -507,9 +490,10 @@ func (c *linuxContainer) commandTemplate(p *Process, childInitPipe *os.File, chi cmd.ExtraFiles = append(cmd.ExtraFiles, childLogPipe) cmd.Env = append(cmd.Env, - "_LIBCONTAINER_LOGPIPE="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1), - "_LIBCONTAINER_LOGLEVEL="+p.LogLevel, - ) + "_LIBCONTAINER_LOGPIPE="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1)) + if p.LogLevel != "" { + cmd.Env = append(cmd.Env, "_LIBCONTAINER_LOGLEVEL="+p.LogLevel) + } // NOTE: when running a container with no PID namespace and the parent process spawning the container is // PID1 the pdeathsig is being delivered to the container's init process by the kernel for some reason @@ -523,7 +507,7 @@ func (c *linuxContainer) commandTemplate(p *Process, childInitPipe *os.File, chi // shouldSendMountSources says whether the child process must setup bind mounts with // the source pre-opened (O_PATH) in the host user namespace. // See https://github.com/opencontainers/runc/issues/2484 -func (c *linuxContainer) shouldSendMountSources() bool { +func (c *Container) shouldSendMountSources() bool { // Passing the mount sources via SCM_RIGHTS is only necessary when // both userns and mntns are active. if !c.config.Namespaces.Contains(configs.NEWUSER) || @@ -547,7 +531,7 @@ func (c *linuxContainer) shouldSendMountSources() bool { return false } -func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, messageSockPair, logFilePair filePair) (*initProcess, error) { +func (c *Container) newInitProcess(p *Process, cmd *exec.Cmd, messageSockPair, logFilePair filePair) (*initProcess, error) { cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initStandard)) nsMaps := make(map[configs.NamespaceType]string) for _, ns := range c.config.Namespaces { @@ -606,7 +590,7 @@ func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, messageSockPa return init, nil } -func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, messageSockPair, logFilePair filePair) (*setnsProcess, error) { +func (c *Container) newSetnsProcess(p *Process, cmd *exec.Cmd, messageSockPair, logFilePair filePair) (*setnsProcess, error) { cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initSetns)) state, err := c.currentState() if err != nil { @@ -655,7 +639,7 @@ func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, messageSockP return proc, nil } -func (c *linuxContainer) newInitConfig(process *Process) *initConfig { +func (c *Container) newInitConfig(process *Process) *initConfig { cfg := &initConfig{ Config: c.config, Args: process.Args, @@ -665,7 +649,7 @@ func (c *linuxContainer) newInitConfig(process *Process) *initConfig { Cwd: process.Cwd, Capabilities: process.Capabilities, PassedFilesCount: len(process.ExtraFiles), - ContainerId: c.ID(), + ContainerID: c.ID(), NoNewPrivileges: c.config.NoNewPrivileges, RootlessEUID: c.config.RootlessEUID, RootlessCgroups: c.config.RootlessCgroups, @@ -695,13 +679,25 @@ func (c *linuxContainer) newInitConfig(process *Process) *initConfig { return cfg } -func (c *linuxContainer) Destroy() error { +// Destroys the container, if its in a valid state, after killing any +// remaining running processes. +// +// Any event registrations are removed before the container is destroyed. +// No error is returned if the container is already destroyed. +// +// Running containers must first be stopped using Signal(..). +// Paused containers must first be resumed using Resume(..). +func (c *Container) Destroy() error { c.m.Lock() defer c.m.Unlock() return c.state.destroy() } -func (c *linuxContainer) Pause() error { +// If the Container state is RUNNING or CREATED, sets the Container +// state to PAUSING and pauses the execution of any user processes. +// Asynchronously, when the container finished being paused the state +// is changed to PAUSED. If the Container state is PAUSED, do nothing. +func (c *Container) Pause() error { c.m.Lock() defer c.m.Unlock() status, err := c.currentStatus() @@ -720,7 +716,11 @@ func (c *linuxContainer) Pause() error { return ErrNotRunning } -func (c *linuxContainer) Resume() error { +// Resume resumes the execution of any user processes in the +// container before setting the container state to RUNNING. +// This is only performed if the current state is PAUSED. +// If the Container state is RUNNING, does nothing. +func (c *Container) Resume() error { c.m.Lock() defer c.m.Unlock() status, err := c.currentStatus() @@ -738,7 +738,9 @@ func (c *linuxContainer) Resume() error { }) } -func (c *linuxContainer) NotifyOOM() (<-chan struct{}, error) { +// NotifyOOM returns a read-only channel signaling when the container receives +// an OOM notification. +func (c *Container) NotifyOOM() (<-chan struct{}, error) { // XXX(cyphar): This requires cgroups. if c.config.RootlessCgroups { logrus.Warn("getting OOM notifications may fail if you don't have the full access to cgroups") @@ -750,7 +752,9 @@ func (c *linuxContainer) NotifyOOM() (<-chan struct{}, error) { return notifyOnOOM(path) } -func (c *linuxContainer) NotifyMemoryPressure(level PressureLevel) (<-chan struct{}, error) { +// NotifyMemoryPressure returns a read-only channel signaling when the +// container reaches a given pressure level. +func (c *Container) NotifyMemoryPressure(level PressureLevel) (<-chan struct{}, error) { // XXX(cyphar): This requires cgroups. if c.config.RootlessCgroups { logrus.Warn("getting memory pressure notifications may fail if you don't have the full access to cgroups") @@ -760,7 +764,7 @@ func (c *linuxContainer) NotifyMemoryPressure(level PressureLevel) (<-chan struc var criuFeatures *criurpc.CriuFeatures -func (c *linuxContainer) checkCriuFeatures(criuOpts *CriuOpts, rpcOpts *criurpc.CriuOpts, criuFeat *criurpc.CriuFeatures) error { +func (c *Container) checkCriuFeatures(criuOpts *CriuOpts, rpcOpts *criurpc.CriuOpts, criuFeat *criurpc.CriuFeatures) error { t := criurpc.CriuReqType_FEATURE_CHECK // make sure the features we are looking for are really not from @@ -821,7 +825,7 @@ func compareCriuVersion(criuVersion int, minVersion int) error { } // checkCriuVersion checks Criu version greater than or equal to minVersion -func (c *linuxContainer) checkCriuVersion(minVersion int) error { +func (c *Container) checkCriuVersion(minVersion int) error { // If the version of criu has already been determined there is no need // to ask criu for the version again. Use the value from c.criuVersion. if c.criuVersion != 0 { @@ -829,7 +833,6 @@ func (c *linuxContainer) checkCriuVersion(minVersion int) error { } criu := criu.MakeCriu() - criu.SetCriuPath(c.criuPath) var err error c.criuVersion, err = criu.GetCriuVersion() if err != nil { @@ -841,7 +844,7 @@ func (c *linuxContainer) checkCriuVersion(minVersion int) error { const descriptorsFilename = "descriptors.json" -func (c *linuxContainer) addCriuDumpMount(req *criurpc.CriuReq, m *configs.Mount) { +func (c *Container) addCriuDumpMount(req *criurpc.CriuReq, m *configs.Mount) { mountDest := strings.TrimPrefix(m.Destination, c.config.Rootfs) if dest, err := securejoin.SecureJoin(c.config.Rootfs, mountDest); err == nil { mountDest = dest[len(c.config.Rootfs):] @@ -853,7 +856,7 @@ func (c *linuxContainer) addCriuDumpMount(req *criurpc.CriuReq, m *configs.Mount req.Opts.ExtMnt = append(req.Opts.ExtMnt, extMnt) } -func (c *linuxContainer) addMaskPaths(req *criurpc.CriuReq) error { +func (c *Container) addMaskPaths(req *criurpc.CriuReq) error { for _, path := range c.config.MaskPaths { fi, err := os.Stat(fmt.Sprintf("/proc/%d/root/%s", c.initProcess.pid(), path)) if err != nil { @@ -875,7 +878,7 @@ func (c *linuxContainer) addMaskPaths(req *criurpc.CriuReq) error { return nil } -func (c *linuxContainer) handleCriuConfigurationFile(rpcOpts *criurpc.CriuOpts) { +func (c *Container) handleCriuConfigurationFile(rpcOpts *criurpc.CriuOpts) { // CRIU will evaluate a configuration starting with release 3.11. // Settings in the configuration file will overwrite RPC settings. // Look for annotations. The annotation 'org.criu.config' @@ -901,7 +904,7 @@ func (c *linuxContainer) handleCriuConfigurationFile(rpcOpts *criurpc.CriuOpts) } } -func (c *linuxContainer) criuSupportsExtNS(t configs.NamespaceType) bool { +func (c *Container) criuSupportsExtNS(t configs.NamespaceType) bool { var minVersion int switch t { case configs.NEWNET: @@ -921,7 +924,7 @@ func criuNsToKey(t configs.NamespaceType) string { return "extRoot" + strings.Title(configs.NsName(t)) + "NS" } -func (c *linuxContainer) handleCheckpointingExternalNamespaces(rpcOpts *criurpc.CriuOpts, t configs.NamespaceType) error { +func (c *Container) handleCheckpointingExternalNamespaces(rpcOpts *criurpc.CriuOpts, t configs.NamespaceType) error { if !c.criuSupportsExtNS(t) { return nil } @@ -943,7 +946,7 @@ func (c *linuxContainer) handleCheckpointingExternalNamespaces(rpcOpts *criurpc. return nil } -func (c *linuxContainer) handleRestoringNamespaces(rpcOpts *criurpc.CriuOpts, extraFiles *[]*os.File) error { +func (c *Container) handleRestoringNamespaces(rpcOpts *criurpc.CriuOpts, extraFiles *[]*os.File) error { for _, ns := range c.config.Namespaces { switch ns.Type { case configs.NEWNET, configs.NEWPID: @@ -981,7 +984,7 @@ func (c *linuxContainer) handleRestoringNamespaces(rpcOpts *criurpc.CriuOpts, ex return nil } -func (c *linuxContainer) handleRestoringExternalNamespaces(rpcOpts *criurpc.CriuOpts, extraFiles *[]*os.File, t configs.NamespaceType) error { +func (c *Container) handleRestoringExternalNamespaces(rpcOpts *criurpc.CriuOpts, extraFiles *[]*os.File, t configs.NamespaceType) error { if !c.criuSupportsExtNS(t) { return nil } @@ -1012,7 +1015,9 @@ func (c *linuxContainer) handleRestoringExternalNamespaces(rpcOpts *criurpc.Criu return nil } -func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error { +// Checkpoint checkpoints the running container's state to disk using the +// criu(8) utility. +func (c *Container) Checkpoint(criuOpts *CriuOpts) error { c.m.Lock() defer c.m.Unlock() @@ -1220,7 +1225,7 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error { return nil } -func (c *linuxContainer) addCriuRestoreMount(req *criurpc.CriuReq, m *configs.Mount) { +func (c *Container) addCriuRestoreMount(req *criurpc.CriuReq, m *configs.Mount) { mountDest := strings.TrimPrefix(m.Destination, c.config.Rootfs) if dest, err := securejoin.SecureJoin(c.config.Rootfs, mountDest); err == nil { mountDest = dest[len(c.config.Rootfs):] @@ -1232,7 +1237,7 @@ func (c *linuxContainer) addCriuRestoreMount(req *criurpc.CriuReq, m *configs.Mo req.Opts.ExtMnt = append(req.Opts.ExtMnt, extMnt) } -func (c *linuxContainer) restoreNetwork(req *criurpc.CriuReq, criuOpts *CriuOpts) { +func (c *Container) restoreNetwork(req *criurpc.CriuReq, criuOpts *CriuOpts) { for _, iface := range c.config.Networks { switch iface.Type { case "veth": @@ -1255,7 +1260,7 @@ func (c *linuxContainer) restoreNetwork(req *criurpc.CriuReq, criuOpts *CriuOpts // makeCriuRestoreMountpoints makes the actual mountpoints for the // restore using CRIU. This function is inspired from the code in // rootfs_linux.go -func (c *linuxContainer) makeCriuRestoreMountpoints(m *configs.Mount) error { +func (c *Container) makeCriuRestoreMountpoints(m *configs.Mount) error { switch m.Device { case "cgroup": // No mount point(s) need to be created: @@ -1307,7 +1312,7 @@ func isPathInPrefixList(path string, prefix []string) bool { // runc modifies the rootfs to add mountpoints which do not exist. // This function also creates missing mountpoints as long as they // are not on top of a tmpfs, as CRIU will restore tmpfs content anyway. -func (c *linuxContainer) prepareCriuRestoreMounts(mounts []*configs.Mount) error { +func (c *Container) prepareCriuRestoreMounts(mounts []*configs.Mount) error { // First get a list of a all tmpfs mounts tmpfs := []string{} for _, m := range mounts { @@ -1364,7 +1369,9 @@ func (c *linuxContainer) prepareCriuRestoreMounts(mounts []*configs.Mount) error return nil } -func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error { +// Restore restores the checkpointed container to a running state using the +// criu(8) utility. +func (c *Container) Restore(process *Process, criuOpts *CriuOpts) error { c.m.Lock() defer c.m.Unlock() @@ -1538,7 +1545,7 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error { return err } -func (c *linuxContainer) criuApplyCgroups(pid int, req *criurpc.CriuReq) error { +func (c *Container) criuApplyCgroups(pid int, req *criurpc.CriuReq) error { // need to apply cgroups only on restore if req.GetType() != criurpc.CriuReqType_RESTORE { return nil @@ -1575,7 +1582,7 @@ func (c *linuxContainer) criuApplyCgroups(pid int, req *criurpc.CriuReq) error { return nil } -func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *CriuOpts, extraFiles []*os.File) error { +func (c *Container) criuSwrk(process *Process, req *criurpc.CriuReq, opts *CriuOpts, extraFiles []*os.File) error { fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_SEQPACKET|unix.SOCK_CLOEXEC, 0) if err != nil { return err @@ -1606,9 +1613,9 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts * if c.criuVersion != 0 { // If the CRIU Version is still '0' then this is probably // the initial CRIU run to detect the version. Skip it. - logrus.Debugf("Using CRIU %d at: %s", c.criuVersion, c.criuPath) + logrus.Debugf("Using CRIU %d", c.criuVersion) } - cmd := exec.Command(c.criuPath, args...) + cmd := exec.Command("criu", args...) if process != nil { cmd.Stdin = process.Stdin cmd.Stdout = process.Stdout @@ -1794,7 +1801,7 @@ func unlockNetwork(config *configs.Config) error { return nil } -func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Process, cmd *exec.Cmd, opts *CriuOpts, fds []string, oob []byte) error { +func (c *Container) criuNotifications(resp *criurpc.CriuResp, process *Process, cmd *exec.Cmd, opts *CriuOpts, fds []string, oob []byte) error { notify := resp.GetNotify() if notify == nil { return fmt.Errorf("invalid response: %s", resp.String()) @@ -1892,7 +1899,7 @@ func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Proc return nil } -func (c *linuxContainer) updateState(process parentProcess) (*State, error) { +func (c *Container) updateState(process parentProcess) (*State, error) { if process != nil { c.initProcess = process } @@ -1907,7 +1914,7 @@ func (c *linuxContainer) updateState(process parentProcess) (*State, error) { return state, nil } -func (c *linuxContainer) saveState(s *State) (retErr error) { +func (c *Container) saveState(s *State) (retErr error) { tmpFile, err := os.CreateTemp(c.root, "state-") if err != nil { return err @@ -1933,7 +1940,7 @@ func (c *linuxContainer) saveState(s *State) (retErr error) { return os.Rename(tmpFile.Name(), stateFilePath) } -func (c *linuxContainer) currentStatus() (Status, error) { +func (c *Container) currentStatus() (Status, error) { if err := c.refreshState(); err != nil { return -1, err } @@ -1944,7 +1951,7 @@ func (c *linuxContainer) currentStatus() (Status, error) { // container is what is true. Because consumers of libcontainer can use it // out of process we need to verify the container's status based on runtime // information and not rely on our in process info. -func (c *linuxContainer) refreshState() error { +func (c *Container) refreshState() error { paused, err := c.isPaused() if err != nil { return err @@ -1962,7 +1969,7 @@ func (c *linuxContainer) refreshState() error { return c.state.transition(&stoppedState{c: c}) } -func (c *linuxContainer) runType() Status { +func (c *Container) runType() Status { if c.initProcess == nil { return Stopped } @@ -1982,7 +1989,7 @@ func (c *linuxContainer) runType() Status { return Running } -func (c *linuxContainer) isPaused() (bool, error) { +func (c *Container) isPaused() (bool, error) { state, err := c.cgroupManager.GetFreezerState() if err != nil { return false, err @@ -1990,7 +1997,7 @@ func (c *linuxContainer) isPaused() (bool, error) { return state == configs.Frozen, nil } -func (c *linuxContainer) currentState() (*State, error) { +func (c *Container) currentState() (*State, error) { var ( startTime uint64 externalDescriptors []string @@ -2037,7 +2044,7 @@ func (c *linuxContainer) currentState() (*State, error) { return state, nil } -func (c *linuxContainer) currentOCIState() (*specs.State, error) { +func (c *Container) currentOCIState() (*specs.State, error) { bundle, annotations := utils.Annotations(c.config.Labels) state := &specs.State{ Version: specs.Version, @@ -2060,7 +2067,7 @@ func (c *linuxContainer) currentOCIState() (*specs.State, error) { // orderNamespacePaths sorts namespace paths into a list of paths that we // can setns in order. -func (c *linuxContainer) orderNamespacePaths(namespaces map[configs.NamespaceType]string) ([]string, error) { +func (c *Container) orderNamespacePaths(namespaces map[configs.NamespaceType]string) ([]string, error) { paths := []string{} for _, ns := range configs.NamespaceTypes() { @@ -2113,7 +2120,7 @@ type netlinkError struct{ error } // such as one that uses nsenter package to bootstrap the container's // init process correctly, i.e. with correct namespaces, uid/gid // mapping etc. -func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.NamespaceType]string, it initType) (_ io.Reader, Err error) { +func (c *Container) bootstrapData(cloneFlags uintptr, nsMaps map[configs.NamespaceType]string, it initType) (_ io.Reader, Err error) { // create the netlink message r := nl.NewNetlinkRequest(int(InitMsg), 0) @@ -2153,11 +2160,16 @@ func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Na if !joinExistingUser { // write uid mappings if len(c.config.UidMappings) > 0 { - if c.config.RootlessEUID && c.newuidmapPath != "" { - r.AddData(&Bytemsg{ - Type: UidmapPathAttr, - Value: []byte(c.newuidmapPath), - }) + if c.config.RootlessEUID { + // We resolve the paths for new{u,g}idmap from + // the context of runc to avoid doing a path + // lookup in the nsexec context. + if path, err := exec.LookPath("newuidmap"); err == nil { + r.AddData(&Bytemsg{ + Type: UidmapPathAttr, + Value: []byte(path), + }) + } } b, err := encodeIDMapping(c.config.UidMappings) if err != nil { @@ -2179,11 +2191,13 @@ func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Na Type: GidmapAttr, Value: b, }) - if c.config.RootlessEUID && c.newgidmapPath != "" { - r.AddData(&Bytemsg{ - Type: GidmapPathAttr, - Value: []byte(c.newgidmapPath), - }) + if c.config.RootlessEUID { + if path, err := exec.LookPath("newgidmap"); err == nil { + r.AddData(&Bytemsg{ + Type: GidmapPathAttr, + Value: []byte(path), + }) + } } if requiresRootOrMappingTool(c.config) { r.AddData(&Boolmsg{ diff --git a/libcontainer/container_linux_test.go b/libcontainer/container_linux_test.go index 3eb6e5affa7..6551de8085f 100644 --- a/libcontainer/container_linux_test.go +++ b/libcontainer/container_linux_test.go @@ -7,22 +7,15 @@ import ( "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/configs" - "github.com/opencontainers/runc/libcontainer/intelrdt" "github.com/opencontainers/runc/libcontainer/system" ) type mockCgroupManager struct { pids []int allPids []int - stats *cgroups.Stats paths map[string]string } -type mockIntelRdtManager struct { - stats *intelrdt.Stats - path string -} - func (m *mockCgroupManager) GetPids() ([]int, error) { return m.pids, nil } @@ -32,7 +25,7 @@ func (m *mockCgroupManager) GetAllPids() ([]int, error) { } func (m *mockCgroupManager) GetStats() (*cgroups.Stats, error) { - return m.stats, nil + return nil, nil } func (m *mockCgroupManager) Apply(pid int) error { @@ -76,30 +69,6 @@ func (m *mockCgroupManager) GetFreezerState() (configs.FreezerState, error) { return configs.Thawed, nil } -func (m *mockIntelRdtManager) Apply(pid int) error { - return nil -} - -func (m *mockIntelRdtManager) GetStats() (*intelrdt.Stats, error) { - return m.stats, nil -} - -func (m *mockIntelRdtManager) Destroy() error { - return nil -} - -func (m *mockIntelRdtManager) GetPath() string { - return m.path -} - -func (m *mockIntelRdtManager) Set(container *configs.Config) error { - return nil -} - -func (m *mockIntelRdtManager) GetCgroups() (*configs.Cgroup, error) { - return nil, nil -} - type mockProcess struct { _pid int started uint64 @@ -146,7 +115,7 @@ func TestGetContainerPids(t *testing.T) { if err != nil { t.Fatalf("can't stat pid %d, got %v", pid, err) } - container := &linuxContainer{ + container := &Container{ id: "myid", config: &configs.Config{}, cgroupManager: &mockCgroupManager{ @@ -173,63 +142,13 @@ func TestGetContainerPids(t *testing.T) { } } -func TestGetContainerStats(t *testing.T) { - container := &linuxContainer{ - id: "myid", - config: &configs.Config{}, - cgroupManager: &mockCgroupManager{ - pids: []int{1, 2, 3}, - stats: &cgroups.Stats{ - MemoryStats: cgroups.MemoryStats{ - Usage: cgroups.MemoryData{ - Usage: 1024, - }, - }, - }, - }, - intelRdtManager: &mockIntelRdtManager{ - stats: &intelrdt.Stats{ - L3CacheSchema: "L3:0=f;1=f0", - MemBwSchema: "MB:0=20;1=70", - }, - }, - } - stats, err := container.Stats() - if err != nil { - t.Fatal(err) - } - if stats.CgroupStats == nil { - t.Fatal("cgroup stats are nil") - } - if stats.CgroupStats.MemoryStats.Usage.Usage != 1024 { - t.Fatalf("expected memory usage 1024 but received %d", stats.CgroupStats.MemoryStats.Usage.Usage) - } - if intelrdt.IsCATEnabled() { - if stats.IntelRdtStats == nil { - t.Fatal("intel rdt stats are nil") - } - if stats.IntelRdtStats.L3CacheSchema != "L3:0=f;1=f0" { - t.Fatalf("expected L3CacheSchema L3:0=f;1=f0 but received %s", stats.IntelRdtStats.L3CacheSchema) - } - } - if intelrdt.IsMBAEnabled() { - if stats.IntelRdtStats == nil { - t.Fatal("intel rdt stats are nil") - } - if stats.IntelRdtStats.MemBwSchema != "MB:0=20;1=70" { - t.Fatalf("expected MemBwSchema MB:0=20;1=70 but received %s", stats.IntelRdtStats.MemBwSchema) - } - } -} - func TestGetContainerState(t *testing.T) { var ( - pid = os.Getpid() - expectedMemoryPath = "/sys/fs/cgroup/memory/myid" - expectedNetworkPath = fmt.Sprintf("/proc/%d/ns/net", pid) - expectedIntelRdtPath = "/sys/fs/resctrl/myid" + pid = os.Getpid() + expectedMemoryPath = "/sys/fs/cgroup/memory/myid" + expectedNetworkPath = fmt.Sprintf("/proc/%d/ns/net", pid) ) - container := &linuxContainer{ + container := &Container{ id: "myid", config: &configs.Config{ Namespaces: []configs.Namespace{ @@ -248,24 +167,10 @@ func TestGetContainerState(t *testing.T) { }, cgroupManager: &mockCgroupManager{ pids: []int{1, 2, 3}, - stats: &cgroups.Stats{ - MemoryStats: cgroups.MemoryStats{ - Usage: cgroups.MemoryData{ - Usage: 1024, - }, - }, - }, paths: map[string]string{ "memory": expectedMemoryPath, }, }, - intelRdtManager: &mockIntelRdtManager{ - stats: &intelrdt.Stats{ - L3CacheSchema: "L3:0=f0;1=f", - MemBwSchema: "MB:0=70;1=20", - }, - path: expectedIntelRdtPath, - }, } container.state = &createdState{c: container} state, err := container.State() @@ -285,15 +190,6 @@ func TestGetContainerState(t *testing.T) { if memPath := paths["memory"]; memPath != expectedMemoryPath { t.Fatalf("expected memory path %q but received %q", expectedMemoryPath, memPath) } - if intelrdt.IsCATEnabled() || intelrdt.IsMBAEnabled() { - intelRdtPath := state.IntelRdtPath - if intelRdtPath == "" { - t.Fatal("intel rdt path should not be empty") - } - if intelRdtPath != expectedIntelRdtPath { - t.Fatalf("expected intel rdt path %q but received %q", expectedIntelRdtPath, intelRdtPath) - } - } for _, ns := range container.config.Namespaces { path := state.NamespacePaths[ns.Type] if path == "" { @@ -336,7 +232,7 @@ func TestGetContainerStateAfterUpdate(t *testing.T) { t.Fatal(err) } - container := &linuxContainer{ + container := &Container{ root: t.TempDir(), id: "myid", config: &configs.Config{ diff --git a/libcontainer/factory.go b/libcontainer/factory.go index 9f9e8fc583c..9b5b461bf30 100644 --- a/libcontainer/factory.go +++ b/libcontainer/factory.go @@ -1,30 +1,212 @@ package libcontainer import ( + "encoding/json" + "errors" + "fmt" + "os" + "path/filepath" + "regexp" + + securejoin "github.com/cyphar/filepath-securejoin" + "golang.org/x/sys/unix" + + "github.com/opencontainers/runc/libcontainer/cgroups/manager" "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/configs/validate" + "github.com/opencontainers/runc/libcontainer/intelrdt" + "github.com/opencontainers/runc/libcontainer/utils" + "github.com/sirupsen/logrus" +) + +const ( + stateFilename = "state.json" + execFifoFilename = "exec.fifo" ) -type Factory interface { - // Creates a new container with the given id and starts the initial process inside it. - // id must be a string containing only letters, digits and underscores and must contain - // between 1 and 1024 characters, inclusive. - // - // The id must not already be in use by an existing container. Containers created using - // a factory with the same path (and filesystem) must have distinct ids. - // - // Returns the new container with a running process. - // - // On error, any partially created container parts are cleaned up (the operation is atomic). - Create(id string, config *configs.Config) (Container, error) - - // Load takes an ID for an existing container and returns the container information - // from the state. This presents a read only view of the container. - Load(id string) (Container, error) - - // StartInitialization is an internal API to libcontainer used during the reexec of the - // container. - StartInitialization() error - - // Type returns info string about factory type (e.g. lxc, libcontainer...) - Type() string +var idRegex = regexp.MustCompile(`^[\w+-\.]+$`) + +// New returns a linux based container factory based in the root directory. +func New(root string) (*Factory, error) { + absRoot, err := filepath.Abs(root) + if err != nil { + return nil, err + } + if err := os.MkdirAll(root, 0o700); err != nil { + return nil, err + } + return &Factory{ + Root: absRoot, + }, nil +} + +// Factory implements the default factory interface for linux based systems. +type Factory struct { + // Root directory for the factory to store state. + Root string +} + +// Creates a new container with the given id and starts the initial process inside it. +// id must be a string containing only letters, digits and underscores and must contain +// between 1 and 1024 characters, inclusive. +// +// The id must not already be in use by an existing container. Containers created using +// a factory with the same path (and filesystem) must have distinct ids. +// +// Returns the new container with a running process. +// +// On error, any partially created container parts are cleaned up (the operation is atomic). +func (l *Factory) Create(id string, config *configs.Config) (*Container, error) { + if l.Root == "" { + return nil, errors.New("root not set") + } + if err := l.validateID(id); err != nil { + return nil, err + } + if err := validate.Validate(config); err != nil { + return nil, err + } + containerRoot, err := securejoin.SecureJoin(l.Root, id) + if err != nil { + return nil, err + } + if _, err := os.Stat(containerRoot); err == nil { + return nil, ErrExist + } else if !os.IsNotExist(err) { + return nil, err + } + + cm, err := manager.New(config.Cgroups) + if err != nil { + return nil, err + } + + // Check that cgroup does not exist or empty (no processes). + // Note for cgroup v1 this check is not thorough, as there are multiple + // separate hierarchies, while both Exists() and GetAllPids() only use + // one for "devices" controller (assuming others are the same, which is + // probably true in almost all scenarios). Checking all the hierarchies + // would be too expensive. + if cm.Exists() { + pids, err := cm.GetAllPids() + // Reading PIDs can race with cgroups removal, so ignore ENOENT and ENODEV. + if err != nil && !errors.Is(err, os.ErrNotExist) && !errors.Is(err, unix.ENODEV) { + return nil, fmt.Errorf("unable to get cgroup PIDs: %w", err) + } + if len(pids) != 0 { + // TODO: return an error. + logrus.Warnf("container's cgroup is not empty: %d process(es) found", len(pids)) + logrus.Warn("DEPRECATED: running container in a non-empty cgroup won't be supported in runc 1.2; https://github.com/opencontainers/runc/issues/3132") + } + } + + // Check that cgroup is not frozen. Do not use Exists() here + // since in cgroup v1 it only checks "devices" controller. + st, err := cm.GetFreezerState() + if err != nil { + return nil, fmt.Errorf("unable to get cgroup freezer state: %w", err) + } + if st == configs.Frozen { + return nil, errors.New("container's cgroup unexpectedly frozen") + } + + if err := os.MkdirAll(containerRoot, 0o711); err != nil { + return nil, err + } + c := &Container{ + id: id, + root: containerRoot, + config: config, + cgroupManager: cm, + intelRdtManager: intelrdt.NewManager(config, id, ""), + } + c.state = &stoppedState{c: c} + return c, nil +} + +// Load takes an ID for an existing container and returns the container information +// from the state. This presents a read only view of the container. +func (l *Factory) Load(id string) (*Container, error) { + if l.Root == "" { + return nil, errors.New("root not set") + } + // when load, we need to check id is valid or not. + if err := l.validateID(id); err != nil { + return nil, err + } + containerRoot, err := securejoin.SecureJoin(l.Root, id) + if err != nil { + return nil, err + } + state, err := l.loadState(containerRoot) + if err != nil { + return nil, err + } + r := &nonChildProcess{ + processPid: state.InitProcessPid, + processStartTime: state.InitProcessStartTime, + fds: state.ExternalDescriptors, + } + cm, err := manager.NewWithPaths(state.Config.Cgroups, state.CgroupPaths) + if err != nil { + return nil, err + } + c := &Container{ + initProcess: r, + initProcessStartTime: state.InitProcessStartTime, + id: id, + config: &state.Config, + cgroupManager: cm, + intelRdtManager: intelrdt.NewManager(&state.Config, id, state.IntelRdtPath), + root: containerRoot, + created: state.Created, + } + c.state = &loadedState{c: c} + if err := c.refreshState(); err != nil { + return nil, err + } + return c, nil +} + +func (l *Factory) loadState(root string) (*State, error) { + stateFilePath, err := securejoin.SecureJoin(root, stateFilename) + if err != nil { + return nil, err + } + f, err := os.Open(stateFilePath) + if err != nil { + if os.IsNotExist(err) { + return nil, ErrNotExist + } + return nil, err + } + defer f.Close() + var state *State + if err := json.NewDecoder(f).Decode(&state); err != nil { + return nil, err + } + return state, nil +} + +func (l *Factory) validateID(id string) error { + if !idRegex.MatchString(id) || string(os.PathSeparator)+id != utils.CleanPath(string(os.PathSeparator)+id) { + return ErrInvalidID + } + + return nil +} + +func parseMountFds() ([]int, error) { + fdsJSON := os.Getenv("_LIBCONTAINER_MOUNT_FDS") + if fdsJSON == "" { + // Always return the nil slice if no fd is present. + return nil, nil + } + + var mountFds []int + if err := json.Unmarshal([]byte(fdsJSON), &mountFds); err != nil { + return nil, fmt.Errorf("Error unmarshalling _LIBCONTAINER_MOUNT_FDS: %w", err) + } + + return mountFds, nil } diff --git a/libcontainer/factory_linux.go b/libcontainer/factory_linux.go deleted file mode 100644 index 9fc32694f3a..00000000000 --- a/libcontainer/factory_linux.go +++ /dev/null @@ -1,410 +0,0 @@ -package libcontainer - -import ( - "encoding/json" - "errors" - "fmt" - "os" - "path/filepath" - "regexp" - "runtime/debug" - "strconv" - - securejoin "github.com/cyphar/filepath-securejoin" - "github.com/moby/sys/mountinfo" - "golang.org/x/sys/unix" - - "github.com/opencontainers/runc/libcontainer/cgroups/manager" - "github.com/opencontainers/runc/libcontainer/configs" - "github.com/opencontainers/runc/libcontainer/configs/validate" - "github.com/opencontainers/runc/libcontainer/intelrdt" - "github.com/opencontainers/runc/libcontainer/utils" - "github.com/sirupsen/logrus" -) - -const ( - stateFilename = "state.json" - execFifoFilename = "exec.fifo" -) - -var idRegex = regexp.MustCompile(`^[\w+-\.]+$`) - -// InitArgs returns an options func to configure a LinuxFactory with the -// provided init binary path and arguments. -func InitArgs(args ...string) func(*LinuxFactory) error { - return func(l *LinuxFactory) (err error) { - if len(args) > 0 { - // Resolve relative paths to ensure that its available - // after directory changes. - if args[0], err = filepath.Abs(args[0]); err != nil { - // The only error returned from filepath.Abs is - // the one from os.Getwd, i.e. a system error. - return err - } - } - - l.InitArgs = args - return nil - } -} - -// IntelRdtfs is an options func to configure a LinuxFactory to return -// containers that use the Intel RDT "resource control" filesystem to -// create and manage Intel RDT resources (e.g., L3 cache, memory bandwidth). -func IntelRdtFs(l *LinuxFactory) error { - if !intelrdt.IsCATEnabled() && !intelrdt.IsMBAEnabled() { - l.NewIntelRdtManager = nil - } else { - l.NewIntelRdtManager = func(config *configs.Config, id string, path string) intelrdt.Manager { - return intelrdt.NewManager(config, id, path) - } - } - return nil -} - -// TmpfsRoot is an option func to mount LinuxFactory.Root to tmpfs. -func TmpfsRoot(l *LinuxFactory) error { - mounted, err := mountinfo.Mounted(l.Root) - if err != nil { - return err - } - if !mounted { - if err := mount("tmpfs", l.Root, "", "tmpfs", 0, ""); err != nil { - return err - } - } - return nil -} - -// CriuPath returns an option func to configure a LinuxFactory with the -// provided criupath -func CriuPath(criupath string) func(*LinuxFactory) error { - return func(l *LinuxFactory) error { - l.CriuPath = criupath - return nil - } -} - -// New returns a linux based container factory based in the root directory and -// configures the factory with the provided option funcs. -func New(root string, options ...func(*LinuxFactory) error) (Factory, error) { - if root != "" { - if err := os.MkdirAll(root, 0o700); err != nil { - return nil, err - } - } - l := &LinuxFactory{ - Root: root, - InitPath: "/proc/self/exe", - InitArgs: []string{os.Args[0], "init"}, - Validator: validate.New(), - CriuPath: "criu", - } - - for _, opt := range options { - if opt == nil { - continue - } - if err := opt(l); err != nil { - return nil, err - } - } - return l, nil -} - -// LinuxFactory implements the default factory interface for linux based systems. -type LinuxFactory struct { - // Root directory for the factory to store state. - Root string - - // InitPath is the path for calling the init responsibilities for spawning - // a container. - InitPath string - - // InitArgs are arguments for calling the init responsibilities for spawning - // a container. - InitArgs []string - - // CriuPath is the path to the criu binary used for checkpoint and restore of - // containers. - CriuPath string - - // New{u,g}idmapPath is the path to the binaries used for mapping with - // rootless containers. - NewuidmapPath string - NewgidmapPath string - - // Validator provides validation to container configurations. - Validator validate.Validator - - // NewIntelRdtManager returns an initialized Intel RDT manager for a single container. - NewIntelRdtManager func(config *configs.Config, id string, path string) intelrdt.Manager -} - -func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, error) { - if l.Root == "" { - return nil, errors.New("root not set") - } - if err := l.validateID(id); err != nil { - return nil, err - } - if err := l.Validator.Validate(config); err != nil { - return nil, err - } - containerRoot, err := securejoin.SecureJoin(l.Root, id) - if err != nil { - return nil, err - } - if _, err := os.Stat(containerRoot); err == nil { - return nil, ErrExist - } else if !os.IsNotExist(err) { - return nil, err - } - - cm, err := manager.New(config.Cgroups) - if err != nil { - return nil, err - } - - // Check that cgroup does not exist or empty (no processes). - // Note for cgroup v1 this check is not thorough, as there are multiple - // separate hierarchies, while both Exists() and GetAllPids() only use - // one for "devices" controller (assuming others are the same, which is - // probably true in almost all scenarios). Checking all the hierarchies - // would be too expensive. - if cm.Exists() { - pids, err := cm.GetAllPids() - // Reading PIDs can race with cgroups removal, so ignore ENOENT and ENODEV. - if err != nil && !errors.Is(err, os.ErrNotExist) && !errors.Is(err, unix.ENODEV) { - return nil, fmt.Errorf("unable to get cgroup PIDs: %w", err) - } - if len(pids) != 0 { - // TODO: return an error. - logrus.Warnf("container's cgroup is not empty: %d process(es) found", len(pids)) - logrus.Warn("DEPRECATED: running container in a non-empty cgroup won't be supported in runc 1.2; https://github.com/opencontainers/runc/issues/3132") - } - } - - // Check that cgroup is not frozen. Do not use Exists() here - // since in cgroup v1 it only checks "devices" controller. - st, err := cm.GetFreezerState() - if err != nil { - return nil, fmt.Errorf("unable to get cgroup freezer state: %w", err) - } - if st == configs.Frozen { - return nil, errors.New("container's cgroup unexpectedly frozen") - } - - if err := os.MkdirAll(containerRoot, 0o711); err != nil { - return nil, err - } - c := &linuxContainer{ - id: id, - root: containerRoot, - config: config, - initPath: l.InitPath, - initArgs: l.InitArgs, - criuPath: l.CriuPath, - newuidmapPath: l.NewuidmapPath, - newgidmapPath: l.NewgidmapPath, - cgroupManager: cm, - } - if l.NewIntelRdtManager != nil { - c.intelRdtManager = l.NewIntelRdtManager(config, id, "") - } - c.state = &stoppedState{c: c} - return c, nil -} - -func (l *LinuxFactory) Load(id string) (Container, error) { - if l.Root == "" { - return nil, errors.New("root not set") - } - // when load, we need to check id is valid or not. - if err := l.validateID(id); err != nil { - return nil, err - } - containerRoot, err := securejoin.SecureJoin(l.Root, id) - if err != nil { - return nil, err - } - state, err := l.loadState(containerRoot) - if err != nil { - return nil, err - } - r := &nonChildProcess{ - processPid: state.InitProcessPid, - processStartTime: state.InitProcessStartTime, - fds: state.ExternalDescriptors, - } - cm, err := manager.NewWithPaths(state.Config.Cgroups, state.CgroupPaths) - if err != nil { - return nil, err - } - c := &linuxContainer{ - initProcess: r, - initProcessStartTime: state.InitProcessStartTime, - id: id, - config: &state.Config, - initPath: l.InitPath, - initArgs: l.InitArgs, - criuPath: l.CriuPath, - newuidmapPath: l.NewuidmapPath, - newgidmapPath: l.NewgidmapPath, - cgroupManager: cm, - root: containerRoot, - created: state.Created, - } - if l.NewIntelRdtManager != nil { - c.intelRdtManager = l.NewIntelRdtManager(&state.Config, id, state.IntelRdtPath) - } - c.state = &loadedState{c: c} - if err := c.refreshState(); err != nil { - return nil, err - } - return c, nil -} - -func (l *LinuxFactory) Type() string { - return "libcontainer" -} - -// StartInitialization loads a container by opening the pipe fd from the parent to read the configuration and state -// This is a low level implementation detail of the reexec and should not be consumed externally -func (l *LinuxFactory) StartInitialization() (err error) { - // Get the INITPIPE. - envInitPipe := os.Getenv("_LIBCONTAINER_INITPIPE") - pipefd, err := strconv.Atoi(envInitPipe) - if err != nil { - err = fmt.Errorf("unable to convert _LIBCONTAINER_INITPIPE: %w", err) - logrus.Error(err) - return err - } - pipe := os.NewFile(uintptr(pipefd), "pipe") - defer pipe.Close() - - defer func() { - // We have an error during the initialization of the container's init, - // send it back to the parent process in the form of an initError. - if werr := writeSync(pipe, procError); werr != nil { - fmt.Fprintln(os.Stderr, err) - return - } - if werr := utils.WriteJSON(pipe, &initError{Message: err.Error()}); werr != nil { - fmt.Fprintln(os.Stderr, err) - return - } - }() - - // Only init processes have FIFOFD. - fifofd := -1 - envInitType := os.Getenv("_LIBCONTAINER_INITTYPE") - it := initType(envInitType) - if it == initStandard { - envFifoFd := os.Getenv("_LIBCONTAINER_FIFOFD") - if fifofd, err = strconv.Atoi(envFifoFd); err != nil { - return fmt.Errorf("unable to convert _LIBCONTAINER_FIFOFD: %w", err) - } - } - - var consoleSocket *os.File - if envConsole := os.Getenv("_LIBCONTAINER_CONSOLE"); envConsole != "" { - console, err := strconv.Atoi(envConsole) - if err != nil { - return fmt.Errorf("unable to convert _LIBCONTAINER_CONSOLE: %w", err) - } - consoleSocket = os.NewFile(uintptr(console), "console-socket") - defer consoleSocket.Close() - } - - logPipeFdStr := os.Getenv("_LIBCONTAINER_LOGPIPE") - logPipeFd, err := strconv.Atoi(logPipeFdStr) - if err != nil { - return fmt.Errorf("unable to convert _LIBCONTAINER_LOGPIPE: %w", err) - } - - // Get mount files (O_PATH). - mountFds, err := parseMountFds() - if err != nil { - return err - } - - // clear the current process's environment to clean any libcontainer - // specific env vars. - os.Clearenv() - - defer func() { - if e := recover(); e != nil { - err = fmt.Errorf("panic from initialization: %w, %v", e, string(debug.Stack())) - } - }() - - i, err := newContainerInit(it, pipe, consoleSocket, fifofd, logPipeFd, mountFds) - if err != nil { - return err - } - - // If Init succeeds, syscall.Exec will not return, hence none of the defers will be called. - return i.Init() -} - -func (l *LinuxFactory) loadState(root string) (*State, error) { - stateFilePath, err := securejoin.SecureJoin(root, stateFilename) - if err != nil { - return nil, err - } - f, err := os.Open(stateFilePath) - if err != nil { - if os.IsNotExist(err) { - return nil, ErrNotExist - } - return nil, err - } - defer f.Close() - var state *State - if err := json.NewDecoder(f).Decode(&state); err != nil { - return nil, err - } - return state, nil -} - -func (l *LinuxFactory) validateID(id string) error { - if !idRegex.MatchString(id) || string(os.PathSeparator)+id != utils.CleanPath(string(os.PathSeparator)+id) { - return ErrInvalidID - } - - return nil -} - -// NewuidmapPath returns an option func to configure a LinuxFactory with the -// provided .. -func NewuidmapPath(newuidmapPath string) func(*LinuxFactory) error { - return func(l *LinuxFactory) error { - l.NewuidmapPath = newuidmapPath - return nil - } -} - -// NewgidmapPath returns an option func to configure a LinuxFactory with the -// provided .. -func NewgidmapPath(newgidmapPath string) func(*LinuxFactory) error { - return func(l *LinuxFactory) error { - l.NewgidmapPath = newgidmapPath - return nil - } -} - -func parseMountFds() ([]int, error) { - fdsJson := os.Getenv("_LIBCONTAINER_MOUNT_FDS") - if fdsJson == "" { - // Always return the nil slice if no fd is present. - return nil, nil - } - - var mountFds []int - if err := json.Unmarshal([]byte(fdsJson), &mountFds); err != nil { - return nil, fmt.Errorf("Error unmarshalling _LIBCONTAINER_MOUNT_FDS: %w", err) - } - - return mountFds, nil -} diff --git a/libcontainer/factory_linux_test.go b/libcontainer/factory_test.go similarity index 54% rename from libcontainer/factory_linux_test.go rename to libcontainer/factory_test.go index d29c32e9dbf..80a2ee2f020 100644 --- a/libcontainer/factory_linux_test.go +++ b/libcontainer/factory_test.go @@ -7,12 +7,9 @@ import ( "reflect" "testing" - "github.com/moby/sys/mountinfo" "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/utils" "github.com/opencontainers/runtime-spec/specs-go" - - "golang.org/x/sys/unix" ) func TestFactoryNew(t *testing.T) { @@ -24,85 +21,8 @@ func TestFactoryNew(t *testing.T) { if factory == nil { t.Fatal("factory should not be nil") } - lfactory, ok := factory.(*LinuxFactory) - if !ok { - t.Fatal("expected linux factory returned on linux based systems") - } - if lfactory.Root != root { - t.Fatalf("expected factory root to be %q but received %q", root, lfactory.Root) - } - - if factory.Type() != "libcontainer" { - t.Fatalf("unexpected factory type: %q, expected %q", factory.Type(), "libcontainer") - } -} - -func TestFactoryNewIntelRdt(t *testing.T) { - root := t.TempDir() - factory, err := New(root, IntelRdtFs) - if err != nil { - t.Fatal(err) - } - if factory == nil { - t.Fatal("factory should not be nil") - } - lfactory, ok := factory.(*LinuxFactory) - if !ok { - t.Fatal("expected linux factory returned on linux based systems") - } - if lfactory.Root != root { - t.Fatalf("expected factory root to be %q but received %q", root, lfactory.Root) - } - - if factory.Type() != "libcontainer" { - t.Fatalf("unexpected factory type: %q, expected %q", factory.Type(), "libcontainer") - } -} - -func TestFactoryNewTmpfs(t *testing.T) { - root := t.TempDir() - factory, err := New(root, TmpfsRoot) - if err != nil { - t.Fatal(err) - } - if factory == nil { - t.Fatal("factory should not be nil") - } - lfactory, ok := factory.(*LinuxFactory) - if !ok { - t.Fatal("expected linux factory returned on linux based systems") - } - if lfactory.Root != root { - t.Fatalf("expected factory root to be %q but received %q", root, lfactory.Root) - } - - if factory.Type() != "libcontainer" { - t.Fatalf("unexpected factory type: %q, expected %q", factory.Type(), "libcontainer") - } - mounted, err := mountinfo.Mounted(lfactory.Root) - if err != nil { - t.Fatal(err) - } - if !mounted { - t.Fatalf("Factory Root is not mounted") - } - mounts, err := mountinfo.GetMounts(mountinfo.SingleEntryFilter(lfactory.Root)) - if err != nil { - t.Fatal(err) - } - if len(mounts) != 1 { - t.Fatalf("Factory Root is not listed in mounts list") - } - m := mounts[0] - if m.FSType != "tmpfs" { - t.Fatalf("FSType of root: %s, expected %s", m.FSType, "tmpfs") - } - if m.Source != "tmpfs" { - t.Fatalf("Source of root: %s, expected %s", m.Source, "tmpfs") - } - err = unix.Unmount(root, unix.MNT_DETACH) - if err != nil { - t.Error("failed to unmount root:", err) + if factory.Root != root { + t.Fatalf("expected factory root to be %q but received %q", root, factory.Root) } } @@ -157,7 +77,7 @@ func TestFactoryLoadContainer(t *testing.T) { if err := marshal(filepath.Join(root, id, stateFilename), expectedState); err != nil { t.Fatal(err) } - factory, err := New(root, IntelRdtFs) + factory, err := New(root) if err != nil { t.Fatal(err) } @@ -176,12 +96,8 @@ func TestFactoryLoadContainer(t *testing.T) { if !reflect.DeepEqual(config.Hooks, expectedHooks) { t.Fatalf("expects hooks %q but received %q", expectedHooks, config.Hooks) } - lcontainer, ok := container.(*linuxContainer) - if !ok { - t.Fatal("expected linux container on linux based systems") - } - if lcontainer.initProcess.pid() != expectedState.InitProcessPid { - t.Fatalf("expected init pid %d but received %d", expectedState.InitProcessPid, lcontainer.initProcess.pid()) + if container.initProcess.pid() != expectedState.InitProcessPid { + t.Fatalf("expected init pid %d but received %d", expectedState.InitProcessPid, container.initProcess.pid()) } } diff --git a/libcontainer/init_linux.go b/libcontainer/init_linux.go index cb862a6a5be..911349163de 100644 --- a/libcontainer/init_linux.go +++ b/libcontainer/init_linux.go @@ -8,6 +8,7 @@ import ( "io" "net" "os" + "runtime/debug" "strconv" "strings" "unsafe" @@ -49,56 +50,146 @@ type network struct { // initConfig is used for transferring parameters from Exec() to Init() type initConfig struct { - Args []string `json:"args"` - Env []string `json:"env"` - Cwd string `json:"cwd"` - Capabilities *configs.Capabilities `json:"capabilities"` - ProcessLabel string `json:"process_label"` - AppArmorProfile string `json:"apparmor_profile"` - NoNewPrivileges bool `json:"no_new_privileges"` - User string `json:"user"` - AdditionalGroups []string `json:"additional_groups"` - Config *configs.Config `json:"config"` - Networks []*network `json:"network"` - PassedFilesCount int `json:"passed_files_count"` - ContainerId string `json:"containerid"` - Rlimits []configs.Rlimit `json:"rlimits"` - CreateConsole bool `json:"create_console"` - ConsoleWidth uint16 `json:"console_width"` - ConsoleHeight uint16 `json:"console_height"` - RootlessEUID bool `json:"rootless_euid,omitempty"` - RootlessCgroups bool `json:"rootless_cgroups,omitempty"` - SpecState *specs.State `json:"spec_state,omitempty"` - Cgroup2Path string `json:"cgroup2_path,omitempty"` + Args []string `json:"args"` + Env []string `json:"env"` + Cwd string `json:"cwd"` + Capabilities *specs.LinuxCapabilities `json:"capabilities"` + ProcessLabel string `json:"process_label"` + AppArmorProfile string `json:"apparmor_profile"` + NoNewPrivileges bool `json:"no_new_privileges"` + User string `json:"user"` + AdditionalGroups []string `json:"additional_groups"` + Config *configs.Config `json:"config"` + Networks []*network `json:"network"` + PassedFilesCount int `json:"passed_files_count"` + ContainerID string `json:"containerid"` + Rlimits []configs.Rlimit `json:"rlimits"` + CreateConsole bool `json:"create_console"` + ConsoleWidth uint16 `json:"console_width"` + ConsoleHeight uint16 `json:"console_height"` + RootlessEUID bool `json:"rootless_euid,omitempty"` + RootlessCgroups bool `json:"rootless_cgroups,omitempty"` + SpecState *specs.State `json:"spec_state,omitempty"` + Cgroup2Path string `json:"cgroup2_path,omitempty"` } -type initer interface { - Init() error +// StartInitialization loads a container by opening the pipe fd from the parent +// to read the configuration and state. This is a low level implementation +// detail of the reexec and should not be consumed externally. +func StartInitialization() (err error) { + // Set up logging. + level := int(logrus.DebugLevel) // default to debug + // Passing log level is optional; currently libcontainer/integration does not do it. + if levelStr := os.Getenv("_LIBCONTAINER_LOGLEVEL"); levelStr != "" { + level, err = strconv.Atoi(levelStr) + if err != nil { + panic(fmt.Errorf("unable to convert _LIBCONTAINER_LOGLEVEL: %w", err)) + } + } + + logPipeFd, err := strconv.Atoi(os.Getenv("_LIBCONTAINER_LOGPIPE")) + if err != nil { + panic(fmt.Errorf("unable to convert _LIBCONTAINER_LOGPIPE: %w", err)) + } + + logrus.SetLevel(logrus.Level(level)) + logrus.SetOutput(os.NewFile(uintptr(logPipeFd), "logpipe")) + logrus.SetFormatter(new(logrus.JSONFormatter)) + logrus.Debug("child process in init()") + + // Once logging is all set, we can use logrus to log errors. + + // Get the INITPIPE. + envInitPipe := os.Getenv("_LIBCONTAINER_INITPIPE") + pipefd, err := strconv.Atoi(envInitPipe) + if err != nil { + err = fmt.Errorf("unable to convert _LIBCONTAINER_INITPIPE: %w", err) + logrus.Error(err) + return err + } + pipe := os.NewFile(uintptr(pipefd), "pipe") + defer pipe.Close() + + // Once init pipe is set, we can send error back to parent. If this + // defer is ever called, this means initialization has failed. + defer func() { + // We have an error during the initialization of the container's init, + // send it back to the parent process in the form of an initError. + if werr := writeSync(pipe, procError); werr != nil { + fmt.Fprintln(os.Stderr, err) + return + } + if werr := utils.WriteJSON(pipe, &initError{Message: err.Error()}); werr != nil { + fmt.Fprintln(os.Stderr, err) + return + } + }() + + // Only init processes have FIFOFD. + fifofd := -1 + envInitType := os.Getenv("_LIBCONTAINER_INITTYPE") + it := initType(envInitType) + if it == initStandard { + envFifoFd := os.Getenv("_LIBCONTAINER_FIFOFD") + if fifofd, err = strconv.Atoi(envFifoFd); err != nil { + return fmt.Errorf("unable to convert _LIBCONTAINER_FIFOFD: %w", err) + } + } + + var consoleSocket *os.File + if envConsole := os.Getenv("_LIBCONTAINER_CONSOLE"); envConsole != "" { + console, err := strconv.Atoi(envConsole) + if err != nil { + return fmt.Errorf("unable to convert _LIBCONTAINER_CONSOLE: %w", err) + } + consoleSocket = os.NewFile(uintptr(console), "console-socket") + defer consoleSocket.Close() + } + + // Get mount files (O_PATH). + mountFds, err := parseMountFds() + if err != nil { + return err + } + + // clear the current process's environment to clean any libcontainer + // specific env vars. + os.Clearenv() + + defer func() { + if e := recover(); e != nil { + err = fmt.Errorf("panic from initialization: %w, %v", e, string(debug.Stack())) + } + }() + + // If init succeeds, it will not return, hence none of the defers will be called. + return containerInit(it, pipe, consoleSocket, fifofd, logPipeFd, mountFds) } -func newContainerInit(t initType, pipe *os.File, consoleSocket *os.File, fifoFd, logFd int, mountFds []int) (initer, error) { +func containerInit(t initType, pipe *os.File, consoleSocket *os.File, fifoFd, logFd int, mountFds []int) error { var config *initConfig if err := json.NewDecoder(pipe).Decode(&config); err != nil { - return nil, err + return err } if err := populateProcessEnvironment(config.Env); err != nil { - return nil, err + return err } switch t { case initSetns: // mountFds must be nil in this case. We don't mount while doing runc exec. if mountFds != nil { - return nil, errors.New("mountFds must be nil. Can't mount while doing runc exec.") + return errors.New("mountFds must be nil; can't mount from exec") } - return &linuxSetnsInit{ + i := &linuxSetnsInit{ pipe: pipe, consoleSocket: consoleSocket, config: config, logFd: logFd, - }, nil + } + return i.Init() case initStandard: - return &linuxStandardInit{ + i := &linuxStandardInit{ pipe: pipe, consoleSocket: consoleSocket, parentPid: unix.Getppid(), @@ -106,9 +197,10 @@ func newContainerInit(t initType, pipe *os.File, consoleSocket *os.File, fifoFd, fifoFd: fifoFd, logFd: logFd, mountFds: mountFds, - }, nil + } + return i.Init() } - return nil, fmt.Errorf("unknown init type %q", t) + return fmt.Errorf("unknown init type %q", t) } // populateProcessEnvironment loads the provided environment variables into the @@ -167,7 +259,7 @@ func finalizeNamespace(config *initConfig) error { } } - caps := &configs.Capabilities{} + caps := &specs.LinuxCapabilities{} if config.Capabilities != nil { caps = config.Capabilities } else if config.Config.Capabilities != nil { diff --git a/libcontainer/integration/exec_test.go b/libcontainer/integration/exec_test.go index ada4f854b0d..8807a8f067c 100644 --- a/libcontainer/integration/exec_test.go +++ b/libcontainer/integration/exec_test.go @@ -358,7 +358,7 @@ func TestProcessCaps(t *testing.T) { Env: standardEnvironment, Stdin: nil, Stdout: &stdout, - Capabilities: &configs.Capabilities{}, + Capabilities: &specs.LinuxCapabilities{}, Init: true, } pconfig.Capabilities.Bounding = append(config.Capabilities.Bounding, "CAP_NET_ADMIN") @@ -1402,7 +1402,7 @@ func TestRootfsPropagationSharedMount(t *testing.T) { Env: standardEnvironment, Stdin: stdinR2, Stdout: &stdout2, - Capabilities: &configs.Capabilities{}, + Capabilities: &specs.LinuxCapabilities{}, } // Provide CAP_SYS_ADMIN diff --git a/libcontainer/integration/execin_test.go b/libcontainer/integration/execin_test.go index f8a6a9c6996..58519a419c0 100644 --- a/libcontainer/integration/execin_test.go +++ b/libcontainer/integration/execin_test.go @@ -215,12 +215,10 @@ func TestExecInError(t *testing.T) { ok(t, err) for i := 0; i < 42; i++ { - var out bytes.Buffer unexistent := &libcontainer.Process{ - Cwd: "/", - Args: []string{"unexistent"}, - Env: standardEnvironment, - Stderr: &out, + Cwd: "/", + Args: []string{"unexistent"}, + Env: standardEnvironment, } err = container.Run(unexistent) if err == nil { @@ -229,9 +227,6 @@ func TestExecInError(t *testing.T) { if !strings.Contains(err.Error(), "executable file not found") { t.Fatalf("Should be error about not found executable, got %s", err) } - if !bytes.Contains(out.Bytes(), []byte("executable file not found")) { - t.Fatalf("executable file not found error not delivered to stdio:\n%s", out.String()) - } } } diff --git a/libcontainer/integration/init_test.go b/libcontainer/integration/init_test.go index effcde06d60..c0d2fb448aa 100644 --- a/libcontainer/integration/init_test.go +++ b/libcontainer/integration/init_test.go @@ -7,31 +7,25 @@ import ( "github.com/opencontainers/runc/libcontainer" _ "github.com/opencontainers/runc/libcontainer/nsenter" - - "github.com/sirupsen/logrus" ) -// init runs the libcontainer initialization code because of the busybox style needs -// to work around the go runtime and the issues with forking +// Same as ../../init.go but for libcontainer/integration. func init() { if len(os.Args) < 2 || os.Args[1] != "init" { return } + // This is the golang entry point for runc init, executed + // before TestMain() but after libcontainer/nsenter's nsexec(). runtime.GOMAXPROCS(1) runtime.LockOSThread() - factory, err := libcontainer.New("") - if err != nil { - logrus.Fatalf("unable to initialize for container: %s", err) - } - if err := factory.StartInitialization(); err != nil { - logrus.Fatal(err) + if err := libcontainer.StartInitialization(); err != nil { + // as the error is sent back to the parent there is no need to log + // or write it to stderr because the parent process will handle this + os.Exit(1) } } func TestMain(m *testing.M) { - logrus.SetOutput(os.Stderr) - logrus.SetLevel(logrus.InfoLevel) - ret := m.Run() os.Exit(ret) } diff --git a/libcontainer/integration/template_test.go b/libcontainer/integration/template_test.go index f56db8956a7..2f784878298 100644 --- a/libcontainer/integration/template_test.go +++ b/libcontainer/integration/template_test.go @@ -9,6 +9,7 @@ import ( "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/devices" "github.com/opencontainers/runc/libcontainer/specconv" + "github.com/opencontainers/runtime-spec/specs-go" "golang.org/x/sys/unix" ) @@ -42,7 +43,7 @@ func newTemplateConfig(t *testing.T, p *tParam) *configs.Config { } config := &configs.Config{ Rootfs: newRootfs(t), - Capabilities: &configs.Capabilities{ + Capabilities: &specs.LinuxCapabilities{ Bounding: []string{ "CAP_CHOWN", "CAP_DAC_OVERRIDE", diff --git a/libcontainer/integration/utils_test.go b/libcontainer/integration/utils_test.go index def29fc0cd6..e401753f675 100644 --- a/libcontainer/integration/utils_test.go +++ b/libcontainer/integration/utils_test.go @@ -165,7 +165,7 @@ func copyBusybox(dest string) error { return nil } -func newContainer(t *testing.T, config *configs.Config) (libcontainer.Container, error) { +func newContainer(t *testing.T, config *configs.Config) (*libcontainer.Container, error) { name := strings.ReplaceAll(t.Name(), "/", "_") + strconv.FormatInt(-int64(time.Now().Nanosecond()), 35) root := t.TempDir() @@ -216,6 +216,6 @@ func runContainer(t *testing.T, config *configs.Config, args ...string) (buffers return } -func destroyContainer(container libcontainer.Container) { +func destroyContainer(container *libcontainer.Container) { _ = container.Destroy() } diff --git a/libcontainer/intelrdt/intelrdt.go b/libcontainer/intelrdt/intelrdt.go index 1fe1ec3e86c..7251c0dc112 100644 --- a/libcontainer/intelrdt/intelrdt.go +++ b/libcontainer/intelrdt/intelrdt.go @@ -146,34 +146,28 @@ import ( * } */ -type Manager interface { - // Applies Intel RDT configuration to the process with the specified pid - Apply(pid int) error - - // Returns statistics for Intel RDT - GetStats() (*Stats, error) - - // Destroys the Intel RDT container-specific 'container_id' group - Destroy() error - - // Returns Intel RDT path to save in a state file and to be able to - // restore the object later - GetPath() string - - // Set Intel RDT "resource control" filesystem as configured. - Set(container *configs.Config) error -} - -// This implements interface Manager -type intelRdtManager struct { +type Manager struct { mu sync.Mutex config *configs.Config id string path string } -func NewManager(config *configs.Config, id string, path string) Manager { - return &intelRdtManager{ +// NewManager returns a new instance of Manager, or nil, if the Intel RDT +// functionality is not available from hardware or not enabled in the kernel. +func NewManager(config *configs.Config, id string, path string) *Manager { + // Quick check whether Intel RDT is available. + // TODO: replace with Root() once PR 3306 is merged. + if unix.Access("/sys/fs/resctrl", unix.F_OK) != nil { + return nil + } + return newManager(config, id, path) +} + +// newManager is the same as NewManager, except it does not check if the feature +// is actually available. To be used from unit tests that mock intelrdt paths. +func newManager(config *configs.Config, id string, path string) *Manager { + return &Manager{ config: config, id: id, path: path, @@ -507,7 +501,7 @@ func IsMBAScEnabled() bool { } // Get the path of the clos group in "resource control" filesystem that the container belongs to -func (m *intelRdtManager) getIntelRdtPath() (string, error) { +func (m *Manager) getIntelRdtPath() (string, error) { rootPath, err := Root() if err != nil { return "", err @@ -522,7 +516,7 @@ func (m *intelRdtManager) getIntelRdtPath() (string, error) { } // Applies Intel RDT configuration to the process with the specified pid -func (m *intelRdtManager) Apply(pid int) (err error) { +func (m *Manager) Apply(pid int) (err error) { // If intelRdt is not specified in config, we do nothing if m.config.IntelRdt == nil { return nil @@ -557,7 +551,7 @@ func (m *intelRdtManager) Apply(pid int) (err error) { } // Destroys the Intel RDT container-specific 'container_id' group -func (m *intelRdtManager) Destroy() error { +func (m *Manager) Destroy() error { // Don't remove resctrl group if closid has been explicitly specified. The // group is likely externally managed, i.e. by some other entity than us. // There are probably other containers/tasks sharing the same group. @@ -574,7 +568,7 @@ func (m *intelRdtManager) Destroy() error { // Returns Intel RDT path to save in a state file and to be able to // restore the object later -func (m *intelRdtManager) GetPath() string { +func (m *Manager) GetPath() string { if m.path == "" { m.path, _ = m.getIntelRdtPath() } @@ -582,7 +576,7 @@ func (m *intelRdtManager) GetPath() string { } // Returns statistics for Intel RDT -func (m *intelRdtManager) GetStats() (*Stats, error) { +func (m *Manager) GetStats() (*Stats, error) { // If intelRdt is not specified in config if m.config.IntelRdt == nil { return nil, nil @@ -668,7 +662,7 @@ func (m *intelRdtManager) GetStats() (*Stats, error) { } // Set Intel RDT "resource control" filesystem as configured. -func (m *intelRdtManager) Set(container *configs.Config) error { +func (m *Manager) Set(container *configs.Config) error { // About L3 cache schema: // It has allocation bitmasks/values for L3 cache on each socket, // which contains L3 cache id and capacity bitmask (CBM). diff --git a/libcontainer/intelrdt/intelrdt_test.go b/libcontainer/intelrdt/intelrdt_test.go index 2184a1468df..c127cd8f7c6 100644 --- a/libcontainer/intelrdt/intelrdt_test.go +++ b/libcontainer/intelrdt/intelrdt_test.go @@ -20,7 +20,7 @@ func TestIntelRdtSetL3CacheSchema(t *testing.T) { }) helper.config.IntelRdt.L3CacheSchema = l3CacheSchemeAfter - intelrdt := NewManager(helper.config, "", helper.IntelRdtPath) + intelrdt := newManager(helper.config, "", helper.IntelRdtPath) if err := intelrdt.Set(helper.config); err != nil { t.Fatal(err) } @@ -50,7 +50,7 @@ func TestIntelRdtSetMemBwSchema(t *testing.T) { }) helper.config.IntelRdt.MemBwSchema = memBwSchemeAfter - intelrdt := NewManager(helper.config, "", helper.IntelRdtPath) + intelrdt := newManager(helper.config, "", helper.IntelRdtPath) if err := intelrdt.Set(helper.config); err != nil { t.Fatal(err) } @@ -80,7 +80,7 @@ func TestIntelRdtSetMemBwScSchema(t *testing.T) { }) helper.config.IntelRdt.MemBwSchema = memBwScSchemeAfter - intelrdt := NewManager(helper.config, "", helper.IntelRdtPath) + intelrdt := newManager(helper.config, "", helper.IntelRdtPath) if err := intelrdt.Set(helper.config); err != nil { t.Fatal(err) } @@ -103,7 +103,7 @@ func TestApply(t *testing.T) { const closID = "test-clos" helper.config.IntelRdt.ClosID = closID - intelrdt := NewManager(helper.config, "", helper.IntelRdtPath) + intelrdt := newManager(helper.config, "", helper.IntelRdtPath) if err := intelrdt.Apply(1234); err == nil { t.Fatal("unexpected success when applying pid") } @@ -112,7 +112,7 @@ func TestApply(t *testing.T) { } // Dir should be created if some schema has been specified - intelrdt.(*intelRdtManager).config.IntelRdt.L3CacheSchema = "L3:0=f" + intelrdt.config.IntelRdt.L3CacheSchema = "L3:0=f" if err := intelrdt.Apply(1235); err != nil { t.Fatalf("Apply() failed: %v", err) } diff --git a/libcontainer/process.go b/libcontainer/process.go index 8a5d340dacd..9a3ff34b0c8 100644 --- a/libcontainer/process.go +++ b/libcontainer/process.go @@ -7,6 +7,7 @@ import ( "os" "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runtime-spec/specs-go" ) var errInvalidProcess = errors.New("invalid process") @@ -55,7 +56,7 @@ type Process struct { // Capabilities specify the capabilities to keep when executing the process inside the container // All capabilities not specified will be dropped from the processes capability mask - Capabilities *configs.Capabilities + Capabilities *specs.LinuxCapabilities // AppArmorProfile specifies the profile to apply to the process and is // changed at the time the process is execed diff --git a/libcontainer/process_linux.go b/libcontainer/process_linux.go index e025445d330..3c9187008a3 100644 --- a/libcontainer/process_linux.go +++ b/libcontainer/process_linux.go @@ -192,7 +192,7 @@ func (p *setnsProcess) start() (retErr error) { Metadata: p.config.Config.Seccomp.ListenerMetadata, State: specs.State{ Version: specs.Version, - ID: p.config.ContainerId, + ID: p.config.ContainerID, Status: specs.StateRunning, Pid: p.initProcessPid, Bundle: bundle, @@ -303,8 +303,8 @@ type initProcess struct { logFilePair filePair config *initConfig manager cgroups.Manager - intelRdtManager intelrdt.Manager - container *linuxContainer + intelRdtManager *intelrdt.Manager + container *Container fds []string process *Process bootstrapData io.Reader diff --git a/libcontainer/setns_init_linux.go b/libcontainer/setns_init_linux.go index 09ab552b3d1..da31110aeee 100644 --- a/libcontainer/setns_init_linux.go +++ b/libcontainer/setns_init_linux.go @@ -26,7 +26,7 @@ type linuxSetnsInit struct { } func (l *linuxSetnsInit) getSessionRingName() string { - return "_ses." + l.config.ContainerId + return "_ses." + l.config.ContainerID } func (l *linuxSetnsInit) Init() error { diff --git a/libcontainer/specconv/spec_linux.go b/libcontainer/specconv/spec_linux.go index d8a4d026b6d..a43785bf06c 100644 --- a/libcontainer/specconv/spec_linux.go +++ b/libcontainer/specconv/spec_linux.go @@ -481,15 +481,7 @@ func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) { config.NoNewPrivileges = spec.Process.NoNewPrivileges config.Umask = spec.Process.User.Umask config.ProcessLabel = spec.Process.SelinuxLabel - if spec.Process.Capabilities != nil { - config.Capabilities = &configs.Capabilities{ - Bounding: spec.Process.Capabilities.Bounding, - Effective: spec.Process.Capabilities.Effective, - Permitted: spec.Process.Capabilities.Permitted, - Inheritable: spec.Process.Capabilities.Inheritable, - Ambient: spec.Process.Capabilities.Ambient, - } - } + config.Capabilities = spec.Process.Capabilities } createHooks(spec, config) config.Version = specs.Version diff --git a/libcontainer/specconv/spec_linux_test.go b/libcontainer/specconv/spec_linux_test.go index 56d808699c6..1068c55a8cb 100644 --- a/libcontainer/specconv/spec_linux_test.go +++ b/libcontainer/specconv/spec_linux_test.go @@ -540,8 +540,7 @@ func TestSpecconvExampleValidate(t *testing.T) { spec.Process.NoNewPrivileges, config.NoNewPrivileges) } - validator := validate.New() - if err := validator.Validate(config); err != nil { + if err := validate.Validate(config); err != nil { t.Errorf("Expected specconv to produce valid container config: %v", err) } } @@ -562,8 +561,7 @@ func TestSpecconvNoLinuxSection(t *testing.T) { t.Errorf("Couldn't create libcontainer config: %v", err) } - validator := validate.New() - if err := validator.Validate(config); err != nil { + if err := validate.Validate(config); err != nil { t.Errorf("Expected specconv to produce valid container config: %v", err) } } @@ -617,8 +615,7 @@ func TestNonZeroEUIDCompatibleSpecconvValidate(t *testing.T) { t.Errorf("Couldn't create libcontainer config: %v", err) } - validator := validate.New() - if err := validator.Validate(config); err != nil { + if err := validate.Validate(config); err != nil { t.Errorf("Expected specconv to produce valid rootless container config: %v", err) } } diff --git a/libcontainer/standard_init_linux.go b/libcontainer/standard_init_linux.go index 585a04fa080..1a9c4979c26 100644 --- a/libcontainer/standard_init_linux.go +++ b/libcontainer/standard_init_linux.go @@ -42,7 +42,7 @@ func (l *linuxStandardInit) getSessionRingParams() (string, uint32, uint32) { // Create a unique per session container name that we can join in setns; // However, other containers can also join it. - return "_ses." + l.config.ContainerId, 0xffffffff, newperms + return "_ses." + l.config.ContainerID, 0xffffffff, newperms } func (l *linuxStandardInit) Init() error { diff --git a/libcontainer/state_linux.go b/libcontainer/state_linux.go index aa6259b157d..4895612e257 100644 --- a/libcontainer/state_linux.go +++ b/libcontainer/state_linux.go @@ -35,7 +35,7 @@ type containerState interface { status() Status } -func destroy(c *linuxContainer) error { +func destroy(c *Container) error { if !c.config.Namespaces.Contains(configs.NEWPID) || c.config.Namespaces.PathOf(configs.NEWPID) != "" { if err := signalAllProcesses(c.cgroupManager, unix.SIGKILL); err != nil { @@ -59,7 +59,7 @@ func destroy(c *linuxContainer) error { return err } -func runPoststopHooks(c *linuxContainer) error { +func runPoststopHooks(c *Container) error { hooks := c.config.Hooks if hooks == nil { return nil @@ -80,7 +80,7 @@ func runPoststopHooks(c *linuxContainer) error { // stoppedState represents a container is a stopped/destroyed state. type stoppedState struct { - c *linuxContainer + c *Container } func (b *stoppedState) status() Status { @@ -104,7 +104,7 @@ func (b *stoppedState) destroy() error { // runningState represents a container that is currently running. type runningState struct { - c *linuxContainer + c *Container } func (r *runningState) status() Status { @@ -136,7 +136,7 @@ func (r *runningState) destroy() error { } type createdState struct { - c *linuxContainer + c *Container } func (i *createdState) status() Status { @@ -162,7 +162,7 @@ func (i *createdState) destroy() error { // pausedState represents a container that is currently pause. It cannot be destroyed in a // paused state and must transition back to running first. type pausedState struct { - c *linuxContainer + c *Container } func (p *pausedState) status() Status { @@ -195,7 +195,7 @@ func (p *pausedState) destroy() error { // information that maybe need destroyed when the container is stopped and destroy is called. type restoredState struct { imageDir string - c *linuxContainer + c *Container } func (r *restoredState) status() Status { @@ -222,7 +222,7 @@ func (r *restoredState) destroy() error { // loadedState is used whenever a container is restored, loaded, or setting additional // processes inside and it should not be destroyed when it is exiting. type loadedState struct { - c *linuxContainer + c *Container s Status } diff --git a/libcontainer/state_linux_test.go b/libcontainer/state_linux_test.go index 413626d5118..b57e6eff716 100644 --- a/libcontainer/state_linux_test.go +++ b/libcontainer/state_linux_test.go @@ -53,7 +53,7 @@ func testTransitions(t *testing.T, initialState containerState, valid []containe func TestStoppedStateTransition(t *testing.T) { testTransitions( t, - &stoppedState{c: &linuxContainer{}}, + &stoppedState{c: &Container{}}, []containerState{ &stoppedState{}, &runningState{}, @@ -65,7 +65,7 @@ func TestStoppedStateTransition(t *testing.T) { func TestPausedStateTransition(t *testing.T) { testTransitions( t, - &pausedState{c: &linuxContainer{}}, + &pausedState{c: &Container{}}, []containerState{ &pausedState{}, &runningState{}, @@ -77,7 +77,7 @@ func TestPausedStateTransition(t *testing.T) { func TestRestoredStateTransition(t *testing.T) { testTransitions( t, - &restoredState{c: &linuxContainer{}}, + &restoredState{c: &Container{}}, []containerState{ &stoppedState{}, &runningState{}, @@ -88,7 +88,7 @@ func TestRestoredStateTransition(t *testing.T) { func TestRunningStateTransition(t *testing.T) { testTransitions( t, - &runningState{c: &linuxContainer{}}, + &runningState{c: &Container{}}, []containerState{ &stoppedState{}, &pausedState{}, @@ -100,7 +100,7 @@ func TestRunningStateTransition(t *testing.T) { func TestCreatedStateTransition(t *testing.T) { testTransitions( t, - &createdState{c: &linuxContainer{}}, + &createdState{c: &Container{}}, []containerState{ &stoppedState{}, &pausedState{}, diff --git a/list.go b/list.go index 3503dcd2f5e..80ee465b456 100644 --- a/list.go +++ b/list.go @@ -111,7 +111,7 @@ To list containers created using a non-default value for "--root": } func getContainers(context *cli.Context) ([]containerState, error) { - factory, err := loadFactory(context) + factory, err := libcontainer.New(context.GlobalString("root")) if err != nil { return nil, err } diff --git a/main.go b/main.go index 4d666382740..f7ae6b4ade8 100644 --- a/main.go +++ b/main.go @@ -101,9 +101,9 @@ func main() { Usage: "root directory for storage of container state (this should be located in tmpfs)", }, cli.StringFlag{ - Name: "criu", - Value: "criu", - Usage: "path to the criu binary used for checkpoint and restore", + Name: "criu", + Usage: "(obsoleted; do not use)", + Hidden: true, }, cli.BoolFlag{ Name: "systemd-cgroup", @@ -152,7 +152,15 @@ func main() { return err } - return configLogrus(context) + if err := configLogrus(context); err != nil { + return err + } + + // TODO: remove this in runc 1.3.0. + if context.IsSet("criu") { + logrus.Warn("--criu value ignored (criu binary from $PATH is used); do not use") + } + return nil } // If the command returns an error, cli takes upon itself to print diff --git a/man/runc.8.md b/man/runc.8.md index 09db1ef023e..ed5cd3a0fe0 100644 --- a/man/runc.8.md +++ b/man/runc.8.md @@ -110,10 +110,6 @@ These options can be used with any command, and must precede the **command**. located on tmpfs. Default is */run/runc*, or *$XDG_RUNTIME_DIR/runc* for rootless containers. -**--criu** _path_ -: Set the path to the **criu**(8) binary used for checkpoint and restore. -Default is **criu**. - **--systemd-cgroup** : Enable systemd cgroup support. If this is set, the container spec (_config.json_) is expected to have **cgroupsPath** value in the diff --git a/notify_socket.go b/notify_socket.go index 76aa27ca518..28c6c0ae65a 100644 --- a/notify_socket.go +++ b/notify_socket.go @@ -91,12 +91,12 @@ func notifySocketStart(context *cli.Context, notifySocketHost, id string) (*noti return notifySocket, nil } -func (n *notifySocket) waitForContainer(container libcontainer.Container) error { - s, err := container.State() +func (s *notifySocket) waitForContainer(container *libcontainer.Container) error { + state, err := container.State() if err != nil { return err } - return n.run(s.InitProcessPid) + return s.run(state.InitProcessPid) } func (n *notifySocket) run(pid1 int) error { diff --git a/tests/integration/checkpoint.bats b/tests/integration/checkpoint.bats index 4b7e442bb1a..9bf9994004c 100644 --- a/tests/integration/checkpoint.bats +++ b/tests/integration/checkpoint.bats @@ -84,7 +84,7 @@ function runc_restore_with_pipes() { shift ret=0 - __runc --criu "$CRIU" restore -d --work-path "$workdir" --image-path ./image-dir "$@" "$name" <&${in_r} >&${out_w} 2>&${err_w} || ret=$? + __runc restore -d --work-path "$workdir" --image-path ./image-dir "$@" "$name" <&${in_r} >&${out_w} 2>&${err_w} || ret=$? if [ "$ret" -ne 0 ]; then echo "__runc restore $name failed (status: $ret)" exec {err_w}>&- @@ -109,7 +109,7 @@ function simple_cr() { for _ in $(seq 2); do # checkpoint the running container - runc --criu "$CRIU" "$@" checkpoint --work-path ./work-dir test_busybox + runc "$@" checkpoint --work-path ./work-dir test_busybox grep -B 5 Error ./work-dir/dump.log || true [ "$status" -eq 0 ] @@ -117,7 +117,7 @@ function simple_cr() { testcontainer test_busybox checkpointed # restore from checkpoint - runc --criu "$CRIU" "$@" restore -d --work-path ./work-dir --console-socket "$CONSOLE_SOCKET" test_busybox + runc "$@" restore -d --work-path ./work-dir --console-socket "$CONSOLE_SOCKET" test_busybox grep -B 5 Error ./work-dir/restore.log || true [ "$status" -eq 0 ] @@ -162,12 +162,12 @@ function simple_cr() { testcontainer test_busybox running # runc should fail with absolute parent image path. - runc --criu "$CRIU" checkpoint --parent-path "$(pwd)"/parent-dir --work-path ./work-dir --image-path ./image-dir test_busybox + runc checkpoint --parent-path "$(pwd)"/parent-dir --work-path ./work-dir --image-path ./image-dir test_busybox [[ "${output}" == *"--parent-path"* ]] [ "$status" -ne 0 ] # runc should fail with invalid parent image path. - runc --criu "$CRIU" checkpoint --parent-path ./parent-dir --work-path ./work-dir --image-path ./image-dir test_busybox + runc checkpoint --parent-path ./parent-dir --work-path ./work-dir --image-path ./image-dir test_busybox [[ "${output}" == *"--parent-path"* ]] [ "$status" -ne 0 ] } @@ -178,7 +178,7 @@ function simple_cr() { #test checkpoint pre-dump mkdir parent-dir - runc --criu "$CRIU" checkpoint --pre-dump --image-path ./parent-dir test_busybox + runc checkpoint --pre-dump --image-path ./parent-dir test_busybox [ "$status" -eq 0 ] # busybox should still be running @@ -187,7 +187,7 @@ function simple_cr() { # checkpoint the running container mkdir image-dir mkdir work-dir - runc --criu "$CRIU" checkpoint --parent-path ../parent-dir --work-path ./work-dir --image-path ./image-dir test_busybox + runc checkpoint --parent-path ../parent-dir --work-path ./work-dir --image-path ./image-dir test_busybox grep -B 5 Error ./work-dir/dump.log || true [ "$status" -eq 0 ] @@ -203,7 +203,7 @@ function simple_cr() { @test "checkpoint --lazy-pages and restore" { # check if lazy-pages is supported - if ! "${CRIU}" check --feature uffd-noncoop; then + if ! criu check --feature uffd-noncoop; then skip "this criu does not support lazy migration" fi @@ -224,7 +224,7 @@ function simple_cr() { # TCP port for lazy migration port=27277 - __runc --criu "$CRIU" checkpoint --lazy-pages --page-server 0.0.0.0:${port} --status-fd ${lazy_w} --work-path ./work-dir --image-path ./image-dir test_busybox & + __runc checkpoint --lazy-pages --page-server 0.0.0.0:${port} --status-fd ${lazy_w} --work-path ./work-dir --image-path ./image-dir test_busybox & cpt_pid=$! # wait for lazy page server to be ready @@ -242,7 +242,7 @@ function simple_cr() { [ -e image-dir/inventory.img ] # Start CRIU in lazy-daemon mode - ${CRIU} lazy-pages --page-server --address 127.0.0.1 --port ${port} -D image-dir & + criu lazy-pages --page-server --address 127.0.0.1 --port ${port} -D image-dir & lp_pid=$! # Restore lazily from checkpoint. @@ -264,7 +264,7 @@ function simple_cr() { @test "checkpoint and restore in external network namespace" { # check if external_net_ns is supported; only with criu 3.10++ - if ! "${CRIU}" check --feature external_net_ns; then + if ! criu check --feature external_net_ns; then # this criu does not support external_net_ns; skip the test skip "this criu does not support external network namespaces" fi @@ -290,7 +290,7 @@ function simple_cr() { for _ in $(seq 2); do # checkpoint the running container; this automatically tells CRIU to # handle the network namespace defined in config.json as an external - runc --criu "$CRIU" checkpoint --work-path ./work-dir test_busybox + runc checkpoint --work-path ./work-dir test_busybox grep -B 5 Error ./work-dir/dump.log || true [ "$status" -eq 0 ] @@ -298,7 +298,7 @@ function simple_cr() { testcontainer test_busybox checkpointed # restore from checkpoint; this should restore the container into the existing network namespace - runc --criu "$CRIU" restore -d --work-path ./work-dir --console-socket "$CONSOLE_SOCKET" test_busybox + runc restore -d --work-path ./work-dir --console-socket "$CONSOLE_SOCKET" test_busybox grep -B 5 Error ./work-dir/restore.log || true [ "$status" -eq 0 ] @@ -341,7 +341,7 @@ function simple_cr() { testcontainer test_busybox running # checkpoint the running container - runc --criu "$CRIU" checkpoint --work-path ./work-dir test_busybox + runc checkpoint --work-path ./work-dir test_busybox grep -B 5 Error ./work-dir/dump.log || true [ "$status" -eq 0 ] ! test -f ./work-dir/"$tmplog1" @@ -352,7 +352,7 @@ function simple_cr() { test -f ./work-dir/"$tmplog2" && unlink ./work-dir/"$tmplog2" # restore from checkpoint - runc --criu "$CRIU" restore -d --work-path ./work-dir --console-socket "$CONSOLE_SOCKET" test_busybox + runc restore -d --work-path ./work-dir --console-socket "$CONSOLE_SOCKET" test_busybox grep -B 5 Error ./work-dir/restore.log || true [ "$status" -eq 0 ] ! test -f ./work-dir/"$tmplog1" @@ -386,7 +386,7 @@ function simple_cr() { testcontainer test_busybox running # checkpoint the running container - runc --criu "$CRIU" checkpoint --work-path ./work-dir test_busybox + runc checkpoint --work-path ./work-dir test_busybox grep -B 5 Error ./work-dir/dump.log || true [ "$status" -eq 0 ] @@ -398,7 +398,7 @@ function simple_cr() { rm -rf "${bind1:?}"/* # restore from checkpoint - runc --criu "$CRIU" restore -d --work-path ./work-dir --console-socket "$CONSOLE_SOCKET" test_busybox + runc restore -d --work-path ./work-dir --console-socket "$CONSOLE_SOCKET" test_busybox grep -B 5 Error ./work-dir/restore.log || true [ "$status" -eq 0 ] diff --git a/tests/integration/helpers.bash b/tests/integration/helpers.bash index aaa68dd7c33..b6d2e244cf6 100644 --- a/tests/integration/helpers.bash +++ b/tests/integration/helpers.bash @@ -23,8 +23,8 @@ SECCOMP_AGENT="${INTEGRATION_ROOT}/../../contrib/cmd/seccompagent/seccompagent" # shellcheck disable=SC2034 TESTDATA="${INTEGRATION_ROOT}/testdata" -# CRIU PATH -CRIU="$(which criu 2>/dev/null || true)" +# Whether we have criu binary. +command -v criu &>/dev/null && HAVE_CRIU=yes # Kernel version KERNEL_VERSION="$(uname -r)" @@ -350,7 +350,7 @@ function requires() { local skip_me case $var in criu) - if [ ! -e "$CRIU" ]; then + if [ -n "$HAVE_CRIU" ]; then skip_me=1 fi ;; diff --git a/utils_linux.go b/utils_linux.go index a9badf20f8b..abdf4ca1185 100644 --- a/utils_linux.go +++ b/utils_linux.go @@ -5,7 +5,6 @@ import ( "fmt" "net" "os" - "os/exec" "path/filepath" "strconv" @@ -17,49 +16,20 @@ import ( "golang.org/x/sys/unix" "github.com/opencontainers/runc/libcontainer" - "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/specconv" "github.com/opencontainers/runc/libcontainer/utils" ) var errEmptyID = errors.New("container id cannot be empty") -// loadFactory returns the configured factory instance for execing containers. -func loadFactory(context *cli.Context) (libcontainer.Factory, error) { - root := context.GlobalString("root") - abs, err := filepath.Abs(root) - if err != nil { - return nil, err - } - - intelRdtManager := libcontainer.IntelRdtFs - - // We resolve the paths for {newuidmap,newgidmap} from the context of runc, - // to avoid doing a path lookup in the nsexec context. TODO: The binary - // names are not currently configurable. - newuidmap, err := exec.LookPath("newuidmap") - if err != nil { - newuidmap = "" - } - newgidmap, err := exec.LookPath("newgidmap") - if err != nil { - newgidmap = "" - } - - return libcontainer.New(abs, intelRdtManager, - libcontainer.CriuPath(context.GlobalString("criu")), - libcontainer.NewuidmapPath(newuidmap), - libcontainer.NewgidmapPath(newgidmap)) -} - // getContainer returns the specified container instance by loading it from state // with the default factory. -func getContainer(context *cli.Context) (libcontainer.Container, error) { +func getContainer(context *cli.Context) (*libcontainer.Container, error) { id := context.Args().First() if id == "" { return nil, errEmptyID } - factory, err := loadFactory(context) + factory, err := libcontainer.New(context.GlobalString("root")) if err != nil { return nil, err } @@ -86,6 +56,7 @@ func newProcess(p specs.Process) (*libcontainer.Process, error) { Label: p.SelinuxLabel, NoNewPrivileges: &p.NoNewPrivileges, AppArmorProfile: p.ApparmorProfile, + Capabilities: p.Capabilities, } if p.ConsoleSize != nil { @@ -93,14 +64,6 @@ func newProcess(p specs.Process) (*libcontainer.Process, error) { lp.ConsoleHeight = uint16(p.ConsoleSize.Height) } - if p.Capabilities != nil { - lp.Capabilities = &configs.Capabilities{} - lp.Capabilities.Bounding = p.Capabilities.Bounding - lp.Capabilities.Effective = p.Capabilities.Effective - lp.Capabilities.Inheritable = p.Capabilities.Inheritable - lp.Capabilities.Permitted = p.Capabilities.Permitted - lp.Capabilities.Ambient = p.Capabilities.Ambient - } for _, gid := range p.User.AdditionalGids { lp.AdditionalGroups = append(lp.AdditionalGroups, strconv.FormatUint(uint64(gid), 10)) } @@ -114,7 +77,7 @@ func newProcess(p specs.Process) (*libcontainer.Process, error) { return lp, nil } -func destroy(container libcontainer.Container) { +func destroy(container *libcontainer.Container) { if err := container.Destroy(); err != nil { logrus.Error(err) } @@ -194,7 +157,7 @@ func createPidFile(path string, process *libcontainer.Process) error { return os.Rename(tmpName, path) } -func createContainer(context *cli.Context, id string, spec *specs.Spec) (libcontainer.Container, error) { +func createContainer(context *cli.Context, id string, spec *specs.Spec) (*libcontainer.Container, error) { rootlessCg, err := shouldUseRootlessCgroupManager(context) if err != nil { return nil, err @@ -212,7 +175,7 @@ func createContainer(context *cli.Context, id string, spec *specs.Spec) (libcont return nil, err } - factory, err := loadFactory(context) + factory, err := libcontainer.New(context.GlobalString("root")) if err != nil { return nil, err } @@ -228,7 +191,7 @@ type runner struct { preserveFDs int pidFile string consoleSocket string - container libcontainer.Container + container *libcontainer.Container action CtAct notifySocket *notifySocket criuOpts *libcontainer.CriuOpts