diff --git a/libcontainer/cgroups/fs/cpuset.go b/libcontainer/cgroups/fs/cpuset.go index e7b5cf6726b..ec241f541b6 100644 --- a/libcontainer/cgroups/fs/cpuset.go +++ b/libcontainer/cgroups/fs/cpuset.go @@ -6,12 +6,10 @@ import ( "os" "path/filepath" - "github.com/moby/sys/mountinfo" "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" "github.com/opencontainers/runc/libcontainer/configs" - libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils" - "github.com/pkg/errors" + "golang.org/x/sys/unix" ) type CpusetGroup struct { @@ -43,46 +41,19 @@ func (s *CpusetGroup) GetStats(path string, stats *cgroups.Stats) error { return nil } -// Get the source mount point of directory passed in as argument. -func getMount(dir string) (string, error) { - mi, err := mountinfo.GetMounts(mountinfo.ParentsFilter(dir)) - if err != nil { - return "", err - } - if len(mi) < 1 { - return "", errors.Errorf("Can't find mount point of %s", dir) - } - - // find the longest mount point - var idx, maxlen int - for i := range mi { - if len(mi[i].Mountpoint) > maxlen { - maxlen = len(mi[i].Mountpoint) - idx = i - } - } - - return mi[idx].Mountpoint, nil -} - func (s *CpusetGroup) ApplyDir(dir string, cgroup *configs.Cgroup, pid int) error { // This might happen if we have no cpuset cgroup mounted. // Just do nothing and don't fail. if dir == "" { return nil } - root, err := getMount(dir) - if err != nil { - return err - } - root = filepath.Dir(root) // 'ensureParent' start with parent because we don't want to // explicitly inherit from parent, it could conflict with // 'cpuset.cpu_exclusive'. - if err := cpusetEnsureParent(filepath.Dir(dir), root); err != nil { + if err := cpusetEnsureParent(filepath.Dir(dir)); err != nil { return err } - if err := os.MkdirAll(dir, 0755); err != nil { + if err := os.Mkdir(dir, 0755); err != nil && !os.IsExist(err) { return err } // We didn't inherit cpuset configs from parent, but we have @@ -111,22 +82,28 @@ func getCpusetSubsystemSettings(parent string) (cpus, mems string, err error) { return cpus, mems, nil } -// cpusetEnsureParent makes sure that the parent directory of current is created -// and populated with the proper cpus and mems files copied from -// its parent. -func cpusetEnsureParent(current, root string) error { +// cpusetEnsureParent makes sure that the parent directories of current +// are created and populated with the proper cpus and mems files copied +// from their respective parent. It does that recursively, starting from +// the top of the cpuset hierarchy (i.e. cpuset cgroup mount point). +func cpusetEnsureParent(current string) error { + var st unix.Statfs_t + parent := filepath.Dir(current) - if libcontainerUtils.CleanPath(parent) == root { + err := unix.Statfs(parent, &st) + if err == nil && st.Type != unix.CGROUP_SUPER_MAGIC { return nil } - // Avoid infinite recursion. - if parent == current { - return errors.New("cpuset: cgroup parent path outside cgroup root") + // Treat non-existing directory as cgroupfs as it will be created, + // and the root cpuset directory obviously exists. + if err != nil && err != unix.ENOENT { + return &os.PathError{Op: "statfs", Path: parent, Err: err} } - if err := cpusetEnsureParent(parent, root); err != nil { + + if err := cpusetEnsureParent(parent); err != nil { return err } - if err := os.MkdirAll(current, 0755); err != nil { + if err := os.Mkdir(current, 0755); err != nil && !os.IsExist(err) { return err } return cpusetCopyIfNeeded(current, parent) diff --git a/libcontainer/cgroups/fs/fs.go b/libcontainer/cgroups/fs/fs.go index d4c0e7c2005..a42ce4535e9 100644 --- a/libcontainer/cgroups/fs/fs.go +++ b/libcontainer/cgroups/fs/fs.go @@ -3,11 +3,9 @@ package fs import ( - "bufio" "fmt" "os" "path/filepath" - "strings" "sync" "github.com/opencontainers/runc/libcontainer/cgroups" @@ -133,46 +131,19 @@ func getCgroupRoot() (string, error) { return cgroupRoot, nil } - // slow path: parse mountinfo, find the first mount where fs=cgroup - // (e.g. "/sys/fs/cgroup/memory"), use its parent. - f, err := os.Open("/proc/self/mountinfo") + // slow path: parse mountinfo + mi, err := cgroups.GetCgroupMounts(false) if err != nil { return "", err } - defer f.Close() - - var root string - scanner := bufio.NewScanner(f) - for scanner.Scan() { - text := scanner.Text() - fields := strings.Split(text, " ") - // Safe as mountinfo encodes mountpoints with spaces as \040. - index := strings.Index(text, " - ") - postSeparatorFields := strings.Fields(text[index+3:]) - numPostFields := len(postSeparatorFields) - - // This is an error as we can't detect if the mount is for "cgroup" - if numPostFields == 0 { - return "", fmt.Errorf("mountinfo: found no fields post '-' in %q", text) - } - - if postSeparatorFields[0] == "cgroup" { - // Check that the mount is properly formatted. - if numPostFields < 3 { - return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text) - } - - root = filepath.Dir(fields[4]) - break - } - } - if err := scanner.Err(); err != nil { - return "", err - } - if root == "" { + if len(mi) < 1 { return "", errors.New("no cgroup mount found in mountinfo") } + // Get the first cgroup mount (e.g. "/sys/fs/cgroup/memory"), + // use its parent directory. + root := filepath.Dir(mi[0].Mountpoint) + if _, err := os.Stat(root); err != nil { return "", err } diff --git a/libcontainer/cgroups/utils_test.go b/libcontainer/cgroups/utils_test.go index 1645ae7b0ec..9e91bac5058 100644 --- a/libcontainer/cgroups/utils_test.go +++ b/libcontainer/cgroups/utils_test.go @@ -9,6 +9,7 @@ import ( "strings" "testing" + "github.com/moby/sys/mountinfo" "github.com/sirupsen/logrus" ) @@ -244,7 +245,13 @@ func TestGetCgroupMounts(t *testing.T) { }, } for _, td := range testTable { - mi := bytes.NewBufferString(td.mountInfo) + mi, err := mountinfo.GetMountsFromReader( + bytes.NewBufferString(td.mountInfo), + mountinfo.FSTypeFilter("cgroup"), + ) + if err != nil { + t.Fatal(err) + } cgMounts, err := getCgroupMountsHelper(td.subsystems, mi, false) if err != nil { t.Fatal(err) @@ -281,7 +288,6 @@ func TestGetCgroupMounts(t *testing.T) { // Test the all=true case. // Reset the test input. - mi = bytes.NewBufferString(td.mountInfo) for k := range td.subsystems { td.subsystems[k] = false } @@ -317,11 +323,15 @@ func BenchmarkGetCgroupMounts(b *testing.B) { "perf_event": false, "hugetlb": false, } + mi, err := mountinfo.GetMountsFromReader( + bytes.NewBufferString(fedoraMountinfo), + mountinfo.FSTypeFilter("cgroup"), + ) + if err != nil { + b.Fatal(err) + } b.ResetTimer() for i := 0; i < b.N; i++ { - b.StopTimer() - mi := bytes.NewBufferString(fedoraMountinfo) - b.StartTimer() if _, err := getCgroupMountsHelper(subsystems, mi, false); err != nil { b.Fatal(err) } @@ -396,7 +406,13 @@ func TestIgnoreCgroup2Mount(t *testing.T) { "name=systemd": false, } - mi := bytes.NewBufferString(cgroup2Mountinfo) + mi, err := mountinfo.GetMountsFromReader( + bytes.NewBufferString(cgroup2Mountinfo), + mountinfo.FSTypeFilter("cgroup"), + ) + if err != nil { + t.Fatal(err) + } cgMounts, err := getCgroupMountsHelper(subsystems, mi, false) if err != nil { t.Fatal(err) @@ -409,10 +425,8 @@ func TestIgnoreCgroup2Mount(t *testing.T) { } func TestFindCgroupMountpointAndRoot(t *testing.T) { - fakeMountInfo := ` -35 27 0:29 / /foo rw,nosuid,nodev,noexec,relatime shared:18 - cgroup cgroup rw,devices -35 27 0:29 / /sys/fs/cgroup/devices rw,nosuid,nodev,noexec,relatime shared:18 - cgroup cgroup rw,devices -` + fakeMountInfo := `35 27 0:29 / /foo rw,nosuid,nodev,noexec,relatime shared:18 - cgroup cgroup rw,devices +35 27 0:29 / /sys/fs/cgroup/devices rw,nosuid,nodev,noexec,relatime shared:18 - cgroup cgroup rw,devices` testCases := []struct { cgroupPath string output string @@ -421,8 +435,16 @@ func TestFindCgroupMountpointAndRoot(t *testing.T) { {cgroupPath: "", output: "/foo"}, } + mi, err := mountinfo.GetMountsFromReader( + bytes.NewBufferString(fakeMountInfo), + mountinfo.FSTypeFilter("cgroup"), + ) + if err != nil { + t.Fatal(err) + } + for _, c := range testCases { - mountpoint, _, _ := findCgroupMountpointAndRootFromReader(strings.NewReader(fakeMountInfo), c.cgroupPath, "devices") + mountpoint, _, _ := findCgroupMountpointAndRootFromMI(mi, c.cgroupPath, "devices") if mountpoint != c.output { t.Errorf("expected %s, got %s", c.output, mountpoint) } diff --git a/libcontainer/cgroups/v1_utils.go b/libcontainer/cgroups/v1_utils.go index f610ed8c475..95ec9dff028 100644 --- a/libcontainer/cgroups/v1_utils.go +++ b/libcontainer/cgroups/v1_utils.go @@ -1,16 +1,16 @@ package cgroups import ( - "bufio" "errors" "fmt" - "io" "os" "path/filepath" "strings" + "sync" "syscall" securejoin "github.com/cyphar/filepath-securejoin" + "github.com/moby/sys/mountinfo" "golang.org/x/sys/unix" ) @@ -25,6 +25,10 @@ const ( var ( errUnified = errors.New("not implemented for cgroup v2 unified hierarchy") ErrV1NoUnified = errors.New("invalid configuration: cannot use unified on cgroup v1") + + readMountinfoOnce sync.Once + readMountinfoErr error + cgroupMountinfo []*mountinfo.Info ) type NotFoundError struct { @@ -91,6 +95,21 @@ func tryDefaultPath(cgroupPath, subsystem string) string { return path } +// readCgroupMountinfo returns a list of cgroup v1 mounts (i.e. the ones +// with fstype of "cgroup") for the current running process. +// +// The results are cached (to avoid re-reading mountinfo which is relatively +// expensive), so it is assumed that cgroup mounts are not being changed. +func readCgroupMountinfo() ([]*mountinfo.Info, error) { + readMountinfoOnce.Do(func() { + cgroupMountinfo, readMountinfoErr = mountinfo.GetMounts( + mountinfo.FSTypeFilter("cgroup"), + ) + }) + + return cgroupMountinfo, readMountinfoErr +} + // https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt func FindCgroupMountpoint(cgroupPath, subsystem string) (string, error) { if IsCgroup2UnifiedMode() { @@ -111,56 +130,28 @@ func FindCgroupMountpointAndRoot(cgroupPath, subsystem string) (string, string, return "", "", errUnified } - // Avoid parsing mountinfo by checking if subsystem is valid/available. - if !isSubsystemAvailable(subsystem) { - return "", "", NewNotFoundError(subsystem) - } - - f, err := os.Open("/proc/self/mountinfo") + mi, err := readCgroupMountinfo() if err != nil { return "", "", err } - defer f.Close() - return findCgroupMountpointAndRootFromReader(f, cgroupPath, subsystem) + return findCgroupMountpointAndRootFromMI(mi, cgroupPath, subsystem) } -func findCgroupMountpointAndRootFromReader(reader io.Reader, cgroupPath, subsystem string) (string, string, error) { - scanner := bufio.NewScanner(reader) - for scanner.Scan() { - txt := scanner.Text() - fields := strings.Fields(txt) - if len(fields) < 9 { - continue - } - if strings.HasPrefix(fields[4], cgroupPath) { - for _, opt := range strings.Split(fields[len(fields)-1], ",") { +func findCgroupMountpointAndRootFromMI(mounts []*mountinfo.Info, cgroupPath, subsystem string) (string, string, error) { + for _, mi := range mounts { + if strings.HasPrefix(mi.Mountpoint, cgroupPath) { + for _, opt := range strings.Split(mi.VFSOptions, ",") { if opt == subsystem { - return fields[4], fields[3], nil + return mi.Mountpoint, mi.Root, nil } } } } - if err := scanner.Err(); err != nil { - return "", "", err - } return "", "", NewNotFoundError(subsystem) } -func isSubsystemAvailable(subsystem string) bool { - if IsCgroup2UnifiedMode() { - panic("don't call isSubsystemAvailable from cgroupv2 code") - } - - cgroups, err := ParseCgroupFile("/proc/self/cgroup") - if err != nil { - return false - } - _, avail := cgroups[subsystem] - return avail -} - func (m Mount) GetOwnCgroup(cgroups map[string]string) (string, error) { if len(m.Subsystems) == 0 { return "", fmt.Errorf("no subsystem for mount") @@ -169,25 +160,15 @@ func (m Mount) GetOwnCgroup(cgroups map[string]string) (string, error) { return getControllerPath(m.Subsystems[0], cgroups) } -func getCgroupMountsHelper(ss map[string]bool, mi io.Reader, all bool) ([]Mount, error) { +func getCgroupMountsHelper(ss map[string]bool, mounts []*mountinfo.Info, all bool) ([]Mount, error) { res := make([]Mount, 0, len(ss)) - scanner := bufio.NewScanner(mi) numFound := 0 - for scanner.Scan() && (all || numFound < len(ss)) { - txt := scanner.Text() - sepIdx := strings.Index(txt, " - ") - if sepIdx == -1 { - return nil, fmt.Errorf("invalid mountinfo format") - } - if txt[sepIdx+3:sepIdx+10] == "cgroup2" || txt[sepIdx+3:sepIdx+9] != "cgroup" { - continue - } - fields := strings.Split(txt, " ") + for _, mi := range mounts { m := Mount{ - Mountpoint: fields[4], - Root: fields[3], + Mountpoint: mi.Mountpoint, + Root: mi.Root, } - for _, opt := range strings.Split(fields[len(fields)-1], ",") { + for _, opt := range strings.Split(mi.VFSOptions, ",") { seen, known := ss[opt] if !known || (!all && seen) { continue @@ -200,19 +181,18 @@ func getCgroupMountsHelper(ss map[string]bool, mi io.Reader, all bool) ([]Mount, if len(m.Subsystems) > 0 || all { res = append(res, m) } - } - if err := scanner.Err(); err != nil { - return nil, err + if !all && numFound >= len(ss) { + break + } } return res, nil } func getCgroupMountsV1(all bool) ([]Mount, error) { - f, err := os.Open("/proc/self/mountinfo") + mi, err := readCgroupMountinfo() if err != nil { return nil, err } - defer f.Close() allSubsystems, err := ParseCgroupFile("/proc/self/cgroup") if err != nil { @@ -223,7 +203,8 @@ func getCgroupMountsV1(all bool) ([]Mount, error) { for s := range allSubsystems { allMap[s] = false } - return getCgroupMountsHelper(allMap, f, all) + + return getCgroupMountsHelper(allMap, mi, all) } // GetOwnCgroup returns the relative path to the cgroup docker is running in.