diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6d0d741..edcc1b2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -62,12 +62,12 @@ jobs: - name: Build test dependencies (healthworker) run: go build ./testdata/healthworker/... - - name: Run cgroup integration tests (as root) + - name: Run cgroup + namespace integration tests (as root) if: env.CGROUP_AVAILABLE == 'true' run: | sudo --preserve-env=PATH,GOPATH,GOCACHE,HOME \ env HERD_CGROUP_TEST=1 \ - $(which go) test -v -run TestSandbox -timeout 60s ./... + $(which go) test -v -run 'TestSandbox|TestNamespace' -timeout 90s ./... env: GOPATH: ${{ env.GOPATH }} GOCACHE: ${{ env.GOCACHE }} diff --git a/factory_cgroup_test.go b/factory_cgroup_test.go index 384c6ec..829853b 100644 --- a/factory_cgroup_test.go +++ b/factory_cgroup_test.go @@ -25,6 +25,13 @@ func TestNewProcessFactory_DefaultMemoryCPUUnlimited(t *testing.T) { } } +func TestNewProcessFactory_DefaultNamespaceFlags(t *testing.T) { + f := NewProcessFactory("./fake-binary") + if f.namespaceCloneFlags != defaultNamespaceCloneFlags() { + t.Errorf("expected default namespaceCloneFlags=%d, got %d", defaultNamespaceCloneFlags(), f.namespaceCloneFlags) + } +} + func TestWithMemoryLimit_StoresBytes(t *testing.T) { const limit = 512 * 1024 * 1024 // 512 MB f := NewProcessFactory("./fake-binary").WithMemoryLimit(limit) diff --git a/process_worker_factory.go b/process_worker_factory.go index 38eae8b..c08e04b 100644 --- a/process_worker_factory.go +++ b/process_worker_factory.go @@ -172,6 +172,7 @@ type ProcessFactory struct { startTimeout time.Duration // maximum time to wait for the first successful health check startHealthCheckDelay time.Duration // delay the health check for the first time. enableSandbox bool // true by default for isolation + namespaceCloneFlags uintptr // Linux namespaces to enable for sandboxed workers cgroupMemory int64 // bytes; 0 means unlimited cgroupCPU int64 // quota in micros per 100ms period; 0 means unlimited cgroupPIDs int64 // max pids; -1 means unlimited @@ -193,6 +194,7 @@ func NewProcessFactory(binary string, args ...string) *ProcessFactory { startTimeout: 30 * time.Second, startHealthCheckDelay: 1 * time.Second, enableSandbox: true, + namespaceCloneFlags: defaultNamespaceCloneFlags(), cgroupPIDs: 100, } } @@ -331,6 +333,7 @@ func (f *ProcessFactory) Spawn(ctx context.Context) (Worker[*http.Client], error memoryMaxBytes: f.cgroupMemory, cpuMaxMicros: f.cgroupCPU, pidsMax: f.cgroupPIDs, + cloneFlags: f.namespaceCloneFlags, }) if err != nil { return nil, fmt.Errorf("herd: ProcessFactory: failed to apply sandbox: %w", err) diff --git a/sandbox.go b/sandbox.go index ff3f9ed..d961323 100644 --- a/sandbox.go +++ b/sandbox.go @@ -6,6 +6,7 @@ type sandboxConfig struct { memoryMaxBytes int64 cpuMaxMicros int64 pidsMax int64 + cloneFlags uintptr } // sandboxHandle owns post-start and cleanup hooks for sandbox resources. diff --git a/sandbox_integration_test.go b/sandbox_integration_test.go index f04493c..93876bf 100644 --- a/sandbox_integration_test.go +++ b/sandbox_integration_test.go @@ -19,6 +19,7 @@ import ( "os" "os/exec" "path/filepath" + "strconv" "strings" "testing" "time" @@ -240,3 +241,65 @@ func TestSandbox_MemoryLimitFileWritten(t *testing.T) { t.Logf("memory limits confirmed: max=%s swap=%s", strings.TrimSpace(string(memMax)), strings.TrimSpace(string(swapMax))) } + +func TestNamespace_PIDIsolation(t *testing.T) { + requireCgroupIntegration(t) + + bin := buildHealthWorker(t) + + factory := NewProcessFactory(bin). + WithHealthPath("/health"). + WithStartTimeout(10 * time.Second). + WithStartHealthCheckDelay(100 * time.Millisecond) + + ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second) + defer cancel() + + worker, err := factory.Spawn(ctx) + if err != nil { + t.Fatalf("Spawn: %v", err) + } + defer worker.Close() + + pw, ok := worker.(*processWorker) + if !ok { + t.Fatal("expected *processWorker from Spawn") + } + pw.mu.Lock() + hostPID := pw.cmd.Process.Pid + pw.mu.Unlock() + + statusFile := fmt.Sprintf("/proc/%d/status", hostPID) + status, err := os.ReadFile(statusFile) + if err != nil { + t.Fatalf("read %s: %v", statusFile, err) + } + + insidePID, ok := parseInnermostNSpid(string(status)) + if !ok { + t.Fatalf("NSpid line not found in %s:\n%s", statusFile, string(status)) + } + if insidePID != 1 { + t.Fatalf("expected worker to be PID 1 inside its namespace, got %d", insidePID) + } + t.Logf("NSpid verified: host pid=%d, namespace pid=%d", hostPID, insidePID) +} + +func parseInnermostNSpid(status string) (int, bool) { + for _, line := range strings.Split(status, "\n") { + if !strings.HasPrefix(line, "NSpid:") { + continue + } + fields := strings.Fields(strings.TrimPrefix(line, "NSpid:")) + if len(fields) == 0 { + return 0, false + } + last := fields[len(fields)-1] + pid, err := strconv.Atoi(last) + if err != nil { + return 0, false + } + return pid, true + } + return 0, false +} diff --git a/sandbox_linux.go b/sandbox_linux.go index 3694edd..e2a6a50 100644 --- a/sandbox_linux.go +++ b/sandbox_linux.go @@ -112,6 +112,9 @@ func applySandboxFlags(cmd *exec.Cmd, workerID string, cfg sandboxConfig) (sandb if sys == nil { sys = &syscall.SysProcAttr{} } + if cfg.cloneFlags != 0 { + sys.Cloneflags |= cfg.cloneFlags + } sys.CgroupFD = int(dir.Fd()) sys.UseCgroupFD = true cmd.SysProcAttr = sys @@ -119,6 +122,10 @@ func applySandboxFlags(cmd *exec.Cmd, workerID string, cfg sandboxConfig) (sandb return &cgroupHandle{path: cgroupPath, fd: dir}, nil } +func defaultNamespaceCloneFlags() uintptr { + return uintptr(syscall.CLONE_NEWPID | syscall.CLONE_NEWNS | syscall.CLONE_NEWIPC) +} + func writeCgroupFile(cgroupPath, filename, value string) error { path := filepath.Join(cgroupPath, filename) return os.WriteFile(path, []byte(value), 0o644) diff --git a/sandbox_linux_test.go b/sandbox_linux_test.go index 5e4f74e..e977f2e 100644 --- a/sandbox_linux_test.go +++ b/sandbox_linux_test.go @@ -12,6 +12,7 @@ import ( "os/exec" "path/filepath" "strings" + "syscall" "testing" ) @@ -163,6 +164,32 @@ func TestApplySandboxFlags_SysProcAttrWired(t *testing.T) { } } +func TestApplySandboxFlags_CloneFlagsMergedWithCgroup(t *testing.T) { + withTempCgroupRoot(t) + cmd := newFakeCmd() + + flags := uintptr(syscall.CLONE_NEWPID | syscall.CLONE_NEWNS | syscall.CLONE_NEWIPC) + h, err := applySandboxFlags(cmd, "worker-ns", sandboxConfig{cloneFlags: flags}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if h == nil { + t.Fatal("expected non-nil handle") + } + if cmd.SysProcAttr == nil { + t.Fatal("SysProcAttr should be set after applySandboxFlags") + } + if cmd.SysProcAttr.Cloneflags&flags != flags { + t.Errorf("expected Cloneflags to include %#x, got %#x", flags, cmd.SysProcAttr.Cloneflags) + } + if !cmd.SysProcAttr.UseCgroupFD { + t.Error("UseCgroupFD should remain true after namespace merge") + } + if cmd.SysProcAttr.CgroupFD <= 0 { + t.Errorf("CgroupFD should remain set after namespace merge, got %d", cmd.SysProcAttr.CgroupFD) + } +} + func TestApplySandboxFlags_SoftFailOnBadRoot(t *testing.T) { // Point to a path that cannot be created (inside /proc which is read-only). old := activeCgroupRoot diff --git a/sandbox_unsupported.go b/sandbox_unsupported.go index 908aa65..6fbf85d 100644 --- a/sandbox_unsupported.go +++ b/sandbox_unsupported.go @@ -27,3 +27,7 @@ var ErrSandboxUnsupported = errors.New( func applySandboxFlags(cmd *exec.Cmd, workerID string, cfg sandboxConfig) (sandboxHandle, error) { return nil, ErrSandboxUnsupported } + +func defaultNamespaceCloneFlags() uintptr { + return 0 +}