From d507bed23903553bb6ec7b34a096e3d7f21d7611 Mon Sep 17 00:00:00 2001 From: Sankalp Narula Date: Thu, 12 Mar 2026 21:17:56 -0400 Subject: [PATCH] security: add no-new-privs and seccomp BPF syscall filter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## No-New-Privs Set PR_SET_NO_NEW_PRIVS on the spawning OS thread via unix.Prctl before forking each worker. Workers inherit the bit and cannot gain privileges through setuid/setgid binaries (sudo, su, etc.). Enabled by default when sandbox is enabled. sandboxConfig gains a noNewPrivs bool field. ProcessFactory passes noNewPrivs: true in the default sandboxConfig. ## Seccomp BPF filter Add a syscall allowlist filter using github.com/elastic/go-seccomp-bpf (pure Go, no libseccomp dependency). Key properties: - SECCOMP_FILTER_FLAG_TSYNC syncs the filter to all Go runtime threads - Default policy: SeccompPolicyErrno (blocked calls return EPERM) - Explicitly blocks: ptrace, mount, bpf, kexec_load, keyctl, io_uring_*, userfaultfd, perf_event_open, setns, process_vm_* Design: ProcessFactory injects HERD_SECCOMP_PROFILE env var into the worker. Go worker binaries call herd.EnterSandbox() at the top of main() to install the filter. Opaque binaries (ollama, playwright) use WithSeccompPolicy(SeccompPolicyOff) to skip seccomp while still benefiting from cgroup + namespace isolation. New API: - EnterSandbox() error — called by worker binaries at startup - ProcessFactory.WithSeccompPolicy(SeccompPolicy) *ProcessFactory - SeccompPolicy: Off | Log | Errno | Kill New files: - seccomp_linux.go: EnterSandbox, httpWorkerSyscalls allowlist - seccomp_unsupported.go: cross-platform no-op stubs - seccomp_linux_test.go: unit tests for BPF construction and policy Updated examples to use WithSeccompPolicy(SeccompPolicyOff) for opaque binaries (ollama, playwright) with explanatory comments. --- examples/ollama/main.go | 9 +- examples/playwright/main.go | 9 +- go.mod | 9 +- go.sum | 14 ++ process_worker_factory.go | 28 +++- sandbox.go | 2 + sandbox_linux.go | 11 ++ sandbox_linux_test.go | 38 ++++- seccomp_linux.go | 280 ++++++++++++++++++++++++++++++++++++ seccomp_linux_test.go | 137 ++++++++++++++++++ seccomp_unsupported.go | 24 ++++ 11 files changed, 556 insertions(+), 5 deletions(-) create mode 100644 go.sum create mode 100644 seccomp_linux.go create mode 100644 seccomp_linux_test.go create mode 100644 seccomp_unsupported.go diff --git a/examples/ollama/main.go b/examples/ollama/main.go index 2cb4b54..d3df3ab 100644 --- a/examples/ollama/main.go +++ b/examples/ollama/main.go @@ -58,12 +58,19 @@ func main() { // you only ever pull a model once. // CUDA_VISIBLE_DEVICES is intentionally left empty here; set it if you // want to pin different workers to different GPUs. + // + // Seccomp note: Ollama is an opaque binary — it cannot call + // herd.EnterSandbox() to install its own syscall filter. We opt out of + // seccomp here. Namespace + cgroup isolation still applies. + // For Go worker binaries you control, call herd.EnterSandbox() at the top + // of main() and remove WithSeccompPolicy to use the default (errno) policy. factory := herd.NewProcessFactory("ollama", "serve"). WithEnv("OLLAMA_HOST=127.0.0.1:{{.Port}}"). WithEnv("OLLAMA_MODELS=" + *modelsDir). WithHealthPath("/"). // ollama: GET / → 200 "Ollama is running" WithStartTimeout(2 * time.Minute). - WithStartHealthCheckDelay(1 * time.Second) + WithStartHealthCheckDelay(1 * time.Second). + WithSeccompPolicy(herd.SeccompPolicyOff) // opaque binary — cannot call EnterSandbox() // ── Pool ─────────────────────────────────────────────────────────────── pool, err := herd.New(factory, diff --git a/examples/playwright/main.go b/examples/playwright/main.go index be4677b..6bf0fcb 100644 --- a/examples/playwright/main.go +++ b/examples/playwright/main.go @@ -40,6 +40,12 @@ func main() { // // We use the CLI directly inside ProcessFactory: // npx playwright run-server --port {{.Port}} --host 127.0.0.1 + // + // Seccomp note: Playwright is an opaque Node.js binary — it cannot call + // herd.EnterSandbox() to install its own syscall filter. We opt out of + // seccomp here. Namespace + cgroup isolation still applies. + // For Go worker binaries you control, call herd.EnterSandbox() at the top + // of main() and remove WithSeccompPolicy to use the default (errno) policy. factory := herd.NewProcessFactory( "npx", "playwright", "run-server", "--port", "{{.Port}}", @@ -47,7 +53,8 @@ func main() { ). WithHealthPath("/"). WithStartTimeout(1 * time.Minute). - WithStartHealthCheckDelay(500 * time.Millisecond) + WithStartHealthCheckDelay(500 * time.Millisecond). + WithSeccompPolicy(herd.SeccompPolicyOff) // opaque binary — cannot call EnterSandbox() // ── Pool ─────────────────────────────────────────────────────────────── // To make a bulletproof multi-tenant tool and avoid shared fate, state leaks, diff --git a/go.mod b/go.mod index d73be28..73d3490 100644 --- a/go.mod +++ b/go.mod @@ -1,3 +1,10 @@ module github.com/hackstrix/herd -go 1.22 +go 1.25.0 + +require ( + github.com/elastic/go-seccomp-bpf v1.6.0 + golang.org/x/net v0.52.0 +) + +require golang.org/x/sys v0.42.0 // indirect diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..c9cb6e8 --- /dev/null +++ b/go.sum @@ -0,0 +1,14 @@ +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/elastic/go-seccomp-bpf v1.6.0 h1:NYduiYxRJ0ZkIyQVwlSskcqPPSg6ynu5pK0/d7SQATs= +github.com/elastic/go-seccomp-bpf v1.6.0/go.mod h1:5tFsTvH4NtWGfpjsOQD53H8HdVQ+zSZFRUDSGevC0Kc= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= +github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +golang.org/x/net v0.52.0 h1:He/TN1l0e4mmR3QqHMT2Xab3Aj3L9qjbhRm78/6jrW0= +golang.org/x/net v0.52.0/go.mod h1:R1MAz7uMZxVMualyPXb+VaqGSa3LIaUqk0eEt3w36Sw= +golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo= +golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/process_worker_factory.go b/process_worker_factory.go index c08e04b..9ae82fa 100644 --- a/process_worker_factory.go +++ b/process_worker_factory.go @@ -176,6 +176,7 @@ type ProcessFactory struct { cgroupMemory int64 // bytes; 0 means unlimited cgroupCPU int64 // quota in micros per 100ms period; 0 means unlimited cgroupPIDs int64 // max pids; -1 means unlimited + seccompPolicy SeccompPolicy // syscall filter enforcement mode; default SeccompPolicyErrno counter atomic.Int64 } @@ -196,6 +197,7 @@ func NewProcessFactory(binary string, args ...string) *ProcessFactory { enableSandbox: true, namespaceCloneFlags: defaultNamespaceCloneFlags(), cgroupPIDs: 100, + seccompPolicy: SeccompPolicyErrno, } } @@ -284,6 +286,21 @@ func (f *ProcessFactory) WithInsecureSandbox() *ProcessFactory { return f } +// WithSeccompPolicy sets the seccomp syscall-filter enforcement mode for +// workers spawned by this factory. +// +// The filter is installed by the worker binary itself at startup via +// [EnterSandbox]. The factory injects HERD_SECCOMP_PROFILE into the worker +// environment to communicate the chosen policy. +// +// Defaults to [SeccompPolicyErrno] (unauthorized syscalls return EPERM). +// Use [SeccompPolicyOff] to disable seccomp (e.g. when the worker binary +// does not call EnterSandbox). +func (f *ProcessFactory) WithSeccompPolicy(p SeccompPolicy) *ProcessFactory { + f.seccompPolicy = p + return f +} + func streamLogs(workerID string, pipe io.ReadCloser, isError bool) { // bufio.Scanner guarantees we read line-by-line, preventing torn logs. scanner := bufio.NewScanner(pipe) @@ -325,7 +342,14 @@ func (f *ProcessFactory) Spawn(ctx context.Context) (Worker[*http.Client], error // During program exits, this should be cleaned up by the Shutdown method cmd := exec.Command(f.binary, resolvedArgs...) - cmd.Env = append(os.Environ(), append([]string{"PORT=" + portStr}, resolvedEnv...)...) + + // Base environment: inherit parent + port + user extras + seccomp profile + baseEnv := []string{"PORT=" + portStr} + if f.enableSandbox && f.seccompPolicy != SeccompPolicyOff { + baseEnv = append(baseEnv, "HERD_SECCOMP_PROFILE="+f.seccompPolicy.envValue()) + } + cmd.Env = append(os.Environ(), append(baseEnv, resolvedEnv...)...) + var cgroupHandle sandboxHandle if f.enableSandbox { @@ -334,6 +358,8 @@ func (f *ProcessFactory) Spawn(ctx context.Context) (Worker[*http.Client], error cpuMaxMicros: f.cgroupCPU, pidsMax: f.cgroupPIDs, cloneFlags: f.namespaceCloneFlags, + noNewPrivs: true, + seccompPolicy: f.seccompPolicy, }) if err != nil { return nil, fmt.Errorf("herd: ProcessFactory: failed to apply sandbox: %w", err) diff --git a/sandbox.go b/sandbox.go index d961323..9f88994 100644 --- a/sandbox.go +++ b/sandbox.go @@ -7,6 +7,8 @@ type sandboxConfig struct { cpuMaxMicros int64 pidsMax int64 cloneFlags uintptr + noNewPrivs bool // prevent privilege escalation via setuid binaries + seccompPolicy SeccompPolicy // syscall filter enforcement mode } // sandboxHandle owns post-start and cleanup hooks for sandbox resources. diff --git a/sandbox_linux.go b/sandbox_linux.go index e2a6a50..cb68367 100644 --- a/sandbox_linux.go +++ b/sandbox_linux.go @@ -11,6 +11,8 @@ import ( "path/filepath" "strconv" "syscall" + + "golang.org/x/sys/unix" ) const ( @@ -119,6 +121,15 @@ func applySandboxFlags(cmd *exec.Cmd, workerID string, cfg sandboxConfig) (sandb sys.UseCgroupFD = true cmd.SysProcAttr = sys + if cfg.noNewPrivs { + // Set no_new_privs on the calling OS thread. The bit is inherited + // by all children of this thread (including the forked worker). + // This prevents workers from gaining privileges via setuid binaries. + if err := unix.Prctl(unix.PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil { + log.Printf("[sandbox:%s] WARNING: prctl PR_SET_NO_NEW_PRIVS failed: %v; continuing without no_new_privs", workerID, err) + } + } + return &cgroupHandle{path: cgroupPath, fd: dir}, nil } diff --git a/sandbox_linux_test.go b/sandbox_linux_test.go index e977f2e..433b34e 100644 --- a/sandbox_linux_test.go +++ b/sandbox_linux_test.go @@ -264,7 +264,43 @@ func TestCgroupHandle_Cleanup_Idempotent(t *testing.T) { h.Cleanup() // dir already gone — should not panic or log error as warning } -func TestCgroupHandle_Cleanup_NilSafe(t *testing.T) { +func TestApplySandboxFlags_NilSafe(t *testing.T) { var h *cgroupHandle h.Cleanup() // must not panic } + +// --------------------------------------------------------------------------- +// No-New-Privs tests +// --------------------------------------------------------------------------- + +func TestApplySandboxFlags_NoNewPrivs(t *testing.T) { + withTempCgroupRoot(t) + cmd := newFakeCmd() + + _, err := applySandboxFlags(cmd, "worker-nnp", sandboxConfig{noNewPrivs: true}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if cmd.SysProcAttr == nil { + t.Fatal("SysProcAttr should be set") + } + if !cmd.SysProcAttr.NoNewPrivs { + t.Error("NoNewPrivs should be true when noNewPrivs=true") + } +} + +func TestApplySandboxFlags_NoNewPrivsOff(t *testing.T) { + withTempCgroupRoot(t) + cmd := newFakeCmd() + + _, err := applySandboxFlags(cmd, "worker-nnp-off", sandboxConfig{noNewPrivs: false}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if cmd.SysProcAttr == nil { + t.Fatal("SysProcAttr should be set") + } + if cmd.SysProcAttr.NoNewPrivs { + t.Error("NoNewPrivs should be false when noNewPrivs=false") + } +} diff --git a/seccomp_linux.go b/seccomp_linux.go new file mode 100644 index 0000000..9b82829 --- /dev/null +++ b/seccomp_linux.go @@ -0,0 +1,280 @@ +//go:build linux + +// seccomp_linux.go — seccomp BPF syscall filter for herd workers. +// +// # Overview +// +// Seccomp (Secure Computing mode) restricts which syscalls a process can +// invoke. Herd uses an allowlist: the worker process is permitted only the +// syscalls a typical HTTP server needs. Every other syscall returns EPERM +// (SeccompPolicyErrno) or kills the process (SeccompPolicyKill). +// +// # Threading note +// +// Go multiplexes goroutines onto OS threads (M:N model). The BPF filter must +// be installed with SECCOMP_FILTER_FLAG_TSYNC so it is applied to all OS +// threads the Go runtime creates — not just the one that called the syscall. +// go-seccomp-bpf handles this automatically. +// +// # Worker integration +// +// [ProcessFactory] injects HERD_SECCOMP_PROFILE into the worker's environment. +// The worker binary calls [EnterSandbox] at the top of main() to install the +// filter. This design keeps the filter in the child process and avoids +// contaminating the herd parent process. +package herd + +import ( + "fmt" + "os" + + seccomp "github.com/elastic/go-seccomp-bpf" + "golang.org/x/net/bpf" +) + +// SeccompPolicy controls how unauthorized syscalls are handled. +type SeccompPolicy int + +const ( + // SeccompPolicyOff disables seccomp filtering entirely. + // Workers that do not call EnterSandbox should use this policy. + SeccompPolicyOff SeccompPolicy = iota + + // SeccompPolicyLog logs blocked syscalls via the kernel audit subsystem + // but does not block them. Requires Linux 4.14+. Useful during development + // to discover which syscalls a new worker needs. + SeccompPolicyLog + + // SeccompPolicyErrno returns EPERM for any unauthorized syscall. + // This is the default for production workers. + SeccompPolicyErrno + + // SeccompPolicyKill terminates the process immediately on any unauthorized + // syscall. Strictest policy; no error is returned to the caller. + SeccompPolicyKill +) + +// envValue returns the string written into HERD_SECCOMP_PROFILE. +func (p SeccompPolicy) envValue() string { + switch p { + case SeccompPolicyLog: + return "log" + case SeccompPolicyErrno: + return "errno" + case SeccompPolicyKill: + return "kill" + default: + return "off" + } +} + +// parsePolicyEnv converts the HERD_SECCOMP_PROFILE value back to a SeccompPolicy. +func parsePolicyEnv(s string) SeccompPolicy { + switch s { + case "log": + return SeccompPolicyLog + case "errno": + return SeccompPolicyErrno + case "kill": + return SeccompPolicyKill + default: + return SeccompPolicyOff + } +} + +// defaultSeccompAction maps a SeccompPolicy to a go-seccomp-bpf Action. +func defaultSeccompAction(p SeccompPolicy) seccomp.Action { + switch p { + case SeccompPolicyLog: + return seccomp.ActionLog + case SeccompPolicyKill: + return seccomp.ActionKillProcess + default: // SeccompPolicyErrno + return seccomp.ActionErrno + } +} + +// httpWorkerSyscalls is the allowlist for a typical HTTP server process. +// Generated by combining: +// - Docker's default seccomp profile (moby/moby) +// - Go runtime syscall requirements (strace on a minimal Go HTTP server) +// - Playwright/Chromium additional requirements removed (use WithSeccompPolicy(SeccompPolicyOff)) +// +// Missing a syscall? Run strace -f -e trace=all on your worker and add +// any blocked-but-needed calls via WithSeccompPolicy or a custom policy. +var httpWorkerSyscalls = []string{ + // ---- File I/O ---- + "read", "write", "readv", "writev", + "pread64", "pwrite64", "sendfile", + "open", "openat", "openat2", "creat", + "close", "close_range", + "stat", "fstat", "lstat", "newfstatat", + "statx", "statfs", "fstatfs", + "lseek", "dup", "dup2", "dup3", + "fcntl", "ioctl", + "access", "faccessat", "faccessat2", + "readlink", "readlinkat", + "getcwd", "chdir", "fchdir", + "mkdir", "mkdirat", "rmdir", + "unlink", "unlinkat", + "rename", "renameat", "renameat2", + "chmod", "fchmod", "fchmodat", + "chown", "fchown", "lchown", "fchownat", + "truncate", "ftruncate", + "link", "linkat", "symlink", "symlinkat", + "sync", "fsync", "fdatasync", "syncfs", + "getdents", "getdents64", + + // ---- Networking ---- + "socket", "bind", "listen", + "accept", "accept4", + "connect", + "sendto", "recvfrom", + "sendmsg", "recvmsg", + "sendmmsg", "recvmmsg", + "setsockopt", "getsockopt", + "getsockname", "getpeername", + "socketpair", "shutdown", + + // ---- Memory management ---- + "mmap", "munmap", "mprotect", + "madvise", "brk", + "mremap", "msync", + "mincore", "mlock", "mlock2", "munlock", + "mlockall", "munlockall", + "mbind", "set_mempolicy", "get_mempolicy", + + // ---- Polling & I/O multiplexing ---- + "poll", "ppoll", + "select", "pselect6", + "epoll_create", "epoll_create1", + "epoll_ctl", "epoll_wait", "epoll_pwait", "epoll_pwait2", + "pipe", "pipe2", + "eventfd", "eventfd2", + "timerfd_create", "timerfd_settime", "timerfd_gettime", + "inotify_init", "inotify_init1", + "inotify_add_watch", "inotify_rm_watch", + "signalfd", "signalfd4", + + // ---- Go runtime — threading ---- + // clone is allowed but argument filtering restricts it to thread-creation + // flags only (CLONE_THREAD | CLONE_VM | CLONE_FILES | CLONE_SIGHAND). + // Fork-style clones (creating new processes) are unnecessary for workers. + "clone", "clone3", + "futex", "futex_waitv", + "set_robust_list", "get_robust_list", + "tkill", "tgkill", + "sched_yield", + "sched_getaffinity", "sched_setaffinity", + "sched_getparam", "sched_setparam", + "sched_getscheduler", "sched_setscheduler", + + // ---- Signals ---- + "rt_sigaction", "rt_sigreturn", + "rt_sigprocmask", "rt_sigsuspend", + "rt_sigpending", "rt_sigtimedwait", + "rt_sigqueueinfo", "rt_tgsigqueueinfo", + "sigaltstack", "kill", "pause", + + // ---- Process info ---- + "getpid", "getppid", "gettid", + "getuid", "getgid", "geteuid", "getegid", + "getgroups", + "getpgrp", "getpgid", "getsid", + "getrlimit", "setrlimit", "prlimit64", + "getrusage", + "uname", "sysinfo", + + // ---- Time ---- + "clock_gettime", "clock_getres", "clock_nanosleep", + "gettimeofday", "settimeofday", + "nanosleep", "time", "times", + "adjtimex", "clock_adjtime", + + // ---- Exit ---- + "exit", "exit_group", + "wait4", "waitid", "waitpid", + + // ---- Misc Go runtime ---- + "arch_prctl", // set thread-local storage (x86-64) + "set_tid_address", // Go runtime init + "restart_syscall", // signal restart + "getrandom", // crypto/rand + "rseq", // restartable sequences (Go 1.21+) + "membarrier", // Go runtime memory barrier + "prctl", // Go runtime uses PR_SET_NAME etc. + + // ---- Misc ---- + "capget", // reading own capabilities is harmless + "syslog", // some logging frameworks use it + "umask", +} + +// EnterSandbox installs the seccomp syscall filter for the current process. +// Worker binaries should call this at the very top of main(), before starting +// any goroutines or HTTP servers. +// +// The filter policy is read from the HERD_SECCOMP_PROFILE environment variable, +// which [ProcessFactory] injects automatically. If the variable is absent or +// set to "off", EnterSandbox is a no-op. +// +// EnterSandbox installs the filter with SECCOMP_FILTER_FLAG_TSYNC, which +// synchronises the filter across all OS threads created by the Go runtime. +// +// Example worker main(): +// +// func main() { +// if err := herd.EnterSandbox(); err != nil { +// log.Fatalf("seccomp: %v", err) +// } +// // ... start HTTP server ... +// } +func EnterSandbox() error { + profileEnv := os.Getenv("HERD_SECCOMP_PROFILE") + if profileEnv == "" || profileEnv == "off" { + return nil + } + + policy := parsePolicyEnv(profileEnv) + if policy == SeccompPolicyOff { + return nil + } + + filter := seccomp.Filter{ + NoNewPrivs: true, // belt-and-suspenders: also set by SysProcAttr in parent + Flag: seccomp.FilterFlagTSync, + Policy: seccomp.Policy{ + DefaultAction: defaultSeccompAction(policy), + Syscalls: []seccomp.SyscallGroup{ + { + Action: seccomp.ActionAllow, + Names: httpWorkerSyscalls, + }, + }, + }, + } + + if err := seccomp.LoadFilter(filter); err != nil { + return fmt.Errorf("herd: EnterSandbox: load seccomp filter: %w", err) + } + return nil +} + +// buildHTTPWorkerFilter builds but does not install the BPF filter for the +// http-worker policy. Used in tests to validate the BPF program structure. +func buildHTTPWorkerFilter(policy SeccompPolicy) ([]bpf.Instruction, error) { + f := seccomp.Filter{ + NoNewPrivs: false, + Flag: seccomp.FilterFlagTSync, + Policy: seccomp.Policy{ + DefaultAction: defaultSeccompAction(policy), + Syscalls: []seccomp.SyscallGroup{ + { + Action: seccomp.ActionAllow, + Names: httpWorkerSyscalls, + }, + }, + }, + } + return f.Policy.Assemble() +} diff --git a/seccomp_linux_test.go b/seccomp_linux_test.go new file mode 100644 index 0000000..5cc16fd --- /dev/null +++ b/seccomp_linux_test.go @@ -0,0 +1,137 @@ +//go:build linux + +package herd + +import ( + "os" + "testing" +) + +// --------------------------------------------------------------------------- +// SeccompPolicy tests +// --------------------------------------------------------------------------- + +func TestSeccompPolicyEnvValue(t *testing.T) { + tests := []struct { + policy SeccompPolicy + want string + }{ + {SeccompPolicyOff, "off"}, + {SeccompPolicyLog, "log"}, + {SeccompPolicyErrno, "errno"}, + {SeccompPolicyKill, "kill"}, + } + for _, tc := range tests { + if got := tc.policy.envValue(); got != tc.want { + t.Errorf("policy %d envValue: want %q, got %q", tc.policy, tc.want, got) + } + } +} + +func TestParsePolicyEnv(t *testing.T) { + tests := []struct { + input string + want SeccompPolicy + }{ + {"off", SeccompPolicyOff}, + {"log", SeccompPolicyLog}, + {"errno", SeccompPolicyErrno}, + {"kill", SeccompPolicyKill}, + {"", SeccompPolicyOff}, + {"unknown", SeccompPolicyOff}, + } + for _, tc := range tests { + if got := parsePolicyEnv(tc.input); got != tc.want { + t.Errorf("parsePolicyEnv(%q): want %d, got %d", tc.input, tc.want, got) + } + } +} + +// --------------------------------------------------------------------------- +// buildHTTPWorkerFilter tests +// --------------------------------------------------------------------------- + +func TestBuildHTTPWorkerFilter_ReturnsNilForPolicyOff(t *testing.T) { + // SeccompPolicyOff means EnterSandbox is a no-op; filter should not be built. + // We test that EnterSandbox returns nil without loading anything. + if err := os.Unsetenv("HERD_SECCOMP_PROFILE"); err != nil { + t.Fatal(err) + } + if err := EnterSandbox(); err != nil { + t.Errorf("EnterSandbox with unset env should be no-op, got: %v", err) + } + + t.Setenv("HERD_SECCOMP_PROFILE", "off") + if err := EnterSandbox(); err != nil { + t.Errorf("EnterSandbox with PROFILE=off should be no-op, got: %v", err) + } +} + +func TestBuildHTTPWorkerFilter_ValidBPF(t *testing.T) { + instructions, err := buildHTTPWorkerFilter(SeccompPolicyErrno) + if err != nil { + t.Fatalf("buildHTTPWorkerFilter: %v", err) + } + if len(instructions) == 0 { + t.Error("expected non-empty BPF instructions for SeccompPolicyErrno") + } + // Sanity check: at least 10 instructions (arch check + syscall checks + return) + const minExpectedInstructions = 10 + if len(instructions) < minExpectedInstructions { + t.Errorf("expected at least %d BPF instructions, got %d", minExpectedInstructions, len(instructions)) + } +} + +func TestBuildHTTPWorkerFilter_KillPolicy(t *testing.T) { + instructions, err := buildHTTPWorkerFilter(SeccompPolicyKill) + if err != nil { + t.Fatalf("buildHTTPWorkerFilter(Kill): %v", err) + } + if len(instructions) == 0 { + t.Error("expected non-empty BPF instructions for SeccompPolicyKill") + } +} + +// --------------------------------------------------------------------------- +// EnterSandbox no-op tests (safe to run without kernel seccomp support) +// --------------------------------------------------------------------------- + +func TestEnterSandbox_NoopWhenEnvUnset(t *testing.T) { + t.Setenv("HERD_SECCOMP_PROFILE", "") + if err := EnterSandbox(); err != nil { + t.Errorf("expected no-op when env unset, got: %v", err) + } +} + +func TestEnterSandbox_NoopWhenOff(t *testing.T) { + t.Setenv("HERD_SECCOMP_PROFILE", "off") + if err := EnterSandbox(); err != nil { + t.Errorf("expected no-op when PROFILE=off, got: %v", err) + } +} + +// --------------------------------------------------------------------------- +// ProcessFactory seccomp defaults +// --------------------------------------------------------------------------- + +func TestProcessFactory_DefaultSeccompPolicyIsErrno(t *testing.T) { + f := NewProcessFactory("echo") + if f.seccompPolicy != SeccompPolicyErrno { + t.Errorf("expected default seccompPolicy == SeccompPolicyErrno (%d), got %d", + SeccompPolicyErrno, f.seccompPolicy) + } +} + +func TestProcessFactory_WithSeccompPolicy(t *testing.T) { + f := NewProcessFactory("echo").WithSeccompPolicy(SeccompPolicyOff) + if f.seccompPolicy != SeccompPolicyOff { + t.Errorf("expected SeccompPolicyOff after WithSeccompPolicy, got %d", f.seccompPolicy) + } +} + +func TestProcessFactory_WithSeccompPolicy_Kill(t *testing.T) { + f := NewProcessFactory("echo").WithSeccompPolicy(SeccompPolicyKill) + if f.seccompPolicy != SeccompPolicyKill { + t.Errorf("expected SeccompPolicyKill after WithSeccompPolicy, got %d", f.seccompPolicy) + } +} diff --git a/seccomp_unsupported.go b/seccomp_unsupported.go new file mode 100644 index 0000000..5ebd5fa --- /dev/null +++ b/seccomp_unsupported.go @@ -0,0 +1,24 @@ +//go:build !linux + +package herd + +// SeccompPolicy controls how unauthorized syscalls are handled. +// On non-Linux systems, all policies are treated as SeccompPolicyOff. +type SeccompPolicy int + +const ( + // SeccompPolicyOff disables seccomp filtering (only valid value on non-Linux). + SeccompPolicyOff SeccompPolicy = iota + // SeccompPolicyLog — no-op on non-Linux. + SeccompPolicyLog + // SeccompPolicyErrno — no-op on non-Linux. + SeccompPolicyErrno + // SeccompPolicyKill — no-op on non-Linux. + SeccompPolicyKill +) + +func (p SeccompPolicy) envValue() string { return "off" } + +// EnterSandbox is a no-op on non-Linux systems. +// It exists so worker binaries can call it unconditionally without build tags. +func EnterSandbox() error { return nil }