diff --git a/BOOKMARKS.md b/BOOKMARKS.md index 30586ad1d..cdf3bd201 100644 --- a/BOOKMARKS.md +++ b/BOOKMARKS.md @@ -30,6 +30,7 @@ Desired state of the system, organized by capability domain. | [Runner](specs/agents/runner.spec.md) | agents | Runner subprocess lifecycle, bridges, gRPC/HTTP endpoints | | [MCP Server](specs/integrations/mcp-server.spec.md) | integrations | MCP tool definitions, sidecar and public endpoint modes | | [Security](specs/security/security.spec.md) | security | Identity boundaries, credential authorization, per-session isolation, design decisions | +| [OpenShell Sandbox](specs/security/openshell-sandbox.spec.md) | security | Agent subprocess sandbox: network namespace, Landlock, seccomp, TLS proxy, OPA policy | Feature specs remain in numbered directories under `specs/` (e.g., `specs/001-*/spec.md`). diff --git a/components/ambient-control-plane/cmd/ambient-control-plane/main.go b/components/ambient-control-plane/cmd/ambient-control-plane/main.go index bac7d22d4..ec1ed01c4 100644 --- a/components/ambient-control-plane/cmd/ambient-control-plane/main.go +++ b/components/ambient-control-plane/cmd/ambient-control-plane/main.go @@ -158,6 +158,8 @@ func runKubeMode(ctx context.Context, cfg *config.ControlPlaneConfig) error { ImagePullSecret: cfg.ImagePullSecret, PlatformMode: cfg.PlatformMode, MPPConfigNamespace: cfg.MPPConfigNamespace, + OpenShellEnabled: cfg.OpenShellEnabled, + OpenShellPolicyName: cfg.OpenShellPolicyName, } conn, err := grpc.NewClient(cfg.GRPCServerAddr, grpc.WithTransportCredentials(grpcCredentials(cfg.GRPCUseTLS))) diff --git a/components/ambient-control-plane/internal/config/config.go b/components/ambient-control-plane/internal/config/config.go index dd3772e08..ad8956401 100755 --- a/components/ambient-control-plane/internal/config/config.go +++ b/components/ambient-control-plane/internal/config/config.go @@ -47,6 +47,8 @@ type ControlPlaneConfig struct { HTTPSProxy string NoProxy string ImagePullSecret string + OpenShellEnabled bool + OpenShellPolicyName string } func Load() (*ControlPlaneConfig, error) { @@ -91,6 +93,8 @@ func Load() (*ControlPlaneConfig, error) { HTTPSProxy: os.Getenv("HTTPS_PROXY"), NoProxy: os.Getenv("NO_PROXY"), ImagePullSecret: os.Getenv("IMAGE_PULL_SECRET"), + OpenShellEnabled: os.Getenv("OPENSHELL_ENABLED") == "true", + OpenShellPolicyName: envOrDefault("OPENSHELL_POLICY_CONFIGMAP", "openshell-policy"), } if cfg.MCPAPIServerURL == "" { diff --git a/components/ambient-control-plane/internal/kubeclient/kubeclient.go b/components/ambient-control-plane/internal/kubeclient/kubeclient.go index 2bc71863c..aeeba3691 100644 --- a/components/ambient-control-plane/internal/kubeclient/kubeclient.go +++ b/components/ambient-control-plane/internal/kubeclient/kubeclient.go @@ -64,6 +64,12 @@ var NetworkPolicyGVR = schema.GroupVersionResource{ Resource: "networkpolicies", } +var ConfigMapGVR = schema.GroupVersionResource{ + Group: "", + Version: "v1", + Resource: "configmaps", +} + type KubeClient struct { dynamic dynamic.Interface logger zerolog.Logger @@ -327,6 +333,14 @@ func (kc *KubeClient) ListTenantNamespaces(ctx context.Context, namespace, label return kc.dynamic.Resource(gvr).Namespace(namespace).List(ctx, opts) } +func (kc *KubeClient) GetConfigMap(ctx context.Context, namespace, name string) (*unstructured.Unstructured, error) { + return kc.dynamic.Resource(ConfigMapGVR).Namespace(namespace).Get(ctx, name, metav1.GetOptions{}) +} + +func (kc *KubeClient) CreateConfigMap(ctx context.Context, obj *unstructured.Unstructured) (*unstructured.Unstructured, error) { + return kc.dynamic.Resource(ConfigMapGVR).Namespace(obj.GetNamespace()).Create(ctx, obj, metav1.CreateOptions{}) +} + func (kc *KubeClient) GetResource(ctx context.Context, gvr schema.GroupVersionResource, namespace, name string) (*unstructured.Unstructured, error) { return kc.dynamic.Resource(gvr).Namespace(namespace).Get(ctx, name, metav1.GetOptions{}) } diff --git a/components/ambient-control-plane/internal/reconciler/kube_reconciler.go b/components/ambient-control-plane/internal/reconciler/kube_reconciler.go index b51733c56..3dfb9e131 100644 --- a/components/ambient-control-plane/internal/reconciler/kube_reconciler.go +++ b/components/ambient-control-plane/internal/reconciler/kube_reconciler.go @@ -74,6 +74,8 @@ type KubeReconcilerConfig struct { ImagePullSecret string PlatformMode string MPPConfigNamespace string + OpenShellEnabled bool + OpenShellPolicyName string } type SimpleKubeReconciler struct { @@ -178,6 +180,12 @@ func (r *SimpleKubeReconciler) provisionSession(ctx context.Context, session typ } } + if r.cfg.OpenShellEnabled { + if err := r.ensureOpenShellPolicy(ctx, namespace); err != nil { + return fmt.Errorf("ensuring openshell policy: %w", err) + } + } + if err := r.ensureServiceAccount(ctx, namespace, session, sessionLabel); err != nil { return fmt.Errorf("ensuring service account: %w", err) } @@ -522,12 +530,7 @@ func (r *SimpleKubeReconciler) ensurePod(ctx context.Context, namespace string, "memory": "4Gi", }, }, - "securityContext": map[string]interface{}{ - "allowPrivilegeEscalation": false, - "capabilities": map[string]interface{}{ - "drop": []interface{}{"ALL"}, - }, - }, + "securityContext": r.buildRunnerSecurityContext(), }, } @@ -588,6 +591,14 @@ func (r *SimpleKubeReconciler) ensurePod(ctx context.Context, namespace string, }, } + if r.cfg.OpenShellEnabled { + pod.Object["spec"].(map[string]interface{})["securityContext"] = map[string]interface{}{ + "seccompProfile": map[string]interface{}{ + "type": "Unconfined", + }, + } + } + if r.cfg.ImagePullSecret != "" { pod.Object["spec"].(map[string]interface{})["imagePullSecrets"] = []interface{}{ map[string]interface{}{"name": r.cfg.ImagePullSecret}, @@ -602,6 +613,25 @@ func (r *SimpleKubeReconciler) ensurePod(ctx context.Context, namespace string, return nil } +func (r *SimpleKubeReconciler) buildRunnerSecurityContext() map[string]interface{} { + sc := map[string]interface{}{ + "allowPrivilegeEscalation": false, + "capabilities": map[string]interface{}{ + "drop": []interface{}{"ALL"}, + }, + } + if r.cfg.OpenShellEnabled { + sc["allowPrivilegeEscalation"] = true + sc["runAsUser"] = int64(0) + sc["runAsNonRoot"] = false + sc["capabilities"] = map[string]interface{}{ + "drop": []interface{}{"ALL"}, + "add": []interface{}{"NET_ADMIN", "SYS_ADMIN", "SYS_PTRACE", "SETUID", "SETGID", "CHOWN", "DAC_OVERRIDE"}, + } + } + return sc +} + func (r *SimpleKubeReconciler) buildVolumes(extraVolumes []interface{}) []interface{} { vols := []interface{}{ map[string]interface{}{ @@ -624,6 +654,14 @@ func (r *SimpleKubeReconciler) buildVolumes(extraVolumes []interface{}) []interf }, }) } + if r.cfg.OpenShellEnabled { + vols = append(vols, map[string]interface{}{ + "name": "openshell-policy", + "configMap": map[string]interface{}{ + "name": r.cfg.OpenShellPolicyName, + }, + }) + } vols = append(vols, extraVolumes...) return vols } @@ -648,6 +686,13 @@ func (r *SimpleKubeReconciler) buildVolumeMounts() []interface{} { "readOnly": true, }) } + if r.cfg.OpenShellEnabled { + mounts = append(mounts, map[string]interface{}{ + "name": "openshell-policy", + "mountPath": "/etc/openshell", + "readOnly": true, + }) + } return mounts } @@ -688,6 +733,48 @@ func (r *SimpleKubeReconciler) ensureVertexSecret(ctx context.Context, namespace return nil } +func (r *SimpleKubeReconciler) ensureOpenShellPolicy(ctx context.Context, namespace string) error { + policyName := r.cfg.OpenShellPolicyName + + if _, err := r.nsKube().GetConfigMap(ctx, namespace, policyName); err == nil { + return nil + } + + src, err := r.nsKube().GetConfigMap(ctx, r.cfg.CPRuntimeNamespace, policyName) + if err != nil { + return fmt.Errorf("reading openshell policy configmap %s/%s: %w", r.cfg.CPRuntimeNamespace, policyName, err) + } + + data, _, _ := unstructured.NestedStringMap(src.Object, "data") + dataIface := make(map[string]interface{}, len(data)) + for k, v := range data { + dataIface[k] = v + } + + dst := &unstructured.Unstructured{ + Object: map[string]interface{}{ + "apiVersion": "v1", + "kind": "ConfigMap", + "metadata": map[string]interface{}{ + "name": policyName, + "namespace": namespace, + "labels": map[string]interface{}{ + LabelManaged: "true", + LabelManagedBy: "ambient-control-plane", + }, + }, + "data": dataIface, + }, + } + + if _, err := r.nsKube().CreateConfigMap(ctx, dst); err != nil && !k8serrors.IsAlreadyExists(err) { + return fmt.Errorf("copying openshell policy configmap to %s: %w", namespace, err) + } + + r.logger.Debug().Str("namespace", namespace).Str("configmap", policyName).Msg("openshell policy configmap copied") + return nil +} + func (r *SimpleKubeReconciler) buildEnv(ctx context.Context, session types.Session, sdk *sdkclient.Client, useMCPSidecar bool, credentialIDs map[string]string) []interface{} { useVertex := "0" if r.cfg.VertexEnabled { @@ -770,6 +857,14 @@ func (r *SimpleKubeReconciler) buildEnv(ctx context.Context, session types.Sessi env = append(env, envVar("NO_PROXY", r.cfg.NoProxy)) } + if r.cfg.OpenShellEnabled { + env = append(env, + envVar("OPENSHELL_ENABLED", "true"), + envVar("OPENSHELL_POLICY_RULES", "/etc/openshell/policy.rego"), + envVar("OPENSHELL_POLICY_DATA", "/etc/openshell/policy.yaml"), + ) + } + return env } diff --git a/components/runners/ambient-runner/.openshell-ref/policy.rego b/components/runners/ambient-runner/.openshell-ref/policy.rego new file mode 100644 index 000000000..afcd28863 --- /dev/null +++ b/components/runners/ambient-runner/.openshell-ref/policy.rego @@ -0,0 +1,740 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +package openshell.sandbox + +default allow_network = false + +# --- Static policy data passthrough (queried at sandbox startup) --- + +filesystem_policy := data.filesystem_policy + +landlock_policy := data.landlock + +process_policy := data.process + +# --- Network access decision (queried per-CONNECT request) --- + +allow_network if { + network_policy_for_request +} + +# --- Deny reasons (specific diagnostics for debugging policy denials) --- + +deny_reason := "missing input.network" if { + not input.network +} + +deny_reason := "missing input.exec" if { + input.network + not input.exec +} + +deny_reason := reason if { + input.network + input.exec + not network_policy_for_request + not endpoint_policy_for_request + count(data.network_policies) > 0 + reason := sprintf("endpoint %s:%d is not allowed by any policy", [input.network.host, input.network.port]) +} + +deny_reason := reason if { + input.network + input.exec + not network_policy_for_request + endpoint_policy_for_request + ancestors_str := concat(" -> ", input.exec.ancestors) + cmdline_str := concat(", ", input.exec.cmdline_paths) + binary_misses := [r | + some name + policy := data.network_policies[name] + endpoint_allowed(policy, input.network) + not binary_allowed(policy, input.exec) + r := sprintf("binary '%s' not allowed in policy '%s' (ancestors: [%s], cmdline: [%s]). SYMLINK HINT: the binary path is the kernel-resolved target from /proc//exe, not the symlink. If your policy specifies a symlink (e.g., /usr/bin/python3) but the actual binary is /usr/bin/python3.11, either: (1) use the canonical path in your policy (run 'readlink -f /usr/bin/python3' inside the sandbox), or (2) ensure symlink resolution is working (check sandbox logs for 'Cannot access container filesystem')", [input.exec.path, name, ancestors_str, cmdline_str]) + ] + count(binary_misses) > 0 + reason := concat("; ", binary_misses) +} + +deny_reason := "network connections not allowed by policy" if { + input.network + input.exec + not network_policy_for_request + count(data.network_policies) == 0 +} + +# --- Matched policy name (for audit logging) --- +# +# Collects all matching policy names into a set, then deterministically picks +# the lexicographically smallest. This avoids a "complete rule conflict" when +# multiple policies cover the same endpoint (e.g. after draft approval adds an +# overlapping rule). + +_matching_policy_names contains name if { + some name + policy := data.network_policies[name] + endpoint_allowed(policy, input.network) + binary_allowed(policy, input.exec) +} + +matched_network_policy := min(_matching_policy_names) if { + count(_matching_policy_names) > 0 +} + +# --- Core matching logic --- + +# True when at least one network policy matches the request (endpoint + binary). +# Expressed as a boolean so that multiple matching policies don't cause a +# "complete rule conflict". +network_policy_for_request if { + some name + data.network_policies[name] + endpoint_allowed(data.network_policies[name], input.network) + binary_allowed(data.network_policies[name], input.exec) +} + +endpoint_policy_for_request if { + some name + data.network_policies[name] + endpoint_allowed(data.network_policies[name], input.network) +} + +# Endpoint matching: exact host (case-insensitive) + port in ports list. +endpoint_allowed(policy, network) if { + some endpoint + endpoint := policy.endpoints[_] + not contains(endpoint.host, "*") + lower(endpoint.host) == lower(network.host) + endpoint.ports[_] == network.port +} + +# Endpoint matching: glob host pattern + port in ports list. +# Uses "." as delimiter so "*" matches a single DNS label and "**" matches +# across label boundaries — consistent with TLS certificate wildcard semantics. +endpoint_allowed(policy, network) if { + some endpoint + endpoint := policy.endpoints[_] + contains(endpoint.host, "*") + glob.match(lower(endpoint.host), ["."], lower(network.host)) + endpoint.ports[_] == network.port +} + +# Endpoint matching: hostless with allowed_ips — match any host on port. +# When an endpoint has allowed_ips but no host, it matches any hostname on the +# given port. The actual IP validation happens in Rust post-DNS-resolution. +endpoint_allowed(policy, network) if { + some endpoint + endpoint := policy.endpoints[_] + object.get(endpoint, "host", "") == "" + count(object.get(endpoint, "allowed_ips", [])) > 0 + endpoint.ports[_] == network.port +} + +# Binary matching: exact path. +# SHA256 integrity is enforced in Rust via trust-on-first-use (TOFU) cache, +# not in Rego. The proxy computes and caches binary hashes at runtime. +binary_allowed(policy, exec) if { + some b + b := policy.binaries[_] + not contains(b.path, "*") + b.path == exec.path +} + +# Binary matching: ancestor exact path (e.g., claude spawns node). +binary_allowed(policy, exec) if { + some b + b := policy.binaries[_] + not contains(b.path, "*") + ancestor := exec.ancestors[_] + b.path == ancestor +} + +# Binary matching: glob pattern against exe path or any ancestor. +# NOTE: cmdline_paths are intentionally excluded — argv[0] is trivially +# spoofable via execve and must not be used as a grant-access signal. +binary_allowed(policy, exec) if { + some b in policy.binaries + contains(b.path, "*") + all_paths := array.concat([exec.path], exec.ancestors) + some p in all_paths + glob.match(b.path, ["/"], p) +} + +user_declared_binary_allowed(policy, exec) if { + some b + b := policy.binaries[_] + not object.get(b, "advisor_proposed", false) + not contains(b.path, "*") + b.path == exec.path +} + +user_declared_binary_allowed(policy, exec) if { + some b + b := policy.binaries[_] + not object.get(b, "advisor_proposed", false) + not contains(b.path, "*") + ancestor := exec.ancestors[_] + b.path == ancestor +} + +user_declared_binary_allowed(policy, exec) if { + some b in policy.binaries + not object.get(b, "advisor_proposed", false) + contains(b.path, "*") + all_paths := array.concat([exec.path], exec.ancestors) + some p in all_paths + glob.match(b.path, ["/"], p) +} + +# --- Network action (allow / deny) --- +# +# These rules are mutually exclusive by construction: +# - "allow" requires `network_policy_for_request` (binary+endpoint matched) +# - default is "deny" when no policy matches. + +default network_action := "deny" + +# Explicitly allowed: endpoint + binary match in a network policy → allow. +network_action := "allow" if { + network_policy_for_request +} + +# =========================================================================== +# L7 request evaluation (queried per-request within a tunnel) +# =========================================================================== + +default allow_request = false + +# Per-policy helper: true when this single policy has at least one endpoint +# matching the L4 request whose L7 rules also permit the specific request. +# Isolating the endpoint iteration inside a function avoids the regorus +# "duplicated definition of local variable" error that occurs when the +# outer `some name` iterates over multiple policies that share a host:port. +_policy_allows_l7(policy) if { + some ep + ep := policy.endpoints[_] + endpoint_matches_l7_request(ep, input.network, input.request) + request_allowed_for_endpoint(input.request, ep) +} + +# L7 request allowed if any matching L4 policy also allows the L7 request +# AND no deny rule blocks it. Deny rules take precedence over allow rules. +allow_request if { + some name + policy := data.network_policies[name] + endpoint_allowed(policy, input.network) + binary_allowed(policy, input.exec) + _policy_allows_l7(policy) + not deny_request +} + +# --- L7 deny rules --- +# +# Deny rules are evaluated after allow rules and take precedence. +# If a request matches any deny rule on any matching endpoint, it is blocked +# even if it would otherwise be allowed. + +default deny_request = false + +# Per-policy helper: true when this policy has at least one endpoint matching +# the L4 request whose deny_rules also match the specific L7 request. +_policy_denies_l7(policy) if { + some ep + ep := policy.endpoints[_] + endpoint_matches_l7_request(ep, input.network, input.request) + request_denied_for_endpoint(input.request, ep) +} + +deny_request if { + some name + policy := data.network_policies[name] + endpoint_allowed(policy, input.network) + binary_allowed(policy, input.exec) + _policy_denies_l7(policy) +} + +# --- L7 deny rule matching: REST method + path + query --- + +request_denied_for_endpoint(request, endpoint) if { + some deny_rule + deny_rule := endpoint.deny_rules[_] + deny_rule.method + method_matches(request.method, deny_rule.method) + path_matches(request.path, deny_rule.path) + deny_query_params_match(request, deny_rule) +} + +# --- L7 deny rule matching: SQL command --- + +request_denied_for_endpoint(request, endpoint) if { + some deny_rule + deny_rule := endpoint.deny_rules[_] + deny_rule.command + command_matches(request.command, deny_rule.command) +} + +# --- L7 deny rule matching: GraphQL operation --- + +request_denied_for_endpoint(request, endpoint) if { + graphql_request_has_operations(request) + some deny_rule + deny_rule := endpoint.deny_rules[_] + deny_rule.operation_type + op := request.graphql.operations[_] + graphql_deny_rule_matches_operation(op, deny_rule, endpoint) +} + +# A GraphQL endpoint path is authoritative once it matches. If the parsed +# GraphQL request is malformed, hash-only without a trusted registry entry, or +# contains an operation outside the GraphQL allow rules, a broader REST rule on +# the same host:port must not allow it through. +request_denied_for_endpoint(request, endpoint) if { + endpoint.protocol == "graphql" + is_object(request.graphql) + not graphql_request_allowed(request, endpoint) +} + +# The same authority applies when a WebSocket endpoint opts into GraphQL +# operation policy. Once the relay classifies a client text message as a +# GraphQL-over-WebSocket operation, generic WEBSOCKET_TEXT rules must not bypass +# operation_type / operation_name / fields policy. +request_denied_for_endpoint(request, endpoint) if { + endpoint.protocol == "websocket" + is_object(request.graphql) + not graphql_request_allowed(request, endpoint) +} + +# Deny query matching: fail-closed semantics. +# If no query rules on the deny rule, match unconditionally (any query params). +# If query rules present, trigger the deny if ANY value for a configured key +# matches the matcher. This is the inverse of allow-side semantics where ALL +# values must match. For deny logic, a single matching value is enough to block. +deny_query_params_match(request, deny_rule) if { + deny_query_rules := object.get(deny_rule, "query", {}) + count(deny_query_rules) == 0 +} + +deny_query_params_match(request, deny_rule) if { + deny_query_rules := object.get(deny_rule, "query", {}) + count(deny_query_rules) > 0 + not deny_query_key_missing(request, deny_query_rules) + not deny_query_value_mismatch_all(request, deny_query_rules) +} + +# A configured deny query key is missing from the request entirely. +# Missing key means the deny rule doesn't apply (fail-open on absence). +deny_query_key_missing(request, query_rules) if { + some key + query_rules[key] + request_query := object.get(request, "query_params", {}) + values := object.get(request_query, key, null) + values == null +} + +# ALL values for a configured key fail to match the matcher. +# If even one value matches, deny fires. This rule checks the opposite: +# true when NO value matches (i.e., every value is a mismatch). +deny_query_value_mismatch_all(request, query_rules) if { + some key + matcher := query_rules[key] + request_query := object.get(request, "query_params", {}) + values := object.get(request_query, key, []) + count(values) > 0 + not deny_any_value_matches(values, matcher) +} + +# True if at least one value in the list matches the matcher. +deny_any_value_matches(values, matcher) if { + some i + query_value_matches(values[i], matcher) +} + +# --- L7 deny reason --- + +request_deny_reason := reason if { + input.request + graphql_request_error(input.request) + reason := sprintf("GraphQL request rejected: %s", [input.request.graphql.error]) +} + +request_deny_reason := reason if { + input.request + not graphql_request_error(input.request) + graphql_request_has_unregistered_persisted_query(input.request, matched_endpoint_config) + reason := "GraphQL persisted query is not registered" +} + +request_deny_reason := reason if { + input.request + deny_request + graphql_request_has_operations(input.request) + not graphql_request_has_unregistered_persisted_query(input.request, matched_endpoint_config) + reason := "GraphQL operation blocked by endpoint policy" +} + +request_deny_reason := reason if { + input.request + not deny_request + not allow_request + graphql_request_has_operations(input.request) + not graphql_request_has_unregistered_persisted_query(input.request, matched_endpoint_config) + reason := "GraphQL operation not permitted by policy" +} + +request_deny_reason := reason if { + input.request + deny_request + not graphql_request_has_operations(input.request) + reason := sprintf("%s %s blocked by deny rule", [input.request.method, input.request.path]) +} + +request_deny_reason := reason if { + input.request + not deny_request + not allow_request + not graphql_request_has_operations(input.request) + reason := sprintf("%s %s not permitted by policy", [input.request.method, input.request.path]) +} + +# --- L7 rule matching: REST method + path --- + +request_allowed_for_endpoint(request, endpoint) if { + some rule + rule := endpoint.rules[_] + rule.allow.method + method_matches(request.method, rule.allow.method) + path_matches(request.path, rule.allow.path) + query_params_match(request, rule) +} + +# --- L7 rule matching: SQL command --- + +request_allowed_for_endpoint(request, endpoint) if { + some rule + rule := endpoint.rules[_] + rule.allow.command + command_matches(request.command, rule.allow.command) +} + +# --- L7 rule matching: GraphQL operation --- + +request_allowed_for_endpoint(request, endpoint) if { + graphql_request_allowed(request, endpoint) +} + +graphql_request_allowed(request, endpoint) if { + graphql_request_has_operations(request) + not graphql_request_error(request) + not graphql_request_has_unregistered_persisted_query(request, endpoint) + not graphql_request_has_unallowed_operation(request, endpoint) +} + +graphql_request_has_operations(request) if { + is_object(request.graphql) + operations := object.get(request.graphql, "operations", []) + count(operations) > 0 +} + +graphql_request_error(request) if { + is_object(request.graphql) + error := object.get(request.graphql, "error", "") + error != "" +} + +graphql_request_has_unallowed_operation(request, endpoint) if { + op := request.graphql.operations[_] + not graphql_operation_allowed(op, endpoint) +} + +graphql_operation_allowed(op, endpoint) if { + rule := endpoint.rules[_] + rule.allow.operation_type + graphql_allow_rule_matches_operation(op, rule.allow, endpoint) +} + +graphql_request_has_unregistered_persisted_query(request, endpoint) if { + op := request.graphql.operations[_] + graphql_operation_needs_registry(op) + not graphql_registered_operation(op, endpoint) +} + +graphql_operation_needs_registry(op) if { + object.get(op, "persisted_query", false) == true + object.get(op, "operation_type", "") == "" +} + +graphql_registered_operation(op, endpoint) if { + object.get(endpoint, "persisted_queries", "deny") == "allow_registered" + id := graphql_operation_registry_key(op) + endpoint.graphql_persisted_queries[id] +} + +graphql_operation_registry_key(op) := key if { + key := object.get(op, "persisted_query_hash", "") + key != "" +} + +graphql_operation_registry_key(op) := key if { + object.get(op, "persisted_query_hash", "") == "" + key := object.get(op, "persisted_query_id", "") + key != "" +} + +graphql_effective_operation(op, endpoint) := registered if { + graphql_operation_needs_registry(op) + key := graphql_operation_registry_key(op) + registered := endpoint.graphql_persisted_queries[key] +} + +graphql_effective_operation(op, _) := op if { + not graphql_operation_needs_registry(op) +} + +graphql_allow_rule_matches_operation(op, rule, endpoint) if { + effective := graphql_effective_operation(op, endpoint) + graphql_operation_type_matches(effective, rule) + graphql_operation_name_matches(effective, rule) + graphql_allow_fields_match(effective, rule) +} + +graphql_deny_rule_matches_operation(op, rule, endpoint) if { + effective := graphql_effective_operation(op, endpoint) + graphql_operation_type_matches(effective, rule) + graphql_operation_name_matches(effective, rule) + graphql_deny_fields_match(effective, rule) +} + +graphql_operation_type_matches(_, rule) if { + object.get(rule, "operation_type", "") == "*" +} + +graphql_operation_type_matches(op, rule) if { + expected := object.get(rule, "operation_type", "") + expected != "" + expected != "*" + lower(object.get(op, "operation_type", "")) == lower(expected) +} + +graphql_operation_name_matches(_, rule) if { + object.get(rule, "operation_name", "") == "" +} + +graphql_operation_name_matches(op, rule) if { + pattern := object.get(rule, "operation_name", "") + pattern != "" + name := object.get(op, "operation_name", "") + glob.match(pattern, [], name) +} + +# Allow-side field constraints are intentionally all-selected-fields semantics: +# if a rule declares fields, every root field selected by the operation must +# match one of the rule patterns. This prevents mixed-operation requests from +# allowing an unlisted field because one safe field also appeared. +graphql_allow_fields_match(_, rule) if { + count(object.get(rule, "fields", [])) == 0 +} + +graphql_allow_fields_match(op, rule) if { + count(object.get(rule, "fields", [])) > 0 + count(object.get(op, "fields", [])) > 0 + not graphql_operation_has_unmatched_field(op, rule) +} + +graphql_operation_has_unmatched_field(op, rule) if { + field := object.get(op, "fields", [])[_] + not graphql_field_matches_any(field, object.get(rule, "fields", [])) +} + +graphql_deny_fields_match(_, rule) if { + count(object.get(rule, "fields", [])) == 0 +} + +graphql_deny_fields_match(op, rule) if { + field := object.get(op, "fields", [])[_] + graphql_field_matches_any(field, object.get(rule, "fields", [])) +} + +graphql_field_matches_any(field, patterns) if { + pattern := patterns[_] + glob.match(pattern, [], field) +} + +# Wildcard "*" matches any method; otherwise case-insensitive exact match. +# RFC 9110 §9.3.2: HEAD is semantically identical to GET except no response body. +method_matches(_, "*") if true + +method_matches(actual, expected) if { + expected != "*" + upper(actual) == upper(expected) +} + +method_matches(actual, expected) if { + upper(actual) == "HEAD" + upper(expected) == "GET" +} + +# Path matching: "**" matches everything; otherwise glob.match with "/" delimiter. +# +# INVARIANT: `input.request.path` is canonicalized by the sandbox before +# policy evaluation — percent-decoded, dot-segments resolved, doubled +# slashes collapsed, `;params` stripped, `%2F` rejected (unless an +# endpoint opts in). Patterns here must therefore match canonical paths; +# do not attempt defensive matching against `..` or `%2e%2e` — those +# inputs are rejected at the L7 parser boundary before this rule runs. +path_matches(_, "**") if true + +path_matches(actual, pattern) if { + pattern != "**" + glob.match(pattern, ["/"], actual) +} + +# Query matching: +# - If no query rules are configured, allow any query params. +# - For configured keys, all request values for that key must match. +# - Matcher shape supports either `glob` or `any`. +query_params_match(request, rule) if { + query_rules := object.get(rule.allow, "query", {}) + not query_mismatch(request, query_rules) +} + +query_mismatch(request, query_rules) if { + some key + matcher := query_rules[key] + not query_key_matches(request, key, matcher) +} + +query_key_matches(request, key, matcher) if { + request_query := object.get(request, "query_params", {}) + values := object.get(request_query, key, null) + values != null + count(values) > 0 + not query_value_mismatch(values, matcher) +} + +query_value_mismatch(values, matcher) if { + some i + value := values[i] + not query_value_matches(value, matcher) +} + +query_value_matches(value, matcher) if { + is_string(matcher) + glob.match(matcher, [], value) +} + +query_value_matches(value, matcher) if { + is_object(matcher) + glob_pattern := object.get(matcher, "glob", "") + glob_pattern != "" + glob.match(glob_pattern, [], value) +} + +query_value_matches(value, matcher) if { + is_object(matcher) + any_patterns := object.get(matcher, "any", []) + count(any_patterns) > 0 + some i + glob.match(any_patterns[i], [], value) +} + +# SQL command matching: "*" matches any; otherwise case-insensitive. +command_matches(_, "*") if true + +command_matches(actual, expected) if { + expected != "*" + upper(actual) == upper(expected) +} + +# --- Matched endpoint config (for L7 and allowed_ips extraction) --- +# Returns the raw endpoint object for the matched policy + host:port. +# Used by Rust to extract L7 config (protocol, tls, enforcement, +# allow_encoded_slash) and/or allowed_ips for SSRF allowlist validation. + +# Per-policy helper: returns matching endpoint configs for a single policy. +_policy_endpoint_configs(policy) := [ep | + some ep + ep := policy.endpoints[_] + endpoint_matches_request(ep, input.network) + endpoint_has_extended_config(ep) +] + +# Collect matching endpoint configs across all policies. Iterates over +# _matching_policy_names (a set, safe from regorus variable collisions) +# then collects per-policy configs via the helper function. +_matching_endpoint_configs := [cfg | + some pname + _matching_policy_names[pname] + cfgs := _policy_endpoint_configs(data.network_policies[pname]) + cfg := cfgs[_] +] + +matched_endpoint_config := _matching_endpoint_configs[0] if { + count(_matching_endpoint_configs) > 0 +} + +_policy_has_exact_declared_endpoint(policy) if { + some ep + ep := policy.endpoints[_] + not object.get(ep, "advisor_proposed", false) + not contains(ep.host, "*") + lower(ep.host) == lower(input.network.host) + ep.ports[_] == input.network.port +} + +exact_declared_endpoint_host if { + some pname + policy := data.network_policies[pname] + user_declared_binary_allowed(policy, input.exec) + _policy_has_exact_declared_endpoint(policy) +} + +# Hosted endpoint: exact host match + port in ports list. +endpoint_matches_request(ep, network) if { + not contains(ep.host, "*") + lower(ep.host) == lower(network.host) + ep.ports[_] == network.port +} + +# Hosted endpoint: glob host match + port in ports list. +endpoint_matches_request(ep, network) if { + contains(ep.host, "*") + glob.match(lower(ep.host), ["."], lower(network.host)) + ep.ports[_] == network.port +} + +# Hostless endpoint with allowed_ips: match on port only. +endpoint_matches_request(ep, network) if { + object.get(ep, "host", "") == "" + count(object.get(ep, "allowed_ips", [])) > 0 + ep.ports[_] == network.port +} + +endpoint_matches_l7_request(ep, network, request) if { + endpoint_matches_request(ep, network) + endpoint_path_matches_request(ep, request) +} + +endpoint_path_matches_request(ep, request) if { + object.get(ep, "path", "") == "" +} + +endpoint_path_matches_request(ep, request) if { + path := object.get(ep, "path", "") + path != "" + path_matches(request.path, path) +} + +# An endpoint has extended config if it specifies L7 protocol, allowed_ips, +# or an explicit tls mode (e.g. tls: skip). +endpoint_has_extended_config(ep) if { + ep.protocol +} + +endpoint_has_extended_config(ep) if { + count(object.get(ep, "allowed_ips", [])) > 0 +} + +endpoint_has_extended_config(ep) if { + ep.tls +} diff --git a/components/runners/ambient-runner/.openshell-ref/policy.yaml b/components/runners/ambient-runner/.openshell-ref/policy.yaml new file mode 100644 index 000000000..d227a28f2 --- /dev/null +++ b/components/runners/ambient-runner/.openshell-ref/policy.yaml @@ -0,0 +1,84 @@ +version: 1 + +filesystem_policy: + include_workdir: true + read_only: + - /usr + - /lib + - /proc + - /dev/urandom + - /app + - /etc + - /var/log + - /home/sandbox + read_write: + - /workspace + - /tmp + - /dev/null + - /app/.claude + +landlock: + compatibility: best_effort + +process: + run_as_user: sandbox + run_as_group: sandbox + +network_policies: + anthropic_api: + name: anthropic-api + endpoints: + - { host: api.anthropic.com, port: 443 } + - { host: statsig.anthropic.com, port: 443 } + binaries: + - { path: /usr/local/bin/claude } + - { path: /usr/bin/node } + - { path: /usr/bin/curl } + + vertex_ai: + name: vertex-ai + endpoints: + - { host: us-east5-aiplatform.googleapis.com, port: 443 } + - { host: europe-west1-aiplatform.googleapis.com, port: 443 } + - { host: us-central1-aiplatform.googleapis.com, port: 443 } + - { host: oauth2.googleapis.com, port: 443 } + binaries: + - { path: /usr/local/bin/claude } + - { path: /usr/bin/node } + - { path: /usr/bin/curl } + + github: + name: github + endpoints: + - { host: github.com, port: 443 } + - { host: api.github.com, port: 443 } + binaries: + - { path: /usr/bin/git } + - { path: /usr/local/bin/gh } + - { path: /usr/bin/curl } + + npm_registry: + name: npm-registry + endpoints: + - { host: registry.npmjs.org, port: 443 } + binaries: + - { path: /usr/bin/npm } + - { path: /usr/bin/node } + - { path: /usr/bin/npx } + + pypi: + name: pypi + endpoints: + - { host: pypi.org, port: 443 } + - { host: files.pythonhosted.org, port: 443 } + binaries: + - { path: /usr/bin/pip3 } + - { path: /usr/bin/python3 } + + gitlab: + name: gitlab + endpoints: + - { host: gitlab.com, port: 443 } + binaries: + - { path: /usr/bin/git } + - { path: /usr/local/bin/glab } diff --git a/components/runners/ambient-runner/Dockerfile b/components/runners/ambient-runner/Dockerfile index f036b4a6f..f8f7a6e51 100755 --- a/components/runners/ambient-runner/Dockerfile +++ b/components/runners/ambient-runner/Dockerfile @@ -12,8 +12,9 @@ ARG PRE_COMMIT_VERSION=4.2.0 ARG GEMINI_CLI_VERSION=0.1.17 # Install system packages: Python 3.12, git, jq, Node.js, Go, unzip (needed by CodeRabbit install script) +# iproute provides ip-netns for OpenShell network namespace setup RUN dnf install -y python3 python3-pip python3-devel \ - git jq nodejs npm go-toolset unzip && \ + git jq nodejs npm go-toolset unzip iproute && \ dnf clean all # Install GitHub CLI and GitLab CLI (binary downloads, pinned) @@ -31,17 +32,34 @@ RUN pip3 install --break-system-packages --no-cache-dir uv==${UV_VERSION} pre-co # (uvx downloads + installs into a temp venv which spikes memory to ~9GB) RUN uv tool install mcp-server-fetch==2025.4.7 +# OpenShell supervisor binary for sandbox isolation (file mode, no Gateway required) +# libcap used for setcap to grant NET_ADMIN file capability to non-root binary +COPY --from=ghcr.io/nvidia/openshell/supervisor:0.0.56 /openshell-sandbox /openshell-sandbox +RUN chmod +x /openshell-sandbox && \ + mkdir -p /var/run/netns && chmod 777 /var/run/netns + +# sandbox user required by OpenShell (process.run_as_user constraint) +RUN groupadd -r sandbox && useradd -r -g sandbox -d /home/sandbox -m -s /bin/bash sandbox && \ + chmod 755 /home/sandbox && \ + mkdir -p /workspace && chown sandbox:sandbox /workspace + # Create working directory WORKDIR /app # Copy ambient-runner package COPY . /app/ambient-runner +# OpenShell wrapper script (dispatches to supervisor or direct claude based on OPENSHELL_ENABLED) +COPY openshell-claude-wrapper.sh /app/openshell-claude-wrapper.sh +RUN chmod +x /app/openshell-claude-wrapper.sh + # Install runner as a package, then remove build-only deps in same layer RUN pip3 install --break-system-packages --no-cache-dir '/app/ambient-runner[all]' && \ dnf remove -y python3-devel && \ dnf clean all && \ - rm -rf /var/cache/dnf /var/lib/dnf + rm -rf /var/cache/dnf /var/lib/dnf && \ + BUNDLED=$(python3 -c 'import claude_agent_sdk; from pathlib import Path; print(Path(claude_agent_sdk.__file__).parent / "_bundled" / "claude")') && \ + ln -sf "$BUNDLED" /usr/local/bin/claude # Install Gemini CLI (npm package, Node.js already available) RUN npm install -g @google/gemini-cli@${GEMINI_CLI_VERSION} && \ diff --git a/components/runners/ambient-runner/ambient_runner/bridges/claude/bridge.py b/components/runners/ambient-runner/ambient_runner/bridges/claude/bridge.py index 893e2348c..50a232545 100644 --- a/components/runners/ambient-runner/ambient_runner/bridges/claude/bridge.py +++ b/components/runners/ambient-runner/ambient_runner/bridges/claude/bridge.py @@ -748,6 +748,9 @@ def _stderr_handler(line: str) -> None: "stderr": _stderr_handler, } + if os.getenv("OPENSHELL_ENABLED") == "true": + options["cli_path"] = "/app/openshell-claude-wrapper.sh" + if self._add_dirs: options["add_dirs"] = self._add_dirs if self._configured_model: diff --git a/components/runners/ambient-runner/openshell-claude-wrapper.sh b/components/runners/ambient-runner/openshell-claude-wrapper.sh new file mode 100644 index 000000000..a6ec77213 --- /dev/null +++ b/components/runners/ambient-runner/openshell-claude-wrapper.sh @@ -0,0 +1,14 @@ +#!/bin/bash +set -euo pipefail + +CLAUDE_BIN="${CLAUDE_CLI_PATH:-/usr/local/bin/claude}" + +if [[ "${OPENSHELL_ENABLED:-}" == "true" ]]; then + exec /openshell-sandbox \ + --policy-rules "${OPENSHELL_POLICY_RULES:-/etc/openshell/policy.rego}" \ + --policy-data "${OPENSHELL_POLICY_DATA:-/etc/openshell/policy.yaml}" \ + --log-level "${OPENSHELL_LOG_LEVEL:-warn}" \ + -- "$CLAUDE_BIN" "$@" +else + exec "$CLAUDE_BIN" "$@" +fi diff --git a/docs/internal/agents/openshell-runner-adaptation.md b/docs/internal/agents/openshell-runner-adaptation.md new file mode 100644 index 000000000..9752e5e24 --- /dev/null +++ b/docs/internal/agents/openshell-runner-adaptation.md @@ -0,0 +1,386 @@ +# OpenShell Runner Adaptation — Implementation Record + +> Initial analysis: 2026-06-03 +> Implementation completed: 2026-06-04 +> Companion doc: [OpenShell Security Model Analysis](openshell-security-analysis.md) +> Formal spec: `specs/security/openshell-sandbox.spec.md` +> Target components: `components/runners/ambient-runner/`, `components/ambient-control-plane/` + +--- + +## Summary + +The runner now wraps the Claude Code subprocess inside NVIDIA OpenShell's +Supervisor binary (`openshell-sandbox` v0.0.56) in **file mode** — no OpenShell +Gateway required. Five defense-in-depth isolation layers are applied: network +namespace, TLS proxy with L7 OPA inspection, Landlock filesystem sandbox, +seccomp-BPF syscall filtering, and privilege drop to an unprivileged user. + +The implementation was validated end-to-end on ROSA OpenShift +(kernel 5.14.0-570.99.1.el9_6). All five layers confirmed operational. + +--- + +## Strategy Selection + +Three strategies were evaluated (see original analysis below). **Strategy 1 +(Supervisor wrapping Claude CLI)** was selected and implemented. The key insight +was that file mode eliminates the Gateway dependency entirely — the Supervisor +reads policy from local Rego + YAML files, and the control plane distributes +these via ConfigMap propagation. No gRPC provider registration is needed. + +The credential placeholder/proxy pattern (Phase 2 in the original analysis) is +deferred. The current implementation provides network, filesystem, syscall, and +process isolation without changing credential flow. LLM credentials (Vertex AI +service account) remain in the runner environment because they are necessary for +inference and the SDK loads them before the sandbox starts. + +--- + +## What Was Actually Built + +### Architecture (Implemented) + +``` +Runner Pod (FastAPI + uvicorn) — runs UNSANDBOXED as UID 0 + │ + ├── bridge.py: _ensure_adapter() sets cli_path when OPENSHELL_ENABLED=true + │ options["cli_path"] = "/app/openshell-claude-wrapper.sh" + │ + └── Claude Agent SDK spawns wrapper as subprocess + │ + └── /app/openshell-claude-wrapper.sh + │ reads: OPENSHELL_ENABLED, OPENSHELL_POLICY_RULES, OPENSHELL_POLICY_DATA + │ + └── exec /openshell-sandbox \ + --policy-rules /etc/openshell/policy.rego \ + --policy-data /etc/openshell/policy.yaml \ + --log-level warn \ + -- /usr/local/bin/claude "$@" + │ + ├── Supervisor (Rust, runs as root): + │ 1. Load + validate OPA policy + │ 2. Create network namespace (ip netns add sandbox-{uuid}) + │ 3. Create veth pair (10.200.0.1 ↔ 10.200.0.2) + │ 4. Start TLS proxy on 10.200.0.1:3128 + │ 5. Generate ephemeral CA for MITM TLS + │ 6. Prepare Landlock PathFds while still root + │ + ├── fork() + │ pre_exec closure (child process, before exec): + │ 1. setns(CLONE_NEWNET) → enter sandbox network namespace + │ 2. setgroups/setgid/setuid → drop to sandbox:sandbox + │ 3. RLIMIT_CORE=0, PR_SET_DUMPABLE=0, PR_SET_NO_NEW_PRIVS=1 + │ 4. landlock_restrict_self → 12 filesystem rules (8 RO, 4 RW) + │ 5. seccomp::apply → block ptrace, memfd_create, raw sockets + │ + └── exec(/usr/local/bin/claude) → runs as sandbox user + env injected by supervisor: + HTTPS_PROXY=http://10.200.0.1:3128 + HTTP_PROXY=http://10.200.0.1:3128 + ALL_PROXY=http://10.200.0.1:3128 + SSL_CERT_FILE=/etc/openshell-tls/ca-bundle.pem + NODE_EXTRA_CA_CERTS=/etc/openshell-tls/openshell-ca.pem + NODE_USE_ENV_PROXY=1 + GIT_SSL_CAINFO=/etc/openshell-tls/ca-bundle.pem +``` + +### Files Changed + +| File | Component | What Changed | +|------|-----------|-------------| +| `Dockerfile` | Runner | Pin supervisor v0.0.56 from `ghcr.io/nvidia/openshell/supervisor:0.0.56`; add `iproute` (provides `ip netns`); create `sandbox` user/group; pre-create `/workspace` owned by sandbox; symlink bundled claude binary to `/usr/local/bin/claude`; set `/home/sandbox` to 755; create `/var/run/netns` with 777 | +| `openshell-claude-wrapper.sh` | Runner | New file — shell script that dispatches to supervisor or direct claude based on `OPENSHELL_ENABLED` env var | +| `bridges/claude/bridge.py` | Runner | 1 line added in `_ensure_adapter()`: `options["cli_path"] = "/app/openshell-claude-wrapper.sh"` when `OPENSHELL_ENABLED == "true"` | +| `.openshell-ref/policy.rego` | Runner | Official OPA Rego policy from OpenShell repo (`package openshell.sandbox`, ~741 lines) | +| `.openshell-ref/policy.yaml` | Runner | Policy data: filesystem allowlists, Landlock config, process identity, network ACLs for 6 endpoint groups | +| `internal/reconciler/kube_reconciler.go` | Control Plane | `buildRunnerSecurityContext()`: 7 capabilities when OpenShell enabled; `buildVolumes()`: openshell-policy ConfigMap volume; `buildVolumeMounts()`: mount at `/etc/openshell`; `buildEnv()`: inject OPENSHELL_* env vars; `ensureOpenShellPolicy()`: copy ConfigMap from CP to runner namespace; `ensurePod()`: pod-level seccompProfile Unconfined | +| `internal/config/config.go` | Control Plane | `OpenShellEnabled` (from `OPENSHELL_ENABLED` env) and `OpenShellPolicyName` (from `OPENSHELL_POLICY_CONFIGMAP`, default `openshell-policy`) | +| `internal/kubeclient/kubeclient.go` | Control Plane | Added `ConfigMapGVR`, `GetConfigMap()`, `CreateConfigMap()` | +| `cmd/ambient-control-plane/main.go` | Control Plane | Thread `OpenShellEnabled` and `OpenShellPolicyName` from config into reconciler | + +### What Did NOT Change + +| Component | Why | +|-----------|-----| +| `platform/auth.py` | Credential placeholder/proxy is deferred; real tokens still injected into runner env | +| `_grpc_client.py` | gRPC client runs in runner process, outside sandbox boundary | +| `middleware/secret_redaction.py` | Retained as-is; still provides output stream defense-in-depth | +| `bridges/claude/session.py` | SessionWorker lifecycle unchanged; supervisor is transparent to the SDK | +| `components/operator/` | OpenShell integration is in the CP, NOT the operator | + +--- + +## Critical Implementation Details + +### Why 7 Capabilities (Not Just NET_ADMIN) + +The original analysis estimated only `NET_ADMIN` was needed. In practice, the +Supervisor's `pre_exec` closure requires significantly more: + +| Capability | Discovery Method | Required For | +|------------|-----------------|-------------| +| `NET_ADMIN` | Expected (documented) | `ip netns add`, veth pair setup, routing | +| `SYS_ADMIN` | EPERM on `mount --make-shared /var/run/netns` | Mount propagation for netns mount points; `nsenter` for in-namespace `ip` commands | +| `SYS_PTRACE` | Exit code 133 (SIGTRAP) when ptrace wrapper attempted | Binary identity verification via `/proc` inspection | +| `SETUID` | `setgroups(1, ...) = EPERM` in forked child | `drop_privileges()` calls `setgroups` before `setgid`/`setuid` to switch from root to sandbox | +| `SETGID` | Same as SETUID — discovered together | `drop_privileges()` calls `setgid(sandbox_gid)` | +| `CHOWN` | Supervisor sets ownership on `/workspace` and `/tmp` | `chown sandbox:sandbox` on read-write directories before privilege drop | +| `DAC_OVERRIDE` | Directory access failures during privilege transition | Access directories that don't have world-readable permissions | + +### Why `runAsUser: 0` + +The Supervisor MUST start as root because: +1. Network namespace creation requires `CAP_NET_ADMIN` + effective UID 0 +2. Mount operations on `/var/run/netns` require `CAP_SYS_ADMIN` +3. The `drop_privileges()` call in `pre_exec` transitions from root → sandbox user + +After fork, the child process runs as `sandbox:sandbox` (non-root). The +Supervisor parent process remains as root for the TLS proxy. + +### Why `seccompProfile: Unconfined` + +The Supervisor applies its own three-layer seccomp-BPF filter to the child +process. If the container-level seccomp profile (from the CRI runtime) is more +restrictive, it can interfere with the Supervisor's own syscalls for namespace +setup, mount operations, and process management. Setting `Unconfined` at the pod +level delegates seccomp entirely to the Supervisor. + +### Why File Mode (No Gateway) + +The original analysis assumed Gateway mode with gRPC provider registration. File +mode was chosen instead because: + +1. No additional service to deploy and operate +2. Policy is static per deployment — it doesn't change per-session +3. ConfigMap propagation is a native K8s pattern the CP already uses +4. The Supervisor loads policy from `--policy-rules` and `--policy-data` flags +5. Eliminates the mTLS PKI bootstrap that Gateway mode requires + +### The `/usr/local/bin/claude` Symlink + +The Claude Agent SDK bundles its CLI binary at a version-dependent path: +``` +/usr/local/lib/python3.12/site-packages/claude_agent_sdk/_bundled/claude +``` + +This path changes with Python version and SDK version. The policy's `binaries` +list needs a stable path to identify which binary is making network requests. +The Dockerfile creates a symlink at build time: + +```dockerfile +BUNDLED=$(python3 -c 'import claude_agent_sdk; from pathlib import Path; print(Path(claude_agent_sdk.__file__).parent / "_bundled" / "claude")') && \ +ln -sf "$BUNDLED" /usr/local/bin/claude +``` + +The wrapper script and policy both reference `/usr/local/bin/claude`. + +--- + +## Debugging Journey + +### Error Progression (Chronological) + +| # | Error | Root Cause | Fix | +|---|-------|-----------|-----| +| 1 | SCC `restricted` blocking `NET_ADMIN` | Default OpenShift SCC doesn't allow custom capabilities | Created custom SCC `openshell-sandbox` | +| 2 | ConfigMap not found in runner namespace | Policy ConfigMap exists only in CP namespace | Added `ensureOpenShellPolicy()` to reconciler | +| 3 | Invalid Rego policy format | Initial policy was hand-written; supervisor expects official format | Replaced with official Rego from OpenShell repo | +| 4 | `EPERM` on network namespace creation | Missing mount propagation for `/var/run/netns` | Added `SYS_ADMIN` capability, `allowPrivilegeEscalation: true`, `runAsUser: 0` | +| 5 | `EINVAL` from unknown syscall | Initially misattributed to `landlock_restrict_self(fd, flags=1)` needing kernel 6.10+ | Extensive ptrace debugging proved `landlock_restrict_self` was NEVER called; actual cause was `setgroups` EPERM | +| 6 | `setgroups(1, ...) = EPERM` | Missing `SETUID`, `SETGID`, `CHOWN`, `DAC_OVERRIDE` capabilities | Added all 4 capabilities to reconciler and SCC | +| 7 | `Permission denied (os error 13)` launching claude | `claude` binary not in PATH inside sandbox | Added `/usr/local/bin/claude` symlink in Dockerfile | + +### The EINVAL Misdiagnosis + +The most significant debugging challenge was error #5. The Supervisor logged +`EINVAL` and the initial hypothesis was that `landlock_restrict_self(fd, flags=1)` +was failing because the `LANDLOCK_RESTRICT_SELF_LOG` flag (bit 0) requires +kernel 6.10+. + +Nine custom C ptrace tracer programs were built and injected into the container +to intercept every syscall from every thread. The definitive finding: +**syscall 446 (`landlock_restrict_self`) was never called by any traced process**. +The EINVAL errors were all from `prctl(23, ...)` in forked `ip`/`nsenter` +subprocesses — non-fatal background noise. + +The actual failing syscall was `setgroups(1, [sandbox_gid]) = -1 (EPERM)` in the +child process after fork, during the `drop_privileges()` sequence. The fix was +adding `SETUID`, `SETGID`, `CHOWN`, and `DAC_OVERRIDE` capabilities. + +--- + +## Verified End-to-End Results + +### Sandbox Layer Confirmation (from Supervisor logs) + +``` +CONFIG:LOADING [INFO] Loading OPA policy engine from local files +CONFIG:VALIDATED [INFO] Validated 'sandbox' user exists in image +CONFIG:ENABLED [INFO] TLS termination enabled: ephemeral CA generated +CONFIG:CREATING [INFO] Creating network namespace [ns:sandbox-*] +CONFIG:CREATED [INFO] Network namespace created [host_ip:10.200.0.1 sandbox_ip:10.200.0.2] +CONFIG:PROBED [INFO] Landlock filesystem sandbox available [abi:v5 compat:BestEffort ro:8 rw:4] +CONFIG:BUILT [INFO] Landlock ruleset built [rules_applied:12 skipped:0] +PROC:LAUNCH [INFO] /usr/local/bin/claude(pid) +``` + +### Network Policy Enforcement (from curl tests inside sandbox) + +| Target | HTTP Status | Policy Match | +|--------|-------------|-------------| +| `api.anthropic.com` | 404 (connected) | `anthropic-api` | +| `us-east5-aiplatform.googleapis.com` | 404 (connected) | `vertex-ai` | +| `oauth2.googleapis.com` | 404 (connected) | `vertex-ai` | +| `api.github.com` | 200 (connected) | `github` | +| `evil.com` | 000 (refused) | No match — **blocked** | + +### Sandbox Environment (injected by Supervisor) + +``` +ALL_PROXY=http://10.200.0.1:3128 +HTTPS_PROXY=http://10.200.0.1:3128 +HTTP_PROXY=http://10.200.0.1:3128 +NO_PROXY=127.0.0.1,localhost,::1 +SSL_CERT_FILE=/etc/openshell-tls/ca-bundle.pem +NODE_EXTRA_CA_CERTS=/etc/openshell-tls/openshell-ca.pem +NODE_USE_ENV_PROXY=1 +GIT_SSL_CAINFO=/etc/openshell-tls/ca-bundle.pem +DENO_CERT=/etc/openshell-tls/openshell-ca.pem +``` + +--- + +## OpenShift SCC Reference + +The custom SCC required on OpenShift clusters: + +```yaml +apiVersion: security.openshift.io/v1 +kind: SecurityContextConstraints +metadata: + name: openshell-sandbox +allowHostDirVolumePlugin: false +allowHostIPC: false +allowHostNetwork: false +allowHostPID: false +allowHostPorts: false +allowPrivilegeEscalation: true +allowPrivilegedContainer: false +allowedCapabilities: + - NET_ADMIN + - SYS_ADMIN + - SYS_PTRACE + - SETUID + - SETGID + - CHOWN + - DAC_OVERRIDE +defaultAddCapabilities: null +fsGroup: + type: RunAsAny +readOnlyRootFilesystem: false +requiredDropCapabilities: + - KILL + - MKNOD +runAsUser: + type: RunAsAny +seLinuxContext: + type: MustRunAs +seccompProfiles: + - '*' +supplementalGroups: + type: RunAsAny +volumes: + - configMap + - downwardAPI + - emptyDir + - persistentVolumeClaim + - projected + - secret +``` + +--- + +## Known Warnings (Non-Fatal) + +| Warning | Source | Impact | +|---------|--------|--------| +| `nft not found; bypass detection rules will not be installed` | Supervisor | `nftables` not in runner image; bypass detection iptables rules not installed. Network namespace still enforces routing. | +| `runtime cgroup pids.max is unlimited` | Supervisor | No PID limit configured at container/cgroup level. Fork bomb protection relies on `RLIMIT_NPROC=512` set by supervisor. | +| `Failed to delete network namespace` | Supervisor | Cleanup race on fast shutdown. Harmless; pod restart clears all. | + +--- + +## Future Work + +### Phase 2: Credential Placeholder/Proxy + +Replace real integration tokens with `openshell:resolve:env:*` placeholders in +the runner environment. The Supervisor's TLS proxy would rewrite placeholders to +real values on outbound HTTP requests. This eliminates LLM credential exposure +in the agent's `/proc/self/environ`. + +### Phase 3: Per-Session OPA Policies + +Generate per-session policy data from the project's credential bindings. A session +with only GitHub credentials would get a tighter network policy than one with +GitHub + GitLab + Jira. + +### Phase 4: nftables Bypass Detection + +Add the `nftables` package to the runner image to enable the Supervisor's bypass +detection iptables rules (LOG + REJECT for direct connections that skip the proxy). + +--- + +## Original Analysis (Preserved for Context) + +The sections below are the original pre-implementation analysis from 2026-06-03. +They are preserved for historical context. The actual implementation diverged +from the original analysis in several ways (file mode instead of Gateway mode, +7 capabilities instead of 1, no auth.py changes in Phase 1). + +
+Original: Current Runner Credential Model (The Problem) + +The runner puts **real secrets directly into `os.environ`** and the agent's process memory. If the agent inspects its own environment, it sees real credentials. + +### How Secrets Flow Today + +| Mechanism | File | What Happens | +|-----------|------|-------------| +| `populate_runtime_credentials()` | `platform/auth.py` | Fetches real tokens from backend API, writes them into `os.environ`: `GITHUB_TOKEN`, `GITLAB_TOKEN`, `JIRA_API_TOKEN`, `ANTHROPIC_API_KEY`, `CODERABBIT_API_KEY`, etc. | +| Token files on disk | `platform/auth.py` | Writes real tokens to `/tmp/.ambient_github_token`, `/tmp/.ambient_gitlab_token`, `/tmp/.ambient_kubeconfig` for the git credential helper and `gh` wrapper | +| Git credential helper | `platform/auth.py` | Shell script at `/tmp/git-credential-ambient` reads the real token from temp file and pipes it to git | +| `gh` CLI wrapper | `platform/auth.py` | Shell script reads real GitHub token from file, exports `GH_TOKEN`, then exec's the real `gh` | +| Secret redaction middleware | `middleware/secret_redaction.py` | Post-hoc defense: scrubs secrets from *outbound AG-UI events* only — the agent process still has full access to real secrets in memory and on disk | + +### The Gap + +``` +Agent reads /proc/self/environ → sees GITHUB_TOKEN=ghp_real_secret +Agent runs: cat /tmp/.ambient_* → sees real tokens +Agent runs: echo $ANTHROPIC_API_KEY → sees real API key +``` + +The redaction middleware protects the *output stream* (events sent to the frontend), not the agent itself. A compromised or misbehaving agent has unrestricted access to all credentials. + +
+ +
+Original: Strategy Comparison + +| Criterion | Strategy 1 (Supervisor) | Strategy 2 (Pod Runtime) | Strategy 3 (Proxy Only) | +|-----------|---------------------|------------------------|------------------------| +| Credential isolation | Full (placeholder/proxy) | Full (placeholder/proxy) | Partial (no netns enforcement) | +| Network isolation | Full (netns + iptables) | Full (netns + iptables) | None | +| Filesystem isolation | Landlock LSM | Landlock LSM | None | +| Syscall filtering | seccomp-BPF | seccomp-BPF | None | +| L7 inspection (OPA) | Yes | Yes | No | +| Runner code changes | Moderate | None | Small | +| Kernel requirements | Linux 5.13+ | Linux 5.13+ | None | +| Defense depth | 5 layers | 5 layers | 1 layer | + +**Strategy 1 was selected.** + +
diff --git a/docs/internal/agents/openshell-security-analysis.md b/docs/internal/agents/openshell-security-analysis.md new file mode 100644 index 000000000..4992024f6 --- /dev/null +++ b/docs/internal/agents/openshell-security-analysis.md @@ -0,0 +1,267 @@ +# NVIDIA OpenShell — Security Model Analysis + +> Research date: 2026-06-03 +> Source: https://github.com/NVIDIA/OpenShell (commit f954e592) +> Implementation status: **Integrated** — Supervisor v0.0.56, file mode, validated on ROSA OpenShift (kernel 5.14+) +> Implementation record: [openshell-runner-adaptation.md](openshell-runner-adaptation.md) + +## Overview + +OpenShell is a Rust-based sandbox runtime for AI agents. It wraps tools like Claude Code, Codex, or Copilot in a hardened execution environment with defense-in-depth isolation. It provides a security cage that the agent runs inside, enforcing filesystem, network, process, and credential policies via declarative YAML. + +- **Language**: Rust (core runtime), Python (SDK/bindings) +- **License**: Apache 2.0 +- **Status**: Alpha (single-developer mode) +- **Compute drivers**: Docker, Podman, Kubernetes, MicroVM + +## Architecture (Three Components) + +| Component | Role | +|-----------|------| +| **Gateway** | Authenticated control plane (gRPC + mTLS). Stores providers, policies, sandbox state in a database. | +| **Supervisor** (`openshell-sandbox`) | The security boundary. Runs inside the container alongside the agent. Enforces policy, manages credentials, runs the proxy. | +| **CLI/SDK/TUI** | User-facing. Creates sandboxes, manages providers, attaches to sessions. | + +## How It Keeps Credentials From Agents — The Placeholder/Proxy Pattern + +OpenShell uses a credential proxy rewrite architecture that ensures agents never see real secrets. + +### Flow + +1. **User registers a provider** (e.g., `openshell provider create claude --from-env`). The real `ANTHROPIC_API_KEY` is stored in the Gateway database. + +2. **Sandbox is created** with `--provider claude`. At startup, the Supervisor calls the Gateway's `GetSandboxProviderEnvironment` gRPC endpoint to fetch credentials. + +3. **Real secrets stay in Supervisor memory only.** The Supervisor injects placeholder values into the agent's environment: + ``` + ANTHROPIC_API_KEY=openshell:resolve:env:ANTHROPIC_API_KEY + ``` + The agent process never sees the real token. + +4. **When the agent makes an API call** (e.g., to `api.anthropic.com`), the request goes through the sandbox's HTTP CONNECT proxy. The proxy rewrites the placeholder back to the real secret in the outbound request before forwarding it upstream. + +5. **E2E tests verify** that raw secrets are never present in the child process environment — only the `openshell:resolve:env:*` placeholders. + +Even if the agent dumps its own environment variables, reads `/proc/self/environ`, or logs its env, it only sees placeholder strings. The real credentials exist exclusively in the Supervisor process memory space, which runs at higher privilege. + +### Supported Provider Types + +| Type | Environment Variables Injected | Typical Use | +|------|-------------------------------|-------------| +| `claude` | `ANTHROPIC_API_KEY`, `CLAUDE_API_KEY` | Claude Code, Anthropic API | +| `github` | `GITHUB_TOKEN`, `GH_TOKEN` | GitHub API, `gh` CLI | +| `gitlab` | `GITLAB_TOKEN`, `GLAB_TOKEN`, `CI_JOB_TOKEN` | GitLab API, `glab` CLI | +| `nvidia` | `NVIDIA_API_KEY` | NVIDIA API Catalog | +| `openai` | `OPENAI_API_KEY` | OpenAI SDK | +| `copilot` | `COPILOT_GITHUB_TOKEN`, `GH_TOKEN`, `GITHUB_TOKEN` | GitHub Copilot | +| `generic` | User-defined | Any service with custom credentials | + +## Five Isolation Layers + +### 1. Network Isolation (Linux Network Namespaces + veth Pairs) + +The agent runs in a separate network namespace with only one reachable IP: the proxy at `10.200.0.1`. + +- All outbound traffic MUST pass through the HTTP CONNECT proxy +- iptables rules LOG + REJECT any bypass attempts (direct connections ignoring proxy env vars) +- A bypass monitor watches `dmesg` for these events in real-time +- SSRF protection: DNS results are validated against RFC 1918/loopback ranges before establishing upstream connections + +**IP addressing**: + +| Component | IP Address | Purpose | +|-----------|-----------|---------| +| Host veth interface | `10.200.0.1/24` | Proxy listener endpoint | +| Sandbox veth interface | `10.200.0.2/24` | Sandboxed process gateway | +| Default route (in sandbox namespace) | `via 10.200.0.1` | Routes all traffic to proxy | + +**Bypass detection iptables rules** (OUTPUT chain in sandbox namespace): + +| Priority | Rule | Target | Purpose | +|----------|------|--------|---------| +| 1 | `-d 10.200.0.1/32 -p tcp --dport 3128` | `ACCEPT` | Allow connections to proxy | +| 2 | `-o lo` | `ACCEPT` | Allow loopback traffic | +| 3 | `-m conntrack --ctstate ESTABLISHED,RELATED` | `ACCEPT` | Allow response packets | +| 4 | `-p tcp --syn ... --log-prefix "openshell:bypass:{ns}:"` | `LOG` | Log TCP bypass attempts | +| 5 | `-p tcp` | `REJECT` | Reject TCP bypass attempts | +| 6 | `-p udp ... --log-prefix "openshell:bypass:{ns}:"` | `LOG` | Log UDP bypass attempts | +| 7 | `-p udp` | `REJECT` | Reject UDP bypass attempts | + +### 2. Process Isolation (Pre-exec Enforcement) + +After fork but before exec, a strict enforcement sequence runs: + +1. `setpgid` — isolate process group +2. `setns` — enter network namespace +3. `harden_child_process` — apply resource limits +4. `drop_privileges` — switch to unprivileged user +5. `sandbox::apply` — Landlock + seccomp + +**Resource hardening**: + +| Mechanism | Setting | Purpose | +|-----------|---------|---------| +| `RLIMIT_CORE` | 0 | No core dumps (prevents sensitive memory leaks) | +| `RLIMIT_NPROC` | 512 | Prevents fork bombs | +| `PR_SET_DUMPABLE` | 0 | Blocks ptrace attach | +| `PR_SET_NO_NEW_PRIVS` | 1 | No setuid escalation | + +**Privilege drop verification**: After `setuid`/`setgid` to the unprivileged user, the supervisor attempts `setuid(0)` and confirms it returns `EPERM` — proving privileges cannot be regained. + +**Environment sanitization**: `scrub_sensitive_env` removes `OPENSHELL_SSH_HANDSHAKE_SECRET` before spawning the child process. + +### 3. Filesystem Isolation (Landlock LSM) + +Landlock provides kernel-level filesystem access control via explicit allowlists. + +**Two-phase application**: +1. **Phase 1 (as root)**: `landlock::prepare()` opens `PathFd`s for all allowed paths while the supervisor still has root privileges +2. **Phase 2 (unprivileged)**: Inside the `pre_exec` closure, after `drop_privileges()`, `restrict_self()` applies the Landlock ruleset — this does not require root + +**Compatibility modes**: +- `best_effort` — logs a warning and continues without filesystem isolation if the kernel lacks Landlock +- `hard_requirement` — aborts sandbox startup if Landlock cannot be enforced + +Credentials are never written to the sandbox filesystem. They exist only in Supervisor memory. + +### 4. Syscall Filtering (seccomp-BPF) + +Three layers of seccomp filters: + +| Filter | Target | Blocked Syscalls | +|--------|--------|-----------------| +| **Supervisor Prelude** | Supervisor process | `mount`, `umount2`, `pivot_root`, `init_module`, `delete_module`, `bpf`, `perf_event_open`, `userfaultfd` | +| **Clone3 Filter** | Child process | `clone3` → `ENOSYS` (forces glibc fallback to `clone` where flags can be filtered) | +| **Main Runtime Filter** | Child process | `ptrace`, `memfd_create`, `io_uring_setup`; socket domains `AF_PACKET`, `AF_NETLINK`, `AF_BLUETOOTH`, `AF_VSOCK` | + +In `NetworkMode::Block`, additionally blocks `AF_INET` and `AF_INET6` to deny all network access. + +### 5. L7 Protocol Inspection (OPA + MITM Proxy) + +For endpoints configured with `protocol: rest`, the proxy performs deep application-layer inspection: + +1. **TLS termination**: The Supervisor generates an ephemeral per-sandbox CA at startup. The CA cert is injected into the child process via `SSL_CERT_FILE`. The proxy issues on-the-fly certificates for each intercepted hostname (cached up to 256 entries). + +2. **Request parsing**: Each HTTP request is parsed. Paths are canonicalized to prevent `../` and `%2e%2e` traversal bypasses. + +3. **OPA evaluation**: Each request is evaluated against Rego rules with input containing `(host, port, binary_path, http_method, canonicalized_path)`. + +4. **Credential redaction**: Placeholders in request URIs are resolved by the proxy, but the **redacted** path is sent to OPA to prevent secrets leaking into policy logs. + +5. **Enforcement modes**: `Audit` (log but forward) or `Enforce` (block with 403 Forbidden). + +**Example L7 policy** (GitHub API read-only): + +```yaml +network_policies: + github_api: + name: github-api-readonly + endpoints: + - host: api.github.com + port: 443 + protocol: rest + access: read-only + binaries: + - { path: /usr/bin/curl } +``` + +## Policy Engine (OPA/Rego) + +- Policies are YAML-defined, compiled to OPA Rego rules evaluated by `regorus` (pure-Rust Rego evaluator) +- In production (gRPC mode), policies are fetched from the Gateway and hot-reloaded every 30 seconds +- Binary identity: the proxy resolves which binary is making each request via `/proc` inspection + SHA256 TOFU (Trust On First Use) cache — if a binary is modified after first use, the connection is denied +- Formal verification available via `openshell-prover` crate + +**Key OPA rules**: + +| Rule | Returns | Purpose | +|------|---------|---------| +| `allow_network` | `bool` | L4 allow/deny decision | +| `network_action` | `"allow"` or `"deny"` | L4 routing decision | +| `deny_reason` | `string` | Human-readable denial reason | +| `matched_network_policy` | `string` | Matched policy rule name | +| `matched_endpoint_config` | `object` | L7 inspection configuration | +| `allow_request` | `bool` | Per-request HTTP allow/deny | +| `request_deny_reason` | `string` | L7-specific denial reason | + +## PKI and TLS + +### Gateway mTLS (Default) + +- Three-tier PKI: Cluster CA → Server cert (Gateway TLS termination) → Client cert (CLI + sandbox pods) +- Shared client cert for CLI and all sandbox pods (individual sandbox identity via `x-sandbox-id` gRPC header) +- Ephemeral CA key: used only during generation, not stored in Kubernetes +- Long-lived certificates (effectively never expire) +- Reconciliation: bootstrap checks existing K8s secrets, validates PEM markers, regenerates if malformed, triggers rollout restart on rotation + +### Sandbox MITM CA (L7 Inspection) + +- Generated fresh per sandbox lifecycle — completely separate from the cluster PKI +- `SandboxCa::generate()` creates a self-signed root with `KeyCertSign` and `CrlSign` usages +- `CertCache` maintains per-hostname leaf certificates (up to 256) +- Upstream verification uses Mozilla root store + system CA paths +- CA cert written to `/etc/openshell-tls/ca-cert.pem`, injected via `SSL_CERT_FILE` and `NODE_EXTRA_CA_CERTS` + +### Authentication Modes + +| Feature | mTLS | Cloudflare JWT | +|---------|------|----------------| +| **Transport** | HTTPS (mTLS) | HTTPS (Plain) or HTTP | +| **Credential** | Client Certificate | JWT Bearer Token | +| **Gateway Flag** | Default | `allow_unauthenticated=true` | +| **CLI Command** | `gateway add --local` | `gateway login` | + +## Audit Logging (OCSF) + +OpenShell uses the Open Cybersecurity Schema Framework (OCSF) standard for structured security logging: + +- Network decisions (allow/deny) with matched policy name and denial reason +- Process lifecycle events +- Security findings (e.g., Landlock unavailable on kernel) +- Bypass detection events with remediation hints +- Configuration changes + +## Relevance to Ambient + +> **Status: Implemented.** The Supervisor (file mode, v0.0.56) is integrated into the +> runner. See [openshell-runner-adaptation.md](openshell-runner-adaptation.md) for +> full implementation details. + +OpenShell operates at a different layer than Ambient but is directly complementary: + +- **Ambient** orchestrates *which* agents run, *when*, *where*, and *with what prompt/context* +- **OpenShell** provides the *sandbox runtime* that those agents execute inside + +The runner now uses OpenShell's Supervisor to add intra-container isolation +(Landlock, seccomp, network namespace, L7 proxy) that is significantly more +granular than container-level SecurityContext and NetworkPolicy alone. + +### Integration Points (Implemented) + +| Ambient Component | OpenShell Equivalent | Integration Status | +|-------------------|---------------------|-------------------| +| Runner container (SecurityContext) | Supervisor (Landlock + seccomp + netns) | **Implemented** — Supervisor wraps Claude CLI; 7 capabilities granted to runner | +| Runner NetworkPolicy | Network namespace + proxy + OPA | **Implemented** — per-binary network ACLs via Rego policy; TLS proxy enforces endpoint allowlist | +| K8s Secret env var injection | Provider placeholder/proxy rewrite | **Deferred** — LLM credentials still in runner env; integration credentials isolated via MCP sidecars | +| Runner pod RBAC | Binary identity + TOFU cache | **Implemented** — policy `binaries` list restricts which executables can access each endpoint | + +### What We Learned During Implementation + +Key divergences from this analysis that were discovered during implementation: + +1. **File mode eliminates Gateway dependency.** The Supervisor reads policy from + local files (`--policy-rules`, `--policy-data`). No gRPC Gateway, no mTLS PKI, + no provider registration. Policy is distributed via K8s ConfigMap. + +2. **7 capabilities required, not just NET_ADMIN.** The Supervisor's `pre_exec` + closure calls `setgroups`/`setgid`/`setuid` (requires SETUID, SETGID), `chown` + (requires CHOWN), mount operations (requires SYS_ADMIN), and process inspection + (requires SYS_PTRACE). + +3. **Landlock ABI compatibility.** The Supervisor detects the kernel's Landlock ABI + version at runtime (`abi:v5` on kernel 5.14+) and applies rules compatible with + that version. The `best_effort` mode ensures graceful degradation. + +4. **OCSF logging is production-ready.** The structured log format provides clear + diagnostics for each sandbox setup phase, making production troubleshooting + straightforward. diff --git a/specs/agents/runner.spec.md b/specs/agents/runner.spec.md index e79e50372..d448c538c 100644 --- a/specs/agents/runner.spec.md +++ b/specs/agents/runner.spec.md @@ -1,7 +1,8 @@ # Ambient Runner Spec **Date:** 2026-04-05 -**Status:** Living Document — current state documented +**Last Updated:** 2026-06-03 +**Status:** Living Document — current state documented, desired state (OpenShell) appended **Related:** `../control-plane/control-plane.spec.md` — CP provisioning, token endpoint, start context assembly --- @@ -28,6 +29,7 @@ Runner Pod (FastAPI + uvicorn) └── HTTP endpoints ├── GET /events/{thread_id} ← live SSE tap (drained by backend proxy) ├── POST / ← AG-UI run (HTTP path, backup) + ├── POST /model ← runtime LLM model switch ├── POST /interrupt └── GET /health ``` @@ -59,7 +61,10 @@ ambient_runner/ _session_messages_api.py ← SessionMessagesAPI (hand-rolled proto codec) _inbox_messages_api.py ← InboxMessagesAPI observability.py ← ObservabilityManager (Langfuse) + observability_config.py ← Observability configuration observability_models.py ← Langfuse event model types + observability_privacy.py ← Privacy-aware observability filtering + mlflow_observability.py ← MLflow observability integration platform/ context.py ← RunnerContext dataclass (shared runtime state) @@ -70,7 +75,6 @@ ambient_runner/ utils.py ← Pure helpers (redact_secrets, get_bot_token, url_with_token) security_utils.py ← Input validation helpers feedback.py ← User feedback storage - workspace.py ← Workspace setup and validation bridges/claude/ bridge.py ← ClaudeBridge (PlatformBridge impl) @@ -82,7 +86,9 @@ ambient_runner/ backend_tools.py ← acp_* MCP tools (backend API access for Claude) prompts.py ← SDK system prompt builder corrections.py ← Correction detection and logging + operational_events.py ← Operational event emission (session lifecycle, errors) mock_client.py ← Local dev mock (no Claude subprocess) + fixtures/ ← JSONL fixtures for local dev mock bridges/gemini_cli/ ← Gemini CLI bridge (separate impl, same ABC) bridges/langgraph/ ← LangGraph bridge (stub) @@ -99,6 +105,7 @@ ambient_runner/ content.py ← GET /content tasks.py ← GET /tasks feedback.py ← POST /feedback + model.py ← POST /model (runtime LLM model switch) middleware/ grpc_push.py ← grpc_push_middleware (HTTP-path event fan-out) @@ -187,6 +194,12 @@ set_bot_token(token) # cache in utils.py On gRPC `UNAUTHENTICATED`, the listener calls `grpc_client.reconnect()` which re-fetches from the CP endpoint and rebuilds the channel. +### AGUI_TOKEN Session Authentication + +When the `AGUI_TOKEN` env var is set (injected by the Operator), the runner registers an HTTP middleware that requires all non-health requests to include an `X-Ambient-Session-Token` header matching the token. Comparison uses `secrets.compare_digest()` to prevent timing attacks. + +This prevents cross-session attacks where an attacker who discovers a runner's in-cluster URL could send requests to another session's runner. Health endpoints (`/health`, `/healthz`) are exempted so liveness/readiness probes continue to work. + --- ## Bridge Layer @@ -455,6 +468,8 @@ All env vars are injected by the CP at pod creation time. | `AMBIENT_MCP_URL` | Ambient MCP sidecar URL (SSE transport) | | `REPOS_JSON` | JSON array of `{url, branch, autoPush}` repo configs | | `ACTIVE_WORKFLOW_GIT_URL` | Active workflow repo URL (overrides REPOS_JSON workspace setup) | +| `AGUI_TOKEN` | Session-scoped bearer token; when set, all non-health endpoints require `X-Ambient-Session-Token` header (constant-time comparison) | +| `SDK_OPTIONS` | JSON string of additional Claude SDK options | --- @@ -502,3 +517,175 @@ The resolved `(cwd_path, add_dirs)` tuple is passed to the Claude SDK via `Claud | `--resume` via persisted session IDs | Claude Code saves state to `.claude/` on graceful subprocess shutdown; session IDs survive `mark_dirty()` rebuilds via JSON file and `_saved_session_ids` snapshot | | Credential URL validated to cluster-local hostname | Prevents exfiltration of user tokens to external hosts if `BACKEND_API_URL` is tampered with | | LLM credentials (Anthropic/Vertex) remain in runner | These are necessary for inference and cannot be moved to sidecars without changing the SDK contract | +| `AGUI_TOKEN` session auth middleware | Prevents cross-session attacks where an attacker uses another session's runner URL; uses `secrets.compare_digest()` for constant-time comparison | +| Runtime model switching via `POST /model` | Allows the frontend/CLI to change `LLM_MODEL` without restarting the pod; acquires a lock to prevent concurrent switches and rejects if agent is mid-generation | + +--- + +## OpenShell Sandbox Isolation + +> **Status:** Implemented — validated end-to-end on ROSA OpenShift (kernel 5.14+) +> **Companion docs:** `docs/internal/agents/openshell-runner-adaptation.md` (implementation details), `docs/internal/agents/openshell-security-analysis.md` (threat model) +> **Formal requirements:** `specs/security/openshell-sandbox.spec.md` + +The runner wraps the Claude Code subprocess inside NVIDIA OpenShell's Supervisor +binary (`openshell-sandbox` v0.0.56), applying five defense-in-depth isolation +layers. The Supervisor operates in **file mode** — policy is provided via local +Rego + YAML files mounted from a ConfigMap. No OpenShell Gateway is required. + +### Architecture + +``` +Runner Pod (FastAPI + uvicorn) — runs UNSANDBOXED + │ + └── bridge.py sets cli_path = /app/openshell-claude-wrapper.sh + │ + └── Claude Agent SDK spawns wrapper as subprocess + │ + └── openshell-claude-wrapper.sh + │ + └── exec /openshell-sandbox \ + --policy-rules /etc/openshell/policy.rego \ + --policy-data /etc/openshell/policy.yaml \ + -- /usr/local/bin/claude "$@" + │ + ├── fork() + │ pre_exec closure (in child, before exec): + │ 1. setns(CLONE_NEWNET) → enter sandbox network namespace + │ 2. drop_privileges(setgroups/setgid/setuid → sandbox:sandbox) + │ 3. harden_child_process(RLIMIT_CORE=0, PR_SET_DUMPABLE=0, PR_SET_NO_NEW_PRIVS=1) + │ 4. landlock::enforce(restrict_self) → filesystem allowlist + │ 5. seccomp::apply(bpf_filter) → syscall blocklist + │ + └── exec(/usr/local/bin/claude) ← runs as sandbox user in isolated netns +``` + +The runner process (FastAPI, gRPC client, credential fetching) runs outside the +sandbox boundary. Only the Claude CLI subprocess is sandboxed. This means the +gRPC client, SSE tap, and health endpoints are unaffected. + +### Five Isolation Layers (All Verified Working) + +| Layer | Mechanism | Verified Evidence | +|-------|-----------|-------------------| +| **1. Network namespace** | `ip netns add` + veth pair (`10.200.0.1`↔`10.200.0.2`), default route via proxy | `OCSF CONFIG:CREATED [INFO] Network namespace created [ns:sandbox-* host_ip:10.200.0.1 sandbox_ip:10.200.0.2]` | +| **2. TLS proxy (L7)** | HTTP CONNECT proxy at `10.200.0.1:3128`, ephemeral per-sandbox CA, `HTTPS_PROXY`/`SSL_CERT_FILE`/`NODE_EXTRA_CA_CERTS` injected | `HTTP/1.1 200 Connection Established` for policy-allowed hosts; `000` (refused) for blocked hosts | +| **3. Landlock LSM** | Filesystem allowlist via `landlock_restrict_self` (12 rules: 8 read-only, 4 read-write) | `OCSF CONFIG:BUILT [INFO] Landlock ruleset built [rules_applied:12 skipped:0]` | +| **4. seccomp-BPF** | Three-layer filter: supervisor prelude → clone3 ENOSYS → main runtime (blocks `ptrace`, `memfd_create`, raw sockets) | `Blocking socket domain via seccomp` (3 domains blocked) | +| **5. OPA policy enforcement** | Per-binary network ACLs via Rego rules; binary identity checked per-request | Allowed endpoints return HTTP status; blocked hosts return connection refused | + +### Policy Files + +Policy is stored in a ConfigMap (`openshell-policy`) in the CP namespace and +propagated to each runner namespace by the reconciler's `ensureOpenShellPolicy()`. + +**Filesystem policy** (`policy.yaml`): + +| Access | Paths | +|--------|-------| +| Read-only | `/usr`, `/lib`, `/proc`, `/dev/urandom`, `/app`, `/etc`, `/var/log`, `/home/sandbox` | +| Read-write | `/workspace`, `/tmp`, `/dev/null`, `/app/.claude` | + +**Network policy** (`policy.yaml`): + +| Policy | Endpoints | Allowed Binaries | +|--------|-----------|-----------------| +| `anthropic-api` | `api.anthropic.com:443`, `statsig.anthropic.com:443` | `claude`, `node`, `curl` | +| `vertex-ai` | `us-east5-aiplatform.googleapis.com:443`, `europe-west1-aiplatform.googleapis.com:443`, `us-central1-aiplatform.googleapis.com:443`, `oauth2.googleapis.com:443` | `claude`, `node`, `curl` | +| `github` | `github.com:443`, `api.github.com:443` | `git`, `gh`, `curl` | +| `npm-registry` | `registry.npmjs.org:443` | `npm`, `node`, `npx` | +| `pypi` | `pypi.org:443`, `files.pythonhosted.org:443` | `pip3`, `python3` | +| `gitlab` | `gitlab.com:443` | `git`, `glab` | + +**Rego rules** (`policy.rego`): Official policy from the OpenShell repository +(`package openshell.sandbox`). Evaluates `allow_network`, `network_action`, +`deny_reason`, and `allow_request` based on host, port, binary path, HTTP method, +and canonicalized request path. + +### Required Linux Capabilities + +The Supervisor needs elevated capabilities for sandbox setup. These are granted +only when `OPENSHELL_ENABLED=true` in the CP config: + +| Capability | Required For | +|------------|-------------| +| `NET_ADMIN` | Create network namespace (`ip netns add`), configure veth pair and routing | +| `SYS_ADMIN` | Mount propagation for `/var/run/netns`, `nsenter` for in-namespace commands | +| `SYS_PTRACE` | Process tracing for binary identity verification | +| `SETUID` | `drop_privileges()`: switch from root to `sandbox` user via `setuid` | +| `SETGID` | `drop_privileges()`: switch group via `setgid`/`setgroups` | +| `CHOWN` | Set ownership on sandbox directories (`/workspace`, `/tmp`) | +| `DAC_OVERRIDE` | Access directories during privilege transition | + +The container also requires: +- `allowPrivilegeEscalation: true` (needed for `setuid`/`setns` in the pre_exec closure) +- `runAsUser: 0` (Supervisor must start as root to set up netns and drop privileges) +- `seccompProfile: Unconfined` at the pod level (Supervisor applies its own seccomp filter) + +### OpenShift SCC + +On OpenShift clusters, a custom SecurityContextConstraints object (`openshell-sandbox`) +MUST be created and bound to the runner service account. The SCC allows the seven +capabilities listed above, `allowPrivilegeEscalation: true`, `runAsUser: RunAsAny`, +and all seccomp profiles. + +### Control Plane Integration + +The CP reconciler (`kube_reconciler.go`) conditionally enables OpenShell via the +`OPENSHELL_ENABLED` environment variable: + +| CP Config | Env Var | Default | Purpose | +|-----------|---------|---------|---------| +| `OpenShellEnabled` | `OPENSHELL_ENABLED` | `false` | Master toggle for sandbox isolation | +| `OpenShellPolicyName` | `OPENSHELL_POLICY_CONFIGMAP` | `openshell-policy` | ConfigMap name for policy files | + +When enabled, the reconciler: +1. Copies the policy ConfigMap from the CP namespace to the runner namespace (`ensureOpenShellPolicy`) +2. Adds the policy ConfigMap as a volume + mount at `/etc/openshell` +3. Injects `OPENSHELL_ENABLED=true`, `OPENSHELL_POLICY_RULES`, `OPENSHELL_POLICY_DATA` env vars +4. Overrides the runner security context with elevated capabilities and root UID +5. Sets pod-level seccomp profile to `Unconfined` + +### Environment Variables (OpenShell-specific) + +| Var | Injected By | Purpose | +|-----|-------------|---------| +| `OPENSHELL_ENABLED` | CP reconciler | Enables sandbox wrapper in `bridge.py` | +| `OPENSHELL_POLICY_RULES` | CP reconciler | Path to Rego policy file (`/etc/openshell/policy.rego`) | +| `OPENSHELL_POLICY_DATA` | CP reconciler | Path to YAML policy data (`/etc/openshell/policy.yaml`) | +| `OPENSHELL_LOG_LEVEL` | Wrapper script default | Supervisor log level (`warn` default) | + +### Files Modified + +| File | Component | Change | +|------|-----------|--------| +| `Dockerfile` | Runner | Added `openshell-sandbox` v0.0.56 binary, `sandbox` user, `/workspace` dir, `/usr/local/bin/claude` symlink, `iproute` package | +| `openshell-claude-wrapper.sh` | Runner | Wrapper script: dispatches to supervisor or direct claude based on `OPENSHELL_ENABLED` | +| `bridges/claude/bridge.py` | Runner | `cli_path = "/app/openshell-claude-wrapper.sh"` when OpenShell enabled | +| `.openshell-ref/policy.rego` | Runner | Official OPA Rego policy from OpenShell repository | +| `.openshell-ref/policy.yaml` | Runner | Network + filesystem + process policy data | +| `internal/reconciler/kube_reconciler.go` | Control Plane | `buildRunnerSecurityContext`, `buildVolumes`, `buildVolumeMounts`, `buildEnv`, `ensureOpenShellPolicy` | +| `internal/config/config.go` | Control Plane | `OpenShellEnabled`, `OpenShellPolicyName` config fields | +| `internal/kubeclient/kubeclient.go` | Control Plane | `ConfigMapGVR`, `GetConfigMap`, `CreateConfigMap` methods | +| `cmd/ambient-control-plane/main.go` | Control Plane | Thread OpenShell config into reconciler | + +### Known Limitations + +| Limitation | Impact | Mitigation | +|------------|--------|------------| +| `nftables` not installed in runner image | Bypass detection iptables rules not installed; supervisor logs `DEGRADED` warning | Network namespace still enforces proxy routing via default route; add `nftables` package to Dockerfile in a future iteration | +| `cgroup pids.max` unlimited | Supervisor warns about missing PID limit | Configure pod resource limits or cgroup constraints at the node level | +| Network namespace cleanup on crash | If the supervisor crashes, leftover netns/veth pairs may cause `Address in use` on next start | Pod restart cleans up; the supervisor's cleanup logic handles most cases | +| Credential proxy pattern not yet implemented | Agent still has LLM credentials in environment (Vertex AI service account) | LLM credentials are necessary for inference; placeholder/proxy rewrite is a future phase | +| Kernel 5.14+ required for Landlock ABI v2+ | Landlock `restrict_self` with flags requires kernel 6.10+; v0.0.56 uses flags=0 on older kernels | `best_effort` compatibility mode ensures graceful degradation | + +### Design Decisions + +| Decision | Rationale | +|----------|-----------| +| File mode (no Gateway) | Eliminates operational dependency on OpenShell Gateway; policy is static per-deployment and distributed via ConfigMap | +| Wrapper script instead of direct SDK modification | Minimal change surface in bridge.py (1 line); wrapper handles supervisor dispatch vs. direct execution | +| Supervisor v0.0.56 pinned | Reproducible builds; version tested end-to-end on ROSA | +| Root UID for runner when sandbox enabled | Supervisor must create network namespaces and drop privileges to sandbox user; running as non-root prevents netns setup | +| ConfigMap propagation from CP namespace | Runner namespace may not exist when the CP starts; propagation on session provision ensures policy availability | +| `/usr/local/bin/claude` symlink | Claude SDK bundles its CLI at a version-dependent path; symlink provides a stable path for the policy's `binaries` list | diff --git a/specs/security/openshell-sandbox.spec.md b/specs/security/openshell-sandbox.spec.md new file mode 100644 index 000000000..9aee8247a --- /dev/null +++ b/specs/security/openshell-sandbox.spec.md @@ -0,0 +1,311 @@ +# OpenShell Sandbox Isolation Specification + +**Date:** 2026-06-04 +**Status:** Implemented — validated end-to-end on ROSA OpenShift (kernel 5.14.0-570.99.1.el9_6) +**Related:** `specs/agents/runner.spec.md` § OpenShell Sandbox Isolation, `specs/control-plane/control-plane.spec.md` + +--- + +## Purpose + +This specification defines the requirements for sandboxing the Claude Code agent +subprocess using NVIDIA OpenShell's Supervisor binary. The sandbox prevents a +compromised or misbehaving agent from accessing credentials, filesystem regions, +network endpoints, or syscalls outside its declared policy. + +--- + +## Requirements + +### Requirement: Sandbox Activation + +The sandbox SHALL be activated when the control plane environment variable +`OPENSHELL_ENABLED` is set to `true`. When not enabled, the runner SHALL launch +Claude Code directly without any sandbox wrapper. + +#### Scenario: Sandbox enabled + +- GIVEN the CP config has `OpenShellEnabled = true` +- WHEN a session pod is provisioned +- THEN the runner container SHALL have `OPENSHELL_ENABLED=true` in its environment +- AND the Claude CLI SHALL be launched through the OpenShell Supervisor wrapper + +#### Scenario: Sandbox disabled (default) + +- GIVEN the CP config has `OpenShellEnabled = false` (or unset) +- WHEN a session pod is provisioned +- THEN the runner container SHALL NOT have OpenShell environment variables +- AND the Claude CLI SHALL be launched directly by the Claude Agent SDK + +--- + +### Requirement: File Mode Operation + +The Supervisor SHALL operate in file mode using local policy files. The system +SHALL NOT require an OpenShell Gateway service. + +#### Scenario: Policy file delivery + +- GIVEN an `openshell-policy` ConfigMap exists in the CP namespace +- WHEN a session is provisioned in a runner namespace +- THEN the reconciler SHALL copy the ConfigMap to the runner namespace +- AND mount it as a read-only volume at `/etc/openshell` + +#### Scenario: Policy file format + +- GIVEN the ConfigMap contains `policy.rego` and `policy.yaml` +- WHEN the Supervisor starts +- THEN it SHALL load the Rego rules from `--policy-rules` +- AND load the YAML data from `--policy-data` +- AND validate the policy before spawning the child process + +--- + +### Requirement: Network Namespace Isolation + +The agent subprocess SHALL run in a separate Linux network namespace. All network +traffic from the agent SHALL route through the Supervisor's TLS proxy. + +#### Scenario: Network namespace creation + +- GIVEN the Supervisor starts with network policy configured +- WHEN it creates the sandbox environment +- THEN it SHALL create a new network namespace with a veth pair +- AND the host side SHALL listen on `10.200.0.1:3128` (HTTP CONNECT proxy) +- AND the sandbox side SHALL have `10.200.0.2/24` with default route via `10.200.0.1` +- AND the child process SHALL have `HTTPS_PROXY`, `HTTP_PROXY`, `ALL_PROXY` set to `http://10.200.0.1:3128` + +#### Scenario: Blocked endpoint + +- GIVEN an endpoint is NOT listed in any `network_policies` entry +- WHEN the agent attempts to connect to that endpoint +- THEN the proxy SHALL refuse the connection +- AND the agent SHALL receive a connection error + +#### Scenario: Allowed endpoint + +- GIVEN an endpoint IS listed in a `network_policies` entry +- AND the requesting binary matches the policy's `binaries` list +- WHEN the agent connects to that endpoint +- THEN the proxy SHALL establish an HTTP CONNECT tunnel +- AND perform TLS termination with the ephemeral per-sandbox CA +- AND forward the request to the upstream server + +--- + +### Requirement: TLS Proxy + +The Supervisor SHALL generate an ephemeral CA certificate per sandbox lifetime and +inject it into the child process via `SSL_CERT_FILE`, `NODE_EXTRA_CA_CERTS`, and +`GIT_SSL_CAINFO` environment variables. + +#### Scenario: TLS trust chain + +- GIVEN the Supervisor generates an ephemeral CA at startup +- WHEN the agent makes an HTTPS request through the proxy +- THEN the proxy SHALL issue a per-hostname leaf certificate signed by the ephemeral CA +- AND the agent's TLS client SHALL trust the certificate via the injected CA bundle +- AND the proxy SHALL verify upstream certificates against the system CA store + +--- + +### Requirement: Filesystem Isolation (Landlock LSM) + +The agent subprocess SHALL be confined to a filesystem allowlist enforced by +Landlock LSM. + +#### Scenario: Read-only paths + +- GIVEN the policy declares `/usr`, `/lib`, `/proc`, `/dev/urandom`, `/app`, `/etc`, `/var/log`, `/home/sandbox` as read-only +- WHEN the agent attempts to write to any of these paths +- THEN the write SHALL be denied by the kernel + +#### Scenario: Read-write paths + +- GIVEN the policy declares `/workspace`, `/tmp`, `/dev/null`, `/app/.claude` as read-write +- WHEN the agent writes to these paths +- THEN the write SHALL succeed + +#### Scenario: Undeclared paths + +- GIVEN a path is not listed in either read-only or read-write lists +- WHEN the agent attempts to access that path +- THEN access SHALL be denied by the kernel + +#### Scenario: Landlock compatibility + +- GIVEN the kernel supports Landlock ABI v2 or higher +- WHEN the Supervisor applies the Landlock ruleset +- THEN it SHALL apply all rules +- AND report the number of rules applied and skipped + +- GIVEN the kernel does NOT support Landlock +- AND the policy has `landlock.compatibility: best_effort` +- WHEN the Supervisor attempts to apply Landlock +- THEN it SHALL log a warning and continue without filesystem isolation + +--- + +### Requirement: Process Privilege Drop + +The Supervisor SHALL drop privileges before executing the agent binary. + +#### Scenario: Privilege drop sequence + +- GIVEN the Supervisor starts as root (UID 0) +- WHEN it forks the child process +- THEN the pre_exec closure SHALL call `setgroups`, `setgid`, `setuid` to switch to the `sandbox` user +- AND set `RLIMIT_CORE` to 0 (no core dumps) +- AND set `PR_SET_DUMPABLE` to 0 (blocks ptrace attach) +- AND set `PR_SET_NO_NEW_PRIVS` to 1 (no setuid escalation) + +#### Scenario: Privilege drop verification + +- GIVEN the child has called `setuid(sandbox_uid)` +- WHEN the Supervisor verifies the drop +- THEN it SHALL attempt `setuid(0)` and confirm it returns `EPERM` + +--- + +### Requirement: Syscall Filtering (seccomp-BPF) + +The agent subprocess SHALL have a seccomp-BPF filter applied that blocks +dangerous syscalls. + +#### Scenario: Blocked syscalls + +- GIVEN the seccomp filter is applied +- WHEN the agent attempts `ptrace`, `memfd_create`, or `io_uring_setup` +- THEN the syscall SHALL be blocked + +#### Scenario: Blocked socket domains + +- GIVEN the seccomp filter is applied +- WHEN the agent attempts to create sockets with `AF_PACKET`, `AF_NETLINK`, or `AF_BLUETOOTH` +- THEN the socket creation SHALL be blocked + +--- + +### Requirement: Container Security Context + +The reconciler SHALL configure the runner container's security context based on +the `OpenShellEnabled` flag. + +#### Scenario: OpenShell enabled + +- GIVEN `OpenShellEnabled = true` +- WHEN the reconciler builds the pod spec +- THEN the container security context SHALL include: + - `allowPrivilegeEscalation: true` + - `runAsUser: 0` + - `runAsNonRoot: false` + - `capabilities.drop: [ALL]` + - `capabilities.add: [NET_ADMIN, SYS_ADMIN, SYS_PTRACE, SETUID, SETGID, CHOWN, DAC_OVERRIDE]` +- AND the pod-level security context SHALL include `seccompProfile.type: Unconfined` + +#### Scenario: OpenShell disabled + +- GIVEN `OpenShellEnabled = false` +- WHEN the reconciler builds the pod spec +- THEN the container security context SHALL include: + - `allowPrivilegeEscalation: false` + - `capabilities.drop: [ALL]` +- AND the pod-level security context SHALL NOT override seccomp + +--- + +### Requirement: Policy ConfigMap Propagation + +The reconciler SHALL propagate the OpenShell policy ConfigMap from the control +plane namespace to each runner namespace. + +#### Scenario: ConfigMap already exists + +- GIVEN the policy ConfigMap already exists in the runner namespace +- WHEN the reconciler provisions a session +- THEN it SHALL skip the copy +- AND proceed with pod creation + +#### Scenario: ConfigMap does not exist + +- GIVEN the policy ConfigMap does NOT exist in the runner namespace +- AND the ConfigMap exists in the CP namespace +- WHEN the reconciler provisions a session +- THEN it SHALL create a copy in the runner namespace +- AND the copy SHALL contain the same `data` keys as the source + +--- + +### Requirement: Runner Image Prerequisites + +The runner container image SHALL include all dependencies required for sandbox +operation. + +#### Scenario: Image contents + +- GIVEN the runner Dockerfile +- WHEN the image is built +- THEN it SHALL contain: + - `/openshell-sandbox` binary (pinned to a specific version) + - `iproute` package (provides `ip netns` for network namespace management) + - A `sandbox` user and group (for privilege drop target) + - `/var/run/netns` directory with mode 777 (for network namespace mount points) + - `/workspace` directory owned by `sandbox:sandbox` + - `/usr/local/bin/claude` symlink to the bundled Claude CLI binary + - `/app/openshell-claude-wrapper.sh` wrapper script + +--- + +### Requirement: Wrapper Script Dispatch + +The wrapper script SHALL dispatch to the Supervisor or directly to Claude based +on the `OPENSHELL_ENABLED` environment variable. + +#### Scenario: OpenShell enabled + +- GIVEN `OPENSHELL_ENABLED=true` +- WHEN the wrapper script executes +- THEN it SHALL exec the Supervisor with `--policy-rules`, `--policy-data`, `--log-level` flags +- AND pass the Claude binary path and all arguments after `--` + +#### Scenario: OpenShell disabled + +- GIVEN `OPENSHELL_ENABLED` is unset or not `true` +- WHEN the wrapper script executes +- THEN it SHALL exec the Claude binary directly + +--- + +## Operational Notes + +### Supervisor Log Messages (OCSF Format) + +The Supervisor emits structured logs in OCSF (Open Cybersecurity Schema Framework) format: + +| Log Entry | Severity | Meaning | +|-----------|----------|---------| +| `CONFIG:LOADING` | INFO | Loading policy from local files | +| `CONFIG:VALIDATED` | INFO | Sandbox user validated in image | +| `CONFIG:ENABLED` | INFO | TLS termination enabled, ephemeral CA generated | +| `CONFIG:CREATING` | INFO | Creating network namespace | +| `CONFIG:CREATED` | INFO | Network namespace created with IP addresses | +| `CONFIG:DEGRADED` | MEDIUM | `nft` not found; bypass detection rules not installed | +| `CONFIG:PROBED` | INFO | Landlock availability probed | +| `CONFIG:BUILT` | INFO | Landlock ruleset built with rule counts | +| `NET:LISTEN` | INFO | Proxy listening on address | +| `PROC:LAUNCH` | INFO | Child process spawned | +| `CONFIG:CLEANED_UP` | INFO | Network namespace cleaned up | + +### Debugging + +Set `OPENSHELL_LOG_LEVEL=debug` in the wrapper script or environment to enable +verbose Supervisor logging. Debug output includes individual Landlock rule +applications, `ip` command invocations, and certificate processing details. + +### OpenShift Cluster Setup + +1. Create a custom SCC named `openshell-sandbox` with the required capabilities +2. Bind the SCC to the runner service account via a ClusterRoleBinding or + namespace-scoped RoleBinding with `system:openshift:scc:openshell-sandbox` +3. Verify with `oc get pod -o jsonpath='{.metadata.annotations.openshift\.io/scc}'` + — it should show `openshell-sandbox`