diff --git a/.claude/commands/submit-pr.md b/.claude/commands/submit-pr.md index 5d2b88033..3e9700233 100644 --- a/.claude/commands/submit-pr.md +++ b/.claude/commands/submit-pr.md @@ -165,8 +165,8 @@ This triggers Mergify to add the PR to the merge queue once all conditions are m After creating/updating PR and queuing, show: ``` -āœ… PR # created: -šŸ”„ Queued for auto-merge via Mergify +PR # created: +Queued for auto-merge via Mergify Title: (<X>/72 chars) Tags: <tag1>, <tag2>, <tag3> diff --git a/cmd/prismctl/cmd/local.go b/cmd/prismctl/cmd/local.go index da90637ed..5207bd3e5 100644 --- a/cmd/prismctl/cmd/local.go +++ b/cmd/prismctl/cmd/local.go @@ -4,14 +4,13 @@ package cmd import ( "context" "fmt" + "net/http" "os" - "os/exec" "path/filepath" "strings" - "syscall" "time" - configv1 "github.com/jrepp/prism-data-layer/pkg/plugin/gen/prism/config/v1" + "github.com/jrepp/prism-data-layer/pkg/launcher" pb "github.com/jrepp/prism-data-layer/pkg/plugin/gen/prism" "github.com/spf13/cobra" "google.golang.org/grpc" @@ -53,34 +52,39 @@ var localCmd = &cobra.Command{ Long: `Manage a local Prism stack for development and testing. The local stack includes: -- prism-admin: Admin server managing proxy configurations -- prism-proxy (2 instances): Data plane proxies -- pattern-launcher: Pattern lifecycle manager +- prism-admin (3 instances): HA admin cluster managing proxy configurations +- prism-proxy (3 instances): Data plane proxies +- prism-launcher: Pattern lifecycle manager - keyvalue-runner: KeyValue pattern with MemStore backend - -All components run from the build/binaries/ directory.`, +- mailbox-runner: Mailbox pattern for $admin namespace +- prism-web-console: Web admin console + +All components run from the build/binaries/ directory with hierarchical startup: + 1. Admin HA cluster (admin-1, admin-2, admin-3) + 2. Proxies (connected to admin cluster) + 3. Launcher and runners (connected to proxy) + 4. Web console (connected to admin via launcher)`, } // localStartCmd starts the local Prism stack var localStartCmd = &cobra.Command{ Use: "start", Short: "Start the local Prism stack", - Long: `Start a complete local Prism development stack. - -This starts: -1. prism-admin with: - - gRPC API on :8981 (admin control plane) - - Web UI on :8080 (admin dashboard at http://localhost:8080) -2. pattern-launcher on :7070 (connected to prism-admin) -3. keyvalue-runner with memstore backend - -All processes run in the background and logs are captured. - -The Admin Web UI provides: - - Real-time dashboard with system metrics - - Proxy status and management - - Launcher status and capacity monitoring - - Namespace management`, + Long: `Start a complete local Prism development stack with hierarchical startup. + +Startup sequence: +1. prism-admin-1 on :8981 (admin HA instance 1) +2. prism-admin-2 on :8982 (admin HA instance 2) +3. prism-admin-3 on :8983 (admin HA instance 3) +4. prism-proxy-1 on :9090 (connected to admin cluster) +5. prism-proxy-2 on :9091 (connected to admin cluster) +6. prism-proxy-3 on :9092 (connected to admin cluster) +7. prism-launcher on :7070 (pattern lifecycle manager) +8. keyvalue-runner (memstore backend, launcher-managed) +9. mailbox-runner (admin namespace, launcher-managed) +10. prism-web-console on :8000 (web admin UI, launcher-managed) + +All processes run in the background and logs are captured.`, RunE: func(cmd *cobra.Command, args []string) error { return startLocalStack() }, @@ -112,12 +116,13 @@ var localLogsCmd = &cobra.Command{ Short: "Show logs from local Prism components", Long: `Show logs from local Prism stack components. -Components: admin, proxy1, proxy2, launcher, keyvalue +Components: admin-1, admin-2, admin-3, proxy, launcher, keyvalue, web-console Example: - prismctl local logs admin - prismctl local logs proxy1`, - ValidArgs: []string{"admin", "proxy1", "proxy2", "launcher", "keyvalue"}, + prismctl local logs admin-1 + prismctl local logs proxy + prismctl local logs web-console`, + ValidArgs: []string{"admin-1", "admin-2", "admin-3", "proxy", "launcher", "keyvalue", "web-console"}, Args: cobra.MaximumNArgs(1), RunE: func(cmd *cobra.Command, args []string) error { component := "all" @@ -128,16 +133,16 @@ Example: }, } -// startLocalStack starts all components of the local stack +// startLocalStack starts all components of the local stack using the orchestrator func startLocalStack() error { + ctx := context.Background() + // Ensure we're in or can find the binaries directory binDir, err := findBinariesDir() if err != nil { return fmt.Errorf("cannot find binaries directory: %w", err) } - fmt.Printf("šŸš€ Starting local Prism stack from %s\n\n", binDir) - // Create logs directory logsDir := filepath.Join(binDir, "..", "logs") if err := os.MkdirAll(logsDir, 0755); err != nil { @@ -156,103 +161,28 @@ func startLocalStack() error { return fmt.Errorf("patterns directory not found at %s", patternsDir) } - // Start components in order - components := []struct { - name string - binary string - args []string - logFile string - delay time.Duration - }{ - { - name: "prism-admin", - binary: filepath.Join(absBinDir, "prism-admin"), - args: []string{"serve", "--port=8981"}, - logFile: filepath.Join(logsDir, "admin.log"), - delay: 2 * time.Second, - }, - { - name: "prism-launcher", - binary: filepath.Join(absBinDir, "prism-launcher"), - args: []string{"--admin-endpoint=localhost:8981", "--launcher-id=launcher-01", "--grpc-port=7070", "--patterns-dir=" + patternsDir}, - logFile: filepath.Join(logsDir, "launcher.log"), - delay: 2 * time.Second, - }, - { - name: "keyvalue-runner", - binary: filepath.Join(absBinDir, "keyvalue-runner"), - args: []string{"--proxy-addr=localhost:9090"}, - logFile: filepath.Join(logsDir, "keyvalue.log"), - delay: 1 * time.Second, - }, + // Create local stack configuration + config := &launcher.LocalStackConfig{ + BinDir: absBinDir, + LogsDir: logsDir, + PatternsDir: patternsDir, + AdminCount: 3, // HA with 3 admin instances (Raft quorum) + AdminBasePorts: []int{8981, 8982, 8983}, // Ports for admin instances + ProxyCount: 3, // 3 proxy instances for load distribution + ProxyPort: 9090, // Proxy data plane base port (9090, 9091, 9092) + RunnerPort: 9095, // KeyValue runner port } - for _, comp := range components { - fmt.Printf(" Starting %s...\n", comp.name) - - // Check if binary exists - if _, err := os.Stat(comp.binary); os.IsNotExist(err) { - return fmt.Errorf("binary not found: %s (run 'make build' first)", comp.binary) - } - - // Create log file - logFile, err := os.Create(comp.logFile) - if err != nil { - return fmt.Errorf("failed to create log file for %s: %w", comp.name, err) - } - - // Start process - use exec.Command (not CommandContext) so process survives parent exit - cmd := exec.Command(comp.binary, comp.args...) - cmd.Stdout = logFile - cmd.Stderr = logFile - cmd.Dir = binDir - - // Detach process from parent so it continues running after prismctl exits - // This creates a new process group for the child - cmd.SysProcAttr = &syscall.SysProcAttr{ - Setpgid: true, - } - - if err := cmd.Start(); err != nil { - logFile.Close() - return fmt.Errorf("failed to start %s: %w", comp.name, err) - } - - // Close the log file handle in the parent process - // The child process has its own handle and will continue writing - logFile.Close() - - // Store PID - localStackPIDs[comp.name] = cmd.Process.Pid - fmt.Printf(" āœ… %s started (PID: %d)\n", comp.name, cmd.Process.Pid) - - // Save PID to file for stop command - pidFile := filepath.Join(logsDir, fmt.Sprintf("%s.pid", comp.name)) - if err := os.WriteFile(pidFile, []byte(fmt.Sprintf("%d", cmd.Process.Pid)), 0644); err != nil { - fmt.Printf(" āš ļø Warning: Could not save PID file: %v\n", err) - } - - // Wait before starting next component - if comp.delay > 0 { - time.Sleep(comp.delay) - } + // Create and start local stack + stack := launcher.NewLocalStack(config) + if err := stack.Start(ctx); err != nil { + return err } - fmt.Printf("\nāœ… Local Prism stack started successfully!\n\n") - fmt.Println("šŸ“Š Stack Overview:") - fmt.Println(" • Admin Control Plane: localhost:8981 (gRPC)") - fmt.Println(" • Admin Web UI: http://localhost:8080 🌐") - fmt.Println(" • Pattern Launcher: localhost:7070") - fmt.Println(" • KeyValue: Ready (MemStore backend)") - fmt.Println() - fmt.Println("šŸ“ View logs: prismctl local logs [component]") - fmt.Println("šŸ›‘ Stop stack: prismctl local stop") - return nil } -// stopLocalStack stops all components of the local stack in proper order -// Pattern runners → Launchers → Admin (last) +// stopLocalStack stops all components of the local stack func stopLocalStack() error { binDir, err := findBinariesDir() if err != nil { @@ -261,119 +191,57 @@ func stopLocalStack() error { logsDir := filepath.Join(binDir, "..", "logs") - fmt.Println("šŸ›‘ Stopping local Prism stack (graceful shutdown)...") - fmt.Println() - - // Step 1: Check admin connectivity for coordinated shutdown - adminRunning := checkAdminConnectivity() - if adminRunning { - fmt.Println(" āœ… Admin control plane is running - coordinating graceful shutdown") - } else { - fmt.Println(" āš ļø Admin not reachable - proceeding with direct shutdown") - } - fmt.Println() - - // Step 2: Stop pattern runners first (they depend on launchers/proxies) - fmt.Println(" Stopping pattern runners...") - patternRunners := []string{"keyvalue-runner"} - for _, comp := range patternRunners { - if err := stopComponent(logsDir, comp); err != nil { - fmt.Printf(" āš ļø %s: %v\n", comp, err) - } - } - fmt.Println() - - // Step 3: Stop launchers (they depend on admin) - fmt.Println(" Stopping launchers...") - launchers := []string{"prism-launcher"} - for _, comp := range launchers { - if err := stopComponent(logsDir, comp); err != nil { - fmt.Printf(" āš ļø %s: %v\n", comp, err) - } - } - fmt.Println() - - // Step 4: Wait a moment for graceful shutdown - fmt.Println(" Waiting for graceful shutdown...") - time.Sleep(1 * time.Second) - fmt.Println() + fmt.Println("šŸ›‘ Stopping local Prism stack...") - // Step 5: Finally, stop admin itself (no dependencies) - fmt.Println(" Stopping admin control plane...") - if err := stopComponent(logsDir, "prism-admin"); err != nil { - fmt.Printf(" āš ļø prism-admin: %v\n", err) + // Stop in reverse order: web console → runners → launcher → proxies → admins + components := []string{ + "prism-web-console", + "mailbox-runner", + "keyvalue-runner", + "prism-launcher", + "prism-proxy-3", + "prism-proxy-2", + "prism-proxy-1", + "prism-admin-3", + "prism-admin-2", + "prism-admin-1", } - fmt.Println() - fmt.Println("āœ… Local Prism stack stopped cleanly") - return nil -} + for _, comp := range components { + pidFile := filepath.Join(logsDir, fmt.Sprintf("%s.pid", comp)) + pidData, err := os.ReadFile(pidFile) + if err != nil { + fmt.Printf(" āš ļø %s: No PID file found\n", comp) + continue + } -// checkAdminConnectivity checks if admin is reachable -func checkAdminConnectivity() bool { - ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) - defer cancel() + var pid int + if _, err := fmt.Sscanf(string(pidData), "%d", &pid); err != nil { + fmt.Printf(" āš ļø %s: Invalid PID file\n", comp) + continue + } - conn, err := grpc.DialContext( - ctx, - "localhost:8981", - grpc.WithTransportCredentials(insecure.NewCredentials()), - grpc.WithBlock(), - ) - if err != nil { - return false - } - defer conn.Close() + fmt.Printf(" Stopping %s (PID: %d)...\n", comp, pid) - return true -} + // Send SIGTERM to process + process, err := os.FindProcess(pid) + if err != nil { + fmt.Printf(" āš ļø Process not found\n") + continue + } -// stopComponent stops a single component by PID file -func stopComponent(logsDir, component string) error { - pidFile := filepath.Join(logsDir, fmt.Sprintf("%s.pid", component)) - pidData, err := os.ReadFile(pidFile) - if err != nil { - return fmt.Errorf("no PID file found") - } + if err := process.Signal(os.Interrupt); err != nil { + fmt.Printf(" āš ļø Failed to stop: %v\n", err) + continue + } - var pid int - if _, err := fmt.Sscanf(string(pidData), "%d", &pid); err != nil { - return fmt.Errorf("invalid PID file") - } + fmt.Printf(" āœ… Stopped\n") - // Check if process exists first - if err := syscall.Kill(pid, syscall.Signal(0)); err != nil { + // Remove PID file os.Remove(pidFile) - return fmt.Errorf("not running (PID: %d)", pid) } - fmt.Printf(" Stopping %s (PID: %d)...\n", component, pid) - - // Send SIGTERM to process - process, err := os.FindProcess(pid) - if err != nil { - return fmt.Errorf("process not found") - } - - if err := process.Signal(os.Interrupt); err != nil { - return fmt.Errorf("failed to send signal: %v", err) - } - - // Wait for process to exit (up to 5 seconds) - for i := 0; i < 50; i++ { - if err := syscall.Kill(pid, syscall.Signal(0)); err != nil { - // Process has exited - os.Remove(pidFile) - fmt.Printf(" āœ… %s stopped\n", component) - return nil - } - time.Sleep(100 * time.Millisecond) - } - - // If still running after 5 seconds, force kill - fmt.Printf(" āš ļø %s did not stop gracefully, sending SIGKILL...\n", component) - process.Signal(syscall.SIGKILL) - os.Remove(pidFile) + fmt.Println("\nāœ… Local Prism stack stopped") return nil } @@ -388,7 +256,18 @@ func showLocalStackStatus() error { fmt.Println("šŸ“Š Local Prism Stack Status") - components := []string{"prism-admin", "prism-launcher", "keyvalue-runner"} + components := []string{ + "prism-admin-1", + "prism-admin-2", + "prism-admin-3", + "prism-proxy-1", + "prism-proxy-2", + "prism-proxy-3", + "prism-launcher", + "keyvalue-runner", + "mailbox-runner", + "prism-web-console", + } for _, comp := range components { pidFile := filepath.Join(logsDir, fmt.Sprintf("%s.pid", comp)) @@ -404,18 +283,16 @@ func showLocalStackStatus() error { continue } - // Check if process is running using signal 0 (null signal) - // This checks for process existence without actually sending a signal - err = syscall.Kill(pid, syscall.Signal(0)) + // Check if process is running + process, err := os.FindProcess(pid) if err != nil { - if err == syscall.ESRCH { - fmt.Printf(" āŒ %s: Not running (PID: %d not found)\n", comp, pid) - } else if err == syscall.EPERM { - // Process exists but we don't have permission (means it's running) - fmt.Printf(" āœ… %s: Running (PID: %d)\n", comp, pid) - } else { - fmt.Printf(" āŒ %s: Not running (PID: %d exited)\n", comp, pid) - } + fmt.Printf(" āŒ %s: Not running (PID: %d not found)\n", comp, pid) + continue + } + + // Try to signal the process to check if it's alive + if err := process.Signal(os.Signal(nil)); err != nil { + fmt.Printf(" āŒ %s: Not running (PID: %d exited)\n", comp, pid) continue } @@ -435,7 +312,7 @@ func showLocalStackLogs(component string) error { logsDir := filepath.Join(binDir, "..", "logs") if component == "all" { - components := []string{"admin", "proxy1", "proxy2", "launcher", "keyvalue"} + components := []string{"admin-1", "admin-2", "admin-3", "proxy-1", "proxy-2", "proxy-3", "launcher", "keyvalue", "mailbox", "web-console"} for _, comp := range components { fmt.Printf("\n=== %s ===\n", comp) showComponentLog(logsDir, comp) @@ -450,11 +327,16 @@ func showLocalStackLogs(component string) error { func showComponentLog(logsDir, component string) error { // Map component name to log file name logMap := map[string]string{ - "admin": "admin.log", - "proxy1": "proxy1.log", - "proxy2": "proxy2.log", - "launcher": "launcher.log", - "keyvalue": "keyvalue.log", + "admin-1": "prism-admin-1.log", + "admin-2": "prism-admin-2.log", + "admin-3": "prism-admin-3.log", + "proxy-1": "prism-proxy-1.log", + "proxy-2": "prism-proxy-2.log", + "proxy-3": "prism-proxy-3.log", + "launcher": "prism-launcher.log", + "keyvalue": "keyvalue-runner.log", + "mailbox": "mailbox-runner.log", + "web-console": "prism-web-console.log", } logFile, ok := logMap[component] @@ -509,6 +391,59 @@ func isInBinariesDir(dir string) bool { return true } +// waitForHTTPHealth polls an HTTP health endpoint until it's ready or timeout +func waitForHTTPHealth(url string, timeout time.Duration) error { + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + + ticker := time.NewTicker(100 * time.Millisecond) + defer ticker.Stop() + + client := &http.Client{Timeout: 1 * time.Second} + + for { + select { + case <-ctx.Done(): + return fmt.Errorf("timeout waiting for %s", url) + case <-ticker.C: + resp, err := client.Get(url) + if err == nil && resp.StatusCode == http.StatusOK { + resp.Body.Close() + return nil + } + if resp != nil { + resp.Body.Close() + } + } + } +} + +// waitForGRPCHealth polls a gRPC endpoint until it's ready or timeout +func waitForGRPCHealth(addr string, timeout time.Duration) error { + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + + ticker := time.NewTicker(100 * time.Millisecond) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + return fmt.Errorf("timeout waiting for %s", addr) + case <-ticker.C: + conn, err := grpc.NewClient( + addr, + grpc.WithTransportCredentials(insecure.NewCredentials()), + ) + if err != nil { + continue + } + conn.Close() + return nil + } + } +} + // provisionNamespace creates a namespace via the control plane func provisionNamespace(namespace string) error { fmt.Printf("šŸ“¦ Provisioning namespace: %s\n", namespace) @@ -528,28 +463,30 @@ func provisionNamespace(namespace string) error { client := pb.NewControlPlaneClient(conn) - // Send CreateNamespace request using the new unified API - req := &configv1.NamespaceRequest{ - Namespace: namespace, - Team: "local-dev", - Description: "Local development namespace created by prismctl", - Patterns: []*configv1.Pattern{ - { - Name: "keyvalue", - Type: configv1.PatternType_PATTERN_TYPE_KEYVALUE, - Requires: []*configv1.Slot{ - { - Name: "store", - PatternType: configv1.PatternType_PATTERN_TYPE_KEYVALUE, - BackendType: configv1.BackendType_BACKEND_TYPE_MEMSTORE, - }, + // Send CreateNamespace request + req := &pb.CreateNamespaceRequest{ + Namespace: namespace, + RequestingProxy: "prismctl-local", + Principal: "local-user", + Config: &pb.NamespaceConfig{ + Backends: map[string]*pb.BackendConfig{ + "memstore": { + BackendType: "memstore", + ConnectionString: "memory://local", + Credentials: map[string]string{}, + Options: map[string]string{}, }, }, + Patterns: map[string]*pb.PatternConfig{ + "keyvalue": { + PatternName: "keyvalue", + Settings: map[string]string{}, + RequiredInterfaces: []string{"KeyValue"}, + }, + }, + Auth: &pb.AuthConfig{Enabled: false}, + Metadata: map[string]string{"source": "prismctl-local"}, }, - Auth: &configv1.AuthConfig{ - Enabled: false, - }, - Metadata: map[string]string{"source": "prismctl-local"}, } resp, err := client.CreateNamespace(ctx, req) @@ -582,19 +519,8 @@ func provisionNamespace(namespace string) error { fmt.Printf("āœ… Namespace Created Successfully\n") fmt.Printf("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n") fmt.Printf(" Namespace: %s\n", namespace) - fmt.Printf(" ID: %s\n", resp.NamespaceId) - if len(resp.AssignedPatterns) > 0 { - fmt.Printf(" Patterns:\n") - for _, p := range resp.AssignedPatterns { - fmt.Printf(" - %s (%s)\n", p.Name, p.PatternType) - } - } - if len(resp.SlotBindings) > 0 { - fmt.Printf(" Slots:\n") - for _, sb := range resp.SlotBindings { - fmt.Printf(" - %s/%s → %s\n", sb.PatternName, sb.SlotName, sb.BackendId) - } - } + fmt.Printf(" Partition: %d\n", resp.AssignedPartition) + fmt.Printf(" Proxy: %s\n", resp.AssignedProxy) fmt.Printf(" Message: %s\n", resp.Message) fmt.Printf("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n") diff --git a/docs-cms/adr/adr-060-web-console-separation.md b/docs-cms/adr/adr-060-web-console-separation.md index a252cee09..ae4d974f0 100644 --- a/docs-cms/adr/adr-060-web-console-separation.md +++ b/docs-cms/adr/adr-060-web-console-separation.md @@ -1,18 +1,18 @@ --- +author: Jacob Repp +created: 2025-10-22 deciders: Core Team doc_uuid: a8f9d2c1-4b7e-4d3a-9f2c-8e1d5a6b7c9d id: adr-060 project_id: prism-data-layer status: Accepted -created: 2025-10-22 -updated: 2025-10-22 tags: - admin - web-console - architecture - separation-of-concerns title: Separate Web Console from Admin Control Plane -author: Jacob Repp +updated: 2025-10-22 --- ## Context