Skip to content

Commit a3fc3aa

Browse files
committed
chore: rebase on rc15 candidate
1 parent 5a982a1 commit a3fc3aa

5 files changed

Lines changed: 351 additions & 9 deletions

File tree

cmd/obol/sell.go

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
"syscall"
2323
"time"
2424

25+
"github.com/ObolNetwork/obol-stack/internal/agentcrd"
2526
"github.com/ObolNetwork/obol-stack/internal/config"
2627
"github.com/ObolNetwork/obol-stack/internal/enclave"
2728
"github.com/ObolNetwork/obol-stack/internal/erc8004"
@@ -3827,11 +3828,11 @@ offers survive in etcd with UpstreamHealthy=False, so the public catalog
38273828
directly so a reboot does not require a full stack-up to recover.
38283829
38293830
Idempotent: offers whose gateway is still running are skipped, and the
3830-
kubectl applies re-assert existing objects. A replayed agent offer needs
3831-
its Agent CR: still present after a reboot, but after a full stack
3832-
recreation the offer waits (controller reports the missing agent) until
3833-
'obol agent new' recreates it. 'sell mcp' servers are foreground
3834-
processes with no ServiceOffer and are not resumed.
3831+
kubectl applies re-assert existing objects. Recorded Agent CRs
3832+
($OBOL_CONFIG_DIR/agents/) are re-applied BEFORE offers so agent-backed
3833+
offers resolve their agent.ref even after a full stack recreation.
3834+
'sell mcp' servers are foreground processes with no ServiceOffer and are
3835+
not resumed.
38353836
38363837
Examples:
38373838
obol sell resume Resume all persisted offers now
@@ -3852,6 +3853,9 @@ Examples:
38523853
if err := waitForClusterAPI(ctx, cfg, u, 3*time.Minute); err != nil {
38533854
u.Warnf("cluster API not ready: %v (continuing — per-offer applies may fail)", err)
38543855
}
3856+
// Recorded Agent CRs first: agent-backed offers resolve
3857+
// agent.ref and would dangle on a freshly-recreated cluster.
3858+
agentcrd.ResumeAll(cfg, u)
38553859
if err := resumeSellOffers(ctx, cfg, u); err != nil {
38563860
return err
38573861
}

cmd/obol/stackup_resume_guard_test.go

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,3 +40,30 @@ func TestStackUpAction_ReplaysRecordedState(t *testing.T) {
4040
t.Error("agentcrd.ResumeAll must run BEFORE resumeSellOffers — agent-backed ServiceOffers need their Agent CR first")
4141
}
4242
}
43+
44+
// TestSellResumeAction_ReplaysAgentsBeforeOffers extends the same guard to
45+
// `obol sell resume` (the reboot-recovery path, incl. the systemd boot
46+
// unit): after a full stack recreation the ledger replays agent-backed
47+
// offers, which dangle unless recorded Agent CRs are re-applied first.
48+
func TestSellResumeAction_ReplaysAgentsBeforeOffers(t *testing.T) {
49+
src, err := os.ReadFile("sell.go")
50+
if err != nil {
51+
t.Fatalf("read sell.go: %v", err)
52+
}
53+
body := string(src)
54+
55+
agentsIdx := strings.Index(body, "agentcrd.ResumeAll(")
56+
if agentsIdx < 0 {
57+
t.Fatal("cmd/obol/sell.go (sell resume action) must call agentcrd.ResumeAll before replaying offers")
58+
}
59+
// The resume action's offer replay is the only call site that returns
60+
// the error (`if err := resumeSellOffers(...)`); main.go's stack-up
61+
// call warns instead.
62+
offersIdx := strings.Index(body, "if err := resumeSellOffers(ctx, cfg, u); err != nil")
63+
if offersIdx < 0 {
64+
t.Fatal("expected the sell resume action's resumeSellOffers call in sell.go")
65+
}
66+
if agentsIdx > offersIdx {
67+
t.Error("agentcrd.ResumeAll must run BEFORE resumeSellOffers in the sell resume action")
68+
}
69+
}

internal/agentcrd/manifest.go

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ func ResumeAll(cfg *config.Config, u *ui.UI) {
8989
u.Warnf("Could not read recorded agent %s: %v", name, err)
9090
continue
9191
}
92+
warnIfWalletWouldRegenerate(cfg, name, data, u)
9293
nsErr := kubectl.PipeCommands(bin, kubeconfig,
9394
[]string{"create", "namespace", Namespace(name), "--dry-run=client", "-o", "yaml"},
9495
[]string{"apply", "-f", "-"})
@@ -102,3 +103,37 @@ func ResumeAll(cfg *config.Config, u *ui.UI) {
102103
u.Successf("Re-applied agent %s", name)
103104
}
104105
}
106+
107+
// warnIfWalletWouldRegenerate flags the funds-stranding edge: replaying a
108+
// wallet-bearing agent against a cluster that lost its keystore Secret
109+
// (full recreation without a prior `obol stack import`) makes the
110+
// controller mint a FRESH wallet — anything held at the old address is
111+
// stranded unless the operator restores the keystore first. Best-effort:
112+
// an unreachable cluster or a wallet-less agent stays silent.
113+
func warnIfWalletWouldRegenerate(cfg *config.Config, name string, manifest []byte, u *ui.UI) {
114+
var doc struct {
115+
Spec struct {
116+
Wallet struct {
117+
Create bool `yaml:"create"`
118+
} `yaml:"wallet"`
119+
} `yaml:"spec"`
120+
}
121+
if err := yaml.Unmarshal(manifest, &doc); err != nil || !doc.Spec.Wallet.Create {
122+
return
123+
}
124+
bin, kubeconfig := kubectl.Paths(cfg)
125+
if err := kubectl.RunSilent(bin, kubeconfig,
126+
"get", "namespace", Namespace(name)); err != nil {
127+
// Namespace itself is gone — full recreation. The keystore Secret
128+
// check below would also fail, but distinguish nothing: same warning.
129+
u.Warnf("Agent %s declares a wallet but its namespace is gone — the controller will mint a NEW wallet on replay. "+
130+
"If the old wallet held funds, restore the keystore first: 'obol stack import <backup> --cluster-only' or 'obol agent wallet restore'.", name)
131+
return
132+
}
133+
if err := kubectl.RunSilent(bin, kubeconfig,
134+
"get", "secret", "remote-signer-keystore", "-n", Namespace(name)); err != nil {
135+
u.Warnf("Agent %s declares a wallet but namespace %s has no keystore Secret — the controller will mint a NEW wallet. "+
136+
"If the old wallet held funds, restore it first: 'obol stack import <backup> --cluster-only' or 'obol agent wallet restore'.",
137+
name, Namespace(name))
138+
}
139+
}

0 commit comments

Comments
 (0)