Skip to content

Commit 8115e32

Browse files
poszumoshababo
andcommitted
Query poet proofs instead of relying on broadcasting (#3865)
## Motivation Part of spacemeshos/pm#173 Closes #3746 Closes #3814 ## Changes - removed broadcasting method from `GatewayService` - removed p2p listeners for broadcasted poet proofs - changed `NIPostBuilder` to query poets for proofs after the rounds end ## Test Plan - added a system test in which nodes use different poets to verify if poet proofs are properly propagated between nodes ## TODO - [ ] Bump poet to a released version in go.mod after spacemeshos/poet#187 is merged ## DevOps Notes - [x] This PR does not require configuration changes (e.g., environment variables, GitHub secrets, VM resources) - [ ] ~This PR does not affect public APIs~ Proof broadcasting was removed - [ ] ~This PR does not rely on a new version of external services (PoET, elasticsearch, etc.)~ - It relies on a new Poet version - [ ] ~This PR does not make changes to log messages (which monitoring infrastructure may rely on)~ Co-authored-by: moshababo <[email protected]>
1 parent e42272d commit 8115e32

36 files changed

+681
-588
lines changed

activation/activation.go

+3-1
Original file line numberDiff line numberDiff line change
@@ -659,7 +659,9 @@ func (b *Builder) createAtx(ctx context.Context) (*types.ActivationTx, error) {
659659
challenge.InitialPost = b.initialPost
660660
challenge.InitialPostMetadata = b.initialPostMeta
661661
}
662-
nipost, postDuration, err := b.nipostBuilder.BuildNIPost(ctx, &challenge, poetProofDeadline)
662+
buildingNipostCtx, cancel := context.WithDeadline(ctx, nextPoetRoundStart)
663+
defer cancel()
664+
nipost, postDuration, err := b.nipostBuilder.BuildNIPost(buildingNipostCtx, &challenge, poetProofDeadline)
663665
if err != nil {
664666
return nil, fmt.Errorf("failed to build NIPost: %w", err)
665667
}

activation/activation_test.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -1408,7 +1408,7 @@ func TestBuilder_UpdatePoets(t *testing.T) {
14081408
atxHdlr := newAtxHandler(t, cdb)
14091409
b := newBuilder(t, cdb, atxHdlr, WithPoETClientInitializer(func(string) PoetProvingServiceClient {
14101410
poet := NewMockPoetProvingServiceClient(gomock.NewController(t))
1411-
poet.EXPECT().PoetServiceID(gomock.Any()).Times(1).Return([]byte("poetid"), nil)
1411+
poet.EXPECT().PoetServiceID(gomock.Any()).AnyTimes().Return([]byte("poetid"), nil)
14121412
return poet
14131413
}))
14141414

@@ -1430,7 +1430,7 @@ func TestBuilder_UpdatePoetsUnstable(t *testing.T) {
14301430
atxHdlr := newAtxHandler(t, cdb)
14311431
b := newBuilder(t, cdb, atxHdlr, WithPoETClientInitializer(func(string) PoetProvingServiceClient {
14321432
poet := NewMockPoetProvingServiceClient(gomock.NewController(t))
1433-
poet.EXPECT().PoetServiceID(gomock.Any()).Times(1).Return([]byte("poetid"), errors.New("ERROR"))
1433+
poet.EXPECT().PoetServiceID(gomock.Any()).AnyTimes().Return([]byte("poetid"), errors.New("ERROR"))
14341434
return poet
14351435
}))
14361436

activation/challenge_verifier.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ func (v *challengeVerifier) Verify(ctx context.Context, challengeBytes, signatur
8484
}
8585

8686
func (v *challengeVerifier) verifyChallenge(ctx context.Context, challenge *types.PoetChallenge, nodeID types.NodeID) error {
87-
log.With().Info("verifying challenge", log.Object("challenge", challenge))
87+
log.GetLogger().WithContext(ctx).With().Info("Verifying challenge", log.Object("challenge", challenge))
8888

8989
if err := validateNumUnits(&v.cfg, challenge.NumUnits); err != nil {
9090
return fmt.Errorf("%w: %v", ErrChallengeInvalid, err)

activation/interface.go

-6
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,6 @@ type atxReceiver interface {
1313
OnAtx(*types.ActivationTxHeader)
1414
}
1515

16-
type poetValidatorPersister interface {
17-
HasProof(types.PoetProofRef) bool
18-
Validate(types.PoetProof, []byte, string, []byte) error
19-
StoreProof(context.Context, types.PoetProofRef, *types.PoetProofMessage) error
20-
}
21-
2216
type nipostValidator interface {
2317
Validate(nodeId types.NodeID, atxId types.ATXID, NIPost *types.NIPost, expectedChallenge types.Hash32, numUnits uint32) (uint64, error)
2418
ValidatePost(nodeId types.NodeID, atxId types.ATXID, Post *types.Post, PostMetadata *types.PostMetadata, numUnits uint32) error

activation/mocks.go

-65
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

activation/nipost.go

+110-53
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,9 @@ type PoetProvingServiceClient interface {
2727
Submit(ctx context.Context, challenge []byte, signature []byte) (*types.PoetRound, error)
2828

2929
// PoetServiceID returns the public key of the PoET proving service.
30-
PoetServiceID(context.Context) ([]byte, error)
30+
PoetServiceID(context.Context) (types.PoetServiceID, error)
31+
32+
GetProof(ctx context.Context, roundID string) (*types.PoetProofMessage, error)
3133
}
3234

3335
func (nb *NIPostBuilder) load(challenge types.Hash32) {
@@ -62,9 +64,8 @@ type NIPostBuilder struct {
6264
}
6365

6466
type poetDbAPI interface {
65-
GetMembershipMap(proofRef types.PoetProofRef) (map[types.Hash32]bool, error)
6667
GetProof(types.PoetProofRef) (*types.PoetProof, error)
67-
GetProofRef(poetID []byte, roundID string) (types.PoetProofRef, error)
68+
ValidateAndStore(ctx context.Context, proofMessage *types.PoetProofMessage) error
6869
}
6970

7071
// NewNIPostBuilder returns a NIPostBuilder.
@@ -129,10 +130,10 @@ func (nb *NIPostBuilder) BuildNIPost(ctx context.Context, challenge *types.PoetC
129130

130131
validPoetRequests := make([]types.PoetRequest, 0, len(poetRequests))
131132
for _, req := range poetRequests {
132-
if !bytes.Equal(req.PoetRound.ChallengeHash, challengeHash[:]) {
133+
if !bytes.Equal(req.PoetRound.ChallengeHash[:], challengeHash[:]) {
133134
nb.log.With().Info(
134135
"poet returned invalid challenge hash",
135-
log.Binary("hash", req.PoetRound.ChallengeHash),
136+
req.PoetRound.ChallengeHash,
136137
log.String("poet_id", hex.EncodeToString(req.PoetServiceID)),
137138
)
138139
} else {
@@ -148,17 +149,16 @@ func (nb *NIPostBuilder) BuildNIPost(ctx context.Context, challenge *types.PoetC
148149
nb.persist()
149150
}
150151

151-
// Phase 1: receive proofs from PoET services
152+
// Phase 1: query PoET services for proofs
152153
if nb.state.PoetProofRef == nil {
153-
select {
154-
case <-time.After(time.Until(poetProofDeadline)):
155-
case <-ctx.Done():
156-
return nil, 0, ctx.Err()
154+
getProofsCtx, cancel := context.WithDeadline(ctx, poetProofDeadline)
155+
defer cancel()
156+
poetProofRef, err := nb.getBestProof(getProofsCtx, challengeHash)
157+
if err != nil {
158+
return nil, 0, &PoetSvcUnstableError{msg: "getBestProof failed", source: err}
157159
}
158-
poetProofRef := nb.getBestProof(ctx, challengeHash)
159160
if poetProofRef == nil {
160-
// Time is up - ATX challenge is expired.
161-
return nil, 0, ErrPoetProofNotReceived
161+
return nil, 0, &PoetSvcUnstableError{source: ErrPoetProofNotReceived}
162162
}
163163
nb.state.PoetProofRef = poetProofRef
164164
nb.persist()
@@ -194,26 +194,20 @@ func (nb *NIPostBuilder) BuildNIPost(ctx context.Context, challenge *types.PoetC
194194
}
195195

196196
// Submit the challenge to a single PoET.
197-
func submitPoetChallenge(ctx context.Context, logger log.Log, poet PoetProvingServiceClient, challenge []byte, signature []byte) (*types.PoetRequest, error) {
197+
func (nb *NIPostBuilder) submitPoetChallenge(ctx context.Context, poet PoetProvingServiceClient, challenge []byte, signature []byte) (*types.PoetRequest, error) {
198198
poetServiceID, err := poet.PoetServiceID(ctx)
199199
if err != nil {
200200
return nil, &PoetSvcUnstableError{msg: "failed to get PoET service ID", source: err}
201201
}
202-
203-
logger.With().Debug("submitting challenge to poet proving service",
204-
log.String("poet_id", hex.EncodeToString(poetServiceID)))
202+
logger := nb.log.WithFields(log.String("poet_id", hex.EncodeToString(poetServiceID)))
203+
logger.Debug("submitting challenge to poet proving service")
205204

206205
round, err := poet.Submit(ctx, challenge, signature)
207206
if err != nil {
208-
logger.With().Error("failed to submit challenge to poet proving service",
209-
log.String("poet_id", hex.EncodeToString(poetServiceID)),
210-
log.Err(err))
211207
return nil, &PoetSvcUnstableError{msg: "failed to submit challenge to poet service", source: err}
212208
}
213209

214-
logger.With().Info("challenge submitted to poet proving service",
215-
log.String("poet_id", hex.EncodeToString(poetServiceID)),
216-
log.String("round_id", round.ID))
210+
logger.With().Info("challenge submitted to poet proving service", log.String("round", round.ID))
217211

218212
return &types.PoetRequest{
219213
PoetRound: round,
@@ -228,7 +222,7 @@ func (nb *NIPostBuilder) submitPoetChallenges(ctx context.Context, challenge []b
228222
for _, poetProver := range nb.poetProvers {
229223
poet := poetProver
230224
g.Go(func() error {
231-
if poetRequest, err := submitPoetChallenge(ctx, nb.log, poet, challenge, signature); err == nil {
225+
if poetRequest, err := nb.submitPoetChallenge(ctx, poet, challenge, signature); err == nil {
232226
poetRequestsChannel <- *poetRequest
233227
} else {
234228
nb.log.With().Warning("failed to submit challenge to PoET", log.Err(err))
@@ -246,47 +240,110 @@ func (nb *NIPostBuilder) submitPoetChallenges(ctx context.Context, challenge []b
246240
return poetRequests
247241
}
248242

249-
func (nb *NIPostBuilder) getBestProof(ctx context.Context, challenge *types.Hash32) types.PoetProofRef {
250-
type poetProof struct {
251-
ref types.PoetProofRef
252-
leafCount uint64
243+
func (nb *NIPostBuilder) getPoetClient(ctx context.Context, id types.PoetServiceID) PoetProvingServiceClient {
244+
for _, client := range nb.poetProvers {
245+
if clientId, err := client.PoetServiceID(ctx); err == nil && bytes.Equal(id, clientId) {
246+
return client
247+
}
253248
}
254-
var bestProof *poetProof
249+
return nil
250+
}
255251

256-
for _, poetSubmission := range nb.state.PoetRequests {
257-
ref, err := nb.poetDB.GetProofRef(poetSubmission.PoetServiceID, poetSubmission.PoetRound.ID)
258-
if err != nil {
259-
continue
252+
func membersContain(members [][]byte, challenge *types.Hash32) bool {
253+
for _, member := range members {
254+
if bytes.Equal(member, challenge.Bytes()) {
255+
return true
260256
}
261-
// We are interested only in proofs that we are members of
262-
membership, err := nb.poetDB.GetMembershipMap(ref)
263-
if err != nil {
264-
nb.log.With().Panic("failed to fetch membership for poet proof", log.Binary("challenge", challenge[:]))
257+
}
258+
return false
259+
}
260+
261+
func (nb *NIPostBuilder) getProofWithRetry(ctx context.Context, client PoetProvingServiceClient, roundID string, retryInterval time.Duration) (*types.PoetProofMessage, error) {
262+
for {
263+
proof, err := client.GetProof(ctx, roundID)
264+
switch {
265+
case err == nil:
266+
return proof, nil
267+
case errors.Is(err, ErrUnavailable) || errors.Is(err, ErrNotFound):
268+
nb.log.With().Debug("Proof not found, retrying", log.Duration("interval", retryInterval))
269+
select {
270+
case <-ctx.Done():
271+
return nil, fmt.Errorf("retry was canceled: %w", ctx.Err())
272+
case <-time.After(retryInterval):
273+
}
274+
default:
275+
return nil, err
265276
}
266-
if !membership[*challenge] {
267-
nb.log.With().Debug("poet proof membership doesn't contain the challenge", log.Binary("challenge", challenge[:]))
277+
}
278+
}
279+
280+
func (nb *NIPostBuilder) getBestProof(ctx context.Context, challenge *types.Hash32) (types.PoetProofRef, error) {
281+
proofs := make(chan *types.PoetProofMessage, len(nb.state.PoetRequests))
282+
283+
var eg errgroup.Group
284+
for _, r := range nb.state.PoetRequests {
285+
logger := nb.log.WithFields(log.String("poet_id", hex.EncodeToString(r.PoetServiceID)), log.String("round", r.PoetRound.ID))
286+
client := nb.getPoetClient(ctx, r.PoetServiceID)
287+
if client == nil {
288+
logger.Warning("Poet client not found")
268289
continue
269290
}
270-
proof, err := nb.poetDB.GetProof(ref)
271-
if err != nil {
272-
nb.log.Panic("Inconsistent state of poetDB. Received poetProofRef which doesn't exist in poetDB.")
273-
}
274-
nb.log.With().Info("Got a new PoET proof", log.Uint64("leafCount", proof.LeafCount), log.Binary("ref", ref))
291+
round := r.PoetRound.ID
292+
// Time to wait before quering for the proof
293+
// The additional second is an optimization to be nicer to poet
294+
// and don't accidentially ask it to soon and have to retry.
295+
waitTime := time.Until(r.PoetRound.End.IntoTime()) + time.Second
296+
eg.Go(func() error {
297+
logger.With().Info("Waiting till poet round end", log.Duration("wait time", waitTime))
298+
select {
299+
case <-ctx.Done():
300+
logger.With().Info("Waiting interrupted", log.Err(ctx.Err()))
301+
return ctx.Err()
302+
case <-time.After(waitTime):
303+
}
304+
proof, err := nb.getProofWithRetry(ctx, client, round, time.Second)
305+
if err != nil {
306+
logger.With().Warning("Failed to get proof from Poet", log.Err(err))
307+
return nil
308+
}
275309

276-
if bestProof == nil || bestProof.leafCount < proof.LeafCount {
277-
bestProof = &poetProof{
278-
ref: ref,
279-
leafCount: proof.LeafCount,
310+
if err := nb.poetDB.ValidateAndStore(ctx, proof); err != nil && !errors.Is(err, ErrObjectExists) {
311+
logger.With().Warning("Failed to validate and store proof", log.Err(err), log.Object("proof", proof))
312+
return nil
280313
}
314+
315+
// We are interested only in proofs that we are members of
316+
if !membersContain(proof.Members, challenge) {
317+
logger.With().Warning("poet proof membership doesn't contain the challenge", challenge)
318+
return nil
319+
}
320+
321+
proofs <- proof
322+
return nil
323+
})
324+
}
325+
if err := eg.Wait(); err != nil {
326+
return nil, fmt.Errorf("querying for proofs failed: %w", err)
327+
}
328+
close(proofs)
329+
330+
var bestProof *types.PoetProofMessage
331+
332+
for proof := range proofs {
333+
nb.log.With().Info("Got a new PoET proof", log.Uint64("leafCount", proof.LeafCount))
334+
if bestProof == nil || bestProof.LeafCount < proof.LeafCount {
335+
bestProof = proof
281336
}
282337
}
283338

284339
if bestProof != nil {
285-
nb.log.With().Debug("Selected the best PoET proof",
286-
log.Uint64("leafCount", bestProof.leafCount),
287-
log.Binary("ref", bestProof.ref))
288-
return bestProof.ref
340+
ref, err := bestProof.Ref()
341+
if err != nil {
342+
return nil, fmt.Errorf("failed to get proof ref: %w", err)
343+
}
344+
nb.log.With().Info("Selected the best proof", log.Uint64("leafCount", bestProof.LeafCount), log.Binary("ref", ref))
345+
return ref, nil
289346
}
290347

291-
return nil
348+
return nil, ErrPoetProofNotReceived
292349
}

0 commit comments

Comments
 (0)