@@ -17,6 +17,7 @@ import (
17
17
"github.com/libp2p/go-libp2p/core/peer"
18
18
"github.com/libp2p/go-libp2p/core/peerstore"
19
19
"github.com/libp2p/go-libp2p/core/transport"
20
+ "golang.org/x/exp/slices"
20
21
21
22
logging "github.com/ipfs/go-log/v2"
22
23
ma "github.com/multiformats/go-multiaddr"
@@ -172,6 +173,11 @@ type Swarm struct {
172
173
m map [network.Notifiee ]struct {}
173
174
}
174
175
176
+ directConnNotifs struct {
177
+ sync.Mutex
178
+ m map [peer.ID ][]chan struct {}
179
+ }
180
+
175
181
transports struct {
176
182
sync.RWMutex
177
183
m map [int ]transport.Transport
@@ -231,6 +237,7 @@ func NewSwarm(local peer.ID, peers peerstore.Peerstore, eventBus event.Bus, opts
231
237
s .listeners .m = make (map [transport.Listener ]struct {})
232
238
s .transports .m = make (map [int ]transport.Transport )
233
239
s .notifs .m = make (map [network.Notifiee ]struct {})
240
+ s .directConnNotifs .m = make (map [peer.ID ][]chan struct {})
234
241
235
242
for _ , opt := range opts {
236
243
if err := opt (s ); err != nil {
@@ -390,6 +397,19 @@ func (s *Swarm) addConn(tc transport.CapableConn, dir network.Direction) (*Conn,
390
397
c .notifyLk .Lock ()
391
398
s .conns .Unlock ()
392
399
400
+ // Notify goroutines waiting for a direct connection
401
+
402
+ // Go routines interested in waiting for direct connection first acquire this lock and then
403
+ // acquire conns.RLock. Do not acquire this lock before conns.Unlock to prevent deadlock.
404
+ s .directConnNotifs .Lock ()
405
+ if ! c .Stat ().Transient {
406
+ for _ , ch := range s .directConnNotifs .m [p ] {
407
+ close (ch )
408
+ }
409
+ delete (s .directConnNotifs .m , p )
410
+ }
411
+ s .directConnNotifs .Unlock ()
412
+
393
413
// Emit event after releasing `s.conns` lock so that a consumer can still
394
414
// use swarm methods that need the `s.conns` lock.
395
415
if isFirstConnection {
@@ -435,46 +455,101 @@ func (s *Swarm) NewStream(ctx context.Context, p peer.ID) (network.Stream, error
435
455
436
456
// Algorithm:
437
457
// 1. Find the best connection, otherwise, dial.
438
- // 2. Try opening a stream.
439
- // 3. If the underlying connection is, in fact, closed, close the outer
458
+ // 2. If the best connection is transient, wait for a direct conn via conn
459
+ // reversal or hole punching.
460
+ // 3. Try opening a stream.
461
+ // 4. If the underlying connection is, in fact, closed, close the outer
440
462
// connection and try again. We do this in case we have a closed
441
463
// connection but don't notice it until we actually try to open a
442
464
// stream.
443
465
//
444
- // Note: We only dial once.
445
- //
446
466
// TODO: Try all connections even if we get an error opening a stream on
447
467
// a non-closed connection.
448
- dials := 0
449
- for {
450
- // will prefer direct connections over relayed connections for opening streams
451
- c := s .bestAcceptableConnToPeer (ctx , p )
452
-
468
+ dialed := false
469
+ for i := 0 ; i < 1 ; i ++ {
470
+ c := s .bestConnToPeer (p )
453
471
if c == nil {
454
- if nodial , _ := network .GetNoDial (ctx ); nodial {
472
+ if nodial , _ := network .GetNoDial (ctx ); ! nodial {
473
+ if dialed {
474
+ return nil , errors .New ("max dial attempts exceeded" )
475
+ }
476
+ dialed = true
477
+ var err error
478
+ c , err = s .dialPeer (ctx , p )
479
+ if err != nil {
480
+ return nil , err
481
+ }
482
+ } else {
455
483
return nil , network .ErrNoConn
456
484
}
485
+ }
457
486
458
- if dials >= DialAttempts {
459
- return nil , errors .New ("max dial attempts exceeded" )
460
- }
461
- dials ++
462
-
487
+ useTransient , _ := network .GetUseTransient (ctx )
488
+ if ! useTransient && c .Stat ().Transient {
463
489
var err error
464
- c , err = s .dialPeer (ctx , p )
490
+ c , err = s .waitForDirectConn (ctx , p )
465
491
if err != nil {
466
492
return nil , err
467
493
}
468
494
}
469
495
470
- s , err := c .NewStream (ctx )
496
+ str , err := c .NewStream (ctx )
471
497
if err != nil {
472
498
if c .conn .IsClosed () {
473
499
continue
474
500
}
475
501
return nil , err
476
502
}
477
- return s , nil
503
+ return str , nil
504
+ }
505
+ return nil , network .ErrNoConn
506
+ }
507
+
508
+ // waitForDirectConn waits for a direct connection established through hole punching or connection reversal.
509
+ func (s * Swarm ) waitForDirectConn (ctx context.Context , p peer.ID ) (* Conn , error ) {
510
+ s .directConnNotifs .Lock ()
511
+ c := s .bestConnToPeer (p )
512
+ if c == nil {
513
+ s .directConnNotifs .Unlock ()
514
+ return nil , network .ErrNoConn
515
+ } else if ! c .Stat ().Transient {
516
+ s .directConnNotifs .Unlock ()
517
+ return c , nil
518
+ }
519
+
520
+ // Wait for transient connection to upgrade to a direct connection either by
521
+ // connection reversal or hole punching.
522
+ ch := make (chan struct {})
523
+ s .directConnNotifs .m [p ] = append (s .directConnNotifs .m [p ], ch )
524
+ s .directConnNotifs .Unlock ()
525
+
526
+ // Wait for notification.
527
+ // There's no point waiting for more than a minute here.
528
+ ctx , cancel := context .WithTimeout (ctx , time .Minute )
529
+ defer cancel ()
530
+ select {
531
+ case <- ctx .Done ():
532
+ // Remove ourselves from the notification list
533
+ s .directConnNotifs .Lock ()
534
+ s .directConnNotifs .m [p ] = slices .DeleteFunc (
535
+ s .directConnNotifs .m [p ],
536
+ func (c chan struct {}) bool { return c == ch },
537
+ )
538
+ if len (s .directConnNotifs .m [p ]) == 0 {
539
+ delete (s .directConnNotifs .m , p )
540
+ }
541
+ s .directConnNotifs .Unlock ()
542
+ return nil , ctx .Err ()
543
+ case <- ch :
544
+ // We do not need to remove ourselves from the list here as the notifier
545
+ // clears the map
546
+ c := s .bestConnToPeer (p )
547
+ if c == nil {
548
+ return nil , network .ErrNoConn
549
+ } else if c .Stat ().Transient {
550
+ return nil , network .ErrTransientConn
551
+ }
552
+ return c , nil
478
553
}
479
554
}
480
555
0 commit comments