Skip to content

Commit aff1577

Browse files
committed
Remove a deadlock when no pingresp
There was a deadlock situation when we didn't receive an expected pingresp, keepalive() called internalConnLost() synchronously, that waits for all workers to exit before it returns, but keepalive() is one of those workers... Also there was an issue when returning from keepalive() that we'd call the waitgroup broadcasts before c.stop had been closed, causing them to fall through and then get stuck at the Wait(), which would cause keepalive() not to finish returning and call workers.Done(), etc... locked again. Changed this to use a pingstop channel for the timer reset go funcs that is closed at the beginning of the exit func for keepalive() ensuring the following broadcasts will cause them all to exit properly. resolve#129
1 parent db7be0c commit aff1577

File tree

2 files changed

+7
-5
lines changed

2 files changed

+7
-5
lines changed

net.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -319,7 +319,7 @@ func errorWatch(c *client) {
319319
return
320320
case err := <-c.errors:
321321
ERROR.Println(NET, "error triggered, stopping")
322-
c.internalConnLost(err)
322+
go c.internalConnLost(err)
323323
return
324324
}
325325
}

ping.go

+6-4
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,10 @@ func keepalive(c *client) {
2626
DEBUG.Println(PNG, "keepalive starting")
2727

2828
var condWG sync.WaitGroup
29+
pingStop := make(chan struct{})
2930

3031
defer func() {
32+
close(pingStop)
3133
c.keepaliveReset.Broadcast()
3234
c.pingResp.Broadcast()
3335
c.packetResp.Broadcast()
@@ -50,7 +52,7 @@ func keepalive(c *client) {
5052
c.pingResp.Wait()
5153
c.pingResp.L.Unlock()
5254
select {
53-
case <-c.stop:
55+
case <-pingStop:
5456
return
5557
default:
5658
}
@@ -68,7 +70,7 @@ func keepalive(c *client) {
6870
c.packetResp.Wait()
6971
c.packetResp.L.Unlock()
7072
select {
71-
case <-c.stop:
73+
case <-pingStop:
7274
return
7375
default:
7476
}
@@ -84,7 +86,7 @@ func keepalive(c *client) {
8486
c.keepaliveReset.Wait()
8587
c.keepaliveReset.L.Unlock()
8688
select {
87-
case <-c.stop:
89+
case <-pingStop:
8890
return
8991
default:
9092
}
@@ -107,7 +109,7 @@ func keepalive(c *client) {
107109
case <-pingRespTimer.C:
108110
pingRespTimer.SetRead(true)
109111
CRITICAL.Println(PNG, "pingresp not received, disconnecting")
110-
c.internalConnLost(errors.New("pingresp not received, disconnecting"))
112+
c.errors <- errors.New("pingresp not received, disconnecting")
111113
pingTimer.Stop()
112114
return
113115
}

0 commit comments

Comments
 (0)