Skip to content

Commit cdb7ac9

Browse files
committed
Fix cluster discovery process.
1 parent 4cd724d commit cdb7ac9

File tree

8 files changed

+95
-76
lines changed

8 files changed

+95
-76
lines changed

Diff for: db-client-java/src/main/java/com/eventstore/dbclient/ConnectionService.java

+1
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,7 @@ public void createChannel(UUID previousId, InetSocketAddress candidate) {
142142

143143
if (this.loadServerFeatures()) {
144144
this.channelId = UUID.randomUUID();
145+
this.connection.confirmChannel();
145146
logger.info("Connection to endpoint [{}] created successfully", this.connection.getLastConnectedEndpoint());
146147
break;
147148
}

Diff for: db-client-java/src/main/java/com/eventstore/dbclient/ConnectionState.java

+13-1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,12 @@ class ConnectionState {
2222
private InetSocketAddress previous;
2323
private ManagedChannel currentChannel;
2424

25+
// Indicates if the current channel passed all the connection pre-requisites to be used by the user
26+
// Not exhaustive list includes:
27+
// * If we managed to get a gossip seed from the channel
28+
// * If we managed to read the server features (if not, it was a not found error then it's not fatal, just old node version)
29+
private boolean confirmedChannel;
30+
2531
ConnectionState(EventStoreDBClientSettings settings) {
2632
this.settings = settings;
2733

@@ -46,7 +52,11 @@ class ConnectionState {
4652
}
4753

4854
InetSocketAddress getLastConnectedEndpoint() {
49-
return this.previous;
55+
return this.confirmedChannel ? this.previous : null;
56+
}
57+
58+
void confirmChannel() {
59+
this.confirmedChannel = true;
5060
}
5161

5262
ManagedChannel getCurrentChannel() {
@@ -82,6 +92,7 @@ void connect(InetSocketAddress addr) {
8292
builder.keepAliveTime(settings.getKeepAliveInterval(), TimeUnit.MILLISECONDS);
8393

8494
this.currentChannel = builder.build();
95+
this.confirmedChannel = false;
8596
this.previous = addr;
8697
}
8798

@@ -108,5 +119,6 @@ public void shutdown() {
108119

109120
public void clear() {
110121
this.previous = null;
122+
this.confirmedChannel = false;
111123
}
112124
}

Diff for: db-client-java/src/main/java/com/eventstore/dbclient/EventStoreDBClientBase.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ public class EventStoreDBClientBase {
1616
EventStoreDBClientBase(EventStoreDBClientSettings settings) {
1717
Discovery discovery;
1818

19-
if (settings.getHosts().length == 1) {
19+
if (settings.getHosts().length == 1 && !settings.isDnsDiscover()) {
2020
discovery = new SingleNodeDiscovery(settings.getHosts()[0]);
2121
} else {
2222
discovery = new ClusterDiscovery(settings);

Diff for: db-client-java/src/test/java/com/eventstore/dbclient/ClientTracker.java

+14
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,20 @@ public synchronized EventStoreDBClient getDefaultClient(Database database) {
7474
continue;
7575
}
7676

77+
// In some rare occasions, it's possible for GHA to take much more time setting up a cluster
78+
// through docker compose. In this case, we recreate a fresh client in the case we exhausted
79+
// all discovery attempts and the connection got closed.
80+
if (e.getCause() instanceof ConnectionShutdownException && (settings.isDnsDiscover() || settings.getHosts().length > 1)) {
81+
logger.debug("Seems we exhausted all discovery attempts. Unusual but maybe docker is slow");
82+
try {
83+
Thread.sleep(500);
84+
} catch (InterruptedException ex) {
85+
throw new RuntimeException(ex);
86+
}
87+
defaultClient = EventStoreDBClient.create(settings);
88+
continue;
89+
}
90+
7791
throw new RuntimeException(e);
7892
}
7993
}

Diff for: db-client-java/src/test/java/com/eventstore/dbclient/databases/ExternallyCreatedCluster.java

+1-2
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,8 @@ public ExternallyCreatedCluster(boolean secure) {
1515
public ConnectionSettingsBuilder defaultSettingsBuilder() {
1616
return EventStoreDBClientSettings
1717
.builder()
18+
.dnsDiscover(true)
1819
.defaultCredentials("admin", "changeit")
19-
.addHost("localhost", 2_111)
20-
.addHost("localhost", 2_112)
2120
.addHost("localhost", 2_113)
2221
.tls(secure)
2322
.tlsVerifyCert(false)

Diff for: db-client-java/src/test/java/com/eventstore/dbclient/streams/DeadlineTests.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
import java.util.concurrent.ExecutionException;
1212

1313
public interface DeadlineTests extends ConnectionAware {
14-
@Test
14+
@RetryingTest(10)
1515
default void testDefaultDeadline() throws Throwable {
1616
EventStoreDBClient client = getDatabase().connectWith(opts ->
1717
opts.defaultDeadline(1)

Diff for: docker-compose.yml

+60-68
Original file line numberDiff line numberDiff line change
@@ -1,106 +1,98 @@
1-
version: '3.5'
1+
version: "3.5"
22

33
services:
44
volumes-provisioner:
5-
image: "hasnat/volumes-provisioner"
5+
image: hasnat/volumes-provisioner
66
environment:
77
PROVISION_DIRECTORIES: "1000:1000:0755:/tmp/certs"
88
volumes:
99
- "./certs:/tmp/certs"
10-
network_mode: "none"
10+
network_mode: none
1111

12-
setup:
12+
cert-gen:
1313
image: eventstore/es-gencert-cli:1.0.2
1414
entrypoint: bash
1515
user: "1000:1000"
1616
command: >
1717
-c "mkdir -p ./certs && cd /certs
1818
&& es-gencert-cli create-ca
19-
&& es-gencert-cli create-node -out ./node1 --dns-names node1.eventstore
20-
&& es-gencert-cli create-node -out ./node2 --dns-names node2.eventstore
21-
&& es-gencert-cli create-node -out ./node3 --dns-names node3.eventstore
19+
&& es-gencert-cli create-node -out ./node1 -ip-addresses 127.0.0.1,172.30.240.11 -dns-names localhost
20+
&& es-gencert-cli create-node -out ./node2 -ip-addresses 127.0.0.1,172.30.240.12 -dns-names localhost
21+
&& es-gencert-cli create-node -out ./node3 -ip-addresses 127.0.0.1,172.30.240.13 -dns-names localhost
2222
&& find . -type f -print0 | xargs -0 chmod 666"
23-
container_name: setup
2423
volumes:
25-
- ./certs:/certs
24+
- "./certs:/certs"
2625
depends_on:
2726
- volumes-provisioner
2827

29-
node1.eventstore: &template
28+
esdb-node1:
3029
image: ghcr.io/eventstore/eventstore:${CONTAINER_IMAGE_VERSION:-latest}
31-
container_name: node1.eventstore
3230
env_file:
3331
- vars.env
3432
environment:
35-
- EVENTSTORE_EXT_HOST_ADVERTISE_AS=node1.eventstore
36-
- EVENTSTORE_INT_HOST_ADVERTISE_AS=node1.eventstore
37-
- EVENTSTORE_GOSSIP_SEED=node2.eventstore:2113,node3.eventstore:2113
38-
- EVENTSTORE_CERTIFICATE_FILE=/certs/node1/node.crt
39-
- EVENTSTORE_CERTIFICATE_PRIVATE_KEY_FILE=/certs/node1/node.key
33+
- EVENTSTORE_GOSSIP_SEED=172.30.240.12:2113,172.30.240.13:2113
34+
- EVENTSTORE_INT_IP=172.30.240.11
35+
- EVENTSTORE_CERTIFICATE_FILE=/etc/eventstore/certs/node1/node.crt
36+
- EVENTSTORE_CERTIFICATE_PRIVATE_KEY_FILE=/etc/eventstore/certs/node1/node.key
4037
- EVENTSTORE_ADVERTISE_HTTP_PORT_TO_CLIENT_AS=2111
41-
- EVENTSTORE_ADVERTISE_TCP_PORT_TO_CLIENT_AS=1111
42-
healthcheck:
43-
test:
44-
[
45-
'CMD-SHELL',
46-
'curl --fail --insecure https://node1.eventstore:2113/health/live || exit 1',
47-
]
48-
interval: 5s
49-
timeout: 5s
50-
retries: 24
5138
ports:
52-
- 1111:1113
5339
- 2111:2113
40+
networks:
41+
clusternetwork:
42+
ipv4_address: 172.30.240.11
5443
volumes:
55-
- ./certs:/certs
44+
- ./certs:/etc/eventstore/certs
45+
restart: unless-stopped
5646
depends_on:
57-
- setup
58-
restart: always
47+
- cert-gen
5948

60-
node2.eventstore:
61-
<<: *template
62-
container_name: node2.eventstore
49+
esdb-node2:
50+
image: ghcr.io/eventstore/eventstore:${CONTAINER_IMAGE_VERSION:-latest}
51+
env_file:
52+
- vars.env
6353
environment:
64-
- EVENTSTORE_EXT_HOST_ADVERTISE_AS=node2.eventstore
65-
- EVENTSTORE_INT_HOST_ADVERTISE_AS=node2.eventstore
66-
- EVENTSTORE_GOSSIP_SEED=node1.eventstore:2113,node3.eventstore:2113
67-
- EVENTSTORE_CERTIFICATE_FILE=/certs/node2/node.crt
68-
- EVENTSTORE_CERTIFICATE_PRIVATE_KEY_FILE=/certs/node2/node.key
54+
- EVENTSTORE_GOSSIP_SEED=172.30.240.11:2113,172.30.240.13:2113
55+
- EVENTSTORE_INT_IP=172.30.240.12
56+
- EVENTSTORE_CERTIFICATE_FILE=/etc/eventstore/certs/node2/node.crt
57+
- EVENTSTORE_CERTIFICATE_PRIVATE_KEY_FILE=/etc/eventstore/certs/node2/node.key
6958
- EVENTSTORE_ADVERTISE_HTTP_PORT_TO_CLIENT_AS=2112
70-
- EVENTSTORE_ADVERTISE_TCP_PORT_TO_CLIENT_AS=1112
71-
healthcheck:
72-
test:
73-
[
74-
'CMD-SHELL',
75-
'curl --fail --insecure https://node2.eventstore:2113/health/live || exit 1',
76-
]
77-
interval: 5s
78-
timeout: 5s
79-
retries: 24
8059
ports:
81-
- 1112:1113
8260
- 2112:2113
61+
networks:
62+
clusternetwork:
63+
ipv4_address: 172.30.240.12
64+
volumes:
65+
- ./certs:/etc/eventstore/certs
66+
restart: unless-stopped
67+
depends_on:
68+
- cert-gen
8369

84-
node3.eventstore:
85-
<<: *template
86-
container_name: node3.eventstore
70+
esdb-node3:
71+
image: ghcr.io/eventstore/eventstore:${CONTAINER_IMAGE_VERSION:-latest}
72+
env_file:
73+
- vars.env
8774
environment:
88-
- EVENTSTORE_EXT_HOST_ADVERTISE_AS=node3.eventstore
89-
- EVENTSTORE_INT_HOST_ADVERTISE_AS=node3.eventstore
90-
- EVENTSTORE_GOSSIP_SEED=node1.eventstore:2113,node2.eventstore:2113
91-
- EVENTSTORE_CERTIFICATE_FILE=/certs/node3/node.crt
92-
- EVENTSTORE_CERTIFICATE_PRIVATE_KEY_FILE=/certs/node3/node.key
75+
- EVENTSTORE_GOSSIP_SEED=172.30.240.11:2113,172.30.240.12:2113
76+
- EVENTSTORE_INT_IP=172.30.240.13
77+
- EVENTSTORE_CERTIFICATE_FILE=/etc/eventstore/certs/node3/node.crt
78+
- EVENTSTORE_CERTIFICATE_PRIVATE_KEY_FILE=/etc/eventstore/certs/node3/node.key
9379
- EVENTSTORE_ADVERTISE_HTTP_PORT_TO_CLIENT_AS=2113
94-
- EVENTSTORE_ADVERTISE_TCP_PORT_TO_CLIENT_AS=1113
95-
healthcheck:
96-
test:
97-
[
98-
'CMD-SHELL',
99-
'curl --fail --insecure https://node3.eventstore:2113/health/live || exit 1',
100-
]
101-
interval: 5s
102-
timeout: 5s
103-
retries: 24
10480
ports:
105-
- 1113:1113
10681
- 2113:2113
82+
networks:
83+
clusternetwork:
84+
ipv4_address: 172.30.240.13
85+
volumes:
86+
- ./certs:/etc/eventstore/certs
87+
restart: unless-stopped
88+
depends_on:
89+
- cert-gen
90+
91+
networks:
92+
clusternetwork:
93+
name: eventstoredb.local
94+
driver: bridge
95+
ipam:
96+
driver: default
97+
config:
98+
- subnet: 172.30.240.0/24

Diff for: vars.env

+4-3
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
EVENTSTORE_CLUSTER_SIZE=3
22
EVENTSTORE_RUN_PROJECTIONS=All
3-
EVENTSTORE_TRUSTED_ROOT_CERTIFICATES_PATH=/certs/ca
3+
EVENTSTORE_INT_TCP_PORT=1112
4+
EVENTSTORE_HTTP_PORT=2113
5+
EVENTSTORE_TRUSTED_ROOT_CERTIFICATES_PATH=/etc/eventstore/certs/ca
46
EVENTSTORE_DISCOVER_VIA_DNS=false
5-
EVENTSTORE_ENABLE_EXTERNAL_TCP=true
67
EVENTSTORE_ENABLE_ATOM_PUB_OVER_HTTP=true
7-
EVENTSTORE_ADVERTISE_HOST_TO_CLIENT_AS=localhost
8+
EVENTSTORE_ADVERTISE_HOST_TO_CLIENT_AS=localhost

0 commit comments

Comments
 (0)