From fd34eefc5ba0d54703c7139bfb84e493f1c3c737 Mon Sep 17 00:00:00 2001 From: Alex Le Date: Thu, 30 Apr 2026 09:02:48 -0700 Subject: [PATCH 1/2] add: high-availability-doc Signed-off-by: Alex Le --- astro.config.mjs | 1 + src/content/docs/how-to/high-availability.mdx | 220 ++++++++++++++++++ 2 files changed, 221 insertions(+) create mode 100644 src/content/docs/how-to/high-availability.mdx diff --git a/astro.config.mjs b/astro.config.mjs index aea076ae..d5c075d5 100755 --- a/astro.config.mjs +++ b/astro.config.mjs @@ -147,6 +147,7 @@ export default defineConfig({ "how-to/installation", "how-to/client-initialization", "how-to/connection-management", + "how-to/high-availability", "how-to/publish-and-subscribe-messages", "how-to/send-batch-commands", "how-to/synchronous-connection", diff --git a/src/content/docs/how-to/high-availability.mdx b/src/content/docs/how-to/high-availability.mdx new file mode 100644 index 00000000..288e93f8 --- /dev/null +++ b/src/content/docs/how-to/high-availability.mdx @@ -0,0 +1,220 @@ +--- +title: High Availability Configurations +description: Learn how to configure Valkey GLIDE for High Availability using cluster mode, automatic failover, read strategies, and reconnection tuning. +--- + +import { Tabs, TabItem, Aside } from '@astrojs/starlight/components'; + +Valkey GLIDE automatically supports high-availability features out of the box. Only minimal configurations are needed. + +This guide will go over what's supported and what's needed to configure for high availability. + +## Available HA Features + +GLIDE comes with many HA features out of the box. This includes: +- Automatic topology updates. +- Connection timeout and exponential backoff for reconnecting. +- Command routing. +- Read strategies (Requires changing to read from replicas). + +#### Example Configuration + + + ```python + from glide import ( + GlideClusterClient, + GlideClusterClientConfiguration, + NodeAddress, + BackoffStrategy, + ReadFrom + ) + + config = GlideClusterClientConfiguration( + addresses=[ + NodeAddress("node1.example.com", 6379), + NodeAddress("node2.example.com", 6379), + NodeAddress("node3.example.com", 6379), + ], + # Read from replicas in the same AZ to reduce latency + read_from=ReadFrom.AZ_AFFINITY, + client_az="us-east-1a", + # Allow more time for commands during failover + request_timeout=1000, + # Reconnect with exponential backoff + reconnect_strategy=BackoffStrategy( + num_of_retries=5, + factor=2, + exponent_base=2, + ), + ) + + client = await GlideClusterClient.create(config) + ``` + + + + ```java + import glide.api.GlideClusterClient; + import glide.api.models.configuration.BackoffStrategy; + import glide.api.models.configuration.GlideClusterClientConfiguration; + import glide.api.models.configuration.NodeAddress; + import glide.api.models.configuration.ReadFrom; + + GlideClusterClientConfiguration config = GlideClusterClientConfiguration.builder() + .address(NodeAddress.builder().host("node1.example.com").port(6379).build()) + .address(NodeAddress.builder().host("node2.example.com").port(6379).build()) + .address(NodeAddress.builder().host("node3.example.com").port(6379).build()) + // Read from replicas in the same AZ to reduce latency + .readFrom(ReadFrom.AZ_AFFINITY) + .clientAZ("us-east-1a") + // Allow more time for commands during failover + .requestTimeout(1000) + // Reconnect with exponential backoff + .reconnectStrategy(BackoffStrategy.builder() + .numOfRetries(5) + .factor(2) + .exponentBase(2) + .build()) + .build(); + + GlideClusterClient client = GlideClusterClient.createClient(config).get(); + ``` + + + + ```typescript + import {GlideClusterClient} from "@valkey/valkey-glide"; + + const client = await GlideClusterClient.createClient({ + addresses: [ + {host: "node1.example.com", port: 6379}, + {host: "node2.example.com", port: 6379}, + {host: "node3.example.com", port: 6379}, + ], + // Read from replicas in the same AZ to reduce latency + readFrom: "AZAffinity", + clientAz: "us-east-1a", + // Allow more time for commands during failover + requestTimeout: 1000, + // Reconnect with exponential backoff + reconnectStrategy: { + numOfRetries: 5, + factor: 2, + exponentBase: 2, + }, + }); + ``` + + + + ```go + import ( + glide "github.com/valkey-io/valkey-glide/go/v2" + "github.com/valkey-io/valkey-glide/go/v2/config" + ) + + func CreateHAClient() (*glide.ClusterClient, error) { + cfg := config.NewClusterClientConfiguration(). + WithAddress(&config.NodeAddress{Host: "node1.example.com", Port: 6379}). + WithAddress(&config.NodeAddress{Host: "node2.example.com", Port: 6379}). + WithAddress(&config.NodeAddress{Host: "node3.example.com", Port: 6379}). + // Read from replicas in the same AZ to reduce latency + WithReadFrom(config.AzAffinity). + WithClientAZ("us-east-1a"). + // Allow more time for commands during failover + WithRequestTimeout(1000). + // Reconnect with exponential backoff + WithReconnectStrategy(config.BackoffStrategy{ + NumOfRetries: 5, + Factor: 2, + ExponentBase: 2, + }) + + return glide.NewClusterClient(cfg) + } + ``` + + + + ```csharp + using Valkey.Glide; + using static Valkey.Glide.ConnectionConfiguration; + + var config = new ClusterClientConfigurationBuilder() + .WithAddress("node1.example.com", 6379) + .WithAddress("node2.example.com", 6379) + .WithAddress("node3.example.com", 6379) + // Read from replicas in the same AZ to reduce latency + .WithReadFrom(new ReadFrom(ReadFromStrategy.AzAffinity, "us-east-1a")) + // Allow more time for commands during failover + .WithRequestTimeout(TimeSpan.FromMilliseconds(1000)) + // Reconnect with exponential backoff + .WithConnectionRetryStrategy( + numberOfRetries: 5, + factor: 2, + exponentBase: 2) + .Build(); + + await using var client = await GlideClusterClient.CreateClient(config); + ``` + + + + ```php + $client = new ValkeyGlideCluster( + addresses: [ + ['host' => 'node1.example.com', 'port' => 6379], + ['host' => 'node2.example.com', 'port' => 6379], + ['host' => 'node3.example.com', 'port' => 6379], + ], + // Read from replicas in the same AZ to reduce latency + read_from: ValkeyGlide::READ_FROM_AZ_AFFINITY, + client_az: 'us-east-1a', + // Allow more time for commands during failover + request_timeout: 1000, + // Reconnect with exponential backoff + reconnect_strategy: [ + 'num_of_retries' => 5, + 'factor' => 2, + 'exponent_base' => 2, + ], + ); + ``` + + + +### Topology Updates + +GLIDE automatically refreshes its view of the cluster when it receives `MOVED` or `ASK` redirections from the server. +No configuration is needed. When a shard's primary changes due to failover, GLIDE detects the change on the next command and updates its routing table. + +### Request Timeout + +By default, GLIDE has a request timeout of 250ms. This controls how long GLIDE waits for a command to complete, including any reconnection attempts. + +During failover, commands may take longer to complete due to retries. Increasing the timeout (e.g., to 1000ms) reduces the chance of false timeouts during brief disruptions. + +### Reconnect Strategy + +By default, GLIDE uses exponential backoff reconnect strategy. This controls how GLIDE attempts to reconnect to nodes. + +For production, configure the number of retries and backoff parameters to match your failover timing. +For more on configuring reconnect strategies, see our [guide](/how-to/connections/timeouts-and-reconnect-strategy/). + +### Read Strategy + +Configuring clients to read from replicas is important for high availability. + +By default, GLIDE clients read and write from the primary node. Changing this to read from replicas provides +two benefits: reads can continue while a primary is down and it distributes load away from primaries. + +For more on configuring read strategies, see our [guide](/how-to/connections/read-strategy/). + +## What Happens During Failover + +When a primary node fails, Valkey promotes a replica to primary. GLIDE will detects this and update its topology mapping accordingly. + +During the failover window (typically a few seconds), commands targeting the affected shard may fail with timeout errors. Commands targeting other shards are unaffected. + +Your application should handle these transient errors with retry logic. GLIDE's [reconnect strategy](/how-to/connections/timeouts-and-reconnect-strategy/) handles reconnection at the connection level, +but application-level retries may still be needed for commands that fail during the transition. From fa5e4eae7153aafac74aaa9b081599886c98cb97 Mon Sep 17 00:00:00 2001 From: Alex Le Date: Fri, 22 May 2026 10:14:11 -0700 Subject: [PATCH 2/2] moved ha to concept Signed-off-by: Alex Le --- astro.config.mjs | 1 - .../client-features/high-availability.mdx | 42 ++++ src/content/docs/how-to/high-availability.mdx | 220 ------------------ 3 files changed, 42 insertions(+), 221 deletions(-) create mode 100644 src/content/docs/concepts/client-features/high-availability.mdx delete mode 100644 src/content/docs/how-to/high-availability.mdx diff --git a/astro.config.mjs b/astro.config.mjs index d5c075d5..aea076ae 100755 --- a/astro.config.mjs +++ b/astro.config.mjs @@ -147,7 +147,6 @@ export default defineConfig({ "how-to/installation", "how-to/client-initialization", "how-to/connection-management", - "how-to/high-availability", "how-to/publish-and-subscribe-messages", "how-to/send-batch-commands", "how-to/synchronous-connection", diff --git a/src/content/docs/concepts/client-features/high-availability.mdx b/src/content/docs/concepts/client-features/high-availability.mdx new file mode 100644 index 00000000..aef8f77f --- /dev/null +++ b/src/content/docs/concepts/client-features/high-availability.mdx @@ -0,0 +1,42 @@ +--- +title: High-Availability +description: Learn how Valkey GLIDE supports high-availability features. +--- + +import { Tabs, TabItem } from '@astrojs/starlight/components'; + +Valkey GLIDE supports high-availability features out of the box. Only minimal configurations are needed. + +## Available HA Features + +- Automatic topology updates. +- Connection timeout and exponential backoff for reconnecting. +- Command routing. +- Read strategies (Requires changing to read from replicas). + +### Automatic Topology Updates + +GLIDE maintains a map of the cluster and automatically updates it when changes occur. For example, if a primary node fails and a replica is promoted, GLIDE detects this change and updates its topology mapping. +No additional configuration is required. + +### Request Timeout + +By default, GLIDE has a request timeout of 250ms. This controls how long GLIDE waits for a command to complete, including any reconnection attempts. + +During failover, commands may take longer to complete due to retries. Increasing the timeout (e.g., to 1000ms) reduces the chance of false timeouts during brief disruptions. + +### Reconnect Strategy + +By default, GLIDE uses exponential backoff reconnect strategy. This controls how GLIDE attempts to reconnect to nodes. + +For production, configure the number of retries and backoff parameters to match your failover timing. +For more on configuring reconnect strategies, see our [guide](/how-to/connections/timeouts-and-reconnect-strategy/). + +### Read Strategy + +Configuring clients to read from replicas is important for high availability. + +By default, GLIDE clients read and write from the primary node. Changing this to read from replicas provides +two benefits: reads can continue while a primary is down and it distributes load away from primaries. + +For more on configuring read strategies, see our [guide](/how-to/connections/read-strategy/). diff --git a/src/content/docs/how-to/high-availability.mdx b/src/content/docs/how-to/high-availability.mdx deleted file mode 100644 index 288e93f8..00000000 --- a/src/content/docs/how-to/high-availability.mdx +++ /dev/null @@ -1,220 +0,0 @@ ---- -title: High Availability Configurations -description: Learn how to configure Valkey GLIDE for High Availability using cluster mode, automatic failover, read strategies, and reconnection tuning. ---- - -import { Tabs, TabItem, Aside } from '@astrojs/starlight/components'; - -Valkey GLIDE automatically supports high-availability features out of the box. Only minimal configurations are needed. - -This guide will go over what's supported and what's needed to configure for high availability. - -## Available HA Features - -GLIDE comes with many HA features out of the box. This includes: -- Automatic topology updates. -- Connection timeout and exponential backoff for reconnecting. -- Command routing. -- Read strategies (Requires changing to read from replicas). - -#### Example Configuration - - - ```python - from glide import ( - GlideClusterClient, - GlideClusterClientConfiguration, - NodeAddress, - BackoffStrategy, - ReadFrom - ) - - config = GlideClusterClientConfiguration( - addresses=[ - NodeAddress("node1.example.com", 6379), - NodeAddress("node2.example.com", 6379), - NodeAddress("node3.example.com", 6379), - ], - # Read from replicas in the same AZ to reduce latency - read_from=ReadFrom.AZ_AFFINITY, - client_az="us-east-1a", - # Allow more time for commands during failover - request_timeout=1000, - # Reconnect with exponential backoff - reconnect_strategy=BackoffStrategy( - num_of_retries=5, - factor=2, - exponent_base=2, - ), - ) - - client = await GlideClusterClient.create(config) - ``` - - - - ```java - import glide.api.GlideClusterClient; - import glide.api.models.configuration.BackoffStrategy; - import glide.api.models.configuration.GlideClusterClientConfiguration; - import glide.api.models.configuration.NodeAddress; - import glide.api.models.configuration.ReadFrom; - - GlideClusterClientConfiguration config = GlideClusterClientConfiguration.builder() - .address(NodeAddress.builder().host("node1.example.com").port(6379).build()) - .address(NodeAddress.builder().host("node2.example.com").port(6379).build()) - .address(NodeAddress.builder().host("node3.example.com").port(6379).build()) - // Read from replicas in the same AZ to reduce latency - .readFrom(ReadFrom.AZ_AFFINITY) - .clientAZ("us-east-1a") - // Allow more time for commands during failover - .requestTimeout(1000) - // Reconnect with exponential backoff - .reconnectStrategy(BackoffStrategy.builder() - .numOfRetries(5) - .factor(2) - .exponentBase(2) - .build()) - .build(); - - GlideClusterClient client = GlideClusterClient.createClient(config).get(); - ``` - - - - ```typescript - import {GlideClusterClient} from "@valkey/valkey-glide"; - - const client = await GlideClusterClient.createClient({ - addresses: [ - {host: "node1.example.com", port: 6379}, - {host: "node2.example.com", port: 6379}, - {host: "node3.example.com", port: 6379}, - ], - // Read from replicas in the same AZ to reduce latency - readFrom: "AZAffinity", - clientAz: "us-east-1a", - // Allow more time for commands during failover - requestTimeout: 1000, - // Reconnect with exponential backoff - reconnectStrategy: { - numOfRetries: 5, - factor: 2, - exponentBase: 2, - }, - }); - ``` - - - - ```go - import ( - glide "github.com/valkey-io/valkey-glide/go/v2" - "github.com/valkey-io/valkey-glide/go/v2/config" - ) - - func CreateHAClient() (*glide.ClusterClient, error) { - cfg := config.NewClusterClientConfiguration(). - WithAddress(&config.NodeAddress{Host: "node1.example.com", Port: 6379}). - WithAddress(&config.NodeAddress{Host: "node2.example.com", Port: 6379}). - WithAddress(&config.NodeAddress{Host: "node3.example.com", Port: 6379}). - // Read from replicas in the same AZ to reduce latency - WithReadFrom(config.AzAffinity). - WithClientAZ("us-east-1a"). - // Allow more time for commands during failover - WithRequestTimeout(1000). - // Reconnect with exponential backoff - WithReconnectStrategy(config.BackoffStrategy{ - NumOfRetries: 5, - Factor: 2, - ExponentBase: 2, - }) - - return glide.NewClusterClient(cfg) - } - ``` - - - - ```csharp - using Valkey.Glide; - using static Valkey.Glide.ConnectionConfiguration; - - var config = new ClusterClientConfigurationBuilder() - .WithAddress("node1.example.com", 6379) - .WithAddress("node2.example.com", 6379) - .WithAddress("node3.example.com", 6379) - // Read from replicas in the same AZ to reduce latency - .WithReadFrom(new ReadFrom(ReadFromStrategy.AzAffinity, "us-east-1a")) - // Allow more time for commands during failover - .WithRequestTimeout(TimeSpan.FromMilliseconds(1000)) - // Reconnect with exponential backoff - .WithConnectionRetryStrategy( - numberOfRetries: 5, - factor: 2, - exponentBase: 2) - .Build(); - - await using var client = await GlideClusterClient.CreateClient(config); - ``` - - - - ```php - $client = new ValkeyGlideCluster( - addresses: [ - ['host' => 'node1.example.com', 'port' => 6379], - ['host' => 'node2.example.com', 'port' => 6379], - ['host' => 'node3.example.com', 'port' => 6379], - ], - // Read from replicas in the same AZ to reduce latency - read_from: ValkeyGlide::READ_FROM_AZ_AFFINITY, - client_az: 'us-east-1a', - // Allow more time for commands during failover - request_timeout: 1000, - // Reconnect with exponential backoff - reconnect_strategy: [ - 'num_of_retries' => 5, - 'factor' => 2, - 'exponent_base' => 2, - ], - ); - ``` - - - -### Topology Updates - -GLIDE automatically refreshes its view of the cluster when it receives `MOVED` or `ASK` redirections from the server. -No configuration is needed. When a shard's primary changes due to failover, GLIDE detects the change on the next command and updates its routing table. - -### Request Timeout - -By default, GLIDE has a request timeout of 250ms. This controls how long GLIDE waits for a command to complete, including any reconnection attempts. - -During failover, commands may take longer to complete due to retries. Increasing the timeout (e.g., to 1000ms) reduces the chance of false timeouts during brief disruptions. - -### Reconnect Strategy - -By default, GLIDE uses exponential backoff reconnect strategy. This controls how GLIDE attempts to reconnect to nodes. - -For production, configure the number of retries and backoff parameters to match your failover timing. -For more on configuring reconnect strategies, see our [guide](/how-to/connections/timeouts-and-reconnect-strategy/). - -### Read Strategy - -Configuring clients to read from replicas is important for high availability. - -By default, GLIDE clients read and write from the primary node. Changing this to read from replicas provides -two benefits: reads can continue while a primary is down and it distributes load away from primaries. - -For more on configuring read strategies, see our [guide](/how-to/connections/read-strategy/). - -## What Happens During Failover - -When a primary node fails, Valkey promotes a replica to primary. GLIDE will detects this and update its topology mapping accordingly. - -During the failover window (typically a few seconds), commands targeting the affected shard may fail with timeout errors. Commands targeting other shards are unaffected. - -Your application should handle these transient errors with retry logic. GLIDE's [reconnect strategy](/how-to/connections/timeouts-and-reconnect-strategy/) handles reconnection at the connection level, -but application-level retries may still be needed for commands that fail during the transition.