Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
25b2f4b
gateway: Add wavekv backend
kvinwang Nov 30, 2025
94cc986
gateway: Rename run_in_dstack to danger_disable_attestation
kvinwang Nov 30, 2025
8d9de10
gw: Add peer urls config
kvinwang Nov 30, 2025
f4b1b4e
gw: Sync data from wavekv to state
kvinwang Dec 1, 2025
2ffc448
gw: Sync certificates
kvinwang Dec 1, 2025
34d678a
gw: Recycle instances according to kvdb last seem
kvinwang Dec 1, 2025
f9c3236
gw: Fix admin rpc error in non dstack agent env
kvinwang Dec 1, 2025
1bdc5a1
gw: Add cluster.sh
kvinwang Dec 1, 2025
0c40446
gw: Use standalone uuid instead of wg pubkey
kvinwang Dec 1, 2025
b397d1f
gw: Add uuid on par of id
kvinwang Dec 1, 2025
6e8d118
gw: Add all nodes info in RegisterCvmResponse
kvinwang Dec 1, 2025
26a97ce
gw: Multiple line last seem
kvinwang Dec 1, 2025
7039823
gw: Rename to insecure config fields
kvinwang Dec 1, 2025
d1c5736
gw: Refactor timeout in config
kvinwang Dec 1, 2025
90674aa
gateway: Fix panic when dropping workers_rt
kvinwang Dec 1, 2025
7ffcfed
gw: gz compress sync message
kvinwang Dec 2, 2025
6c25ea9
gw: Use new rocket addrees syntax
kvinwang Dec 2, 2025
00def34
gw: Refactor RPC and add bootnode support
kvinwang Dec 2, 2025
1369757
gw: Rename test script
kvinwang Dec 2, 2025
f81f11f
gw: Rename wavekv_data_dir to data_dir
kvinwang Dec 2, 2025
f99b5d4
gw: Drop legacy state support
kvinwang Dec 2, 2025
65624a1
gw: Fix config files
kvinwang Dec 2, 2025
d9f5d20
gw: Add periodic persistence
kvinwang Dec 2, 2025
bcc8ee5
gw: Use msgpack instead of bincode
kvinwang Dec 4, 2025
12b0ce9
gw: Refactor HttpSyncNetwork structure
kvinwang Dec 4, 2025
197d4d4
gw: Use wavekv from crates.io
kvinwang Dec 5, 2025
6c48d12
gw: Sync handshakes to kv
kvinwang Dec 8, 2025
4654b0a
gw: Show more info on the page
kvinwang Dec 8, 2025
72eef9a
gw: Remove last_seen of instance
kvinwang Dec 8, 2025
0797969
gw: Only returns active gw nodes from register cvm
kvinwang Dec 8, 2025
29eefec
gw: Add two more test cases
kvinwang Dec 8, 2025
886b238
cargo fmt
kvinwang Dec 8, 2025
d50bc91
gw: Add SPDX header in scripts
kvinwang Dec 8, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 88 additions & 3 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ sodiumbox = { path = "sodiumbox" }
serde-duration = { path = "serde-duration" }
dstack-mr = { path = "dstack-mr" }
size-parser = { path = "size-parser" }
wavekv = "1.0.0"

# Core dependencies
anyhow = { version = "1.0.97", default-features = false }
Expand All @@ -105,6 +106,7 @@ sd-notify = "0.4.5"
jemallocator = "0.5.4"

# Serialization/Parsing
flate2 = "1.0"
borsh = { version = "1.5.7", default-features = false, features = ["derive"] }
bon = { version = "3.4.0", default-features = false }
base64 = "0.22.1"
Expand All @@ -118,6 +120,7 @@ scale = { version = "3.7.4", package = "parity-scale-codec", features = [
] }
serde = { version = "1.0.219", features = ["derive"], default-features = false }
serde-human-bytes = "0.1.0"
rmp-serde = "1.3.0"
serde_json = { version = "1.0.140", default-features = false }
serde_ini = "0.2.0"
toml = "0.8.20"
Expand All @@ -137,6 +140,11 @@ hyper-util = { version = "0.1.10", features = [
"client-legacy",
"http1",
] }
hyper-rustls = { version = "0.27", default-features = false, features = [
"ring",
"http1",
"tls12",
] }
hyperlocal = "0.9.1"
ipnet = { version = "2.11.0", features = ["serde"] }
reqwest = { version = "0.12.14", default-features = false, features = [
Expand Down
12 changes: 10 additions & 2 deletions gateway/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ edition.workspace = true
license.workspace = true

[dependencies]
rocket = { workspace = true, features = ["mtls"] }
rocket = { workspace = true, features = ["mtls", "json"] }
tracing.workspace = true
tracing-subscriber.workspace = true
anyhow.workspace = true
Expand Down Expand Up @@ -48,11 +48,19 @@ dstack-types.workspace = true
serde-duration.workspace = true
reqwest = { workspace = true, features = ["json"] }
hyper = { workspace = true, features = ["server", "http1"] }
hyper-util = { version = "0.1", features = ["tokio"] }
hyper-util = { workspace = true, features = ["tokio"] }
hyper-rustls.workspace = true
http-body-util.workspace = true
x509-parser.workspace = true
jemallocator.workspace = true
wavekv.workspace = true
flate2.workspace = true
uuid = { workspace = true, features = ["v4"] }
rmp-serde.workspace = true

[target.'cfg(unix)'.dependencies]
nix = { workspace = true, features = ["resource"] }

[dev-dependencies]
insta.workspace = true
tempfile.workspace = true
20 changes: 13 additions & 7 deletions gateway/dstack-app/builder/entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,13 @@ validate_env "$CF_API_TOKEN"
validate_env "$CF_ZONE_ID"
validate_env "$SRV_DOMAIN"
validate_env "$WG_ENDPOINT"
validate_env "$NODE_ID"

# Validate $NODE_ID, must be a number
if [[ ! "$NODE_ID" =~ ^[0-9]+$ ]]; then
echo "Invalid NODE_ID: $NODE_ID"
exit 1
fi

# Validate $SUBNET_INDEX, valid range is 0-15
if [[ ! "$SUBNET_INDEX" =~ ^[0-9]+$ ]] || [ "$SUBNET_INDEX" -lt 0 ] || [ "$SUBNET_INDEX" -gt 15 ]; then
Expand Down Expand Up @@ -80,8 +87,7 @@ echo "RPC_DOMAIN: $RPC_DOMAIN"
cat >$CONFIG_PATH <<EOF
keep_alive = 10
log_level = "info"
address = "0.0.0.0"
port = 8000
address = "0.0.0.0:8000"

[tls]
key = "$CERTS_DIR/gateway-rpc.key"
Expand All @@ -92,21 +98,21 @@ ca_certs = "$CERTS_DIR/gateway-ca.cert"
mandatory = false

[core]
state_path = "$DATA_DIR/gateway-state.json"
set_ulimit = true
rpc_domain = "$RPC_DOMAIN"
run_in_dstack = true

[core.sync]
enabled = $SYNC_ENABLED
interval = "30s"
node_id = $NODE_ID
interval = "1m"
timeout = "2m"
my_url = "$MY_URL"
bootnode = "$BOOTNODE_URL"
data_dir = "$DATA_DIR"

[core.admin]
enabled = true
address = "0.0.0.0"
port = 8001
address = "0.0.0.0:8001"

[core.certbot]
enabled = true
Expand Down
8 changes: 8 additions & 0 deletions gateway/dstack-app/deploy-to-vmm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,12 @@ else
# Public IP address
PUBLIC_IP=$(curl -s4 ifconfig.me)

# Node ID for this gateway instance.
# Must be unique across all gateway instances in the network.
# Must be 32-bit unsigned integer (0-4294967295)
# Must be non-zero if deploying multiple gateways (1-4294967295)
NODE_ID=1

# The dstack-gateway application ID. Register the app in DstackKms first to get the app ID.
# GATEWAY_APP_ID=31884c4b7775affe4c99735f6c2aff7d7bc6cfcd

Expand Down Expand Up @@ -103,6 +109,7 @@ required_env_vars=(
"GATEWAY_APP_ID"
"MY_URL"
"APP_LAUNCH_TOKEN"
"NODE_ID"
# "BOOTNODE_URL"
)

Expand Down Expand Up @@ -143,6 +150,7 @@ BOOTNODE_URL=$BOOTNODE_URL
SUBNET_INDEX=$SUBNET_INDEX
APP_LAUNCH_TOKEN=$APP_LAUNCH_TOKEN
RPC_DOMAIN=$RPC_DOMAIN
NODE_ID=$NODE_ID
EOF

if [ -n "$APP_COMPOSE_FILE" ]; then
Expand Down
31 changes: 22 additions & 9 deletions gateway/gateway.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,13 @@ ident = "dstack Gateway"
temp_dir = "/tmp"
keep_alive = 10
log_level = "info"
port = 8010
address = "127.0.0.1:8010"

[core]
kms_url = ""
state_path = "./gateway-state.json"
# auto set soft ulimit to hard ulimit
set_ulimit = true
rpc_domain = ""
run_in_dstack = true

[core.auth]
enabled = false
Expand All @@ -25,7 +23,12 @@ timeout = "5s"

[core.admin]
enabled = false
port = 8011
address = "127.0.0.1:8011"

[core.debug]
insecure_enable_debug_rpc = false
insecure_skip_attestation = false
address = "127.0.0.1:8012"

[core.certbot]
enabled = false
Expand Down Expand Up @@ -97,9 +100,19 @@ node_timeout = "10m"

[core.sync]
enabled = false
interval = "30s"
broadcast_interval = "10m"
timeout = "2s"
# WaveKV node ID for this gateway (must be unique across cluster)
node_id = 0
my_url = "https://localhost:8011"
# The url of the bootnode used to join the network
bootnode = "https://localhost:8011"
interval = "1m"
timeout = "30s"
# The URL of the bootnode used to fetch initial peer list when joining the network.
# Leave empty if this is the first node or peers are managed via Admin.SetNodeInfo RPC.
bootnode = ""
# Data directory for WaveKV persistence (WAL and snapshots)
data_dir = "/dstack-gateway/data"
# Interval for periodic persistence of WaveKV data (e.g., "5s", "1m", "1h")
persist_interval = "5m"
# Enable periodic sync of instance connections to KV store
sync_connections_enabled = true
# Interval for syncing instance connections to KV store
sync_connections_interval = "30s"
Loading
Loading