Skip to content

Commit 85b3c8b

Browse files
committed
XRP. Added HA setup tests and modified node dashboard
1 parent e4c0ef5 commit 85b3c8b

File tree

6 files changed

+362
-62
lines changed

6 files changed

+362
-62
lines changed

lib/xrp/README.md

+8-8
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
| Contributed by |
44
|:--------------------------------:|
5-
| Pedro Aceves <br/>acevespa@amazon.com |
5+
| [Pedro Aceves](https://github.com/acevesp)|
66

77
XRP node deployment on AWS. All nodes are configure as ["Stock Servers"](https://xrpl.org/docs/infrastructure/configuration/server-modes/run-rippled-as-a-stock-server)
88

@@ -27,7 +27,7 @@ XRP node deployment on AWS. All nodes are configure as ["Stock Servers"](https:/
2727
## Well-Architected
2828

2929
<details>
30-
<summary>Review the for pros and cons of this solution.</summary>
30+
<summary>Review pros and cons of this solution.</summary>
3131

3232
### Well-Architected Checklist
3333

@@ -50,7 +50,7 @@ This is the Well-Architected checklist for XRP nodes implementation of the AWS B
5050
| | Resource monitoring | How are workload resources monitored? | Resources are being monitored using Amazon CloudWatch dashboards. Amazon CloudWatch custom metrics are being pushed via CloudWatch Agent. |
5151
| Performance efficiency | Compute selection | How is compute solution selected? | Compute solution is selected based on best price-performance. |
5252
| | Storage selection | How is storage solution selected? | Storage solution is selected based on best price-performance. |
53-
| Operational excellence | Workload health | How is health of workload determined? | Health of workload is determined via AWS Application Load Balancer Target Group Health Checks, on port 8545. |
53+
| Operational excellence | Workload health | How is health of workload determined? | Health of workload is determined via AWS Application Load Balancer Target Group Health Checks, on port 6005. |
5454
| Sustainability | Hardware & services | Select most efficient hardware for your workload | Amazon EC2 R7a instances support the Sustainability Pillar of the AWS Well-Architected Framework by offering memory optimization that enables more efficient resource utilization, potentially reducing overall energy consumption and hardware requirements for data-intensive workloads. |
5555

5656
</details>
@@ -59,7 +59,7 @@ This is the Well-Architected checklist for XRP nodes implementation of the AWS B
5959

6060
### Open AWS CloudShell
6161

62-
To begin, ensure you login to your AWS account with permissions to create and modify resources in IAM, EC2, EBS, VPC, S3, KMS, and Secrets Manager.
62+
To begin, ensure you login to your AWS account with permissions to create and modify resources in IAM, EC2, EBS, VPC, S3, and KMS.
6363

6464
From the AWS Management Console, open the [AWS CloudShell](https://docs.aws.amazon.com/cloudshell/latest/userguide/welcome.html), a web-based shell environment. If unfamiliar, review the [2-minute YouTube video](https://youtu.be/fz4rbjRaiQM) for an overview and check out [CloudShell with VPC environment](https://docs.aws.amazon.com/cloudshell/latest/userguide/creating-vpc-environment.html) that we'll use to test nodes API from internal IP address space.
6565

@@ -93,7 +93,7 @@ cd lib/xrp
9393
cp ./sample-configs/.env-sample-testnet .env
9494
nano .env
9595
```
96-
> **NOTE:** *You can find more examples inside `sample-configs` *
96+
> **NOTE:** *You can find more examples inside `sample-configs`*
9797
9898

9999
4. Deploy common components such as IAM role:
@@ -194,13 +194,13 @@ pwd
194194
# Make sure you are in aws-blockchain-node-runners/lib/xrp
195195

196196
# Destroy HA Nodes
197-
cdk destroy XRP-ha-nodes
197+
npx cdk destroy XRP-ha-nodes
198198

199199
# Destroy Single Node
200-
cdk destroy XRP-single-node
200+
npx cdk destroy XRP-single-node
201201

202202
# Delete all common components like IAM role and Security Group
203-
cdk destroy XRP-common
203+
npx cdk destroy XRP-common
204204
```
205205

206206
### FAQ

lib/xrp/lib/assets/user-data/check_xrp_sequence.sh

+42-5
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@ set -euo pipefail
2626
MAX_RETRIES=3
2727
RETRY_DELAY=5
2828
NAMESPACE="CWAgent"
29-
METRIC_NAME="XRP_Sequence"
29+
CURRENT_METRIC_NAME="XRP_Current_Sequence"
30+
DELTA_METRIC_NAME="XRP_Delta_Sequence"
3031
LOCKFILE="/tmp/check_xrp_sequence.lock"
3132
LOCK_FD=200
3233

@@ -115,6 +116,29 @@ get_current_sequence() {
115116
local retry_count=0
116117
local seq
117118

119+
while [[ ${retry_count} -lt ${MAX_RETRIES} ]]; do
120+
if seq=$(curl -s -f -H 'Content-Type: application/json' \
121+
-d '{"method":"ledger_current","params":[{}]}' \
122+
http://localhost:5005/ | \
123+
jq -e '.result.ledger_current_index // 0'); then
124+
if [[ "${seq}" != "0" ]]; then
125+
echo "${seq}"
126+
return 0
127+
fi
128+
fi
129+
log_warning "Failed to get sequence, attempt $((retry_count + 1))/${MAX_RETRIES}"
130+
retry_count=$((retry_count + 1))
131+
sleep ${RETRY_DELAY}
132+
done
133+
134+
log_error "Failed to get current sequence after ${MAX_RETRIES} attempts"
135+
return 1
136+
}
137+
138+
get_validated_sequence() {
139+
local retry_count=0
140+
local seq
141+
118142
while [[ ${retry_count} -lt ${MAX_RETRIES} ]]; do
119143
if seq=$(curl -s -f -H 'Content-Type: application/json' \
120144
-d '{"method":"server_info","params":[{}]}' \
@@ -137,12 +161,13 @@ get_current_sequence() {
137161
# Function to send metric to CloudWatch with retries
138162
send_to_cloudwatch() {
139163
local sequence=$1
164+
local metric_name=$2
140165
local retry_count=0
141166

142167
while [[ ${retry_count} -lt ${MAX_RETRIES} ]]; do
143168
if aws cloudwatch put-metric-data \
144169
--namespace "${NAMESPACE}" \
145-
--metric-name "${METRIC_NAME}" \
170+
--metric-name "${metric_name}" \
146171
--value "${sequence}" \
147172
--region "${REGION}" \
148173
--dimensions "InstanceId=${INSTANCE_ID}" \
@@ -192,14 +217,26 @@ main() {
192217
fi
193218

194219
# Get current sequence
195-
if ! sequence=$(get_current_sequence); then
220+
if ! current_sequence=$(get_current_sequence); then
196221
return 1
197222
fi
198223

199-
log_info "Retrieved sequence: ${sequence}"
224+
# Get current sequence
225+
if ! validated_sequence=$(get_validated_sequence); then
226+
return 1
227+
fi
228+
229+
log_info "Retrieved current sequence: ${current_sequence}"
230+
log_info "Retrieved validated sequence: ${validated_sequence}"
231+
232+
# Send to CloudWatch
233+
if ! send_to_cloudwatch "${current_sequence}" "${CURRENT_METRIC_NAME}"; then
234+
return 1
235+
fi
200236

201237
# Send to CloudWatch
202-
if ! send_to_cloudwatch "${sequence}"; then
238+
delta_sequence=$((current_sequence - validated_sequence))
239+
if ! send_to_cloudwatch "${delta_sequence}" "${DELTA_METRIC_NAME}"; then
203240
return 1
204241
fi
205242

+66-47
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
export const SingleNodeCWDashboardJSON = {
22
"widgets": [
33
{
4-
"height": 6,
4+
"height": 4,
55
"width": 8,
66
"y": 0,
77
"x": 0,
@@ -18,23 +18,23 @@ export const SingleNodeCWDashboardJSON = {
1818
},
1919
"region": "${REGION}",
2020
"metrics": [
21-
[ "AWS/EC2", "CPUUtilization","InstanceId", "${INSTANCE_ID}", {"label": "${INSTANCE_ID}-${INSTANCE_NAME}" } ]
21+
[ "AWS/EC2", "CPUUtilization", "InstanceId", "${INSTANCE_ID}", { "label": "${INSTANCE_ID}-${INSTANCE_NAME}" } ]
2222
],
2323
"title": "CPU utilization (%)"
2424
}
2525
},
2626
{
27-
"height": 6,
27+
"height": 4,
2828
"width": 8,
2929
"y": 0,
3030
"x": 8,
3131
"type": "metric",
3232
"properties": {
3333
"metrics": [
3434
[ { "expression": "m7/PERIOD(m7)", "label": "Read", "id": "e7" } ],
35-
[ "CWAgent", "diskio_reads","InstanceId", "${INSTANCE_ID}", "name", "nvme1n1", { "id": "m7", "visible": false, "stat": "Sum", "period": 60 } ],
35+
[ "CWAgent", "diskio_reads", "InstanceId", "${INSTANCE_ID}", "name", "nvme1n1", { "id": "m7", "visible": false, "stat": "Sum", "period": 60 } ],
3636
[ { "expression": "m8/PERIOD(m8)", "label": "Write", "id": "e8" } ],
37-
[ "CWAgent", "diskio_writes","InstanceId", "${INSTANCE_ID}", "name", "nvme1n1", { "id": "m8", "visible": false, "stat": "Sum", "period": 60 } ]
37+
[ "CWAgent", "diskio_writes", "InstanceId", "${INSTANCE_ID}", "name", "nvme1n1", { "id": "m8", "visible": false, "stat": "Sum", "period": 60 } ]
3838
],
3939
"view": "timeSeries",
4040
"stacked": false,
@@ -45,31 +45,32 @@ export const SingleNodeCWDashboardJSON = {
4545
}
4646
},
4747
{
48-
"height": 6,
48+
"height": 4,
4949
"width": 8,
5050
"y": 0,
5151
"x": 16,
5252
"type": "metric",
5353
"properties": {
54+
"metrics": [
55+
[ "CWAgent", "XRP_Current_Sequence", "InstanceId", "${INSTANCE_ID}", { "label": "${INSTANCE_ID}-${INSTANCE_NAME}", "region": "${REGION}" } ]
56+
],
5457
"sparkline": false,
55-
"view": "singleValue",
58+
"view": "timeSeries",
5659
"region": "${REGION}",
5760
"stacked": false,
5861
"singleValueFullPrecision": true,
5962
"liveData": true,
6063
"setPeriodToTimeRange": false,
6164
"trend": true,
62-
"metrics": [
63-
[ "CWAgent", "XRP_Sequence","InstanceId", "${INSTANCE_ID}", {"label": "${INSTANCE_ID}-${INSTANCE_NAME}" } ]
64-
],
65-
"title": "XRP Sequence"
65+
"title": "XRP Current Sequence",
66+
"period": 300
6667
}
6768
},
6869
{
69-
"height": 6,
70+
"height": 4,
7071
"width": 8,
71-
"y": 6,
72-
"x": 0,
72+
"y": 12,
73+
"x": 16,
7374
"type": "metric",
7475
"properties": {
7576
"view": "timeSeries",
@@ -83,16 +84,16 @@ export const SingleNodeCWDashboardJSON = {
8384
},
8485
"region": "${REGION}",
8586
"metrics": [
86-
[ "AWS/EC2", "NetworkIn","InstanceId", "${INSTANCE_ID}", {"label": "${INSTANCE_ID}-${INSTANCE_NAME}" } ]
87+
[ "AWS/EC2", "NetworkIn", "InstanceId", "${INSTANCE_ID}", { "label": "${INSTANCE_ID}-${INSTANCE_NAME}" } ]
8788
],
8889
"title": "Network in (bytes)"
8990
}
9091
},
9192
{
92-
"height": 6,
93+
"height": 4,
9394
"width": 8,
94-
"y": 6,
95-
"x": 8,
95+
"y": 4,
96+
"x": 0,
9697
"type": "metric",
9798
"properties": {
9899
"view": "timeSeries",
@@ -101,16 +102,16 @@ export const SingleNodeCWDashboardJSON = {
101102
"stat": "Average",
102103
"period": 300,
103104
"metrics": [
104-
[ "CWAgent", "cpu_usage_iowait","InstanceId", "${INSTANCE_ID}", {"label": "${INSTANCE_ID}-${INSTANCE_NAME}" } ]
105+
[ "CWAgent", "cpu_usage_iowait", "InstanceId", "${INSTANCE_ID}", { "label": "${INSTANCE_ID}-${INSTANCE_NAME}" } ]
105106
],
106107
"title": "CPU Usage IO wait (%)"
107108
}
108109
},
109110
{
110-
"height": 6,
111+
"height": 4,
111112
"width": 8,
112-
"y": 6,
113-
"x": 16,
113+
"y": 4,
114+
"x": 8,
114115
"type": "metric",
115116
"properties": {
116117
"view": "timeSeries",
@@ -125,20 +126,20 @@ export const SingleNodeCWDashboardJSON = {
125126
"region": "${REGION}",
126127
"metrics": [
127128
[ { "expression": "IF(m7_2 !=0, (m7_1 / m7_2), 0)", "label": "Read", "id": "e7" } ],
128-
[ "CWAgent", "diskio_read_time","InstanceId", "${INSTANCE_ID}", "name", "nvme1n1", { "id": "m7_1", "visible": false, "stat": "Sum", "period": 60 } ],
129-
[ "CWAgent", "diskio_reads","InstanceId", "${INSTANCE_ID}", "name", "nvme1n1", { "id": "m7_2", "visible": false, "stat": "Sum", "period": 60 } ],
129+
[ "CWAgent", "diskio_read_time", "InstanceId", "${INSTANCE_ID}", "name", "nvme1n1", { "id": "m7_1", "visible": false, "stat": "Sum", "period": 60 } ],
130+
[ "CWAgent", "diskio_reads", "InstanceId", "${INSTANCE_ID}", "name", "nvme1n1", { "id": "m7_2", "visible": false, "stat": "Sum", "period": 60 } ],
130131
[ { "expression": "IF(m7_4 !=0, (m7_3 / m7_4), 0)", "label": "Write", "id": "e8" } ],
131-
[ "CWAgent", "diskio_write_time","InstanceId", "${INSTANCE_ID}", "name", "nvme1n1", { "id": "m7_3", "visible": false, "stat": "Sum", "period": 60 } ],
132-
[ "CWAgent", "diskio_writes","InstanceId", "${INSTANCE_ID}", "name", "nvme1n1", { "id": "m7_4", "visible": false, "stat": "Sum", "period": 60 } ]
132+
[ "CWAgent", "diskio_write_time", "InstanceId", "${INSTANCE_ID}", "name", "nvme1n1", { "id": "m7_3", "visible": false, "stat": "Sum", "period": 60 } ],
133+
[ "CWAgent", "diskio_writes", "InstanceId", "${INSTANCE_ID}", "name", "nvme1n1", { "id": "m7_4", "visible": false, "stat": "Sum", "period": 60 } ]
133134
],
134135
"title": "nvme1n1 Volume Read/Write latency (ms/op)"
135136
}
136137
},
137138
{
138-
"height": 6,
139+
"height": 4,
139140
"width": 8,
140-
"y": 12,
141-
"x": 0,
141+
"y": 8,
142+
"x": 16,
142143
"type": "metric",
143144
"properties": {
144145
"view": "timeSeries",
@@ -152,16 +153,16 @@ export const SingleNodeCWDashboardJSON = {
152153
},
153154
"region": "${REGION}",
154155
"metrics": [
155-
[ "AWS/EC2", "NetworkOut","InstanceId", "${INSTANCE_ID}", {"label": "${INSTANCE_ID}-${INSTANCE_NAME}" } ]
156+
[ "AWS/EC2", "NetworkOut", "InstanceId", "${INSTANCE_ID}", { "label": "${INSTANCE_ID}-${INSTANCE_NAME}" } ]
156157
],
157158
"title": "Network out (bytes)"
158159
}
159160
},
160161
{
161-
"height": 6,
162+
"height": 4,
162163
"width": 8,
163-
"y": 12,
164-
"x": 8,
164+
"y": 8,
165+
"x": 0,
165166
"type": "metric",
166167
"properties": {
167168
"view": "timeSeries",
@@ -170,23 +171,23 @@ export const SingleNodeCWDashboardJSON = {
170171
"stat": "Average",
171172
"period": 300,
172173
"metrics": [
173-
[ "CWAgent", "mem_used_percent","InstanceId", "${INSTANCE_ID}", {"label": "${INSTANCE_ID}-${INSTANCE_NAME}" } ]
174+
[ "CWAgent", "mem_used_percent", "InstanceId", "${INSTANCE_ID}", { "label": "${INSTANCE_ID}-${INSTANCE_NAME}" } ]
174175
],
175176
"title": "Mem Used (%)"
176177
}
177178
},
178179
{
179-
"height": 6,
180+
"height": 4,
180181
"width": 8,
181-
"y": 12,
182-
"x": 16,
182+
"y": 8,
183+
"x": 8,
183184
"type": "metric",
184185
"properties": {
185186
"metrics": [
186-
[ { "expression": "m2/PERIOD(m2)", "label": "Read", "id": "e2", "period": 60, "region": "us-east-1" } ],
187-
[ "CWAgent", "diskio_read_bytes","InstanceId", "${INSTANCE_ID}", "name", "nvme1n1", { "id": "m2", "stat": "Sum", "visible": false, "period": 60 } ],
188-
[ { "expression": "m3/PERIOD(m3)", "label": "Write", "id": "e3", "period": 60, "region": "us-east-1" } ],
189-
[ "CWAgent", "diskio_write_bytes","InstanceId", "${INSTANCE_ID}", "name", "nvme1n1", { "id": "m3", "stat": "Sum", "visible": false, "period": 60 } ]
187+
[ { "expression": "m2/PERIOD(m2)", "label": "Read", "id": "e2", "period": 60, "region": "${REGION}" } ],
188+
[ "CWAgent", "diskio_read_bytes", "InstanceId", "${INSTANCE_ID}", "name", "nvme1n1", { "id": "m2", "stat": "Sum", "visible": false, "period": 60 } ],
189+
[ { "expression": "m3/PERIOD(m3)", "label": "Write", "id": "e3", "period": 60, "region": "${REGION}" } ],
190+
[ "CWAgent", "diskio_write_bytes", "InstanceId", "${INSTANCE_ID}", "name", "nvme1n1", { "id": "m3", "stat": "Sum", "visible": false, "period": 60 } ]
190191
],
191192
"view": "timeSeries",
192193
"stacked": false,
@@ -197,22 +198,40 @@ export const SingleNodeCWDashboardJSON = {
197198
}
198199
},
199200
{
200-
"height": 6,
201+
"height": 4,
201202
"width": 8,
202-
"y": 18,
203-
"x": 0,
203+
"y": 12,
204+
"x": 8,
204205
"type": "metric",
205206
"properties": {
206207
"metrics": [
207-
[ "CWAgent", "disk_used_percent","InstanceId", "${INSTANCE_ID}", "device", "nvme1n1", "path", "/var/lib/rippled", "fstype", "xfs", { "region": "${REGION}", "label": "/var/lib/rippled" } ]
208+
[ "CWAgent", "disk_used_percent", "InstanceId", "${INSTANCE_ID}", "device", "nvme1n1", "path", "/var/lib/rippled", "fstype", "xfs", { "region": "${REGION}", "label": "/var/lib/rippled" } ]
208209
],
209210
"sparkline": true,
210211
"view": "singleValue",
211212
"region": "${REGION}",
212213
"title": "nvme1n1 Disk Used (%)",
213214
"period": 60,
214-
"stat": "Average"
215+
"stat": "Maximum"
216+
}
217+
},
218+
{
219+
"type": "metric",
220+
"x": 16,
221+
"y": 4,
222+
"width": 8,
223+
"height": 4,
224+
"properties": {
225+
"metrics": [
226+
[ "CWAgent", "XRP_Delta_Sequence", "InstanceId", "${INSTANCE_ID}", { "region": "${REGION}", "label": "XRP Current - Validated Sequence" } ]
227+
],
228+
"view": "timeSeries",
229+
"stacked": false,
230+
"region": "${REGION}",
231+
"period": 300,
232+
"stat": "Maximum",
233+
"title": "XRP Current - Validated Sequence"
215234
}
216235
}
217236
]
218-
}
237+
}

0 commit comments

Comments
 (0)