Skip to content

Commit 23e7e00

Browse files
authored
feat(RHIDP-9255): Add benchmark scenario for Orchestrator plugin (#228)
* feat(RHIDP-9255): Add benchmark scenario for Orchestrator plugin Signed-off-by: Pavel Macík <[email protected]> * feat(scalability): Add option to always cleanup before earh iteration Signed-off-by: Pavel Macík <[email protected]> --------- Signed-off-by: Pavel Macík <[email protected]>
1 parent 72c0334 commit 23e7e00

24 files changed

+1334
-770
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ deploy-locust: namespace
150150
else \
151151
echo "Helm release \"$(LOCUST_OPERATOR)\" already exists"; \
152152
fi
153-
kubectl wait --timeout=180s --namespace $(LOCUST_NAMESPACE) --for=condition=ready $$(kubectl get --namespace $(LOCUST_NAMESPACE) pod -o name)
153+
kubectl wait --timeout=180s --namespace $(LOCUST_NAMESPACE) --for=condition=ready $$(kubectl get --namespace $(LOCUST_NAMESPACE) pod -l app.kubernetes.io/name=locust-k8s-operator -o name)
154154

155155
## Uninstall locust operator helm chart
156156
.PHONY: undeploy-locust

ci-scripts/collect-results.sh

Lines changed: 63 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ RHDH_NAMESPACE=${RHDH_NAMESPACE:-rhdh-performance}
2222
ENABLE_PROFILING="${ENABLE_PROFILING:-false}"
2323
RHDH_INSTALL_METHOD="${RHDH_INSTALL_METHOD:-helm}"
2424
LOCUST_NAMESPACE="${LOCUST_NAMESPACE:-locust-operator}"
25+
RHDH_METRIC="${RHDH_METRIC:-true}"
26+
PSQL_EXPORT="${PSQL_EXPORT:-false}"
2527

2628
cli="oc"
2729
clin="$cli -n $RHDH_NAMESPACE"
@@ -36,7 +38,7 @@ gather_pod_logs() {
3638
for pod in $pods; do
3739
echo "$pod"
3840
containers=$($cli -n "$namespace" get pod "$pod" -o json | jq -r '.spec.containers[].name')
39-
if $cli -n "$namespace" get pod "$pod" -o json | jq -e '.spec.initContainers? // empty' > /dev/null; then
41+
if $cli -n "$namespace" get pod "$pod" -o json | jq -e '.spec.initContainers? // empty' >/dev/null; then
4042
init_containers=$($cli -n "$namespace" get pod "$pod" -o json | jq -r '.spec.initContainers[].name // empty')
4143
else
4244
init_containers=""
@@ -65,6 +67,11 @@ for label in app.kubernetes.io/name=developer-hub app.kubernetes.io/name=postgre
6567
done
6668
gather_pod_logs "${ARTIFACT_DIR}/rhdh-logs" "$pods" "$RHDH_NAMESPACE"
6769

70+
if [ "$ENABLE_ORCHESTRATOR" == "true" ]; then
71+
pods=$($clin get pods -l app.kubernetes.io/component=serverless-workflow -o jsonpath='{.items[*].metadata.name}')
72+
gather_pod_logs "${ARTIFACT_DIR}/workflow-logs" "$pods" "$RHDH_NAMESPACE"
73+
fi
74+
6875
monitoring_collection_data=$ARTIFACT_DIR/benchmark.json
6976
monitoring_collection_log=$ARTIFACT_DIR/monitoring-collection.log
7077
monitoring_collection_dir=$ARTIFACT_DIR/monitoring-collection-raw-data-dir
@@ -112,6 +119,7 @@ try_gather_file "${TMP_DIR}/rbac-config.yaml"
112119
try_gather_file "${TMP_DIR}/locust-k8s-operator.values.yaml"
113120
try_gather_file load-test.log
114121
try_gather_file postgresql.log
122+
try_gather_dir "${TMP_DIR}/workflows"
115123

116124
# Metrics
117125
PYTHON_VENV_DIR=.venv
@@ -140,6 +148,23 @@ timestamp_diff() {
140148
python3 -c "from datetime import datetime; st = datetime.strptime('$started', '%Y-%m-%d %H:%M:%S.%f%z'); et = datetime.strptime('$ended', '%Y-%m-%d %H:%M:%S.%f%z'); diff = et - st; print(f'{diff.total_seconds():.9f}')"
141149
}
142150

151+
metrics_config_dir="${ARTIFACT_DIR}/metrics-config"
152+
mkdir -p "$metrics_config_dir"
153+
154+
collect_additional_metrics() {
155+
echo "$(date -u -Ins) Collecting metrics from $1"
156+
status_data.py \
157+
--status-data-file "$monitoring_collection_data" \
158+
--additional "$1" \
159+
--monitoring-start "$mstart" \
160+
--monitoring-end "$mend" \
161+
--monitoring-raw-data-dir "$monitoring_collection_dir" \
162+
--prometheus-host "https://$mhost" \
163+
--prometheus-port 443 \
164+
--prometheus-token "$($cli whoami -t)" \
165+
-d >>"$monitoring_collection_log" 2>&1
166+
}
167+
143168
# populate phase
144169
if [ "$PRE_LOAD_DB" == "true" ]; then
145170
start_ts="$(cat "${ARTIFACT_DIR}/populate-before")"
@@ -163,7 +188,7 @@ if [ "$PRE_LOAD_DB" == "true" ]; then
163188
populate_catalog_started=$(cat "${ARTIFACT_DIR}/populate-catalog-before")
164189
populate_catalog_ended=$(cat "${ARTIFACT_DIR}/populate-catalog-after")
165190
populate_catalog_duration="$(timestamp_diff "$populate_catalog_started" "$populate_catalog_ended")"
166-
191+
echo "$(date -u -Ins) Collecting Populate phase metrics"
167192
status_data.py \
168193
--status-data-file "$monitoring_collection_data" \
169194
--set \
@@ -180,16 +205,20 @@ if [ "$PRE_LOAD_DB" == "true" ]; then
180205
measurements.timings.populate_catalog.ended="$populate_catalog_ended" \
181206
measurements.timings.populate_catalog.duration="$populate_catalog_duration" \
182207
-d >"$monitoring_collection_log" 2>&1
183-
status_data.py \
184-
--status-data-file "$monitoring_collection_data" \
185-
--additional config/cluster_read_config.populate.yaml \
186-
--monitoring-start "$mstart" \
187-
--monitoring-end "$mend" \
188-
--monitoring-raw-data-dir "$monitoring_collection_dir" \
189-
--prometheus-host "https://$mhost" \
190-
--prometheus-port 443 \
191-
--prometheus-token "$($cli whoami -t)" \
192-
-d >>"$monitoring_collection_log" 2>&1
208+
envsubst <config/cluster_read_config.populate.yaml >"${metrics_config_dir}/cluster_read_config.populate.yaml"
209+
collect_additional_metrics "${metrics_config_dir}/cluster_read_config.populate.yaml"
210+
if [ "$PSQL_EXPORT" == "true" ]; then
211+
echo "$(date -u -Ins) Collecting Postgresql specific metrics (populate)"
212+
envsubst <config/cluster_read_config.populate.postgresql.yaml >"${metrics_config_dir}/cluster_read_config.populate.postgresql.yaml"
213+
collect_additional_metrics "${metrics_config_dir}/cluster_read_config.populate.postgresql.yaml" "$monitoring_collection_data"
214+
fi
215+
#NodeJS specific metrics
216+
if [ "$RHDH_METRIC" == "true" ]; then
217+
echo "$(date -u -Ins) Collecting NodeJS specific metrics (populate)"
218+
envsubst <config/cluster_read_config.populate.nodejs.yaml >"${metrics_config_dir}/cluster_read_config.populate.nodejs.yaml"
219+
collect_additional_metrics "${metrics_config_dir}/cluster_read_config.populate.nodejs.yaml"
220+
fi
221+
193222
fi
194223
# test phase
195224
start_ts="$(cat "${ARTIFACT_DIR}/benchmark-before")"
@@ -201,6 +230,7 @@ mhost=$(kubectl -n openshift-monitoring get route -l app.kubernetes.io/name=than
201230
mversion=$(sed -n 's/^__version__ = "\(.*\)"/\1/p' "scenarios/$(cat "${ARTIFACT_DIR}/benchmark-scenario").py")
202231
benchmark_started=$(cat "${ARTIFACT_DIR}/benchmark-before")
203232
benchmark_ended=$(cat "${ARTIFACT_DIR}/benchmark-after")
233+
echo "$(date -u -Ins) Collecting Test phase metrics"
204234
status_data.py \
205235
--status-data-file "$monitoring_collection_data" \
206236
--set \
@@ -211,28 +241,26 @@ status_data.py \
211241
metadata.scenario.name="$(cat "${ARTIFACT_DIR}/benchmark-scenario")" \
212242
metadata.scenario.version="$mversion" \
213243
-d >"$monitoring_collection_log" 2>&1
214-
status_data.py \
215-
--status-data-file "$monitoring_collection_data" \
216-
--additional config/cluster_read_config.test.yaml \
217-
--monitoring-start "$mstart" \
218-
--monitoring-end "$mend" \
219-
--monitoring-raw-data-dir "$monitoring_collection_dir" \
220-
--prometheus-host "https://$mhost" \
221-
--prometheus-port 443 \
222-
--prometheus-token "$($cli whoami -t)" \
223-
-d >>"$monitoring_collection_log" 2>&1
244+
envsubst <config/cluster_read_config.test.yaml >"${metrics_config_dir}/cluster_read_config.test.yaml"
245+
collect_additional_metrics "${metrics_config_dir}/cluster_read_config.test.yaml"
224246
#Scenario specific metrics
225-
if [ -f "scenarios/$(cat "${ARTIFACT_DIR}/benchmark-scenario").metrics.yaml" ]; then
226-
status_data.py \
227-
--status-data-file "$monitoring_collection_data" \
228-
--additional "scenarios/$(cat "${ARTIFACT_DIR}/benchmark-scenario").metrics.yaml" \
229-
--monitoring-start "$mstart" \
230-
--monitoring-end "$mend" \
231-
--monitoring-raw-data-dir "$monitoring_collection_dir" \
232-
--prometheus-host "https://$mhost" \
233-
--prometheus-port 443 \
234-
--prometheus-token "$($cli whoami -t)" \
235-
-d >>"$monitoring_collection_log" 2>&1
247+
echo "$(date -u -Ins) Collecting Scenario specific metrics"
248+
benchmark_scenario=$(cat "${ARTIFACT_DIR}/benchmark-scenario")
249+
if [ -f "scenarios/$benchmark_scenario.metrics.yaml" ]; then
250+
envsubst <"scenarios/$benchmark_scenario.metrics.yaml" >"${metrics_config_dir}/$benchmark_scenario.metrics.yaml"
251+
collect_additional_metrics "${metrics_config_dir}/$benchmark_scenario.metrics.yaml"
252+
fi
253+
#Postgresql specific metrics
254+
if [ "$PSQL_EXPORT" == "true" ]; then
255+
echo "$(date -u -Ins) Collecting Postgresql specific metrics (test)"
256+
envsubst <config/cluster_read_config.test.postgresql.yaml >"${metrics_config_dir}/cluster_read_config.test.postgresql.yaml"
257+
collect_additional_metrics "${metrics_config_dir}/cluster_read_config.test.postgresql.yaml"
258+
fi
259+
#NodeJS specific metrics
260+
if [ "$RHDH_METRIC" == "true" ]; then
261+
echo "$(date -u -Ins) Collecting NodeJS specific metrics (test)"
262+
envsubst <config/cluster_read_config.test.nodejs.yaml >"${metrics_config_dir}/cluster_read_config.test.nodejs.yaml"
263+
collect_additional_metrics "${metrics_config_dir}/cluster_read_config.test.nodejs.yaml"
236264
fi
237265
set +u
238266
deactivate
@@ -254,8 +282,10 @@ if [ "$RHDH_INSTALL_METHOD" == "helm" ] && ${ENABLE_PROFILING}; then
254282
$clin exec "$pod" -c backstage-backend -- /bin/bash -c 'find /opt/app-root/src -name "*.heapsnapshot" -exec base64 -w0 {} \;' | base64 -d >"$memory_profile_file"
255283
fi
256284

285+
echo "$(date -u -Ins) Generating summary CSV"
257286
./ci-scripts/runs-to-csv.sh "$ARTIFACT_DIR" >"$ARTIFACT_DIR/summary.csv"
258287

288+
echo "$(date -u -Ins) Collecting error reports"
259289
# Error report
260290
find "$ARTIFACT_DIR" -name load-test.log -print0 | sort -V | while IFS= read -r file; do
261291
if grep "Error report" "$file" >/dev/null; then

ci-scripts/rhdh-setup/create_resource.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -395,12 +395,12 @@ create_groups() {
395395
if [[ "$RBAC_POLICY" == "$RBAC_POLICY_NESTED_GROUPS" ]]; then
396396
N="${RBAC_POLICY_SIZE:-$GROUP_COUNT}"
397397
[ "$N" -gt "$GROUP_COUNT" ] && N="$GROUP_COUNT"
398-
seq 1 "$N" | xargs -n1 -P1 -I{} bash -lc "create_group \"\$1\"" _ {}
398+
seq 1 "$N" | xargs -P1 -I{} bash -lc "create_group \"\$1\"" _ {}
399399
if [ "$GROUP_COUNT" -gt "$N" ]; then
400-
seq $((N+1)) "$GROUP_COUNT" | xargs -n1 -P"${POPULATION_CONCURRENCY}" -I{} bash -lc "create_group \"\$1\"" _ {}
400+
seq $((N+1)) "$GROUP_COUNT" | xargs -P"${POPULATION_CONCURRENCY}" -I{} bash -lc "create_group \"\$1\"" _ {}
401401
fi
402402
else
403-
seq 1 "$GROUP_COUNT" | xargs -n1 -P"${POPULATION_CONCURRENCY}" -I{} bash -lc "create_group \"\$1\"" _ {}
403+
seq 1 "$GROUP_COUNT" | xargs -P"${POPULATION_CONCURRENCY}" -I{} bash -lc "create_group \"\$1\"" _ {}
404404
fi
405405
}
406406

ci-scripts/rhdh-setup/deploy.sh

Lines changed: 20 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,10 @@ install() {
221221
log_error "Installation failed!!!"
222222
return "$exit_code"
223223
fi
224+
225+
if ${ENABLE_ORCHESTRATOR}; then
226+
install_workflows
227+
fi
224228
psql_debug
225229
}
226230

@@ -380,6 +384,16 @@ backstage_install() {
380384
done
381385
}
382386

387+
install_workflows() {
388+
log_info "Installing Orchestrator workflows"
389+
mkdir -p "$TMP_DIR/workflows"
390+
find template/workflows/basic -type f -print0 | while IFS= read -r -d '' i; do
391+
# shellcheck disable=SC2094
392+
envsubst <"$i" >"$TMP_DIR/workflows/$(basename "$i")"
393+
$clin apply -f "$TMP_DIR/workflows/$(basename "$i")"
394+
done
395+
}
396+
383397
# shellcheck disable=SC2016,SC1004
384398
install_rhdh_with_helm() {
385399
chart_values=template/backstage/helm/chart-values.yaml
@@ -617,28 +631,8 @@ setup_monitoring() {
617631
oc -n openshift-user-workload-monitoring rollout status statefulset/prometheus-user-workload -w
618632
fi
619633

620-
log_info "Setup monitoring"
621-
cat <<EOF | kubectl -n locust-operator apply -f -
622-
apiVersion: monitoring.coreos.com/v1
623-
kind: ServiceMonitor
624-
metadata:
625-
labels:
626-
app: locust-operator
627-
annotations:
628-
networkoperator.openshift.io/ignore-errors: ""
629-
name: locust-operator-monitor
630-
namespace: locust-operator
631-
spec:
632-
endpoints:
633-
- interval: 10s
634-
port: prometheus-metrics
635-
honorLabels: true
636-
jobLabel: app
637-
namespaceSelector:
638-
matchNames:
639-
- locust-operator
640-
selector: {}
641-
EOF
634+
log_info "Setting up Locust monitoring"
635+
envsubst <template/locust-metrics/locust-service-monitor.yaml | kubectl -n "${LOCUST_NAMESPACE}" apply -f -
642636
}
643637

644638
delete() {
@@ -748,7 +742,7 @@ delete_orchestrator_infra() {
748742
$cli delete ns knative-serving-ingress --ignore-not-found=true --wait
749743
}
750744

751-
while getopts "oi:mrd" flag; do
745+
while getopts "oi:mrdw" flag; do
752746
case "${flag}" in
753747
o)
754748
export INSTALL_METHOD=olm
@@ -764,6 +758,9 @@ while getopts "oi:mrd" flag; do
764758
AUTH_PROVIDER="$OPTARG"
765759
install
766760
;;
761+
w)
762+
install_workflows
763+
;;
767764
m)
768765
setup_monitoring
769766
;;

ci-scripts/rhdh-setup/template/backstage/app-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ catalog:
2323
groupQuerySize: 1000
2424
schedule:
2525
frequency:
26-
minutes: 30
26+
hours: 2
2727
timeout:
2828
minutes: 1
2929
initialDelay:

ci-scripts/rhdh-setup/template/backstage/helm/chart-values.yaml

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -25,16 +25,16 @@ orchestrator:
2525
upstream:
2626
backstage:
2727
appConfig:
28-
database:
29-
connection:
30-
password: "${POSTGRESQL_ADMIN_PASSWORD}"
31-
user: postgres
32-
auth:
33-
externalAccess:
34-
- type: legacy
35-
options:
36-
secret: ${BACKEND_SECRET}
37-
subject: legacy-catalog
28+
database:
29+
connection:
30+
password: "${POSTGRESQL_ADMIN_PASSWORD}"
31+
user: postgres
32+
auth:
33+
externalAccess:
34+
- type: legacy
35+
options:
36+
secret: ${BACKEND_SECRET}
37+
subject: legacy-catalog
3838
containerPorts:
3939
backend: 7007
4040
extraAppConfig:
Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,4 @@
1-
p, role:default/a, orchestrator.workflow.read, read, allow
2-
p, role:default/a, orchestrator.workflow.execute, create, allow
31
p, role:default/a, orchestrator.workflow, read, allow
4-
p, role:default/a, orchestrator.workflow, use, allow
5-
p, role:default/a, orchestrator.workflow.instance.read, read, allow
6-
p, role:default/a, orchestrator.workflow.instance.abort, delete, allow
7-
p, role:default/a, orchestrator.workflow.instances.read, read, allow
8-
p, role:default/a, scaffolder.action.execute, use, allow
9-
p, role:default/a, scaffolder.task.create, create, allow
10-
p, role:default/a, scaffolder.task.read, read, allow
11-
p, role:default/a, scaffolder.task.cancel, delete, allow
2+
p, role:default/a, orchestrator.workflow.use, update, allow
3+
p, role:default/a, orchestrator.workflowAdminView, read, allow
4+
p, role:default/a, orchestrator.instanceAdminView, read, allow
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
apiVersion: monitoring.coreos.com/v1
2+
kind: ServiceMonitor
3+
metadata:
4+
annotations:
5+
networkoperator.openshift.io/ignore-errors: ''
6+
labels:
7+
app: locust-operator
8+
name: ${LOCUST_NAMESPACE}-monitor
9+
namespace: ${LOCUST_NAMESPACE}
10+
spec:
11+
endpoints:
12+
- honorLabels: true
13+
interval: 10s
14+
port: prometheus-metrics
15+
jobLabel: app
16+
namespaceSelector:
17+
matchNames:
18+
- ${LOCUST_NAMESPACE}
19+
selector: {}
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
apiVersion: v1
2+
data:
3+
application.properties: |2
4+
5+
# This property is used to select the log level, which controls the amount
6+
# of information logged on HTTP requests based on the severity of the events.
7+
# Possible values: OFF, FATAL, ERROR, WARN, INFO, DEBUG, ALL.
8+
# and see https://quarkus.io/guides/logging for documentation
9+
quarkus.log.category."org.apache.http".level=INFO
10+
quarkus.log.level=INFO
11+
12+
# enable persistence
13+
kie.flyway.enabled = true
14+
kind: ConfigMap
15+
metadata:
16+
creationTimestamp: null
17+
labels:
18+
app: basic
19+
app.kubernetes.io/component: serverless-workflow
20+
app.kubernetes.io/managed-by: sonataflow-operator
21+
app.kubernetes.io/name: basic
22+
sonataflow.org/workflow-app: basic
23+
sonataflow.org/workflow-namespace: ""
24+
name: basic-props

0 commit comments

Comments
 (0)