Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Automatically start e2e testing #105

Merged
merged 2 commits into from
Jul 26, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions hack/lib/util.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ function ssh-config {
--ssh-flag="-o LogLevel=quiet" \
--ssh-flag="-o ConnectTimeout=30" \
--project "${PROJECT}" \
--zone="${zone}" \
--zone "${zone}" \
--command "${cmd}" \
--quiet
--quiet &
}
28 changes: 26 additions & 2 deletions hack/test-config.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
GRS_ROOT=$(dirname "${BASH_SOURCE[0]}")/..

SIM_NUM=${SIM_NUM:-2}
CLIENT_NUM=${CLIENT_NUM:-4}
CLIENT_NUM=${CLIENT_NUM:-2}
SERVER_NUM=${SERVER_NUM:-1}
ZONE=${GRS_GCE_ZONE:-"us-central1-a"}
REGION=${ZONE%-*}
Expand Down Expand Up @@ -56,4 +56,28 @@ SERVER_LOG_DIR=${SERVER_LOG_DIR:-"${DIR_ROOT}/logs"}
CLIENT_LOG_DIR=${CLIENT_LOG_DIR:-"${DIR_ROOT}/logs"}
DES_LOG_DIR=${DES_LOG_DIR:-"${DIR_ROOT}/grs/logs/${SERVER_NUM}se${SIM_NUM}si${CLIENT_NUM}cl"}
DES_LOG_INSTANCE=${DES_LOG_INSTANCE:-"sonyadev4"}
DES_LOG_INSTANCE_ZONE=${DES_LOG_INSTANCE_ZONE:-"us-central1-a"}
DES_LOG_INSTANCE_ZONE=${DES_LOG_INSTANCE_ZONE:-"us-central1-a"}


#rune2e parameter
SIM_LOG_LEVEL=${SIM_LOG_LEVEL:-3}
SERVER_LOG_LEVEL=${SERVER_LOG_LEVEL:-3}
CLIENT_LOG_LEVEL=${CLIENT_LOG_LEVEL:-3}
SIM_CODE_ROOT=${SIM_CODE_ROOT:-"/home/sonyali/go/src/global-resource-service"}
SERVER_CODE_ROOT=${SERVER_CODE_ROOT:-"/home/sonyali/go/src/global-resource-service"}
CLIENT_CODE_ROOT=${CLIENT_CODE_ROOT:-"/home/sonyali/go/src/global-resource-service"}
SERVICE_EXTRA_ARGS=${SERVICE_EXTRA_ARGS:-}
SIM_EXTRA_ARGS=${SIM_EXTRA_ARGS:-}
CLIENT_EXTRA_ARGS=${CLIENT_EXTRA_ARGS:-}
SIM_PORT=${SIM_PORT:-"9119"}

SIM_REGIONS=${SIM_REGIONS:-"Beijing,Shanghai"}
SIM_RP_NUM=${SIM_RP_NUM:-"10"}
NODES_PER_RP=${NODES_PER_RP:-"20000"}

SCHEDULER_REQUEST_MACHINE=${SCHEDULER_REQUEST_MACHINE:-"25000"}
SCHEDULER_REQUEST_LIMIT=${SCHEDULER_REQUEST_LIMIT:-"26000"}
SCHEDULER_NUM=${SCHEDULER_NUM:-"16"}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

any reason default to 16? and also, is 14 scheduler instances per machine too many ?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we have default 2 sim and total 400k nodes, CHEDULER_REQUEST_MACHINE is 25000, so set to 16 scheduler to meet with total nodes.
for this setup, scheduler number per machine are dynamic assigned based on total scheduler_num and client machine numbers.
so far, out testing using 3 client machines and 40 schedulers are working well.


####if true, all service will start automaticly including resource management service, simulator, scheduler
AUTORUN_E2E=${AUTORUN_E2E:-true}
2 changes: 2 additions & 0 deletions hack/test-logcollect.sh
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,8 @@ fi

echo "Copying logs to destination instance."
copy-logs "${DES_LOG_INSTANCE}" "${DES_LOG_INSTANCE_ZONE}" "${DESTINATION}" "${DES_LOG_DIR}"
echo "Removing local copy."
sudo rm -r "${DESTINATION}"



175 changes: 175 additions & 0 deletions hack/test-rune2e.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
#!/usr/bin/env bash

### Only support gcloud
### Please ensure gcloud is installed before run this script
GRS_ROOT=$(dirname "${BASH_SOURCE[0]}")/..

source "${GRS_ROOT}/hack/test-config.sh"
source "${GRS_ROOT}/hack/lib/util.sh"

function start-service {
local name="$1"
local urls="$2"
local zone="$3"
local log_level="${4:-}"
local extra_args="${5:-}"
echo "Starting resource management service on ${name}"
cmd="cd ${SERVER_CODE_ROOT}"
cmd+=" && mkdir -p ${SERVER_LOG_DIR}"
args=" --master_ip=${name}"
args+=" --resource_urls=${urls}"
if [ ${log_level} != "" ]; then
args+=" -v=${log_level}"
fi
args+=" ${extra_args}"
log_file="${name}.log"
cmd+=" && /usr/local/go/bin/go run resource-management/cmds/service-api/service-api.go ${args} > ${SERVER_LOG_DIR}/${log_file} 2>&1 "
ssh-config "${cmd}" "${name}" "${SERVICE_ZONE}"
}


function start-simulator {
local name="$1"
local region_name="$2"
local rp_num="$3"
local nodes_per_rp="$4"
local sim_port="$5"
local zone="$6"
local log_level="${7:-}"
local extra_args="${8:-}"
echo "Starting simulator on ${name}"
cmd="cd ${SIM_CODE_ROOT}"
cmd+=" && mkdir -p ${SIM_LOG_DIR}"
args=" --region_name=${region_name}"
args+=" --rp_num=${rp_num}"
args+=" --nodes_per_rp=${nodes_per_rp}"
args+=" --master_port=${sim_port}"
if [ ${log_level} != "" ]; then
args+=" -v=${log_level}"
fi
args+=" ${extra_args}"
log_file="${name}.log"
cmd+=" && /usr/local/go/bin/go run resource-management/test/resourceRegionMgrSimulator/main.go ${args} > ${SIM_LOG_DIR}/${log_file} 2>&1 "
ssh-config "${cmd}" "${name}" "${zone}"

}

function start-scheduler {
local name="$1"
local zone="$2"
local service_num="$3"
local service_url="$4"
local request_machines="$5"
local limit="$6"
local log_level="${7:-}"
local extra_args="${8:-}"
echo "Starting ${service_num} schedluer service on ${name}"
cmd="cd ${CLIENT_CODE_ROOT}"
cmd+=" && mkdir -p ${CLIENT_LOG_DIR}"
args=" --service_url=${service_url}"
args+=" --request_machines=${request_machines}"
args+=" --limit=${limit}"
args+=" --action=watch"
args+=" --repeats=1"
if [ ${log_level} != "" ]; then
args+=" -v=${log_level}"
fi
args+=" ${extra_args}"
log_file="${name}.log"
for (( i=0; i<${service_num}; i++ )); do
sleep 1
gocmd=" && /usr/local/go/bin/go run resource-management/test/e2e/singleClientTest.go ${args} > ${SIM_LOG_DIR}/${log_file}.$i 2>&1 &"
sshcmd="${cmd}${gocmd}"
ssh-config "${sshcmd}" "${name}" "${zone}"
done
}

###############
# main function
###############

#IFS=','; INSTANCE_SERVER_ZONE=($SERVER_ZONE); unset IFS;
IFS=','; INSTANCE_SIM_ZONE=($SIM_ZONE); unset IFS;
IFS=','; INSTANCE_CLIENT_ZONE=($CLIENT_ZONE); unset IFS;
IFS=','; SIM_REGION_LIST=($SIM_REGIONS); unset IFS;

###TODO
###using go run to start all component for now
###will add build and start from bin

##Only support to start service on one resource management server
if [ ${SERVER_NUM} -gt 0 ]; then
if [[ "${MASTER_IP}" != "" && "${RESOURCE_URLS}" != "" ]]; then
start-service "${MASTER_IP}" "${RESOURCE_URLS}" "${SERVICE_ZONE}" "${SERVER_LOG_LEVEL}" "${SERVICE_EXTRA_ARGS}"
else
echo "Failed to start service, Please ensure MASTER_IP: ${MASTER_IP} and RESOURCE_URLS: ${RESOURCE_URLS} set correctly"
fi
fi

echo "Waiting 10 seconds to get resource management service running"
sleep 10

if [ ${SIM_NUM} -gt 0 ]; then
if [[ "${#SIM_REGION_LIST[@]}" == "${SIM_NUM}" ]]; then
if [ ${#INSTANCE_SIM_ZONE[@]} == 1 ]; then
instance_names=()
instance_names=($(gcloud compute instance-groups managed list-instances \
"${SIM_INSTANCE_PREFIX}-${INSTANCE_SIM_ZONE[0]}-mig" --zone "${INSTANCE_SIM_ZONE[0]}" --project "${PROJECT}" \
--format='value(instance)'))

index=0
for name in "${instance_names[@]}"; do
start-simulator "${name}" "${SIM_REGION_LIST[$index]}" "${SIM_RP_NUM}" "${NODES_PER_RP}" "${SIM_PORT}" "${INSTANCE_SIM_ZONE[0]}" "${SIM_LOG_LEVEL}" "${SIM_EXTRA_ARGS}"
index=$(($index + 1))
done
else
index=0
for zone in "${INSTANCE_SIM_ZONE[@]}"; do
start-simulator "${SIM_INSTANCE_PREFIX}-${zone}-${index}" "${SIM_REGION_LIST[$index]}" "${SIM_RP_NUM}" "${NODES_PER_RP}" "${SIM_PORT}" "${zone}" "${SIM_LOG_LEVEL}" "${SIM_EXTRA_ARGS}"
index=$(($index + 1))
done

fi
else
echo "Failed to start simulator service, Please ensure SIM_REGIONS: ${SIM_REGIONS} has same number with SIM_NUM: ${SIM_NUM}"
fi
fi

echo "Waiting 60 seconds to get simulator running"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that seems too long for the wait.... on my local dev, a few seconds is more than enough to have simulator up to go.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

changed to 10 seconds

sleep 60

if [ ${CLIENT_NUM} -gt 0 ]; then
if [[ "${SERVICE_URL}" != "" ]]; then
if [ ${#INSTANCE_CLIENT_ZONE[@]} == 1 ]; then
instance_names=()
instance_names=($(gcloud compute instance-groups managed list-instances \
"${CLIENT_INSTANCE_PREFIX}-${INSTANCE_CLIENT_ZONE[0]}-mig" --zone "${INSTANCE_CLIENT_ZONE[0]}" --project "${PROJECT}" \
--format='value(instance)'))

index=0
service_num=$((${SCHEDULER_NUM} / ${CLIENT_NUM} + 1))
for name in "${instance_names[@]}"; do
if [ $index == $((${CLIENT_NUM} - 1)) ]; then
done_num=$((${service_num} * ${index} ))
service_num=$((${SCHEDULER_NUM} - ${done_num}))
fi
start-scheduler "${name}" "${INSTANCE_CLIENT_ZONE[0]}" "${service_num}" "${SERVICE_URL}" "${SCHEDULER_REQUEST_MACHINE}" "${SCHEDULER_REQUEST_LIMIT}" "${CLIENT_LOG_LEVEL}" "${CLIENT_EXTRA_ARGS}"
index=$(($index + 1))
done
else
index=0
service_num=$((${SCHEDULER_NUM} / ${CLIENT_NUM} + 1))
for zone in "${INSTANCE_CLIENT_ZONE[@]}"; do
if [ $index == $((${CLIENT_NUM} - 1)) ]; then
done_num=$((${service_num} * ${index} ))
service_num=$((${SCHEDULER_NUM} - ${done_num}))
fi
start-scheduler "${CLIENT_INSTANCE_PREFIX}-${zone}-${index}" "${zone}" "${service_num}" "${SERVICE_URL}" "${SCHEDULER_REQUEST_MACHINE}" "${SCHEDULER_REQUEST_LIMIT}" "${CLIENT_LOG_LEVEL}" "${CLIENT_EXTRA_ARGS}"
index=$(($index + 1))
done

fi
else
echo "Failed to start scheduler service, Please ensure SERVICE_URL: ${SERVICE_URL} is correct"
fi
fi
65 changes: 62 additions & 3 deletions hack/test-setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,38 @@ function get-mig-ips {
echo "${mig_ips}"
}

function get-mig-urls {
local group_name="$1"
local zone="$2"

instance_names=()
instance_names=($(gcloud compute instance-groups managed list-instances \
"${group_name}" --zone "${zone}" --project "${PROJECT}" \
--format='value(instance)'))

mig_urls=""
for name in "${instance_names[@]}"; do
mig_urls+="$(get-instance-ip ${name} ${zone}):${SIM_PORT},"
done
echo "${mig_urls}"
}

function get-mig-names {
local group_name="$1"
local zone="$2"

instance_names=()
instance_names=($(gcloud compute instance-groups managed list-instances \
"${group_name}" --zone "${zone}" --project "${PROJECT}" \
--format='value(instance)'))

mig_names=""
for name in "${instance_names[@]}"; do
mig_names+="${name},"
done
echo "${mig_names}"
}

###############
# main function
###############
Expand Down Expand Up @@ -274,19 +306,26 @@ fi
echo "Waiting 60 seconds to get all resource started"
sleep 60

RESOURCE_URLS=""
MASTER_IP=""
SERVICE_URL=""
SERVER_ZONE=""
if [ ${SIM_NUM} -gt 0 ]; then
SIM_IPS=""
RESOURCE_URLS=""
if [ ${#INSTANCE_SIM_ZONE[@]} == 1 ]; then
SIM_IPS="$(get-mig-ips ${SIM_INSTANCE_PREFIX}-${INSTANCE_SIM_ZONE[0]}-mig ${INSTANCE_SIM_ZONE[0]})"
RESOURCE_URLS="$(get-mig-urls ${SIM_INSTANCE_PREFIX}-${INSTANCE_SIM_ZONE[0]}-mig ${INSTANCE_SIM_ZONE[0]})"
else
index=0
for zone in "${INSTANCE_SIM_ZONE[@]}"; do
SIM_IPS+="$(get-instance-ip ${SIM_INSTANCE_PREFIX}-${zone}-${index} ${zone}),"
RESOURCE_URLS+="$(get-instance-ip ${SIM_INSTANCE_PREFIX}-${zone}-${index} ${zone}):${SIM_PORT},"
index=$((index + 1))
done

fi
echo "Simulators started at ip addresss: ${SIM_IPS}"
echo "Simulators started at ip addresss: ${SIM_IPS%,}"
fi

if [ ${CLIENT_NUM} -gt 0 ]; then
Expand All @@ -301,24 +340,44 @@ if [ ${CLIENT_NUM} -gt 0 ]; then
done

fi
echo "Client schedulers started at ip addresss: ${CLIENT_IPS}"
echo "Client schedulers started at ip addresss: ${CLIENT_IPS%,}"
fi

if [ ${SERVER_NUM} -gt 0 ]; then
SERVER_IPS=""
SERVER_NAMES=""
SERVICE_ZONE="${INSTANCE_SERVER_ZONE[0]}"
if [ ${#INSTANCE_SERVER_ZONE[@]} == 1 ]; then
start-mig-redis "${SERVER_INSTANCE_PREFIX}-${INSTANCE_SERVER_ZONE[0]}-mig" "${INSTANCE_SERVER_ZONE[0]}"
SERVER_IPS="$(get-mig-ips ${SERVER_INSTANCE_PREFIX}-${INSTANCE_SERVER_ZONE[0]}-mig ${INSTANCE_SERVER_ZONE[0]})"
SERVER_NAMES="$(get-mig-names ${SERVER_INSTANCE_PREFIX}-${INSTANCE_SERVER_ZONE[0]}-mig ${INSTANCE_SERVER_ZONE[0]})"
else
index=0
for zone in "${INSTANCE_SERVER_ZONE[@]}"; do
start-instance-redis "${SERVER_INSTANCE_PREFIX}-${zone}-${index}" "${zone}"
SERVER_IPS+="$(get-instance-ip ${SERVER_INSTANCE_PREFIX}-${zone}-${index} ${zone}),"
SERVER_NAMES+="${SERVER_INSTANCE_PREFIX}-${zone}-${index},"
index=$((index + 1))
done

fi
echo "Servers started at ip addresss: ${SERVER_IPS}"
echo "Servers started at ip addresss: ${SERVER_IPS%,}"
fi

####Most cloud doesn't support binding to public IP, so using machine name to listen and bind service for now
export MASTER_IP="${SERVER_NAMES%%,*}"
export SERVICE_URL="${SERVER_IPS%%,*}:8080"
export RESOURCE_URLS="${RESOURCE_URLS%,}"
export SERVICE_ZONE # the zone on resosurce management serevice instance

echo "Done to create and start all required resouce"

if [ "${AUTORUN_E2E}" == "true" ]; then
#Starting e2e testing
"${GRS_ROOT}/hack/test-rune2e.sh"
else
echo "You can start service using args: --master_ip=${MASTER_IP} --resource_urls=${RESOURCE_URLS}"
echo "You can start scheduler using args: --service_url=${SERVICE_URL}"
fi


9 changes: 6 additions & 3 deletions hack/test-teardown.sh
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,10 @@ if [[ "${SIM_AUTO_DELETE}" == "true" && ${SIM_NUM} -gt 0 ]]; then
delete-vm-instance "${SIM_INSTANCE_PREFIX}-${zone}-${index}" "${zone}" &
index=$(($index + 1))
done
sleep 60
fi
if [ "${SIMIMAGE_AUTO_DELETE}" == "true" ]; then
#waiting 60 seconds to get all instances deleted before delete images
sleep 60
delete-image "${SIM_IMAGE_NAME}"
delete-machine-image "${SIM_IMAGE_NAME}"
fi
Expand All @@ -102,9 +103,10 @@ if [[ "${CLIENT_AUTO_DELETE}" == "true" && ${CLIENT_NUM} -gt 0 ]]; then
delete-vm-instance "${CLIENT_INSTANCE_PREFIX}-${zone}-${index}" "${zone}" &
index=$(($index + 1))
done
sleep 60
fi
if [ "${CLIENTIMAGE_AUTO_DELETE}" == "true" ]; then
#waiting 60 seconds to get all instances deleted before delete images
sleep 60
delete-image "${CLIENT_IMAGE_NAME}"
delete-machine-image "${CLIENT_IMAGE_NAME}"
fi
Expand All @@ -122,9 +124,10 @@ if [[ "${SERVER_AUTO_DELETE}" == "true" && ${SERVER_NUM} -gt 0 ]]; then
delete-vm-instance "${SERVER_INSTANCE_PREFIX}-${zone}-${index}" "${zone}" &
index=$(($index + 1))
done
sleep 60
fi
if [ "${SERVERIMAGE_AUTO_DELETE}" == "true" ]; then
#waiting 60 seconds to get all instances deleted before delete images
sleep 60
delete-image "${SERVER_IMAGE_NAME}"
delete-machine-image "${SERVER_IMAGE_NAME}"
fi
Expand Down