Skip to content

Commit

Permalink
Migration job (kserve#1834)
Browse files Browse the repository at this point in the history
* Adding migration job for isvc migration from kfserving

* remove revision clean up step and ignore webapp from validation

* verify inferenceservice status before and after migration
  • Loading branch information
Suresh-Nakkeran authored Oct 6, 2021
1 parent 582c70d commit 57249a0
Show file tree
Hide file tree
Showing 3 changed files with 373 additions and 0 deletions.
14 changes: 14 additions & 0 deletions hack/kserve_migration/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
FROM alpine:latest

RUN apk add --no-cache wget bash curl jq

RUN curl -LO https://storage.googleapis.com/kubernetes-release/release/$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/linux/amd64/kubectl
RUN chmod +x ./kubectl
RUN mv ./kubectl /usr/local/bin

RUN wget https://github.com/mikefarah/yq/releases/download/3.4.1/yq_linux_amd64 -O /usr/bin/yq &&\
chmod +x /usr/bin/yq

COPY kserve_migration.sh /kserve_migration.sh

ENTRYPOINT ["./kserve_migration.sh"]
228 changes: 228 additions & 0 deletions hack/kserve_migration/kserve_migration.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,228 @@
#!/bin/bash
# Usage: kserve_migration.sh

set -o errexit
set -o errtrace

export KSERVE_VERSION=v0.7.0-rc0
export CONFIG_DIR="config"
export ISVC_CONFIG_DIR="${CONFIG_DIR}/isvc"
export KSVC_CONFIG_DIR="${CONFIG_DIR}/ksvc"

CLEAN_KFSERVING="true"

# custom logger
log() {
level=$1
msg=$2
ts=$(date -u +'%F %T')
echo "${ts} [${level}] $msg"
}

# Validates whether controller manager and models web app service running
# on this machine for the given namespace or not.
isControllerRunning() {
namespace=$1
prefix="kserve"
if [ "${namespace}" == "kfserving-system" ]; then
prefix="kfserving"
fi
svc_names=$(kubectl get svc -n $namespace -o jsonpath='{.items[*].metadata.name}')
for svc_name in "${prefix}-controller-manager-metrics-service" \
"${prefix}-controller-manager-service"; do
if [ ! -z "${svc_names##*$svc_name*}" ]; then
log ERROR "${prefix} controller services are not installed completely."
exit 1;
fi
done
po_names=$(kubectl get po -n $namespace -o jsonpath='{.items[*].metadata.name}')
for po_name in "${prefix}-controller-manager"; do
if [ ! -z "${po_names##*$po_name*}" ]; then
log ERROR "${prefix} controller services are not installed completely."
exit 1;
fi
done
}

# Checks user preference on cleaning kfserving controller
if [ "${REMOVE_KFSERVING}" == "false" ]; then
CLEAN_KFSERVING="false"
fi

# Checks whether the kfserving is running or not
log INFO "checking whether kfserving is running or not"
isControllerRunning kfserving-system

# Checks whether the kserve is running or not
log INFO "checking whether kserve is running or not"
isControllerRunning kserve

# # Deploy kserve
# log INFO "deploying kserve"
# cd ..
# KSERVE_CONFIG=kserve.yaml
# for i in 1 2 3 4 5 ; do kubectl apply -f install/${KSERVE_VERSION}/${KSERVE_CONFIG} && break || sleep 15; done
# kubectl wait --for=condition=ready --timeout=120s po --all -n kserve
# isControllerRunning kserve
# cd hack
# log INFO "kserve deployment completed"

# Get inference services config
log INFO "getting inference services config"
inference_services=$(kubectl get inferenceservice.serving.kubeflow.org -A -o jsonpath='{.items[*].metadata.namespace},{.items[*].metadata.name}')
declare -a isvc_names
declare -a isvc_ns
declare -A kfserving_isvc_status
if [ ! -z "$inference_services" ]; then
mkdir -p ${ISVC_CONFIG_DIR}
IFS=','; isvc_split=($inference_services); unset IFS;
isvc_ns=(${isvc_split[0]})
isvc_names=(${isvc_split[1]})
fi
isvc_count=${#isvc_names[@]}
for (( i=0; i<${isvc_count}; i++ ));
do
kubectl get inferenceservice.serving.kubeflow.org ${isvc_names[$i]} -n ${isvc_ns[$i]} -o yaml > "${ISVC_CONFIG_DIR}/${isvc_names[$i]}.yaml"
kfserving_isvc_status[${isvc_names[$i]}]=$(kubectl get inferenceservice.serving.kubeflow.org ${isvc_names[$i]} -n ${isvc_ns[$i]} -o json | jq --raw-output '.status.conditions | map(select(.type == "Ready"))[0].status')
done

# Get knative services names
log INFO "getting knative services"
knative_services=$(kubectl get ksvc -A -o jsonpath='{.items[*].metadata.namespace},{.items[*].metadata.name}')
declare -a ksvc_names;
declare -a ksvc_ns;
if [ ! -z "$knative_services" ]; then
mkdir -p ${KSVC_CONFIG_DIR}
IFS=','; ksvc_split=(${knative_services}); unset IFS;
ksvc_ns=(${ksvc_split[0]})
ksvc_names=(${ksvc_split[1]})
fi
ksvc_count=${#ksvc_names[@]}

(
# Stop kfserving controller
log INFO "stopping kfserving controller"
kubectl scale --replicas=0 statefulset.apps kfserving-controller-manager -n kfserving-system
sleep 30

trap 'kubectl scale --replicas=1 statefulset.apps kfserving-controller-manager -n kfserving-system' ERR

# Deploy inference services on kserve
log INFO "deploying inference services on kserve"
for (( i=0; i<${isvc_count}; i++ ));
do
yq d -i "${ISVC_CONFIG_DIR}/${isvc_names[$i]}.yaml" 'metadata.annotations[kubectl.kubernetes.io/last-applied-configuration]'
yq d -i "${ISVC_CONFIG_DIR}/${isvc_names[$i]}.yaml" 'metadata.creationTimestamp'
yq d -i "${ISVC_CONFIG_DIR}/${isvc_names[$i]}.yaml" 'metadata.finalizers'
yq d -i "${ISVC_CONFIG_DIR}/${isvc_names[$i]}.yaml" 'metadata.generation'
yq d -i "${ISVC_CONFIG_DIR}/${isvc_names[$i]}.yaml" 'metadata.resourceVersion'
yq d -i "${ISVC_CONFIG_DIR}/${isvc_names[$i]}.yaml" 'metadata.uid'
yq d -i "${ISVC_CONFIG_DIR}/${isvc_names[$i]}.yaml" 'metadata.managedFields'
yq d -i "${ISVC_CONFIG_DIR}/${isvc_names[$i]}.yaml" 'status'
sed -i -- 's/kubeflow.org/kserve.io/g' ${ISVC_CONFIG_DIR}/${isvc_names[$i]}.yaml
kubectl apply -f "${ISVC_CONFIG_DIR}/${isvc_names[$i]}.yaml"
done
)
sleep 300

# Remove owner references from knative services
log INFO "removing owner references from knative services"
declare -A ksvc_isvc_map
for (( i=0; i<${ksvc_count}; i++ ));
do
ksvc_api_version=$(kubectl get ksvc ${ksvc_names[$i]} -n ${ksvc_ns[$i]} -o json | jq --raw-output '.metadata.ownerReferences[0].apiVersion')
if [ "$ksvc_api_version" == "serving.kubeflow.org/v1beta1" ]; then
ksvc_isvc_map[${ksvc_names[$i]}]=$(kubectl get ksvc ${ksvc_names[$i]} -n ${ksvc_ns[$i]} -o json | jq --raw-output '.metadata.ownerReferences[0].name')
kubectl patch ksvc ${ksvc_names[$i]} -n ${ksvc_ns[$i]} --type json -p='[{"op": "remove", "path": "/metadata/ownerReferences"}]'
fi
done

# Remove owner references from virtual services
log INFO "removing owner references from virtual services"
for (( i=0; i<${isvc_count}; i++ ));
do
vsvc_api_version=$(kubectl get virtualservices ${isvc_names[$i]} -n ${isvc_ns[$i]} -o json | jq --raw-output '.metadata.ownerReferences[0].apiVersion')
if [ "$vsvc_api_version" == "serving.kubeflow.org/v1beta1" ]; then
kubectl patch virtualservices ${isvc_names[$i]} -n ${isvc_ns[$i]} --type json -p='[{"op": "remove", "path": "/metadata/ownerReferences"}]'
fi
done
sleep 5

# Extract inference service uids
log INFO "extracting inference service uids"
declare -A infr_uid_map
for (( i=0; i<${isvc_count}; i++ ));
do
infr_uid_map[${isvc_names[$i]}]=$(kubectl get inferenceservice.serving.kserve.io ${isvc_names[$i]} -n ${isvc_ns[$i]} -o json | jq --raw-output '.metadata.uid')
done

# Update knative services with new owner reference
log INFO "updating knative services with new owner reference"
for (( i=0; i<${ksvc_count}; i++ ));
do
owner_ref_count=$(kubectl get ksvc ${ksvc_names[$i]} -n ${ksvc_ns[$i]} -o json | jq --raw-output '.metadata.ownerReferences | length')
if [ $owner_ref_count -eq 0 ]; then
isvc_name=${ksvc_isvc_map[${ksvc_names[$i]}]}
isvc_uid=${infr_uid_map[${isvc_name}]}
kubectl patch ksvc ${ksvc_names[$i]} -n ${ksvc_ns[$i]} --type='json' -p='[{"op": "add", "path": "/metadata/ownerReferences", "value": [{"apiVersion": "serving.kserve.io/v1beta1","blockOwnerDeletion": true,"controller": true,"kind": "InferenceService","name": "'${isvc_name}'","uid": "'${isvc_uid}'"}] }]'
fi
done

# Update virtual services with new owner reference
log INFO "updating virtual services with new owner reference"
for (( i=0; i<${isvc_count}; i++ ));
do
owner_ref_count=$(kubectl get virtualservices ${isvc_names[$i]} -n ${isvc_ns[$i]} -o json | jq --raw-output '.metadata.ownerReferences | length')
if [ $owner_ref_count -eq 0 ]; then
isvc_uid=${infr_uid_map[${isvc_names[$i]}]}
kubectl patch virtualservices ${isvc_names[$i]} -n ${isvc_ns[$i]} --type='json' -p='[{"op": "add", "path": "/metadata/ownerReferences", "value": [{"apiVersion": "serving.kserve.io/v1beta1","blockOwnerDeletion": true,"controller": true,"kind": "InferenceService","name": "'${isvc_names[$i]}'","uid": "'${isvc_uid}'"}] }]'
fi
done
sleep 5

# Verify that all inference services are migrated and ready
log INFO "verifying inference services are migrated and ready"
for (( i=0; i<${isvc_count}; i++ ));
do
if [ "${kfserving_isvc_status[${isvc_names[$i]}]}" == "True" ]; then
(
trap 'log ERROR "inference service ${isvc_names[$i]} did not migrate properly. migration job exits with code 1."' ERR
kubectl wait --for=condition=ready --timeout=10s inferenceservice.serving.kserve.io/${isvc_names[$i]} -n ${isvc_ns[$i]}
)
fi
done

# Start kfserving controller for clean up
log INFO "starting kfserving controller for clean up"
kubectl scale --replicas=1 statefulset.apps kfserving-controller-manager -n kfserving-system

# Delete inference services running on kfserving
log INFO "deleting inference services on kfserving"
for (( i=0; i<${isvc_count}; i++ ));
do
kubectl delete inferenceservice.serving.kubeflow.org ${isvc_names[$i]} -n ${isvc_ns[$i]}
done

# Clean up kfserving
if [ "${CLEAN_KFSERVING}" == "true" ]; then
log INFO "deleting kfserving namespace"
kubectl delete ns kfserving-system

log INFO "deleting kfserving cluster role and cluster role binding"
kubectl delete ClusterRoleBinding kfserving-manager-rolebinding
kubectl delete ClusterRoleBinding kfserving-models-web-app-binding
kubectl delete ClusterRoleBinding kfserving-proxy-rolebinding

log INFO "deleting kfserving webhook configuration and crd"
kubectl delete CustomResourceDefinition inferenceservices.serving.kubeflow.org
kubectl delete CustomResourceDefinition trainedmodels.serving.kubeflow.org

kubectl delete MutatingWebhookConfiguration inferenceservice.serving.kubeflow.org
kubectl delete ValidatingWebhookConfiguration inferenceservice.serving.kubeflow.org
kubectl delete ValidatingWebhookConfiguration trainedmodel.serving.kubeflow.org
fi

rm -rf ${CONFIG_DIR}

log INFO "kserve migration completed successfully"
exit 0;
131 changes: 131 additions & 0 deletions hack/kserve_migration/kserve_migration_job.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: cluster-migration-svcaccount
namespace: kserve
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: cluster-migration-role
rules:
- apiGroups: [""]
resources: ["*"]
verbs: ["*"]
- apiGroups:
- apps
resources:
- statefulsets
- deployments
verbs:
- get
- list
- apiGroups:
- apps
resources:
- statefulsets/scale
verbs:
- patch
- apiGroups:
- networking.istio.io
resources:
- virtualservices
verbs:
- get
- list
- patch
- update
- watch
- apiGroups:
- serving.knative.dev
resources:
- services
verbs:
- get
- list
- patch
- update
- watch
- apiGroups:
- serving.knative.dev
resources:
- revisions
verbs:
- delete
- apiGroups:
- serving.kubeflow.org
resources:
- inferenceservices
- inferenceservices/finalizers
verbs:
- delete
- get
- list
- patch
- update
- watch
- apiGroups:
- serving.kserve.io
resources:
- inferenceservices
- inferenceservices/finalizers
verbs:
- create
- get
- list
- patch
- update
- watch
- apiGroups:
- rbac.authorization.k8s.io
resources:
- clusterrolebindings
verbs:
- delete
- apiGroups:
- apiextensions.k8s.io
resources:
- customresourcedefinitions
verbs:
- delete
- apiGroups:
- admissionregistration.k8s.io
resources:
- mutatingwebhookconfigurations
- validatingwebhookconfigurations
verbs:
- delete
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: cluster-migration-rolebinding
subjects:
- kind: ServiceAccount
name: cluster-migration-svcaccount
apiGroup: ""
namespace: kserve
roleRef:
kind: ClusterRole
name: cluster-migration-role
apiGroup: ""
---
apiVersion: batch/v1
kind: Job
metadata:
name: kserve-migration
namespace: kserve
spec:
backoffLimit: 0
template:
metadata:
name: kserve-migration
spec:
serviceAccountName: cluster-migration-svcaccount
containers:
- name: kserve-migration
image: kserve/kserve-migration:latest
env:
- name: REMOVE_KFSERVING
value: "true"
restartPolicy: Never

0 comments on commit 57249a0

Please sign in to comment.