-
Notifications
You must be signed in to change notification settings - Fork 28
feat: Add kubectl plugin support and enhance EKS aperf script #323
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -12,11 +12,9 @@ NAMESPACE="default" | |
| APERF_OPTIONS="" | ||
| NODE_NAME="" | ||
| APERF_IMAGE="" | ||
| REPORT_NAME="aperf_record" | ||
| OPEN_BROWSER=true | ||
| SHOW_HELP=false | ||
| CPU_REQUEST="1.0" | ||
| MEMORY_REQUEST="1Gi" | ||
| CPU_LIMIT="4.0" | ||
| MEMORY_LIMIT="4Gi" | ||
|
|
||
| # Define color and formatting codes | ||
| BOLD="\033[1m" | ||
|
|
@@ -34,10 +32,8 @@ while [ $# -gt 0 ]; do | |
| --namespace) dest="NAMESPACE";; | ||
| --aperf_options) dest="APERF_OPTIONS";; | ||
| --aperf_image) dest="APERF_IMAGE";; | ||
| --cpu-request) dest="CPU_REQUEST";; | ||
| --memory-request) dest="MEMORY_REQUEST";; | ||
| --cpu-limit) dest="CPU_LIMIT";; | ||
| --memory-limit) dest="MEMORY_LIMIT";; | ||
| --report-name) dest="REPORT_NAME";; | ||
| --open-browser) dest="OPEN_BROWSER";; | ||
| --help) | ||
| SHOW_HELP=true | ||
| shift | ||
|
|
@@ -68,10 +64,8 @@ if [ "$SHOW_HELP" = true ]; then | |
| echo " --node Required. The name of the Kubernetes node to run aperf on" | ||
| echo " --namespace Optional. The Kubernetes namespace (default: '${NAMESPACE}')" | ||
| echo " --aperf_options Optional. Options to pass to aperf (default: '${APERF_OPTIONS}')" | ||
| echo " --cpu-request Optional. CPU request (default: '${CPU_REQUEST}')" | ||
| echo " --memory-request Optional. Memory request (default: '${MEMORY_REQUEST}')" | ||
| echo " --cpu-limit Optional. CPU limit (default: '${CPU_LIMIT}')" | ||
| echo " --memory-limit Optional. Memory limit (default: '${MEMORY_LIMIT}')" | ||
| echo " --report-name Optional. Name for aperf record/report (default: '${REPORT_NAME}')" | ||
| echo " --open-browser Optional. Open report in browser (default: ${OPEN_BROWSER})" | ||
| echo " --help Show this help message" | ||
| exit 0 | ||
| fi | ||
|
|
@@ -93,6 +87,43 @@ fi | |
|
|
||
| POD_NAME="aperf-pod-${NODE_NAME//[.]/-}" | ||
|
|
||
| # Get node taints and generate tolerations | ||
| echo -e "${BOLD}Checking node taints...${NC}" | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This goes to a new line, can we have the result on a single line? |
||
| TAINTS=$(kubectl get node ${NODE_NAME} -o jsonpath='{.spec.taints[*]}' 2>/dev/null) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we probably need to add a check if the NODE exists before check the taint, otherwise if user enter a wrong node, it fails without a clear reason. What do you think? |
||
|
|
||
| TOLERATIONS="" | ||
| if [ -n "$TAINTS" ]; then | ||
| echo -e " ${YELLOW}Node has taints, adding tolerations to pod spec${NC}" | ||
|
|
||
| # Parse taints and create tolerations YAML | ||
| TOLERATIONS=" tolerations:" | ||
|
|
||
| # Get taints as JSON array and process each one | ||
| TAINT_COUNT=$(kubectl get node ${NODE_NAME} -o json | jq -r '.spec.taints | length' 2>/dev/null || echo "0") | ||
|
|
||
| for ((i=0; i<$TAINT_COUNT; i++)); do | ||
| KEY=$(kubectl get node ${NODE_NAME} -o json | jq -r ".spec.taints[$i].key" 2>/dev/null) | ||
| VALUE=$(kubectl get node ${NODE_NAME} -o json | jq -r ".spec.taints[$i].value" 2>/dev/null) | ||
| EFFECT=$(kubectl get node ${NODE_NAME} -o json | jq -r ".spec.taints[$i].effect" 2>/dev/null) | ||
|
|
||
| echo -e " Taint: ${KEY}=${VALUE}:${EFFECT}" | ||
|
|
||
| TOLERATIONS="${TOLERATIONS} | ||
| - key: \"${KEY}\"" | ||
|
|
||
| if [ "$VALUE" != "null" ] && [ -n "$VALUE" ]; then | ||
| TOLERATIONS="${TOLERATIONS} | ||
| value: \"${VALUE}\"" | ||
| fi | ||
|
|
||
| TOLERATIONS="${TOLERATIONS} | ||
| effect: \"${EFFECT}\" | ||
| operator: \"Equal\"" | ||
| done | ||
| else | ||
| echo -e " ${GREEN}No taints found on node${NC}" | ||
| fi | ||
|
|
||
| # Create pod YAML as a variable | ||
| POD_YAML=$(cat << EOF | ||
| apiVersion: v1 | ||
|
|
@@ -104,6 +135,7 @@ metadata: | |
| spec: | ||
| nodeSelector: | ||
| kubernetes.io/hostname: "${NODE_NAME}" | ||
| ${TOLERATIONS} | ||
| containers: | ||
| - name: aperf-runner | ||
| image: ${APERF_IMAGE} | ||
|
|
@@ -115,25 +147,17 @@ spec: | |
| set -e | ||
|
|
||
| echo -e "\nStarting Aperf recording execution..." | ||
| echo "Run: /usr/bin/aperf record -r aperf_record ${APERF_OPTIONS}" | ||
| sudo /usr/bin/aperf record -r aperf_record ${APERF_OPTIONS} | ||
| echo "Run: /usr/bin/aperf record -r ${REPORT_NAME} ${APERF_OPTIONS}" | ||
| sudo /usr/bin/aperf record -r ${REPORT_NAME} ${APERF_OPTIONS} | ||
| echo "APerf record completed" | ||
|
|
||
| echo -e "\nStarting Aperf report generation..." | ||
| echo "Run: /usr/bin/aperf report -r aperf_record -n aperf_report" | ||
| sudo /usr/bin/aperf report -r aperf_record -n aperf_report | ||
| echo "Run: /usr/bin/aperf report -r ${REPORT_NAME} -n ${REPORT_NAME}_report" | ||
| sudo /usr/bin/aperf report -r ${REPORT_NAME} -n ${REPORT_NAME}_report | ||
| echo "APerf report generation completed" | ||
|
|
||
| echo -e "\nWaiting for files to be copied..." | ||
| sleep 7200 | ||
|
|
||
| resources: | ||
| requests: | ||
| memory: "${MEMORY_REQUEST}" | ||
| cpu: "${CPU_REQUEST}" | ||
| limits: | ||
| memory: "${MEMORY_LIMIT}" | ||
| cpu: "${CPU_LIMIT}" | ||
| volumeMounts: | ||
| - mountPath: /boot | ||
| name: boot-volume | ||
|
|
@@ -174,10 +198,13 @@ fi | |
|
|
||
| # Show resource usage for pods on this node | ||
| echo -e "${BOLD}Resource usage for pods on ${NODE_NAME}:${NC}" | ||
| rm /tmp/allpods.out 2> /dev/null; \ | ||
| kubectl top pods --all-namespaces > /tmp/allpods.out && \ | ||
| head -n 1 /tmp/allpods.out && \ | ||
| grep "$(kubectl get pods --all-namespaces --field-selector spec.nodeName=${NODE_NAME} -o jsonpath='{range .items[*]}{.metadata.name}{" "}{end}' | sed 's/[[:space:]]*$//' | sed 's/[[:space:]]/\\|/g')" /tmp/allpods.out --color=never | ||
| if kubectl top pods --all-namespaces > /tmp/allpods.out 2>/dev/null; then | ||
| head -n 1 /tmp/allpods.out | ||
| grep "$(kubectl get pods --all-namespaces --field-selector spec.nodeName=${NODE_NAME} -o jsonpath='{range .items[*]}{.metadata.name}{" "}{end}' | sed 's/[[:space:]]*$//' | sed 's/[[:space:]]/\\|/g')" /tmp/allpods.out --color=never || echo " No pods found on this node" | ||
| rm /tmp/allpods.out 2>/dev/null || true | ||
| else | ||
| echo " ${YELLOW}Note: kubectl top not available (metrics-server may not be installed)${NC}" | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Formatting is wrong here and can we have it on a single line? |
||
| fi | ||
|
|
||
| # Create APerf pod | ||
| echo -e "\n${BOLD}Created pod configuration for node:${NC} ${NODE_NAME}${NC}" | ||
|
|
@@ -215,13 +242,51 @@ done | |
| kill $LOGS_PID 2>/dev/null || true | ||
|
|
||
| # Copy files from pod to local directory | ||
| LOCAL_FILE="aperf_report_${POD_STARTTIME}.tar.gz" | ||
| LOCAL_FILE="${REPORT_NAME}_${POD_STARTTIME}.tar.gz" | ||
| EXTRACT_DIR="${REPORT_NAME}_${POD_STARTTIME}" | ||
| echo -e "${NC}${BOLD}Aperf completed. Copying files from pod ${POD_NAME}...${NC}" | ||
| kubectl cp ${NAMESPACE}/${POD_NAME}:aperf_report.tar.gz ${LOCAL_FILE} | ||
| kubectl cp ${NAMESPACE}/${POD_NAME}:${REPORT_NAME}_report.tar.gz ${LOCAL_FILE} | ||
|
|
||
| # Delete the pod after copying files | ||
| echo -ne "${BOLD}Deleting pod to clean up resources...${NC} " | ||
| kubectl delete pod ${POD_NAME} -n ${NAMESPACE} | ||
|
|
||
| echo -e "${BOLD}${GREEN}Files copied to${NC} ${BLUE}${LOCAL_FILE}${NC}" | ||
|
|
||
| # Extract the tar.gz file | ||
| echo -e "${BOLD}Extracting report files...${NC}" | ||
| mkdir -p "${EXTRACT_DIR}" | ||
| tar -xzf "${LOCAL_FILE}" -C "${EXTRACT_DIR}" | ||
| echo -e " ${GREEN}Extracted to${NC} ${BLUE}${EXTRACT_DIR}/${NC}" | ||
|
|
||
| # Open index.html in browser if enabled | ||
| if [ "$OPEN_BROWSER" = true ]; then | ||
| INDEX_FILE="${EXTRACT_DIR}/${REPORT_NAME}_report/index.html" | ||
|
|
||
| if [ -f "$INDEX_FILE" ]; then | ||
| echo -e "${BOLD}Opening report in browser...${NC}" | ||
|
|
||
| # Detect OS and open browser accordingly | ||
| if [[ "$OSTYPE" == "darwin"* ]]; then | ||
| # macOS | ||
| open "$INDEX_FILE" | ||
| elif [[ "$OSTYPE" == "linux-gnu"* ]]; then | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Windows support? |
||
| # Linux | ||
| if command -v xdg-open &> /dev/null; then | ||
| xdg-open "$INDEX_FILE" | ||
| elif command -v sensible-browser &> /dev/null; then | ||
| sensible-browser "$INDEX_FILE" | ||
| else | ||
| echo -e " ${YELLOW}Could not detect browser command. Please open manually:${NC} ${BLUE}${INDEX_FILE}${NC}" | ||
| fi | ||
| else | ||
| echo -e " ${YELLOW}Unsupported OS. Please open manually:${NC} ${BLUE}${INDEX_FILE}${NC}" | ||
| fi | ||
| else | ||
| echo -e " ${YELLOW}Warning: index.html not found at ${INDEX_FILE}${NC}" | ||
| echo -e " ${YELLOW}Extracted contents:${NC}" | ||
| ls -la "${EXTRACT_DIR}/" | ||
| fi | ||
| fi | ||
|
|
||
| echo -e "${BOLD}${GREEN}Done!${NC}" | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Any reason why you remove such options?