diff --git a/.github/workflows/script-cleanup.yml b/.github/workflows/script-cleanup.yml new file mode 100644 index 000000000..bacbcf9ff --- /dev/null +++ b/.github/workflows/script-cleanup.yml @@ -0,0 +1,54 @@ +name: Daily RG Cleanup + +on: + schedule: + # runs every day at 11:11 UTC + - cron: '11 11 * * *' + workflow_dispatch: + +permissions: + contents: write + id-token: write + issues: write + +jobs: + cleanup: + runs-on: ubuntu-latest + steps: + - name: 'Az CLI login' + uses: azure/login@v1 + with: + client-id: ${{ secrets.AZURE_CLIENT_ID }} + tenant-id: ${{ secrets.AZURE_TENANT_ID }} + subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }} + + - name: Install jq + run: sudo apt-get update && sudo apt-get install -y jq + + - name: Cleanup Resource Groups + run: | + echo "Fetching all resource groups in the subscription..." + rgs_json=$(az group list --output json) + + # don't exit on errors in this block + set +e + + echo "Attempting to delete all resource groups except 'exec-docs-ai' and those containing 'mcp'..." + echo "$rgs_json" | + jq -r '.[] | select(.name != "exec-docs-ai" and (.name | contains("mcp") | not)) | .name' | + while read -r rg_name; do + if [[ -z "$rg_name" ]]; then + echo "Skipping empty resource group name." + continue + fi + echo -n "Deleting $rg_name… " + az group delete \ + --name "$rg_name" \ + --yes \ + --no-wait \ + && echo "OK" \ + || echo "⚠️ Skipped (deny-assignment or other error)" + done + + # restore "exit on error" if you need it later + set -e diff --git a/.github/workflows/sync.yml b/.github/workflows/sync.yml index 8339803ce..230bb15dc 100644 --- a/.github/workflows/sync.yml +++ b/.github/workflows/sync.yml @@ -1,8 +1,8 @@ name: Sync Markdown Files on: - schedule: - - cron: '0 15 * * *' + # schedule: + # - cron: '0 15 * * *' workflow_dispatch: permissions: diff --git a/.gitignore b/.gitignore index 791b4ebf6..957a66fa5 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,5 @@ _themes*/ _repo.*/ .openpublishing.buildcore.ps1 -ie.log \ No newline at end of file +ie.log +report.json \ No newline at end of file diff --git a/LICENSE-CODE b/LICENSE-CODE deleted file mode 100644 index b17b032a4..000000000 --- a/LICENSE-CODE +++ /dev/null @@ -1,17 +0,0 @@ -The MIT License (MIT) -Copyright (c) Microsoft Corporation - -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and -associated documentation files (the "Software"), to deal in the Software without restriction, -including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, -and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all copies or substantial -portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT -NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. \ No newline at end of file diff --git a/README.md b/README.md index 303e9df81..3d356efbd 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,7 @@ These experiences utilize [Innovation Engine](https://github.com/Azure/Innovatio ## Table of Contents +- [Selecting Documentation for Exec Docs](#selecting-documentation-for-exec-docs) - [How to Write an Exec Doc](#how-to-write-an-exec-doc) - [Training Resources (Optional)](#training-resources-optional) - [Setup](#setup) @@ -18,6 +19,82 @@ These experiences utilize [Innovation Engine](https://github.com/Azure/Innovatio - [Frequently Asked Questions (FAQs)](#frequently-asked-questions-faqs) - [Contact Information for Exec Docs](#contact-information-for-exec-docs) +I'll update the highlighted section with the clarified information about command execution limitations: + +## Selecting Documentation for Exec Docs + +Not all documentation is suitable for conversion to Exec Docs. Use these filters to determine if a document can be effectively converted: + +1. **Command Execution Limitations** + - **Supported scenarios:** + - Any command that can run in a BASH terminal (e.g. azurecli, azure-cli-interactive, azurecli-interactive commands) + + - **Not supported currently:** + - PowerShell scripts + - GUI-based instructions + - Commands requiring `sudo` privileges + - Direct code blocks of languages that aren't bash/shell commands + + **Example of supported command:** + ```markdown + ```bash + export REGION="eastus" + export RESOURCE_GROUP="myResourceGroup" + az group create --name $RESOURCE_GROUP --location $REGION + ``` + ``` + + **Example of unsupported command (SQL query below won't work):** + ```markdown + ```sql + INSERT INTO myTable (name, value) VALUES ('test', 123); + ``` + ``` + + **Example of supported command (SQL query below will work):** + ```markdown + ```bash + export DATABASE_NAME="mydb" + export TABLE_NAME="myTable" + psql -d $DATABASE_NAME -c "INSERT INTO $TABLE_NAME (name, value) VALUES ('test', 123);" + ``` + ``` + >**Note:** You can include code blocks of any type in your documentation for human readers, but only the types listed above in the "Supported Code Block Types" section will be executed by Innovation Engine. Other code block types will be displayed but ignored during execution. + + >**Note:** The key principle is simple: if you can run it in a BASH terminal as written, it will work with Exec Docs (although at this time `sudo` is not supported). Code blocks in other languages won't be executed directly but can be included for human readers. + +2. **Azure Portal Custom Cloud Shell Constraints** + - **Supported scenarios:** + - Standard Azure resource operations (create, read, update, delete) + - Commands running within the user's subscription scope + - Standard service deployments (VMs, storage, networking) + + - **Not supported currrently:** + - Commands requiring elevated Microsoft Graph API permissions + - Operations needing KeyVault special access + - Cross-subscription or tenant-level operations + - Commands requiring admin consent + + **Example of supported command:** + ```markdown + ```bash + export RESOURCE_GROUP="myResourceGroup" + export LOCATION="eastus" + az group create --name $RESOURCE_GROUP --location $LOCATION + ``` + ``` + + **Example of unsupported command:** + ```markdown + ```bash + export APP_NAME="myApp" + # This requires elevated Graph API permissions and would fail + az ad app create --display-name $APP_NAME --native-app + ``` + ``` + +This filter system ensures that you select documentation that can be effectively transformed into executable docs that provide value through automated deployment capabilities. Please reach out to the [Exec Docs Team](#contact-information-for-exec-docs) if you have any questions about the suitability of a document for conversion to an Exec Doc. + ## How to Write an Exec Doc Follow these steps in sequence to write an Exec Doc either by converting an existing Azure Doc i.e. building on top of the author's work or from scratch i.e. you are the author _(read the Notes in any step for more information)_: @@ -81,33 +158,14 @@ Check if all prerequisites below are met before writing the Exec Doc. ***If any │ └── my-script.yaml ``` -6. Code blocks are used to provide examples, commands, or other code snippets in Exec Docs. They are distinguished by a triple backtick (```) at the start and end of the block. - - Ensure that the Exec Doc contains at least 1 code block and every input code block's type in the Exec Doc is taken from this list: - - - bash - - azurecli - - azure-cli-interactive - - azurecli-interactive - - **Example:** - - ```bash - az group create --name $MY_RESOURCE_GROUP_NAME --location $REGION - ``` - - >**Note:** This rule does not apply to output code blocks, which are used to display the results of commands, scripts, or other operations. These blocks help in illustrating what the expected output should look like. They include, but are not limited to, the following types: _output, json, yaml, console, text, and log._ - - >**Note:** While Innovation Engine can _parse_ a code block of any type, given its current features, it can only _execute_ code blocks of the types above. So, it is important to ensure that the code blocks in your Exec Doc are of the types above. - -7. Headings are used to organize content in a document. The number of hashes indicates the level of the heading. For example, a single hash (#) denotes an h1 heading, two hashes (##) denote an h2 heading, and so on. Innovation Engine uses headings to structure the content of an Exec Doc and to provide a clear outline of the document's contents. +6. Headings are used to organize content in a document. The number of hashes indicates the level of the heading. For example, a single hash (#) denotes an h1 heading, two hashes (##) denote an h2 heading, and so on. Innovation Engine uses headings to structure the content of an Exec Doc and to provide a clear outline of the document's contents. Ensure there is at least one h1 heading in the Exec Doc, denoted by a single hash (#) at the start of the line. **Example:** ```markdown - # Quickstart: Deploy an Azure Kubernetes Service (AKS) cluster using Azure CLI + # Quickstart: Deploy an Azure Kubernetes Service (AKS) cluster using Azure CLI ``` ### Writing Requirements @@ -299,15 +357,22 @@ Check if all prerequisites below are met before writing the Exec Doc. ***If any **Example:** - **Title:** _Making your Azure Doc executable!_ + **Title:** Making your Azure Doc executable! - _**Details:** Hello [author_name], \ - \ - I am submitting this PR to make the doc you have authored executable! Essentially, without any major content changes, this will make your doc accesssible, accurate, and actionable! And what is "this": Exec Docs (short for Executable Documentation) \ - \ - Exec Docs are documents that automate the deployment/maintenace of Azure resources using Azure CLI commands. This is a new initiative undertaken by the Azure Core Linux, Skilling, and Portal teams to simplify the evaluation and adoption of services for Linux on Azure customers. [Learn More Here!](https://github.com/MicrosoftDocs/executable-docs/blob/main/README.md)\ - \ - Once you get acquainted with Exec Docs, I would love to get your review on this doc. If you have any questions feel free to contact me or [Naman Parikh](mailto:namanparikh@microsoft.com)._ + **Details:** + + Hello [author_name], + + This PR will increase the value of the doc to customers, it turns the doc into an Executable Document! An Executable Document improves the customer experience by making the content actionable in portal as well as ensuring that it works, as documented, using automated testing. + + Exec Docs are: + - Fully integrated into Portal where the user can leverage them in hands on learning experiences OR low effort "click to deploy" scenarios + - Increased reusability through adoption of consistent best practices and exportable scripts + - Higher quality as a result of automated testing + + We have conducted many experiments and have consistent results that show higher CSAT from customers working with Exec Docs. This in turn results in a higher conversion rate from experimentation to paid customer. [Learn More Here!](https://microsoft-my.sharepoint.com/:p:/p/namanparikh/EdxlQiyhGDhFmGcAUE9fejYB3r6ZzgLqWO3jZPK7fcnKgQ?e=CDWOet) + + Once you get acquainted with Exec Docs, I would love to get your review on this doc. If you have any questions feel free to contact me or [Naman Parikh](mailto:namanparikh@microsoft.com). - Assign the original Exec Doc author (if it is not you) as a reviewer to the PR. In most cases, this assignment should happen automatically and should include a reviewer from the Skilling team. - Add the comment ***#sign-off*** in the PR comments section once the Exec Doc is successfully reviewed. This will trigger the automated pipeline to merge the PR into the public repo. @@ -349,12 +414,12 @@ Check if all prerequisites below are met before writing the Exec Doc. ***If any **Deeplink Template:** ```markdown - [![Deploy to Azure](https://aka.ms/deploytoazurebutton)](https://ms.portal.azure.com/#view/Microsoft_Azure_CloudNative/SubscriptionSelectionPage.ReactView/isLearnMode~/true/referer/docs/tutorialKey/) + [![Deploy to Azure](https://aka.ms/deploytoazurebutton)](https://ms.portal.azure.com/#view/Microsoft_Azure_CloudNative/SubscriptionSelectionPage.ReactView/isLearnMode~/true/referer/docs/tutorialKey/) ``` **Deeplink for Example Exec Doc:** ```markdown - [![Deploy to Azure](https://aka.ms/deploytoazurebutton)](https://ms.portal.azure.com/#view/Microsoft_Azure_CloudNative/SubscriptionSelectionPage.ReactView/isLearnMode~/true/referer/docs/tutorialKey/azure-docs%2farticles%2faks%2fquick-kubernetes-deploy-cli.md) + [![Deploy to Azure](https://aka.ms/deploytoazurebutton)](https://ms.portal.azure.com/#view/Microsoft_Azure_CloudNative/SubscriptionSelectionPage.ReactView/isLearnMode~/true/referer/docs/tutorialKey/azure-docs%2farticles%2faks%2fquick-kubernetes-deploy-cli.md) ``` **Example of Button in Live Exec Doc:** @@ -456,11 +521,10 @@ For the first PR, if either the original doc author or someone from skilling is ## Contact Information for Exec Docs - PM for Exec Docs E2E Experience: [Naman Parikh](mailto:namanparikh@microsoft.com) -- PM for Exec Docs Portal Experience: [Varun Desai](mailto:varun.desai@microsoft.com) -- PM for Innovation Engine: [Mitchell Bifeld](mailto:mbifeld@microsoft.com) -- Skilling & Content Developer for Exec Docs: [Carol Smith](mailto:carols@microsoft.com) -- Devs for Exec Docs: [PJ Singh](mailto:pjsingh@microsoft.com), [Aria Amini](mailto:ariaamini@microsoft.com), [Abhishek Bhombore](mailto:abhishek.bhombore@microsoft.com) -- Devs for Innovation Engine: [Vincenzo Marcella](mailto:vmarcella@microsoft.com), [Rahul Gupta](mailto:guptar@microsoft.com) +- PM for Exec Docs Portal Experience: [Matt DePietro](mattdepietro@microsoft.com) +- PM for Innovation Engine: [Ross Gardler](mailto:rogardle@microsoft.com) +- MS Learn Point Person: [Carol Smith](mailto:carols@microsoft.com), [Rayo Flores](mailto:rayoflores@microsoft.com), [Chase Dovey](mailto:doveychase@microsoft.com), +- Devs for Exec Docs: [PJ Singh](mailto:pjsingh@microsoft.com), [Aria Amini](mailto:ariaamini@microsoft.com) ## Trademarks This project may contain trademarks or logos for projects, products, or diff --git a/azure-vote-nginx-ssl.yml b/azure-vote-nginx-ssl.yml deleted file mode 100644 index d03fd94b1..000000000 --- a/azure-vote-nginx-ssl.yml +++ /dev/null @@ -1,28 +0,0 @@ ---- -# INGRESS WITH SSL PROD -apiVersion: networking.k8s.io/v1 -kind: Ingress -metadata: - name: vote-ingress - namespace: default - annotations: - kubernetes.io/tls-acme: "true" - nginx.ingress.kubernetes.io/ssl-redirect: "true" - cert-manager.io/cluster-issuer: letsencrypt-prod -spec: - ingressClassName: nginx - tls: - - hosts: - - mydnslabel9730fc.westeurope.cloudapp.azure.com - secretName: azure-vote-nginx-secret - rules: - - host: mydnslabel9730fc.westeurope.cloudapp.azure.com - http: - paths: - - path: / - pathType: Prefix - backend: - service: - name: azure-vote-front - port: - number: 80 diff --git a/cloud-init.txt b/cloud-init.txt deleted file mode 100644 index 12bd08305..000000000 --- a/cloud-init.txt +++ /dev/null @@ -1,105 +0,0 @@ -#cloud-config -# Install, update, and upgrade packages -package_upgrade: true -package_update: true -package_reboot_if_require: true -# Install packages -packages: - - vim - - certbot - - python3-certbot-nginx - - bash-completion - - nginx - - mysql-client - - php - - php-cli - - php-bcmath - - php-curl - - php-imagick - - php-intl - - php-json - - php-mbstring - - php-mysql - - php-gd - - php-xml - - php-xmlrpc - - php-zip - - php-fpm -write_files: - - owner: www-data:www-data - path: /etc/nginx/sites-available/default.conf - content: | - server { - listen 80 default_server; - listen [::]:80 default_server; - root /var/www/html; - server_name mydnslabel28fb9f.westeurope.cloudapp.azure.com; - } -write_files: - - owner: www-data:www-data - path: /etc/nginx/sites-available/mydnslabel28fb9f.westeurope.cloudapp.azure.com.conf - content: | - upstream php { - server unix:/run/php/php8.1-fpm.sock; - } - server { - listen 443 ssl http2; - listen [::]:443 ssl http2; - server_name mydnslabel28fb9f.westeurope.cloudapp.azure.com; - ssl_certificate /etc/letsencrypt/live/mydnslabel28fb9f.westeurope.cloudapp.azure.com/fullchain.pem; - ssl_certificate_key /etc/letsencrypt/live/mydnslabel28fb9f.westeurope.cloudapp.azure.com/privkey.pem; - root /var/www/mydnslabel28fb9f.westeurope.cloudapp.azure.com; - index index.php; - location / { - try_files $uri $uri/ /index.php?$args; - } - location ~ \.php$ { - include fastcgi_params; - fastcgi_intercept_errors on; - fastcgi_pass php; - fastcgi_param SCRIPT_FILENAME $document_root$fastcgi_script_name; - } - location ~* \.(js|css|png|jpg|jpeg|gif|ico)$ { - expires max; - log_not_found off; - } - location = /favicon.ico { - log_not_found off; - access_log off; - } - location = /robots.txt { - allow all; - log_not_found off; - access_log off; - } - } - server { - listen 80; - listen [::]:80; - server_name mydnslabel28fb9f.westeurope.cloudapp.azure.com; - return 301 https://mydnslabel28fb9f.westeurope.cloudapp.azure.com$request_uri; - } -runcmd: - - sed -i 's/;cgi.fix_pathinfo.*/cgi.fix_pathinfo = 1/' /etc/php/8.1/fpm/php.ini - - sed -i 's/^max_execution_time \= .*/max_execution_time \= 300/g' /etc/php/8.1/fpm/php.ini - - sed -i 's/^upload_max_filesize \= .*/upload_max_filesize \= 64M/g' /etc/php/8.1/fpm/php.ini - - sed -i 's/^post_max_size \= .*/post_max_size \= 64M/g' /etc/php/8.1/fpm/php.ini - - systemctl restart php8.1-fpm - - systemctl restart nginx - - certbot --nginx certonly --non-interactive --agree-tos -d mydnslabel28fb9f.westeurope.cloudapp.azure.com -m dummy@dummy.com --redirect - - ln -s /etc/nginx/sites-available/mydnslabel28fb9f.westeurope.cloudapp.azure.com.conf /etc/nginx/sites-enabled/ - - rm /etc/nginx/sites-enabled/default - - systemctl restart nginx - - curl --url https://raw.githubusercontent.com/wp-cli/builds/gh-pages/phar/wp-cli.phar --output /tmp/wp-cli.phar - - mv /tmp/wp-cli.phar /usr/local/bin/wp - - chmod +x /usr/local/bin/wp - - wp cli update - - mkdir -m 0755 -p /var/www/mydnslabel28fb9f.westeurope.cloudapp.azure.com - - chown -R azureadmin:www-data /var/www/mydnslabel28fb9f.westeurope.cloudapp.azure.com - - sudo -u azureadmin -i -- wp core download --path=/var/www/mydnslabel28fb9f.westeurope.cloudapp.azure.com - - sudo -u azureadmin -i -- wp config create --dbhost=mydb28fb9f.mysql.database.azure.com --dbname=wp001 --dbuser=dbadmin28fb9f --dbpass="OKISjTu6H7xixUjYxP3+521zeGuH75YxtTriR87fq28=" --path=/var/www/mydnslabel28fb9f.westeurope.cloudapp.azure.com - - sudo -u azureadmin -i -- wp core install --url=mydnslabel28fb9f.westeurope.cloudapp.azure.com --title="Azure hosted blog" --admin_user=wpcliadmin --admin_password="j19pzsPcHrLBBCTzAuAHtyYgWFuy1+6odxXO7HCFzWI=" --admin_email=6ab2c105-cbe9-4ecf-971b-20034854fbca --path=/var/www/mydnslabel28fb9f.westeurope.cloudapp.azure.com - - sudo -u azureadmin -i -- wp plugin update --all --path=/var/www/mydnslabel28fb9f.westeurope.cloudapp.azure.com - - chmod 600 /var/www/mydnslabel28fb9f.westeurope.cloudapp.azure.com/wp-config.php - - mkdir -p -m 0775 /var/www/mydnslabel28fb9f.westeurope.cloudapp.azure.com/wp-content/uploads - - chgrp www-data /var/www/mydnslabel28fb9f.westeurope.cloudapp.azure.com/wp-content/uploads diff --git a/cluster-issuer-prod.yml b/cluster-issuer-prod.yml deleted file mode 100644 index e49a9a8c9..000000000 --- a/cluster-issuer-prod.yml +++ /dev/null @@ -1,29 +0,0 @@ -apiVersion: cert-manager.io/v1 -kind: ClusterIssuer -metadata: - name: letsencrypt-prod -spec: - acme: - # You must replace this email address with your own. - # Let's Encrypt will use this to contact you about expiring - # certificates, and issues related to your account. - email: namanparikh@microsoft.com - # ACME server URL for Let’s Encrypt’s prod environment. - # The staging environment will not issue trusted certificates but is - # used to ensure that the verification process is working properly - # before moving to production - server: https://acme-v02.api.letsencrypt.org/directory - # Secret resource used to store the account's private key. - privateKeySecretRef: - name: letsencrypt - # Enable the HTTP-01 challenge provider - # you prove ownership of a domain by ensuring that a particular - # file is present at the domain - solvers: - - http01: - ingress: - class: nginx - podTemplate: - spec: - nodeSelector: - "kubernetes.io/os": linux diff --git a/report.json b/report.json deleted file mode 100644 index 69001f6f5..000000000 --- a/report.json +++ /dev/null @@ -1,356 +0,0 @@ -{ - "name": "Quickstart: Deploy Inspektor Gadget in an Azure Kubernetes Service cluster", - "properties": { - "author": "josebl", - "description": "This tutorial shows how to deploy Inspektor Gadget in an AKS cluster", - "ms.author": "josebl", - "ms.custom": "innovation-engine", - "ms.date": "12/06/2023", - "ms.topic": "article", - "title": "Deploy Inspektor Gadget in an Azure Kubernetes Service cluster" - }, - "environmentVariables": { - "AKS_CLUSTER_NAME": "aks-cnpg-3cee3l", - "AKS_CLUSTER_VERSION": "1.29", - "AKS_MANAGED_IDENTITY_NAME": "mi-aks-cnpg-3cee3l", - "AKS_NODE_COUNT": "2", - "AKS_PRIMARY_CLUSTER_FED_CREDENTIAL_NAME": "pg-primary-fedcred1-cnpg-l1tsugyd", - "AKS_PRIMARY_CLUSTER_NAME": "aks-primary-cnpg-l1tsugyd", - "AKS_PRIMARY_CLUSTER_PG_DNSPREFIX": "a33a3d08c14", - "AKS_PRIMARY_MANAGED_RG_NAME": "rg-cnpg-primary-aksmanaged-l1tsugyd", - "AKS_UAMI_CLUSTER_IDENTITY_NAME": "mi-aks-cnpg-l1tsugyd", - "BARMAN_CONTAINER_NAME": "barman", - "CLUSTER_VERSION": "1.27", - "ENABLE_AZURE_PVC_UPDATES": "true", - "ERROR": "\u001b[31m", - "IP_ADDRESS": "52.233.203.69", - "KEYVAULT_NAME": "kv-cnpg-3cee3l", - "LOCAL_NAME": "cnpg", - "LOCATION": "eastus", - "MOTD_SHOWN": "update-motd", - "MY_AKS_CLUSTER_NAME": "myAKSClusterb60d78", - "MY_COMPUTER_VISION_NAME": "computervisiont6xygvc3", - "MY_CONTAINER_APP_ENV_NAME": "containerappenvt6xygvc3", - "MY_CONTAINER_APP_NAME": "containerappt6xygvc3", - "MY_DATABASE_NAME": "dbt6xygvc3", - "MY_DATABASE_PASSWORD": "dbpasst6xygvc3", - "MY_DATABASE_SERVER_NAME": "dbservert6xygvc3", - "MY_DATABASE_USERNAME": "dbusert6xygvc3", - "MY_DNS_LABEL": "mydnslabel3f8d9e", - "MY_RESOURCE_GROUP_NAME": "myResourceGroupb60d78", - "MY_STATIC_WEB_APP_NAME": "myStaticWebApp85f4f3", - "MY_STORAGE_ACCOUNT_NAME": "storaget6xygvc3", - "MY_USERNAME": "azureuser", - "MY_VM_IMAGE": "Canonical:0001-com-ubuntu-minimal-jammy:minimal-22_04-lts-gen2:latest", - "MY_VM_NAME": "myVMecb9fc", - "MyAction": "allow", - "MyAddressPrefix": "0.0.0.0/0", - "MyAddressPrefixes1": "10.0.0.0/8", - "MyAddressPrefixes2": "10.10.1.0/24", - "MyAddressPrefixes3": "10.20.1.0/24", - "MyAddressPrefixes4": "10.100.1.0/26", - "MyAddressPrefixes5": "10.30.1.0/24", - "MyAdminUsername": "d95734", - "MyApiserverVisibility": "Private", - "MyCollectionName1": "AROd95734", - "MyCollectionName2": "Dockerd95734", - "MyCustomData": "cloud_init_upgrade.txt", - "MyDearmor": "-o", - "MyDisablePrivateLinkServiceNetworkPolicies": "true", - "MyGenerateSshKeys": "export", - "MyImage": "Ubuntu2204", - "MyIngressVisibility": "Private", - "MyMasterSubnet": "-master", - "MyName": "NetworkWatcherAgentLinux2ef723", - "MyName1": "ubuntu-jumpd95734", - "MyName2": "aro-udrd95734", - "MyName3": "-masterd95734", - "MyName4": "-workerd95734", - "MyNextHopType": "VirtualAppliance", - "MyPriority1": "100", - "MyPriority2": "200", - "MyProtocols": "http=80", - "MyPublicIpAddress1": "jumphost-ip", - "MyPublicIpAddress2": "fw-ip", - "MyPublisher": "Microsoft.Azure.NetworkWatcher", - "MyPullSecret": "@pull-secret.txt", - "MyQuery1": "ipAddress", - "MyQuery2": "ipConfigurations[0].privateIPAddress", - "MyRemove": "routeTable", - "MyResourceGroup": "d95734", - "MyRouteTable": "aro-udr", - "MyRouteTableName": "aro-udrd95734", - "MyServiceEndpoints": "Microsoft.ContainerRegistry", - "MySku": "Standard", - "MySourceAddresses": "*", - "MyTargetFqdns1": "cert-api.access.redhat.com", - "MyTargetFqdns2": "*cloudflare.docker.com", - "MyVersion": "1.4", - "MyVmName": "myVM12ef723", - "MyVnetName": "d95734", - "MyWorkerSubnet": "-worker", - "NC": "\u001b(B\u001b[m", - "OUTPUT": "\u001b[32m", - "PG_NAMESPACE": "cnpg-database", - "PG_PRIMARY_CLUSTER_NAME": "pg-primary-cnpg-l1tsugyd", - "PG_PRIMARY_STORAGE_ACCOUNT_NAME": "hacnpgpsal1tsugyd", - "PG_STORAGE_BACKUP_CONTAINER_NAME": "backups", - "PG_SYSTEM_NAMESPACE": "cnpg-system", - "PRIMARY_CLUSTER_REGION": "westus3", - "RANDOM_ID": "b60d78", - "REGION": "eastus", - "RESOURCE_GROUP_NAME": "rg-cnpg-l1tsugyd", - "RGTAGS": "owner=cnpg", - "RG_NAME": "rg-cnpg-3cee3l", - "STORAGE_ACCOUNT_NAME": "storcnpg3cee3l", - "SUFFIX": "3cee3l", - "TAGS": "owner=user" - }, - "success": false, - "error": "failed to execute code block 0 on step 2.\nError: command exited with 'exit status 1' and the message 'WARNING: The behavior of this command has been altered by the following extension: aks-preview\nERROR: (SkuNotAvailable) Preflight validation check for resource(s) for container service myAKSClusterb60d78 in resource group MC_myResourceGroupb60d78_myAKSClusterb60d78_eastus failed. Message: The requested VM size for resource 'Following SKUs have failed for Capacity Restrictions: Standard_DS2_v2' is currently not available in location 'eastus'. Please try another size or deploy to a different location or different zone. See https://aka.ms/azureskunotavailable for details.. Details: \nCode: SkuNotAvailable\nMessage: Preflight validation check for resource(s) for container service myAKSClusterb60d78 in resource group MC_myResourceGroupb60d78_myAKSClusterb60d78_eastus failed. Message: The requested VM size for resource 'Following SKUs have failed for Capacity Restrictions: Standard_DS2_v2' is currently not available in location 'eastus'. Please try another size or deploy to a different location or different zone. See https://aka.ms/azureskunotavailable for details.. Details: \n'\nStdErr: WARNING: The behavior of this command has been altered by the following extension: aks-preview\nERROR: (SkuNotAvailable) Preflight validation check for resource(s) for container service myAKSClusterb60d78 in resource group MC_myResourceGroupb60d78_myAKSClusterb60d78_eastus failed. Message: The requested VM size for resource 'Following SKUs have failed for Capacity Restrictions: Standard_DS2_v2' is currently not available in location 'eastus'. Please try another size or deploy to a different location or different zone. See https://aka.ms/azureskunotavailable for details.. Details: \nCode: SkuNotAvailable\nMessage: Preflight validation check for resource(s) for container service myAKSClusterb60d78 in resource group MC_myResourceGroupb60d78_myAKSClusterb60d78_eastus failed. Message: The requested VM size for resource 'Following SKUs have failed for Capacity Restrictions: Standard_DS2_v2' is currently not available in location 'eastus'. Please try another size or deploy to a different location or different zone. See https://aka.ms/azureskunotavailable for details.. Details: \n", - "failedAtStep": -1, - "steps": [ - { - "codeBlock": { - "language": "bash", - "content": "if ! [ -x \"$(command -v kubectl)\" ]; then az aks install-cli; fi\n", - "header": "Connect to the cluster", - "description": "Install az aks CLI locally using the az aks install-cli command", - "resultBlock": { - "language": "", - "content": "", - "expectedSimilarityScore": 0, - "expectedRegexPattern": null - } - }, - "codeBlockNumber": 0, - "error": null, - "stdErr": "", - "stdOut": "", - "stepName": "Connect to the cluster", - "stepNumber": 3, - "success": false, - "similarityScore": 0 - }, - { - "codeBlock": { - "language": "bash", - "content": "IG_VERSION=$(curl -s https://api.github.com/repos/inspektor-gadget/inspektor-gadget/releases/latest | jq -r .tag_name)\nIG_ARCH=amd64\nmkdir -p $HOME/.local/bin\nexport PATH=$PATH:$HOME/.local/bin\ncurl -sL https://github.com/inspektor-gadget/inspektor-gadget/releases/download/${IG_VERSION}/kubectl-gadget-linux-${IG_ARCH}-${IG_VERSION}.tar.gz | tar -C $HOME/.local/bin -xzf - kubectl-gadget\n", - "header": "Installing the kubectl plugin: `gadget`", - "description": "[!NOTE]\nIf you want to install it using [`krew`](https://sigs.k8s.io/krew) or compile it from the source, please follow the official documentation: [installing kubectl gadget](https://github.com/inspektor-gadget/inspektor-gadget/blob/main/docs/install.md#installing-kubectl-gadget).", - "resultBlock": { - "language": "", - "content": "", - "expectedSimilarityScore": 0, - "expectedRegexPattern": null - } - }, - "codeBlockNumber": 0, - "error": null, - "stdErr": "", - "stdOut": "", - "stepName": "Installing the kubectl plugin: `gadget`", - "stepNumber": 4, - "success": false, - "similarityScore": 0 - }, - { - "codeBlock": { - "language": "bash", - "content": "kubectl gadget version\n", - "header": "Installing Inspektor Gadget in the cluster", - "description": "Now, let’s verify the installation by running the `version` command again:", - "resultBlock": { - "language": "text", - "content": "Client version: vX.Y.Z\nServer version: vX.Y.Z\n", - "expectedSimilarityScore": 0, - "expectedRegexPattern": "(?m)^Client version: v\\d+\\.\\d+\\.\\d+$\\n^Server version: v\\d+\\.\\d+\\.\\d+$" - } - }, - "codeBlockNumber": 1, - "error": null, - "stdErr": "", - "stdOut": "", - "stepName": "Installing Inspektor Gadget in the cluster", - "stepNumber": 5, - "success": false, - "similarityScore": 0 - }, - { - "codeBlock": { - "language": "bash", - "content": "kubectl gadget help\n", - "header": "Installing Inspektor Gadget in the cluster", - "description": "You can now start running the gadgets:", - "resultBlock": { - "language": "", - "content": "", - "expectedSimilarityScore": 0, - "expectedRegexPattern": null - } - }, - "codeBlockNumber": 2, - "error": null, - "stdErr": "", - "stdOut": "", - "stepName": "Installing Inspektor Gadget in the cluster", - "stepNumber": 5, - "success": false, - "similarityScore": 0 - }, - { - "codeBlock": { - "language": "bash", - "content": "az group create --name $MY_RESOURCE_GROUP_NAME --location $REGION\n", - "header": "Create a resource group", - "description": "A resource group is a container for related resources. All resources must be placed in a resource group. We will create one for this tutorial. The following command creates a resource group with the previously defined $MY_RESOURCE_GROUP_NAME and $REGION parameters.", - "resultBlock": { - "language": "JSON", - "content": "{\n \"id\": \"/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/myResourceGroup210\",\n \"location\": \"eastus\",\n \"managedBy\": null,\n \"name\": \"testResourceGroup\",\n \"properties\": {\n \"provisioningState\": \"Succeeded\"\n },\n \"tags\": null,\n \"type\": \"Microsoft.Resources/resourceGroups\"\n}\n", - "expectedSimilarityScore": 0.3, - "expectedRegexPattern": null - } - }, - "codeBlockNumber": 0, - "error": null, - "stdErr": "", - "stdOut": "{\n \"id\": \"/subscriptions/325e7c34-99fb-4190-aa87-1df746c67705/resourceGroups/myResourceGroupb60d78\",\n \"location\": \"eastus\",\n \"managedBy\": null,\n \"name\": \"myResourceGroupb60d78\",\n \"properties\": {\n \"provisioningState\": \"Succeeded\"\n },\n \"tags\": null,\n \"type\": \"Microsoft.Resources/resourceGroups\"\n}\n", - "stepName": "Create a resource group", - "stepNumber": 1, - "success": true, - "similarityScore": 0.7850672214487863 - }, - { - "codeBlock": { - "language": "bash", - "content": "az aks create \\\n --resource-group $MY_RESOURCE_GROUP_NAME \\\n --name $MY_AKS_CLUSTER_NAME \\\n --location $REGION \\\n --no-ssh-key\n", - "header": "Create AKS Cluster", - "description": "This will take a few minutes.", - "resultBlock": { - "language": "", - "content": "", - "expectedSimilarityScore": 0, - "expectedRegexPattern": null - } - }, - "codeBlockNumber": 0, - "error": {}, - "stdErr": "WARNING: The behavior of this command has been altered by the following extension: aks-preview\nERROR: (SkuNotAvailable) Preflight validation check for resource(s) for container service myAKSClusterb60d78 in resource group MC_myResourceGroupb60d78_myAKSClusterb60d78_eastus failed. Message: The requested VM size for resource 'Following SKUs have failed for Capacity Restrictions: Standard_DS2_v2' is currently not available in location 'eastus'. Please try another size or deploy to a different location or different zone. See https://aka.ms/azureskunotavailable for details.. Details: \nCode: SkuNotAvailable\nMessage: Preflight validation check for resource(s) for container service myAKSClusterb60d78 in resource group MC_myResourceGroupb60d78_myAKSClusterb60d78_eastus failed. Message: The requested VM size for resource 'Following SKUs have failed for Capacity Restrictions: Standard_DS2_v2' is currently not available in location 'eastus'. Please try another size or deploy to a different location or different zone. See https://aka.ms/azureskunotavailable for details.. Details: \n", - "stdOut": "", - "stepName": "Create AKS Cluster", - "stepNumber": 2, - "success": false, - "similarityScore": 0 - }, - { - "codeBlock": { - "language": "bash", - "content": "az aks get-credentials --resource-group $MY_RESOURCE_GROUP_NAME --name $MY_AKS_CLUSTER_NAME --overwrite-existing\n", - "header": "Connect to the cluster", - "description": "[!WARNING]\nThis will overwrite any existing credentials with the same entry", - "resultBlock": { - "language": "", - "content": "", - "expectedSimilarityScore": 0, - "expectedRegexPattern": null - } - }, - "codeBlockNumber": 1, - "error": null, - "stdErr": "", - "stdOut": "", - "stepName": "Connect to the cluster", - "stepNumber": 3, - "success": false, - "similarityScore": 0 - }, - { - "codeBlock": { - "language": "bash", - "content": "kubectl get nodes\n", - "header": "Connect to the cluster", - "description": "Verify the connection to your cluster using the kubectl get command. This command returns a list of the cluster nodes.", - "resultBlock": { - "language": "", - "content": "", - "expectedSimilarityScore": 0, - "expectedRegexPattern": null - } - }, - "codeBlockNumber": 2, - "error": null, - "stdErr": "", - "stdOut": "", - "stepName": "Connect to the cluster", - "stepNumber": 3, - "success": false, - "similarityScore": 0 - }, - { - "codeBlock": { - "language": "bash", - "content": "kubectl gadget version\n", - "header": "Installing the kubectl plugin: `gadget`", - "description": "Now, let’s verify the installation by running the `version` command:", - "resultBlock": { - "language": "text", - "content": "Client version: vX.Y.Z\nServer version: not installed\n", - "expectedSimilarityScore": 0, - "expectedRegexPattern": "(?m)^Client version: v\\d+\\.\\d+\\.\\d+$\\n^Server version: not installed$" - } - }, - "codeBlockNumber": 1, - "error": null, - "stdErr": "", - "stdOut": "", - "stepName": "Installing the kubectl plugin: `gadget`", - "stepNumber": 4, - "success": false, - "similarityScore": 0 - }, - { - "codeBlock": { - "language": "bash", - "content": "kubectl gadget deploy\n", - "header": "Installing Inspektor Gadget in the cluster", - "description": "[!NOTE]\nSeveral options are available to customize the deployment: use a specific container image, deploy to specific nodes, and many others. To know all of them, please check the official documentation: [installing in the cluster](https://github.com/inspektor-gadget/inspektor-gadget/blob/main/docs/install.md#installing-in-the-cluster).", - "resultBlock": { - "language": "", - "content": "", - "expectedSimilarityScore": 0, - "expectedRegexPattern": null - } - }, - "codeBlockNumber": 0, - "error": null, - "stdErr": "", - "stdOut": "", - "stepName": "Installing Inspektor Gadget in the cluster", - "stepNumber": 5, - "success": false, - "similarityScore": 0 - }, - { - "codeBlock": { - "language": "bash", - "content": "export RANDOM_ID=\"$(openssl rand -hex 3)\"\nexport MY_RESOURCE_GROUP_NAME=\"myResourceGroup$RANDOM_ID\"\nexport REGION=\"eastus\"\nexport MY_AKS_CLUSTER_NAME=\"myAKSCluster$RANDOM_ID\"\n", - "header": "Define Environment Variables", - "description": "The First step in this tutorial is to define environment variables:", - "resultBlock": { - "language": "", - "content": "", - "expectedSimilarityScore": 0, - "expectedRegexPattern": null - } - }, - "codeBlockNumber": 0, - "error": null, - "stdErr": "", - "stdOut": "", - "stepName": "Define Environment Variables", - "stepNumber": 0, - "success": true, - "similarityScore": 1 - } - ] -} \ No newline at end of file diff --git a/scenarios/AIChatApp/ai-chat-app.md b/scenarios/AIChatApp/ai-chat-app.md new file mode 100644 index 000000000..bcd63bf38 --- /dev/null +++ b/scenarios/AIChatApp/ai-chat-app.md @@ -0,0 +1,308 @@ +--- +title: 'Tutorial: Implement RAG on Azure Cognitive Services with a Chat Interface' +description: Learn how to implement Retrieval-Augmented Generation (RAG) using Azure Cognitive Services, LangChain, ChromaDB, and Chainlit, and deploy it in Azure Container Apps. +ms.topic: tutorial +ms.date: 10/10/2023 +author: GitHubCopilot +ms.author: GitHubCopilot +ms.custom: innovation-engine +--- + +# Tutorial: Create a RAG Chat App using Azure AI Search with OpenAI in Python + +This tutorial guides you through the process of creating a Retrieval-Augmented Generation (RAG) Chat App using Azure AI Search with OpenAI in Python. + +## Prerequisites + +- An Azure account with an active subscription. +- Azure CLI installed on your local machine. +- Python 3.9 or higher installed on your local machine. +- Docker installed if you plan to containerize the application. + +## Step 1: Create Azure Resources + +1. **Set Environment Variables** + + ```bash + export RANDOM_SUFFIX=$(openssl rand -hex 3) + export RESOURCE_GROUP="myResourceGroup$RANDOM_SUFFIX" + export LOCATION="westus2" + ``` + +2. **Create a Resource Group** + + ```bash + az group create --name $RESOURCE_GROUP --location $LOCATION + ``` + + Results: + + + + ```JSON + { + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/myResourceGroupxxx", + "location": "westus2", + "managedBy": null, + "name": "myResourceGroupxxx", + "properties": { + "provisioningState": "Succeeded" + }, + "tags": null, + "type": "Microsoft.Resources/resourceGroups" + } + ``` + +3. **Create an Azure Cognitive Search Service** + + ```bash + export SEARCH_SERVICE_NAME="mySearchService$RANDOM_SUFFIX" + az search service create \ + --name $SEARCH_SERVICE_NAME \ + --resource-group $RESOURCE_GROUP \ + --location $LOCATION \ + --sku basic + ``` + + Results: + + + + ```JSON + { + "hostName": "mysearchservicexxx.search.windows.net", + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/myResourceGroupxxx/providers/Microsoft.Search/searchServices/mySearchServicexxx", + "location": "westus2", + "name": "mySearchServicexxx", + "properties": { + "status": "running", + "provisioningState": "succeeded", + "replicaCount": 1, + "partitionCount": 1, + "sku": { + "name": "basic" + } + }, + "type": "Microsoft.Search/searchServices" + } + ``` + +4. **Create an Azure OpenAI Service** + + ```bash + export OPENAI_SERVICE_NAME="myOpenAIService$RANDOM_SUFFIX" + az cognitiveservices account create \ + --name $OPENAI_SERVICE_NAME \ + --resource-group $RESOURCE_GROUP \ + --kind OpenAI \ + --sku S0 \ + --location $LOCATION \ + --custom-domain $OPENAI_SERVICE_NAME + ``` + +## Step 2: Prepare the Data and Index + +1. **Create a Sample Document** + + ```bash + mkdir rag-chat-app + cd rag-chat-app + echo "Azure Cognitive Search enhances the experience of users by indexing and retrieving relevant data." > documents.txt + ``` + +2. **Upload Documents to Azure Cognitive Search** + + ```bash + az search service update \ + --name $SEARCH_SERVICE_NAME \ + --resource-group $RESOURCE_GROUP \ + --set properties.corsOptions.allowedOrigins="*" + + export SEARCH_ADMIN_KEY=$(az search admin-key show --resource-group $RESOURCE_GROUP --service-name $SEARCH_SERVICE_NAME --query primaryKey --output tsv) + ``` + + Create a Python script `upload_docs.py`: + + ```python + import os + from azure.core.credentials import AzureKeyCredential + from azure.search.documents import SearchClient, SearchIndexClient + from azure.search.documents.indexes.models import SearchIndex, SimpleField, edm + + search_service_endpoint = f"https://{os.environ['SEARCH_SERVICE_NAME']}.search.windows.net" + admin_key = os.environ['SEARCH_ADMIN_KEY'] + + index_name = "documents" + + index_client = SearchIndexClient(search_service_endpoint, AzureKeyCredential(admin_key)) + + fields = [ + SimpleField(name="id", type=edm.String, key=True), + SimpleField(name="content", type=edm.String, searchable=True) + ] + + index = SearchIndex(name=index_name, fields=fields) + + index_client.create_or_update_index(index) + + search_client = SearchClient(search_service_endpoint, index_name, AzureKeyCredential(admin_key)) + + documents = [ + {"id": "1", "content": open("documents.txt").read()} + ] + + result = search_client.upload_documents(documents) + print(f"Uploaded documents: {result}") + ``` + + Run the script: + + ```bash + export SEARCH_SERVICE_NAME + export SEARCH_ADMIN_KEY + python3 upload_docs.py + ``` + +## Step 3: Build the RAG Chat App + +1. **Create a Virtual Environment** + + ```bash + python3 -m venv venv + source venv/bin/activate + ``` + +2. **Install Dependencies** + + Create a `requirements.txt` file: + + ```plaintext + azure-search-documents + openai + python-dotenv + flask + ``` + + Install the dependencies: + + ```bash + pip install -r requirements.txt + ``` + +3. **Create the `app.py` File** + + ```python + import os + from flask import Flask, request, jsonify + from azure.core.credentials import AzureKeyCredential + from azure.search.documents import SearchClient + import openai + + app = Flask(__name__) + + search_service_endpoint = f"https://{os.environ['SEARCH_SERVICE_NAME']}.search.windows.net" + index_name = "documents" + search_client = SearchClient(search_service_endpoint, index_name, AzureKeyCredential(os.environ['SEARCH_ADMIN_KEY'])) + + openai.api_type = "azure" + openai.api_base = f"https://{os.environ['OPENAI_SERVICE_NAME']}.openai.azure.com/" + openai.api_version = "2023-03-15-preview" + openai.api_key = os.environ["OPENAI_API_KEY"] + + @app.route('/chat', methods=['POST']) + def chat(): + user_question = request.json.get('question', '') + + results = search_client.search(user_question) + context = " ".join([doc['content'] for doc in results]) + + response = openai.Completion.create( + engine="text-davinci-003", + prompt=f"Answer the following question using the context below:\n\nContext: {context}\n\nQuestion: {user_question}\nAnswer:", + max_tokens=150 + ) + + answer = response.choices[0].text.strip() + return jsonify({'answer': answer}) + + if __name__ == '__main__': + app.run(host='0.0.0.0', port=5000) + ``` + +4. **Set Environment Variables** + + ```bash + export SEARCH_SERVICE_NAME=$SEARCH_SERVICE_NAME + export SEARCH_ADMIN_KEY=$SEARCH_ADMIN_KEY + export OPENAI_SERVICE_NAME=$OPENAI_SERVICE_NAME + export OPENAI_API_KEY="" + ``` + +## Step 4: Test the Application Locally + +Run the application: + +```bash +python3 app.py +``` + +Results: + + + +```log + * Serving Flask app 'app' + * Running on all addresses. + WARNING: This is a development server. Do not use it in a production deployment. + * Running on http://0.0.0.0:5000/ (Press CTRL+C to quit) +``` + +In another terminal, test the chat endpoint: + +```bash +curl -X POST http://localhost:5000/chat -H "Content-Type: application/json" -d '{"question": "What does Azure Cognitive Search do?"}' +``` + +Results: + + + +```JSON +{ + "answer": "Azure Cognitive Search indexes and retrieves relevant data to enhance user experiences." +} +``` + +## Step 5: (Optional) Containerize the Application + +1. **Create a `Dockerfile`** + + ```dockerfile + FROM python:3.9-slim + + WORKDIR /app + + COPY . /app + + RUN pip install --no-cache-dir -r requirements.txt + + EXPOSE 5000 + + CMD ["python", "app.py"] + ``` + +2. **Build the Docker Image** + + ```bash + docker build -t rag-chat-app . + ``` + +3. **Run the Docker Container** + + ```bash + docker run -p 5000:5000 rag-chat-app + ``` + +## Conclusion + +You have successfully created a RAG Chat App using Azure AI Search with OpenAI in Python. \ No newline at end of file diff --git a/scenarios/AIChatApp/app.py b/scenarios/AIChatApp/app.py new file mode 100644 index 000000000..066f1b913 --- /dev/null +++ b/scenarios/AIChatApp/app.py @@ -0,0 +1,37 @@ +import os +from langchain.document_loaders import TextLoader +from langchain.indexes import VectorstoreIndexCreator +from langchain.chains import ConversationalRetrievalChain +from langchain.embeddings import OpenAIEmbeddings +from langchain.llms import OpenAI +import chainlit as cl + +# Set Azure OpenAI API credentials +os.environ["OPENAI_API_TYPE"] = "azure" +os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY") +os.environ["OPENAI_API_BASE"] = os.getenv("OPENAI_API_BASE") +os.environ["OPENAI_API_VERSION"] = "2023-03-15-preview" + +# Load documents +loader = TextLoader('documents.txt') +documents = loader.load() + +# Create index +index = VectorstoreIndexCreator().from_loaders([loader]) + +# Create conversational retrieval chain +retriever = index.vectorstore.as_retriever() +qa_chain = ConversationalRetrievalChain.from_llm( + llm=OpenAI(temperature=0), + retriever=retriever +) + +# Initialize conversation history +history = [] + +@cl.on_message +async def main(message): + global history + result = qa_chain({"question": message, "chat_history": history}) + history.append((message, result['answer'])) + await cl.Message(content=result['answer']).send() \ No newline at end of file diff --git a/scenarios/AIChatApp/requirements.txt b/scenarios/AIChatApp/requirements.txt new file mode 100644 index 000000000..6bfe7c298 --- /dev/null +++ b/scenarios/AIChatApp/requirements.txt @@ -0,0 +1,5 @@ +langchain +chromadb +chainlit +openai +tiktoken \ No newline at end of file diff --git a/scenarios/AKSDNSLookupFailError/aksdns-lookup-fail-error.md b/scenarios/AKSDNSLookupFailError/aksdns-lookup-fail-error.md new file mode 100644 index 000000000..010a06192 --- /dev/null +++ b/scenarios/AKSDNSLookupFailError/aksdns-lookup-fail-error.md @@ -0,0 +1,91 @@ +--- +title: Troubleshoot the K8SAPIServerDNSLookupFailVMExtensionError error code (52) +description: Learn how to troubleshoot the K8SAPIServerDNSLookupFailVMExtensionError error (52) when you try to start or create and deploy an Azure Kubernetes Service (AKS) cluster. +ms.topic: article +ms.date: 06/14/2024 +author: MicrosoftDocsExec +ms.author: MicrosoftDocsExec +ms.custom: sap:Create, Upgrade, Scale and Delete operations (cluster or nodepool), innovation-engine +--- + +# Troubleshoot the K8SAPIServerDNSLookupFailVMExtensionError error code (52) + +This article discusses how to identify and resolve the `K8SAPIServerDNSLookupFailVMExtensionError` error (also known as error code ERR_K8S_API_SERVER_DNS_LOOKUP_FAIL, error number 52) that occurs when you try to start or create and deploy a Microsoft Azure Kubernetes Service (AKS) cluster. + +## Prerequisites + +- The [nslookup](/windows-server/administration/windows-commands/nslookup) DNS lookup tool for Windows nodes or the [dig](https://linuxize.com/post/how-to-use-dig-command-to-query-dns-in-linux/) tool for Linux nodes. + +- [Azure CLI](/cli/azure/install-azure-cli), version 2.0.59 or a later version. If Azure CLI is already installed, you can find the version number by running `az --version`. + +## Symptoms + +When you try to start or create an AKS cluster, you receive the following error message: + +> Agents are unable to resolve Kubernetes API server name. It's likely custom DNS server is not correctly configured, please see for more information. +> +> Details: Code="VMExtensionProvisioningError" +> +> Message="VM has reported a failure when processing extension 'vmssCSE'. +> +> Error message: "**Enable failed: failed to execute command: command terminated with exit status=52**\n[stdout]\n{ +> +> "ExitCode": "52", +> +> "Output": "Fri Oct 15 10:06:00 UTC 2021,aks- nodepool1-36696444-vmss000000\\nConnection to mcr.microsoft.com 443 port [tcp/https] + +## Cause + +The cluster nodes can't resolve the cluster's fully qualified domain name (FQDN) in Azure DNS. Run the following DNS lookup command on the failed cluster node to find DNS resolutions that are valid. + +| Node OS | Command | +| ------- | ------------------------- | +| Linux | `dig ` | +| Windows | `nslookup ` | + +## Solution + +On your DNS servers and firewall, make sure that nothing blocks the resolution to your cluster's FQDN. Your custom DNS server might be incorrectly configured if something is blocking even after you run the `nslookup` or `dig` command and apply any necessary fixes. For help to configure your custom DNS server, review the following articles: + +- [Create a private AKS cluster](/azure/aks/private-clusters) +- [Private Azure Kubernetes service with custom DNS server](https://github.com/Azure/terraform/tree/00d15e09c54f25fb6387330c36aa4366122c5aaa/quickstart/301-aks-private-cluster) +- [What is IP address 168.63.129.16?](/azure/virtual-network/what-is-ip-address-168-63-129-16) + +When you use a private cluster that has a custom DNS, a DNS zone is created. The DNS zone must be linked to the virtual network. This occurs after the cluster is created. Creating a private cluster that has a custom DNS fails during creation. However, you can restore the creation process to a "success" state by reconciling the cluster. To do this, run the [az resource update](/cli/azure/resource#az-resource-update) command in Azure CLI, as follows: + +Below, set your AKS cluster and resource group names, then run the update command to reconcile the cluster. The environment variables will make your resource names unique and are declared just before use. + +```azurecli-interactive +az resource update --resource-group $RESOURCE_GROUP_NAME \ + --name $CLUSTER_NAME \ + --namespace Microsoft.ContainerService \ + --resource-type ManagedClusters +``` + +Results: + + + +```output +{ + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/myResourceGroupxxx/providers/Microsoft.ContainerService/ManagedClusters/myAksClusterxxx", + "location": "eastus", + "name": "myAksClusterxxx", + "properties": { + // ...other properties... + }, + "resourceGroup": "myResourceGroupxxx", + "type": "Microsoft.ContainerService/ManagedClusters" +} +``` + +Also verify that your DNS server is configured correctly for your private cluster, as described earlier. + +> [!NOTE] +> Conditional Forwarding doesn't support subdomains. + +## More information + +- [General troubleshooting of AKS cluster creation issues](troubleshoot-aks-cluster-creation-issues.md) + +[!INCLUDE [Azure Help Support](../../../includes/azure-help-support.md)] diff --git a/scenarios/AksOpenAiTerraform/.gitignore b/scenarios/AksOpenAiTerraform/.gitignore new file mode 100644 index 000000000..1b2971f39 --- /dev/null +++ b/scenarios/AksOpenAiTerraform/.gitignore @@ -0,0 +1,40 @@ +# Local .terraform directories +**/.terraform/* + +# .tfstate files +*.tfstate +*.tfstate.* + +# Crash log files +crash.log +crash.*.log + +# Exclude all .tfvars files, which are likely to contain sensitive data, such as +# password, private keys, and other secrets. These should not be part of version +# control as they are data points which are potentially sensitive and subject +# to change depending on the environment. +*.tfvars +*.tfvars.json + +# Ignore override files as they are usually used to override resources locally and so +# are not checked in +override.tf +override.tf.json +*_override.tf +*_override.tf.json + +# Ignore transient lock info files created by terraform apply +.terraform.tfstate.lock.info + +# Include override files you do wish to add to version control using negated pattern +# !example_override.tf + +# Include tfplan files to ignore the plan output of command: terraform plan -out=tfplan +# example: *tfplan* + +# Ignore CLI configuration files +.terraformrc +terraform.rc + +.venv +.vscode \ No newline at end of file diff --git a/scenarios/AksOpenAiTerraform/README.md b/scenarios/AksOpenAiTerraform/README.md new file mode 100644 index 000000000..3977ca25a --- /dev/null +++ b/scenarios/AksOpenAiTerraform/README.md @@ -0,0 +1,73 @@ +--- +title: Deploy and run an Azure OpenAI ChatGPT application on AKS via Terraform +description: This article shows how to deploy an AKS cluster and Azure OpenAI Service via Terraform and how to deploy a ChatGPT-like application in Python. +ms.topic: quickstart +ms.date: 09/06/2024 +author: aamini7 +ms.author: ariaamini +ms.custom: innovation-engine, linux-related-content +--- + +## Provision Resources with Terraform (~5 minutes) +Run terraform to provision all the Azure resources required to setup your new OpenAI website. +```bash +# Terraform parses TF_VAR_* as vars (Ex: TF_VAR_name -> name) +export TF_VAR_location=$REGION +export TF_VAR_kubernetes_version="1.30.9" +export TF_VAR_model_name="gpt-4o-mini" +export TF_VAR_model_version="2024-07-18" +# Terraform consumes sub id as $ARM_SUBSCRIPTION_ID +export ARM_SUBSCRIPTION_ID=$SUBSCRIPTION_ID +# Run Terraform +terraform -chdir=terraform init +terraform -chdir=terraform apply -auto-approve +``` + +## Login to Cluster +In order to use the kubectl to run commands on the newly created cluster, you must first login. +```bash +RESOURCE_GROUP=$(terraform -chdir=terraform output -raw resource_group_name) +AKS_CLUSTER_NAME=$(terraform -chdir=terraform output -raw aks_cluster_name) +az aks get-credentials --admin --name $AKS_CLUSTER_NAME --resource-group $RESOURCE_GROUP --subscription $SUBSCRIPTION_ID +``` + +# Install Helm Charts +Install nginx and cert-manager through Helm +```bash +helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx +helm repo add jetstack https://charts.jetstack.io +helm repo update + +STATIC_IP=$(terraform -chdir=terraform output -raw static_ip) +DNS_LABEL=$(terraform -chdir=terraform output -raw dns_label) +helm upgrade --install ingress-nginx ingress-nginx/ingress-nginx \ + --set controller.replicaCount=2 \ + --set controller.nodeSelector."kubernetes\.io/os"=linux \ + --set defaultBackend.nodeSelector."kubernetes\.io/os"=linux \ + --set controller.service.annotations."service\.beta\.kubernetes\.io/azure-dns-label-name"=$DNS_LABEL \ + --set controller.service.loadBalancerIP=$STATIC_IP \ + --set controller.service.annotations."service\.beta\.kubernetes\.io/azure-load-balancer-health-probe-request-path"=/healthz +helm upgrade --install cert-manager jetstack/cert-manager \ + --set crds.enabled=true \ + --set nodeSelector."kubernetes\.io/os"=linux +``` + +## Deploy +Apply/Deploy Manifest File +```bash +export IMAGE="aamini8/magic8ball:latest" +# (Uncomment below to manually build docker image yourself instead of using pre-built image.) +# docker build -t ./magic8ball --push + +export HOSTNAME=$(terraform -chdir=terraform output -raw hostname) +export WORKLOAD_IDENTITY_CLIENT_ID=$(terraform -chdir=terraform output -raw workload_identity_client_id) +export AZURE_OPENAI_DEPLOYMENT=$(terraform -chdir=terraform output -raw openai_deployment) +export AZURE_OPENAI_ENDPOINT=$(terraform -chdir=terraform output -raw openai_endpoint) +envsubst < quickstart-app.yml | kubectl apply -f - +``` + +## Wait for host to be ready +```bash +kubectl wait --for=condition=Ready --timeout=5m certificate/tls-secret +echo "Visit: https://$HOSTNAME" +``` \ No newline at end of file diff --git a/scenarios/AksOpenAiTerraform/magic8ball/Dockerfile b/scenarios/AksOpenAiTerraform/magic8ball/Dockerfile new file mode 100644 index 000000000..fe9aa8ca5 --- /dev/null +++ b/scenarios/AksOpenAiTerraform/magic8ball/Dockerfile @@ -0,0 +1,12 @@ +FROM python:3.13-slim +WORKDIR /app + +ENV PYTHONDONTWRITEBYTECODE=1 +ENV PYTHONUNBUFFERED=1 + +COPY requirements.txt ./ +RUN pip install --no-cache-dir -r requirements.txt + +COPY . . +EXPOSE 8501 +ENTRYPOINT ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"] \ No newline at end of file diff --git a/scenarios/AksOpenAiTerraform/magic8ball/app.py b/scenarios/AksOpenAiTerraform/magic8ball/app.py new file mode 100644 index 000000000..a3de44d3a --- /dev/null +++ b/scenarios/AksOpenAiTerraform/magic8ball/app.py @@ -0,0 +1,65 @@ +import os +from openai import AzureOpenAI +import streamlit as st +from azure.identity import WorkloadIdentityCredential, get_bearer_token_provider + +azure_deployment = os.getenv("AZURE_OPENAI_DEPLOYMENT") +azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT") +workload_identity_client_id = os.getenv("WORKLOAD_IDENTITY_CLIENT_ID") + +client = AzureOpenAI( + api_version="2024-10-21", + azure_endpoint=azure_endpoint, + azure_ad_token_provider=get_bearer_token_provider( + WorkloadIdentityCredential(client_id=workload_identity_client_id), + "https://cognitiveservices.azure.com/.default", + ), +) + + +def ask_openai_api(messages: list[str]): + completion = client.chat.completions.create( + messages=messages, model=azure_deployment, stream=True, max_tokens=20 + ) + return completion + + +assistant_prompt = """ +Answer as a magic 8 ball and make random predictions. +If the question is not clear, respond with "Ask the Magic 8 Ball a question about your future." +""" + +# Init state +if "messages" not in st.session_state: + st.session_state.messages = [{"role": "system", "content": assistant_prompt}] +if "disabled" not in st.session_state: + st.session_state.disabled = False + +st.title(":robot_face: Magic 8 Ball") +for message in st.session_state.messages[1:]: # Print previous messages + with st.chat_message(message["role"]): + st.markdown(message["content"]) + + +def disable_chat(): + st.session_state.disabled = True + + +if prompt := st.chat_input( + "Ask your question", on_submit=disable_chat, disabled=st.session_state.disabled +): + # Print Question + st.session_state.messages.append({"role": "user", "content": prompt}) + with st.chat_message("user"): + st.write(prompt) + + # Print Response + with st.chat_message("assistant"): + messages = st.session_state.messages + with st.spinner("Loading..."): + response = st.write_stream(ask_openai_api(messages)) + st.session_state.messages.append({"role": "assistant", "content": response}) + + # Re-enable textbox + st.session_state.disabled = False + st.rerun() diff --git a/scenarios/AksOpenAiTerraform/magic8ball/requirements.txt b/scenarios/AksOpenAiTerraform/magic8ball/requirements.txt new file mode 100644 index 000000000..89cd420f5 --- /dev/null +++ b/scenarios/AksOpenAiTerraform/magic8ball/requirements.txt @@ -0,0 +1,3 @@ +streamlit~=1.40.1 +azure-identity~=1.21.0 +openai~=1.66.2 \ No newline at end of file diff --git a/scenarios/AksOpenAiTerraform/quickstart-app.yml b/scenarios/AksOpenAiTerraform/quickstart-app.yml new file mode 100644 index 000000000..0f2bb4854 --- /dev/null +++ b/scenarios/AksOpenAiTerraform/quickstart-app.yml @@ -0,0 +1,95 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: magic8ball-configmap +data: + AZURE_OPENAI_ENDPOINT: $AZURE_OPENAI_ENDPOINT + AZURE_OPENAI_DEPLOYMENT: $AZURE_OPENAI_DEPLOYMENT + WORKLOAD_IDENTITY_CLIENT_ID: $WORKLOAD_IDENTITY_CLIENT_ID +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: magic8ball + labels: + app.kubernetes.io/name: magic8ball +spec: + replicas: 2 + selector: + matchLabels: + app.kubernetes.io/name: magic8ball + template: + metadata: + labels: + app.kubernetes.io/name: magic8ball + azure.workload.identity/use: "true" + spec: + serviceAccountName: magic8ball-sa + containers: + - name: magic8ball + image: $IMAGE + imagePullPolicy: Always + ports: + - containerPort: 8501 + envFrom: + - configMapRef: + name: magic8ball-configmap +--- +apiVersion: v1 +kind: Service +metadata: + name: magic8ball +spec: + selector: + app.kubernetes.io/name: magic8ball + ports: + - port: 80 + targetPort: 8501 + protocol: TCP +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: magic8ball-sa + annotations: + azure.workload.identity/client-id: $WORKLOAD_IDENTITY_CLIENT_ID + azure.workload.identity/tenant-id: $TENANT_ID +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: magic8ball + annotations: + cert-manager.io/issuer: letsencrypt-dev +spec: + ingressClassName: nginx + tls: + - hosts: + - $HOSTNAME + secretName: tls-secret + rules: + - host: $HOSTNAME + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: magic8ball + port: + number: 80 +--- +apiVersion: cert-manager.io/v1 +kind: Issuer +metadata: + name: letsencrypt-dev +spec: + acme: + server: https://acme-v02.api.letsencrypt.org/directory + email: $EMAIL + privateKeySecretRef: + name: tls-secret + solvers: + - http01: + ingress: + ingressClassName: nginx \ No newline at end of file diff --git a/scenarios/AksOpenAiTerraform/terraform/.terraform.lock.hcl b/scenarios/AksOpenAiTerraform/terraform/.terraform.lock.hcl new file mode 100644 index 000000000..3ea2ce44c --- /dev/null +++ b/scenarios/AksOpenAiTerraform/terraform/.terraform.lock.hcl @@ -0,0 +1,41 @@ +# This file is maintained automatically by "terraform init". +# Manual edits may be lost in future updates. + +provider "registry.terraform.io/hashicorp/azurerm" { + version = "4.20.0" + constraints = "~> 4.20.0" + hashes = [ + "h1:O7hZA85M9/G5LZt+m0bppCinoyp8C346JpI+QnMjYVo=", + "zh:0d29f06abed90da7b943690244420fe1de3e28d4c6de0db441f1af2aa91ea6b8", + "zh:2345e07e91dfec9af3df25fd5119d3a09f91e37ca10af30a344f7b3c297e9ad8", + "zh:42d77650df0238333bcce5da91b4f3d62e54b1ed456f58a9c913270d80a70262", + "zh:43ce137f2644769ceada99a2c815c9c30807e42f61f2f6ce60869411217375f9", + "zh:5e4d8f6a5212f6b7ba29846a2ff328214c7f983ce772196f8e6721edcefd4c59", + "zh:69613d671884fc568a075359e2920d7c19e6d588717b4532b90fb4a4ca8aabd0", + "zh:827ca4fcc25958c731677cb1d87cb09764e3a24ae4117fd9776429341fcdeabe", + "zh:8fad25f949dff7c6f40ea22b13a8b4de6ea0de3c5a975c4a3281529e4797e897", + "zh:b3d175e2725fe38f2a71d5fb346a9d4ff70d449a9d229c95c24f88e764dd2d47", + "zh:c53f3fef67aa64664c85bb8603b0a9730a267a76d7d84ceae16416de7ccb2437", + "zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c", + "zh:f7d9ff06344547232e6c84bc3f6bf9c29cf978ba7cd585c10f4c3361a4b81f22", + ] +} + +provider "registry.terraform.io/hashicorp/random" { + version = "3.7.1" + hashes = [ + "h1:/qtweZW2sk0kBNiQM02RvBXmlVdI9oYqRMCyBZ8XA98=", + "zh:3193b89b43bf5805493e290374cdda5132578de6535f8009547c8b5d7a351585", + "zh:3218320de4be943e5812ed3de995946056db86eb8d03aa3f074e0c7316599bef", + "zh:419861805a37fa443e7d63b69fb3279926ccf98a79d256c422d5d82f0f387d1d", + "zh:4df9bd9d839b8fc11a3b8098a604b9b46e2235eb65ef15f4432bde0e175f9ca6", + "zh:5814be3f9c9cc39d2955d6f083bae793050d75c572e70ca11ccceb5517ced6b1", + "zh:63c6548a06de1231c8ee5570e42ca09c4b3db336578ded39b938f2156f06dd2e", + "zh:697e434c6bdee0502cc3deb098263b8dcd63948e8a96d61722811628dce2eba1", + "zh:78d5eefdd9e494defcb3c68d282b8f96630502cac21d1ea161f53cfe9bb483b3", + "zh:a0b8e44927e6327852bbfdc9d408d802569367f1e22a95bcdd7181b1c3b07601", + "zh:b7d3af018683ef22794eea9c218bc72d7c35a2b3ede9233b69653b3c782ee436", + "zh:d63b911d618a6fe446c65bfc21e793a7663e934b2fef833d42d3ccd38dd8d68d", + "zh:fa985cd0b11e6d651f47cff3055f0a9fd085ec190b6dbe99bf5448174434cdea", + ] +} diff --git a/scenarios/AksOpenAiTerraform/terraform/main.tf b/scenarios/AksOpenAiTerraform/terraform/main.tf new file mode 100644 index 000000000..037b78f70 --- /dev/null +++ b/scenarios/AksOpenAiTerraform/terraform/main.tf @@ -0,0 +1,144 @@ +############################################################################### +# azurerm plugin setup +############################################################################### +terraform { + required_providers { + azurerm = { + source = "hashicorp/azurerm" + version = "~> 4.20.0" + } + } +} + +provider "azurerm" { + features {} +} + +############################################################################### +# Resource Group +############################################################################### +data "azurerm_client_config" "current" { +} + +resource "random_string" "this" { + length = 8 + special = false + lower = true + upper = false + numeric = false +} + +locals { + tenant_id = data.azurerm_client_config.current.tenant_id + subscription_id = data.azurerm_client_config.current.subscription_id + random_id = random_string.this.result +} + +resource "azurerm_resource_group" "main" { + name = "${var.resource_group_name_prefix}-${local.random_id}-rg" + location = var.location + + lifecycle { + ignore_changes = [tags] + } +} + +############################################################################### +# Kubernetes +############################################################################### +resource "azurerm_kubernetes_cluster" "main" { + name = "AksCluster-${local.random_id}" + location = var.location + resource_group_name = azurerm_resource_group.main.name + + sku_tier = "Standard" + dns_prefix = "AksCluster${local.random_id}" + kubernetes_version = var.kubernetes_version + automatic_upgrade_channel = "stable" + + workload_identity_enabled = true + oidc_issuer_enabled = true + + image_cleaner_enabled = true + image_cleaner_interval_hours = 72 + + default_node_pool { + name = "agentpool" + vm_size = "Standard_DS2_v2" + node_count = 2 + + upgrade_settings { + max_surge = "10%" + drain_timeout_in_minutes = 0 + node_soak_duration_in_minutes = 0 + } + } + + identity { + type = "UserAssigned" + identity_ids = tolist([azurerm_user_assigned_identity.workload.id]) + } +} + +resource "azurerm_user_assigned_identity" "workload" { + name = "WorkloadManagedIdentity" + resource_group_name = azurerm_resource_group.main.name + location = var.location +} + +resource "azurerm_federated_identity_credential" "this" { + name = azurerm_user_assigned_identity.workload.name + resource_group_name = azurerm_user_assigned_identity.workload.resource_group_name + parent_id = azurerm_user_assigned_identity.workload.id + audience = ["api://AzureADTokenExchange"] + issuer = azurerm_kubernetes_cluster.main.oidc_issuer_url + subject = "system:serviceaccount:default:magic8ball-sa" +} + +############################################################################### +# OpenAI +############################################################################### +resource "azurerm_cognitive_account" "openai" { + name = "OpenAi-${local.random_id}" + location = var.location + resource_group_name = azurerm_resource_group.main.name + + kind = "OpenAI" + custom_subdomain_name = "magic8ball-${local.random_id}" + sku_name = "S0" +} + +resource "azurerm_cognitive_deployment" "deployment" { + name = var.model_name + cognitive_account_id = azurerm_cognitive_account.openai.id + + model { + format = "OpenAI" + name = var.model_name + version = var.model_version + } + + sku { + name = "Standard" + } +} + +resource "azurerm_role_assignment" "cognitive_services_user" { + scope = azurerm_cognitive_account.openai.id + role_definition_name = "Cognitive Services User" + principal_id = azurerm_user_assigned_identity.workload.principal_id + principal_type = "ServicePrincipal" + + skip_service_principal_aad_check = true +} + +############################################################################### +# Networking +############################################################################### +resource "azurerm_public_ip" "this" { + name = "PublicIp" + domain_name_label = "magic8ball-${local.random_id}" + location = var.location + resource_group_name = azurerm_kubernetes_cluster.main.node_resource_group + allocation_method = "Static" +} \ No newline at end of file diff --git a/scenarios/AksOpenAiTerraform/terraform/outputs.tf b/scenarios/AksOpenAiTerraform/terraform/outputs.tf new file mode 100644 index 000000000..4d58c75ac --- /dev/null +++ b/scenarios/AksOpenAiTerraform/terraform/outputs.tf @@ -0,0 +1,31 @@ +output "resource_group_name" { + value = azurerm_resource_group.main.name +} + +output "aks_cluster_name" { + value = azurerm_kubernetes_cluster.main.name +} + +output "workload_identity_client_id" { + value = azurerm_user_assigned_identity.workload.client_id +} + +output "openai_endpoint" { + value = azurerm_cognitive_account.openai.endpoint +} + +output "openai_deployment" { + value = azurerm_cognitive_deployment.deployment.name +} + +output "hostname" { + value = azurerm_public_ip.this.fqdn +} + +output "static_ip" { + value = azurerm_public_ip.this.ip_address +} + +output "dns_label" { + value = azurerm_public_ip.this.domain_name_label +} \ No newline at end of file diff --git a/scenarios/AksOpenAiTerraform/terraform/variables.tf b/scenarios/AksOpenAiTerraform/terraform/variables.tf new file mode 100644 index 000000000..05ce7856e --- /dev/null +++ b/scenarios/AksOpenAiTerraform/terraform/variables.tf @@ -0,0 +1,20 @@ +variable "resource_group_name_prefix" { + type = string + default = "AksOpenAiTerraform" +} + +variable "location" { + type = string +} + +variable "kubernetes_version" { + type = string +} + +variable "model_name" { + type = string +} + +variable "model_version" { + type = string +} \ No newline at end of file diff --git a/scenarios/ConfigurePythonContainer/configure-python-container.md b/scenarios/ConfigurePythonContainer/configure-python-container.md new file mode 100644 index 000000000..4ef412bc2 --- /dev/null +++ b/scenarios/ConfigurePythonContainer/configure-python-container.md @@ -0,0 +1,207 @@ +--- +title: 'Quickstart: Configure a Linux Python app in Azure App Service' +description: Learn how to configure a Linux Python app in Azure App Service, including setting Python versions and customizing build automation. +ms.topic: quickstart +ms.date: 10/07/2023 +author: msangapu +ms.author: msangapu +ms.custom: innovation-engine, devx-track-python, devx-track-azurecli, linux-related-content +--- + +# Quickstart: Configure a Linux Python app in Azure App Service + +In this quickstart, you'll learn how to configure a Python app deployed on Azure App Service using the Azure CLI. This includes setting and checking the Python version, listing the supported Python versions for App Service, and customizing build automation during deployment. + +## Prerequisites + +Ensure you have the following: + +- An Azure subscription. +- [Azure CLI installed](https://learn.microsoft.com/cli/azure/install-azure-cli) locally or access to [Azure Cloud Shell](https://ms.portal.azure.com/#cloudshell/). +- Permissions to manage resources in your Azure subscription. + +## Step 1: Create necessary resources + +The following commands create the required resources: a resource group, an App Service plan, and an App Service instance. **Random suffixes are included for resource names to avoid conflicts.** + +### Create a resource group + +```bash +export RANDOM_SUFFIX=$(openssl rand -hex 3) +export REGION="centralindia" +export RESOURCE_GROUP="MyResourceGroup$RANDOM_SUFFIX" +az group create --name $RESOURCE_GROUP --location $REGION +``` + +Results: + + + +```json +{ + "id": "/subscriptions/xxxxx-xxxxx-xxxxx-xxxxx/resourceGroups/MyResourceGroupxxx", + "location": "centralindia", + "managedBy": null, + "name": "MyResourceGroupxxx", + "properties": { + "provisioningState": "Succeeded" + }, + "tags": null, + "type": "Microsoft.Resources/resourceGroups" +} +``` + +### Create an App Service plan + +```bash +export APP_SERVICE_PLAN="MyAppServicePlan$RANDOM_SUFFIX" +az appservice plan create --name $APP_SERVICE_PLAN --resource-group $RESOURCE_GROUP --sku FREE --is-linux +``` + +Results: + + + +```json +{ + "id": "/subscriptions/xxxxx-xxxxx-xxxxx-xxxxx/resourceGroups/MyResourceGroupxxx/providers/Microsoft.Web/serverfarms/MyAppServicePlanxxx", + "location": "centralindia", + "name": "MyAppServicePlanxxx", + "sku": { + "name": "F1", + "tier": "Free", + "size": "F1", + "family": "F", + "capacity": 1 + }, + "reserved": true +} +``` + +### Create an App Service instance + +```bash +export APP_NAME="MyPythonApp$RANDOM_SUFFIX" +export RUNTIME="PYTHON|3.10" +az webapp create --resource-group $RESOURCE_GROUP --plan $APP_SERVICE_PLAN --name $APP_NAME --runtime $RUNTIME +``` + +Results: + + + +```json +{ + "id": "/subscriptions/xxxxx-xxxxx-xxxxx-xxxxx/resourceGroups/MyResourceGroupxxx/providers/Microsoft.Web/sites/MyPythonAppxxx", + "name": "MyPythonAppxxx", + "state": "Running", + "defaultHostName": "MyPythonAppxxx.azurewebsites.net" +} +``` + +## Show the current Python version + +The following command retrieves the Python runtime version currently used by your Azure App Service. + +```bash +az webapp config show --resource-group $RESOURCE_GROUP --name $APP_NAME --query linuxFxVersion -o jsonc +``` + +Results: + + + +```jsonc +"PYTHON|3.10" +``` + +## Set the desired Python version + +Update your Azure App Service instance to use a specific Python version. Replace the desired Python version (e.g., "PYTHON|3.11") as needed. + +```bash +export DESIRED_PYTHON_VERSION="PYTHON|3.11" +az webapp config set --resource-group $RESOURCE_GROUP --name $APP_NAME --linux-fx-version $DESIRED_PYTHON_VERSION +``` + +## Verify Version +Verify the updated Python version: + +```bash +az webapp config show --resource-group $RESOURCE_GROUP --name $APP_NAME --query linuxFxVersion -o jsonc +``` + +Results: + + + +```jsonc +"PYTHON|3.11" +``` + +## List all supported Python runtime versions + +Use the following command to view all Python versions supported by Azure App Service on Linux. + +```bash +az webapp list-runtimes --os linux --query "[?contains(@, 'PYTHON')]" -o jsonc +``` + +Results: + + + +```jsonc +[ + "PYTHON|3.7", + "PYTHON|3.8", + "PYTHON|3.9", + "PYTHON|3.10", + "PYTHON|3.11" +] +``` + +## Step 5: Customize build automation + +Azure App Service automates the Python app-building process during deployment. These steps demonstrate how to configure or modify its behavior. + +### Enable build automation + +The following command configures App Service to run the build process during deployment by setting the `SCM_DO_BUILD_DURING_DEPLOYMENT` variable to `1`. + +```bash +az webapp config appsettings set --resource-group $RESOURCE_GROUP --name $APP_NAME --settings SCM_DO_BUILD_DURING_DEPLOYMENT="1" +``` + +## Step 6: Add application settings + +App settings in Azure App Service act as environment variables within your app. Below, we add and verify a sample setting. + +### Add a new App Service environment variable + +For example, set a `DATABASE_SERVER` variable for your app as shown below: + +```bash +export DATABASE_SERVER="https://mydatabase.example" +az webapp config appsettings set --resource-group $RESOURCE_GROUP --name $APP_NAME --settings DATABASE_SERVER=$DATABASE_SERVER +``` + +### Verify the setting + +```bash +az webapp config appsettings list --resource-group $RESOURCE_GROUP --name $APP_NAME --query "[?name=='DATABASE_SERVER']" -o jsonc +``` + +Results: + + + +```jsonc +[ + { + "name": "DATABASE_SERVER", + "slotSetting": false, + "value": "https://mydatabase.example" + } +] +``` \ No newline at end of file diff --git a/scenarios/CreateAKSWebApp/README.md b/scenarios/CreateAKSWebApp/README.md index 8888f5bc7..f3527485f 100644 --- a/scenarios/CreateAKSWebApp/README.md +++ b/scenarios/CreateAKSWebApp/README.md @@ -14,31 +14,14 @@ ms.custom: innovation-engine Welcome to this tutorial where we will take you step by step in creating an Azure Kubernetes Web Application that is secured via https. This tutorial assumes you are logged into Azure CLI already and have selected a subscription to use with the CLI. It also assumes that you have Helm installed ([Instructions can be found here](https://helm.sh/docs/intro/install/)). -## Define Environment Variables +## Create a resource group -The first step in this tutorial is to define environment variables. +A resource group is a container for related resources. All resources must be placed in a resource group. We will create one for this tutorial. The following command creates a resource group with the previously defined $MY_RESOURCE_GROUP_NAME and $REGION parameters. ```bash export RANDOM_ID="$(openssl rand -hex 3)" -export NETWORK_PREFIX="$(($RANDOM % 254 + 1))" -export SSL_EMAIL_ADDRESS="$(az account show --query user.name --output tsv)" export MY_RESOURCE_GROUP_NAME="myAKSResourceGroup$RANDOM_ID" export REGION="westeurope" -export MY_AKS_CLUSTER_NAME="myAKSCluster$RANDOM_ID" -export MY_PUBLIC_IP_NAME="myPublicIP$RANDOM_ID" -export MY_DNS_LABEL="mydnslabel$RANDOM_ID" -export MY_VNET_NAME="myVNet$RANDOM_ID" -export MY_VNET_PREFIX="10.$NETWORK_PREFIX.0.0/16" -export MY_SN_NAME="mySN$RANDOM_ID" -export MY_SN_PREFIX="10.$NETWORK_PREFIX.0.0/22" -export FQDN="${MY_DNS_LABEL}.${REGION}.cloudapp.azure.com" -``` - -## Create a resource group - -A resource group is a container for related resources. All resources must be placed in a resource group. We will create one for this tutorial. The following command creates a resource group with the previously defined $MY_RESOURCE_GROUP_NAME and $REGION parameters. - -```bash az group create --name $MY_RESOURCE_GROUP_NAME --location $REGION ``` @@ -65,6 +48,11 @@ Results: A virtual network is the fundamental building block for private networks in Azure. Azure Virtual Network enables Azure resources like VMs to securely communicate with each other and the internet. ```bash +export NETWORK_PREFIX="$(($RANDOM % 254 + 1))" +export MY_VNET_NAME="myVNet$RANDOM_ID" +export MY_VNET_PREFIX="10.$NETWORK_PREFIX.0.0/16" +export MY_SN_NAME="mySN$RANDOM_ID" +export MY_SN_PREFIX="10.$NETWORK_PREFIX.0.0/22" az network vnet create \ --resource-group $MY_RESOURCE_GROUP_NAME \ --location $REGION \ @@ -129,6 +117,7 @@ This will take a few minutes. ```bash export MY_SN_ID=$(az network vnet subnet list --resource-group $MY_RESOURCE_GROUP_NAME --vnet-name $MY_VNET_NAME --query "[0].id" --output tsv) +export MY_AKS_CLUSTER_NAME="myAKSCluster$RANDOM_ID" az aks create \ --resource-group $MY_RESOURCE_GROUP_NAME \ --name $MY_AKS_CLUSTER_NAME \ @@ -176,6 +165,8 @@ kubectl get nodes ## Install NGINX Ingress Controller ```bash +export MY_PUBLIC_IP_NAME="myPublicIP$RANDOM_ID" +export MY_DNS_LABEL="mydnslabel$RANDOM_ID" export MY_STATIC_IP=$(az network public-ip create --resource-group MC_${MY_RESOURCE_GROUP_NAME}_${MY_AKS_CLUSTER_NAME}_${REGION} --location ${REGION} --name ${MY_PUBLIC_IP_NAME} --dns-name ${MY_DNS_LABEL} --sku Standard --allocation-method static --version IPv4 --zone 1 2 3 --query publicIp.ipAddress -o tsv) helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx helm repo update @@ -458,6 +449,7 @@ while [[ $(date -u +%s) -le $endtime ]]; do fi; done +export FQDN="${MY_DNS_LABEL}.${REGION}.cloudapp.azure.com" curl "http://$FQDN" ``` @@ -488,92 +480,70 @@ Helm is a Kubernetes deployment tool for automating creation, packaging, configu Cert-manager provides Helm charts as a first-class method of installation on Kubernetes. -```bash -# Add the Jetstack Helm repository -# This repository is the only supported source of cert-manager charts. There are some other mirrors and copies across the internet, but those are entirely unofficial and could present a security risk. +1. Add the Jetstack Helm repository -helm repo add jetstack https://charts.jetstack.io + This repository is the only supported source of cert-manager charts. There are some other mirrors and copies across the internet, but those are entirely unofficial and could present a security risk. -# Update local Helm Chart repository cache -helm repo update + ```bash + helm repo add jetstack https://charts.jetstack.io + helm repo update + helm install cert-manager jetstack/cert-manager --namespace cert-manager --version v1.7.0 + ``` -# Install Cert-Manager addon via helm by running the following -helm install cert-manager jetstack/cert-manager --namespace cert-manager --version v1.7.0 +2. Update local Helm Chart repository cache -# ClusterIssuers are Kubernetes resources that represent certificate authorities (CAs) that are able to generate signed certificates by honoring certificate signing requests. All cert-manager certificates require a referenced issuer that is in a ready condition to attempt to honor the request. -# The issuer we are using can be found in the `cluster-issuer-prod.yml file` - -cat < cluster-issuer-prod.yml -apiVersion: cert-manager.io/v1 -kind: ClusterIssuer -metadata: - name: letsencrypt-prod -spec: - acme: - # You must replace this email address with your own. - # Let's Encrypt will use this to contact you about expiring - # certificates, and issues related to your account. - email: $SSL_EMAIL_ADDRESS - # ACME server URL for Let’s Encrypt’s prod environment. - # The staging environment will not issue trusted certificates but is - # used to ensure that the verification process is working properly - # before moving to production - server: https://acme-v02.api.letsencrypt.org/directory - # Secret resource used to store the account's private key. - privateKeySecretRef: - name: letsencrypt - # Enable the HTTP-01 challenge provider - # you prove ownership of a domain by ensuring that a particular - # file is present at the domain - solvers: - - http01: - ingress: - class: nginx - podTemplate: - spec: - nodeSelector: - "kubernetes.io/os": linux -EOF + ```bash + ``` -cluster_issuer_variables=$( azure-vote-nginx-ssl.yml ---- -# INGRESS WITH SSL PROD -apiVersion: networking.k8s.io/v1 -kind: Ingress -metadata: - name: vote-ingress - namespace: default - annotations: - kubernetes.io/tls-acme: "true" - nginx.ingress.kubernetes.io/ssl-redirect: "true" - cert-manager.io/cluster-issuer: letsencrypt-prod -spec: - ingressClassName: nginx - tls: - - hosts: - - $FQDN - secretName: azure-vote-nginx-secret - rules: - - host: $FQDN - http: - paths: - - path: / - pathType: Prefix - backend: - service: - name: azure-vote-front - port: - number: 80 -EOF +4. Apply Certificate Issuer YAML File -azure_vote_nginx_ssl_variables=$( cluster-issuer-prod.yml + apiVersion: cert-manager.io/v1 + kind: ClusterIssuer + metadata: + name: letsencrypt-prod + spec: + acme: + # You must replace this email address with your own. + # Let's Encrypt will use this to contact you about expiring + # certificates, and issues related to your account. + email: $SSL_EMAIL_ADDRESS + # ACME server URL for Let’s Encrypt’s prod environment. + # The staging environment will not issue trusted certificates but is + # used to ensure that the verification process is working properly + # before moving to production + server: https://acme-v02.api.letsencrypt.org/directory + # Secret resource used to store the account's private key. + privateKeySecretRef: + name: letsencrypt + # Enable the HTTP-01 challenge provider + # you prove ownership of a domain by ensuring that a particular + # file is present at the domain + solvers: + - http01: + ingress: + class: nginx + podTemplate: + spec: + nodeSelector: + "kubernetes.io/os": linux + EOF + cluster_issuer_variables=$(Create one for free. +- Access granted to Azure OpenAI in the desired Azure subscription. +- Access permissions to [create Azure OpenAI resources and to deploy models](../how-to/role-based-access-control.md). +- The Azure CLI. For more information, see [How to install the Azure CLI](/cli/azure/install-azure-cli). + +> [!NOTE] +> Currently, you must submit an application to access Azure OpenAI Service. To apply for access, complete [this form](https://aka.ms/oai/access). If you need assistance, open an issue on this repository to contact Microsoft. + +## Sign in to the Azure CLI + +[Sign in](/cli/azure/authenticate-azure-cli) to the Azure CLI or select **Open Cloudshell** in the following steps. + +## Create an Azure resource group + +To create an Azure OpenAI resource, you need an Azure resource group. When you create a new resource through the Azure CLI, you can also create a new resource group or instruct Azure to use an existing group. The following example shows how to create a new resource group named _$MY_RESOURCE_GROUP_NAME_ with the [az group create](/cli/azure/group?view=azure-cli-latest&preserve-view=true#az-group-create) command. The resource group is created in the East US region as defined by the enviornment variable _$REGION_. + +```bash +export RANDOM_ID="$(openssl rand -hex 3)" +export MY_RESOURCE_GROUP_NAME="myAOAIResourceGroup$RANDOM_ID" +export REGION="eastus" +export TAGS="owner=user" + +az group create --name $MY_RESOURCE_GROUP_NAME --location $REGION --tags $TAGS +``` + +Results: + +```JSON +{ + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/myAOAIResourceGroupxxxxxx", + "location": "eastus", + "managedBy": null, + "name": "myAIResourceGroupxxxxxx", + "properties": { + "provisioningState": "Succeeded" + }, + "tags": { + "owner": "user" + }, + "type": "Microsoft.Resources/resourceGroups" +} +``` + +## Create a resource + +Use the [az cognitiveservices account create](/cli/azure/cognitiveservices/account?view=azure-cli-latest&preserve-view=true#az-cognitiveservices-account-create) command to create an Azure OpenAI resource in the resource group. In the following example, you create a resource named _$MY_OPENAI_RESOURCE_NAME_ in the _$MY_RESOURCE_GROUP_NAME_ resource group. When you try the example, update the environment variables to use your desired values for the resource group and resource name. + +```bash +export MY_OPENAI_RESOURCE_NAME="myOAIResource$RANDOM_ID" +az cognitiveservices account create \ +--name $MY_OPENAI_RESOURCE_NAME \ +--resource-group $MY_RESOURCE_GROUP_NAME \ +--location $REGION \ +--kind OpenAI \ +--sku s0 \ +``` +Results: + +```JSON +{ + "etag": "\"xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx\"", + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/myAOAIResourceGroupxxxxxx/providers/Microsoft.CognitiveServices/accounts/myOAIResourcexxxxxx", + "identity": null, + "kind": "OpenAI", + "location": "eastus", + "name": "myOAIResourcexxxxxx", + "properties": { + "abusePenalty": null, + "allowedFqdnList": null, + "apiProperties": null, + "callRateLimit": { + "count": null, + "renewalPeriod": null, + "rules": [ + { + "count": 30.0, + "dynamicThrottlingEnabled": null, + "key": "openai.dalle.post", + "matchPatterns": [ + { + "method": "POST", + "path": "dalle/*" + }, + { + "method": "POST", + "path": "openai/images/*" + } + ], + "minCount": null, + "renewalPeriod": 1.0 + }, + { + "count": 30.0, + "dynamicThrottlingEnabled": null, + "key": "openai.dalle.other", + "matchPatterns": [ + { + "method": "*", + "path": "dalle/*" + }, + { + "method": "*", + "path": "openai/operations/images/*" + } + ], + "minCount": null, + "renewalPeriod": 1.0 + }, + { + "count": 30.0, + "dynamicThrottlingEnabled": null, + "key": "openai", + "matchPatterns": [ + { + "method": "*", + "path": "openai/*" + } + ], + "minCount": null, + "renewalPeriod": 1.0 + }, + { + "count": 30.0, + "dynamicThrottlingEnabled": null, + "key": "default", + "matchPatterns": [ + { + "method": "*", + "path": "*" + } + ], + "minCount": null, + "renewalPeriod": 1.0 + } + ] + }, + "capabilities": [ + { + "name": "VirtualNetworks", + "value": null + }, + { + "name": "CustomerManagedKey", + "value": null + }, + { + "name": "MaxFineTuneCount", + "value": "100" + }, + { + "name": "MaxRunningFineTuneCount", + "value": "1" + }, + { + "name": "MaxUserFileCount", + "value": "50" + }, + { + "name": "MaxTrainingFileSize", + "value": "512000000" + }, + { + "name": "MaxUserFileImportDurationInHours", + "value": "1" + }, + { + "name": "MaxFineTuneJobDurationInHours", + "value": "720" + }, + { + "name": "TrustedServices", + "value": "Microsoft.CognitiveServices,Microsoft.MachineLearningServices,Microsoft.Search" + } + ], + "commitmentPlanAssociations": null, + "customSubDomainName": null, + "dateCreated": "xxxx-xx-xxxxx:xx:xx.xxxxxxxx", + "deletionDate": null, + "disableLocalAuth": null, + "dynamicThrottlingEnabled": null, + "encryption": null, + "endpoint": "https://eastus.api.cognitive.microsoft.com/", + "endpoints": { + "OpenAI Dall-E API": "https://eastus.api.cognitive.microsoft.com/", + "OpenAI Language Model Instance API": "https://eastus.api.cognitive.microsoft.com/", + "OpenAI Model Scaleset API": "https://eastus.api.cognitive.microsoft.com/", + "OpenAI Whisper API": "https://eastus.api.cognitive.microsoft.com/" + }, + "internalId": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", + "isMigrated": false, + "locations": null, + "migrationToken": null, + "networkAcls": null, + "privateEndpointConnections": [], + "provisioningState": "Succeeded", + "publicNetworkAccess": "Enabled", + "quotaLimit": null, + "restore": null, + "restrictOutboundNetworkAccess": null, + "scheduledPurgeDate": null, + "skuChangeInfo": null, + "userOwnedStorage": null + }, + "resourceGroup": "myAOAIResourceGroupxxxxxx", + "sku": { + "capacity": null, + "family": null, + "name": "S0", + "size": null, + "tier": null + }, + "systemData": { + "createdAt": "xxxx-xx-xxxxx:xx:xx.xxxxxxxx", + "createdBy": "yyyyyyyyyyyyyyyyyyyyyyyy", + "createdByType": "User", + "lastModifiedAt": "xxxx-xx-xxxxx:xx:xx.xxxxxx+xx:xx", + "lastModifiedBy": "yyyyyyyyyyyyyyyyyyyyyyyy", + "lastModifiedByType": "User" + }, + "tags": null, + "type": "Microsoft.CognitiveServices/accounts" +} +``` + +## Retrieve information about the resource + +After you create the resource, you can use different commands to find useful information about your Azure OpenAI Service instance. The following examples demonstrate how to retrieve the REST API endpoint base URL and the access keys for the new resource. + +### Get the endpoint URL + +Use the [az cognitiveservices account show](/cli/azure/cognitiveservices/account?view=azure-cli-latest&preserve-view=true#az-cognitiveservices-account-show) command to retrieve the REST API endpoint base URL for the resource. In this example, we direct the command output through the [jq](https://jqlang.github.io/jq/) JSON processor to locate the `.properties.endpoint` value. + +When you try the example, update the environment variables to use your values for the resource group _$MY_RESOURCE_GROUP_NAME_ and resource _$MY_OPENAI_RESOURCE_NAME_. + +```bash +az cognitiveservices account show \ +--name $MY_OPENAI_RESOURCE_NAME \ +--resource-group $MY_RESOURCE_GROUP_NAME \ +| jq -r .properties.endpoint +``` + +### Get the primary API key + +To retrieve the access keys for the resource, use the [az cognitiveservices account keys list](/cli/azure/cognitiveservices/account?view=azure-cli-latest&preserve-view=true#az-cognitiveservices-account-keys-list) command. In this example, we direct the command output through the [jq](https://jqlang.github.io/jq/) JSON processor to locate the `.key1` value. + +When you try the example, update the environment variables to use your values for the resource group and resource. + +```bash +az cognitiveservices account keys list \ +--name $MY_OPENAI_RESOURCE_NAME \ +--resource-group $MY_RESOURCE_GROUP_NAME \ +| jq -r .key1 +``` + +## Deploy a model + +To deploy a model, use the [az cognitiveservices account deployment create](/cli/azure/cognitiveservices/account/deployment?view=azure-cli-latest&preserve-view=true#az-cognitiveservices-account-deployment-create) command. In the following example, you deploy an instance of the `text-embedding-ada-002` model and give it the name _$MY_MODEL_NAME_. When you try the example, update the variables to use your values for the resource group and resource. You don't need to change the `model-version`, `model-format` or `sku-capacity`, and `sku-name` values. + +```bash +export MY_MODEL_NAME="myModel$RANDOM_ID" +az cognitiveservices account deployment create \ +--name $MY_OPENAI_RESOURCE_NAME \ +--resource-group $MY_RESOURCE_GROUP_NAME \ +--deployment-name $MY_MODEL_NAME \ +--model-name text-embedding-ada-002 \ +--model-version "2" \ +--model-format OpenAI \ +--sku-capacity "1" \ +--sku-name "Standard" +``` + +`--sku-name` accepts the following deployment types: `Standard`, `GlobalStandard`, and `ProvisionedManaged`. Learn more about [deployment type options](../how-to/deployment-types.md). + + +> [!IMPORTANT] +> When you access the model via the API, you need to refer to the deployment name rather than the underlying model name in API calls, which is one of the [key differences](../how-to/switching-endpoints.yml) between OpenAI and Azure OpenAI. OpenAI only requires the model name. Azure OpenAI always requires deployment name, even when using the model parameter. In our docs, we often have examples where deployment names are represented as identical to model names to help indicate which model works with a particular API endpoint. Ultimately your deployment names can follow whatever naming convention is best for your use case. + +Results: + +```JSON +{ + "etag": "\"xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx\"", + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/myAOAIResourceGroupxxxxxx/providers/Microsoft.CognitiveServices/accounts/myOAIResourcexxxxxx/deployments/myModelxxxxxx", + "name": "myModelxxxxxx", + "properties": { + "callRateLimit": null, + "capabilities": { + "embeddings": "true", + "embeddingsMaxInputs": "1" + }, + "model": { + "callRateLimit": null, + "format": "OpenAI", + "name": "text-embedding-ada-002", + "source": null, + "version": "1" + }, + "provisioningState": "Succeeded", + "raiPolicyName": null, + "rateLimits": [ + { + "count": 1.0, + "dynamicThrottlingEnabled": null, + "key": "request", + "matchPatterns": null, + "minCount": null, + "renewalPeriod": 10.0 + }, + { + "count": 1000.0, + "dynamicThrottlingEnabled": null, + "key": "token", + "matchPatterns": null, + "minCount": null, + "renewalPeriod": 60.0 + } + ], + "scaleSettings": null, + "versionUpgradeOption": "OnceNewDefaultVersionAvailable" + }, + "resourceGroup": "myAOAIResourceGroupxxxxxx", + "sku": { + "capacity": 1, + "family": null, + "name": "Standard", + "size": null, + "tier": null + }, + "systemData": { + "createdAt": "xxxx-xx-xxxxx:xx:xx.xxxxxx+xx:xx", + "createdBy": "yyyyyyyyyyyyyyyyyyyyyyyy", + "createdByType": "User", + "lastModifiedAt": "xxxx-xx-xxxxx:xx:xx.xxxxxx+xx:xx", + "lastModifiedBy": "yyyyyyyyyyyyyyyyyyyyyyyy", + "lastModifiedByType": "User" + }, + "type": "Microsoft.CognitiveServices/accounts/deployments" +} +``` +## Delete a model from your resource + +You can delete any model deployed from your resource with the [az cognitiveservices account deployment delete](/cli/azure/cognitiveservices/account/deployment?view=azure-cli-latest&preserve-view=true#az-cognitiveservices-account-deployment-delete) command. \ No newline at end of file diff --git a/scenarios/CreateContainerAppDeploymentFromSource/create-container-app-deployment-from-source.md b/scenarios/CreateContainerAppDeploymentFromSource/create-container-app-deployment-from-source.md new file mode 100644 index 000000000..5be70797f --- /dev/null +++ b/scenarios/CreateContainerAppDeploymentFromSource/create-container-app-deployment-from-source.md @@ -0,0 +1,636 @@ +--- +title: Create a Container App leveraging Blob Store, SQL, and Computer Vision +description: This tutorial shows how to create a Container App leveraging Blob Store, SQL, and Computer Vision +author: mbifeld +ms.author: mbifeld +ms.topic: article +ms.date: 12/06/2023 +ms.custom: innovation-engine +--- + +# Create a Container App leveraging Blob Store, SQL, and Computer Vision + +In this guide, we'll be walking through deploying the necessary resources for a web app that allows users to cast votes using their name, email and an image. Users can vote for their preference of cat or dog, using an image of a cat or a dog that will be analyzed by our infrastructure. For this to work, we will be deploying resources across several different Azure services: + +- **Azure Storage Account** to store the images +- **Azure Database for PostgreSQL** to store users and votes +- **Azure Computer Vision** to analyze the images for cats or dogs +- **Azure Container App** to deploy our code + +Note: If you've never created a Computer Vision resource before, you will not be able to create one using the Azure CLI. You must create your first Computer Vision resource from the Azure portal to review and acknowledge the Responsible AI terms and conditions. You can do so here: [Create a Computer Vision Resource](https://portal.azure.com/#create/Microsoft.CognitiveServicesComputerVision). After that, you can create subsequent resources using any deployment tool (SDK, CLI, or ARM template, etc) under the same Azure subscription. + +## Clone the sample repository + +First, we're going to clone this repository onto our local machines. This will provide the starter code required to provide the functionality for the simple application outlined above. We can clone with a simple git command. + +```bash +git clone https://github.com/Azure/computer-vision-nextjs-webapp.git +``` + +To preserve saved environment variables, it's important that this terminal window stays open for the duration of the deployment. + +## Login to Azure using the CLI + +In order to run commands against Azure using [the CLI ](https://learn.microsoft.com/cli/azure/install-azure-cli)you need to login. This is done though the `az login` command: + +## Create a resource group + +A resource group is a container for related resources. All resources must be placed in a resource group. We will create one for this tutorial. The following command creates a resource group with the previously defined $MY_RESOURCE_GROUP_NAME and $REGION parameters. + +```bash +export SUFFIX="$(openssl rand -hex 3)" +export MY_RESOURCE_GROUP_NAME=rg$SUFFIX +export REGION="eastus2" +az group create --name $MY_RESOURCE_GROUP_NAME --location $REGION +``` + +Results: + + +```json +{ + "id": "/subscriptions/xxxxx-xxxxxx-xxxxxx-xxxxxx/resourceGroups/$MY_RESOURCE_GROUP_NAME", + "location": "$REGION", + "managedBy": null, + "name": "$MY_RESOURCE_GROUP_NAME", + "properties": { + "provisioningState": "Succeeded" + }, + "tags": null, + "type": "Microsoft.Resources/resourceGroups" +} +``` + +## Create the storage account + +To create a storage account in this resource group we need to run a simple command. To this command, we are passing the name of the storage account, the resource group to deploy it in, the physical region to deploy it in, and the SKU of the storage account. All values are configured using environment variables. + +```bash +export MY_STORAGE_ACCOUNT_NAME=storage$SUFFIX +az storage account create --name $MY_STORAGE_ACCOUNT_NAME --resource-group $MY_RESOURCE_GROUP_NAME --location $REGION --sku Standard_LRS +``` + +Results: + + +```json +{ + "accessTier": "Hot", + "allowBlobPublicAccess": false, + "allowCrossTenantReplication": null, + "allowSharedKeyAccess": null, + "allowedCopyScope": null, + "azureFilesIdentityBasedAuthentication": null, + "blobRestoreStatus": null, + "creationTime": "xxxx-xx-xxxxx:xx:xx.xxxxxx+xx:xx", + "customDomain": null, + "defaultToOAuthAuthentication": null, + "dnsEndpointType": null, + "enableHttpsTrafficOnly": true, + "enableNfsV3": null, + "encryption": { + "encryptionIdentity": null, + "keySource": "Microsoft.Storage", + "keyVaultProperties": null, + "requireInfrastructureEncryption": null, + "services": { + "blob": { + "enabled": true, + "keyType": "Account", + "lastEnabledTime": "xxxx-xx-xxxxx:xx:xx.xxxxxx+xx:xx" + }, + "file": { + "enabled": true, + "keyType": "Account", + "lastEnabledTime": "xxxx-xx-xxxxx:xx:xx.xxxxxx+xx:xx" + }, + "queue": null, + "table": null + } + }, + "extendedLocation": null, + "failoverInProgress": null, + "geoReplicationStats": null, + "id": "/subscriptions/xxxxx-xxxxxx-xxxxxx-xxxxxx/resourceGroups/$MY_RESOURCE_GROUP_NAME/providers/Microsoft.Storage/storageAccounts/$MY_STORAGE_ACCOUNT_NAME", + "identity": null, + "immutableStorageWithVersioning": null, + "isHnsEnabled": null, + "isLocalUserEnabled": null, + "isSftpEnabled": null, + "keyCreationTime": { + "key1": "xxxx-xx-xxxxx:xx:xx.xxxxxx+xx:xx", + "key2": "xxxx-xx-xxxxx:xx:xx.xxxxxx+xx:xx" + }, + "keyPolicy": null, + "kind": "StorageV2", + "largeFileSharesState": null, + "lastGeoFailoverTime": null, + "location": "$REGION", + "minimumTlsVersion": "TLS1_0", + "name": "$MY_STORAGE_ACCOUNT_NAME", + "networkRuleSet": { + "bypass": "AzureServices", + "defaultAction": "Allow", + "ipRules": [], + "resourceAccessRules": null, + "virtualNetworkRules": [] + }, + "primaryEndpoints": { + "blob": "https://$MY_STORAGE_ACCOUNT_NAME.blob.core.windows.net/", + "dfs": "https://$MY_STORAGE_ACCOUNT_NAME.dfs.core.windows.net/", + "file": "https://$MY_STORAGE_ACCOUNT_NAME.file.core.windows.net/", + "internetEndpoints": null, + "microsoftEndpoints": null, + "queue": "https://$MY_STORAGE_ACCOUNT_NAME.queue.core.windows.net/", + "table": "https://$MY_STORAGE_ACCOUNT_NAME.table.core.windows.net/", + "web": "https://$MY_STORAGE_ACCOUNT_NAME.z22.web.core.windows.net/" + }, + "primaryLocation": "$REGION", + "privateEndpointConnections": [], + "provisioningState": "Succeeded", + "publicNetworkAccess": null, + "resourceGroup": "$MY_RESOURCE_GROUP_NAME", + "routingPreference": null, + "sasPolicy": null, + "secondaryEndpoints": null, + "secondaryLocation": null, + "sku": { + "name": "Standard_LRS", + "tier": "Standard" + }, + "statusOfPrimary": "available", + "statusOfSecondary": null, + "storageAccountSkuConversionStatus": null, + "tags": {}, + "type": "Microsoft.Storage/storageAccounts" +} +``` + +We also need to store one of the API keys for the storage account into an environment variable for later use (to create a container, and put it into an environment file for the code). We are calling the `keys list` command on the storage account and storing the first one in a `STORAGE_ACCOUNT_KEY` environment variable. + +```bash +export STORAGE_ACCOUNT_KEY=$(az storage account keys list --account-name $MY_STORAGE_ACCOUNT_NAME --resource-group $MY_RESOURCE_GROUP_NAME --query "[0].value" --output tsv) +``` + +## Create a container in the storage account + +Run the following command to create an `images` container in the storage account we just created. User uploaded images will be stored as blobs in this container. + +```bash +az storage container create --name images --account-name $MY_STORAGE_ACCOUNT_NAME --account-key $STORAGE_ACCOUNT_KEY --public-access blob +``` + +Results: + + +```json +{ + "created": true +} +``` + +## Create a database + +We will be creating an Azure Database for PostgreSQL flexible server for the application to store users and their votes. We are passing several arguments to the `create` command: + +- The basics: database name, resource group, and physical region to deploy in. +- The tier (which determines the capabilities of the server) as `burstable`, which is for workloads that don't need full CPU continuously. +- The SKU as `Standard_B1ms`. + - `Standard` for the performance tier. + - `B` for burstable workload. + - `1` for a single vCore. + - `ms` for memory optimized. +- The storage size, 32 GiB +- The PostgreSQL major version, 15 +- The datatabase credentials: username and password + +```bash +export MY_DATABASE_SERVER_NAME=dbserver$SUFFIX +export MY_DATABASE_NAME=db$SUFFIX +export MY_DATABASE_USERNAME=dbuser$SUFFIX +export MY_DATABASE_PASSWORD=dbpass$SUFFIX +az postgres flexible-server create \ + --name $MY_DATABASE_SERVER_NAME \ + --database-name $MY_DATABASE_NAME \ + --resource-group $MY_RESOURCE_GROUP_NAME \ + --location $REGION \ + --tier Burstable \ + --sku-name Standard_B1ms \ + --storage-size 32 \ + --version 15 \ + --admin-user $MY_DATABASE_USERNAME \ + --admin-password $MY_DATABASE_PASSWORD \ + --yes +``` + +Results: + + +```json +{ + "connectionString": "postgresql://$MY_DATABASE_USERNAME:$MY_DATABASE_PASSWORD@$MY_DATABASE_NAME.postgres.database.azure.com/flexibleserverdb?sslmode=require", + "databaseName": "$MY_DATABASE_NAME", + "firewallName": "FirewallIPAddress_xxxx-xx-xx-xx-xx", + "host": "$MY_DATABASE_NAME.postgres.database.azure.com", + "id": "/subscriptions/xxxx-xx-xxxxx:xx:xx.xxxxxx+xx:xx/resourceGroups/$MY_RESOURCE_GROUP_NAME/providers/Microsoft.DBforPostgreSQL/flexibleServers/$MY_DATABASE_NAME", + "location": "$REGION", + "password": "$MY_DATABASE_PASSWORD", + "resourceGroup": "$MY_RESOURCE_GROUP_NAME", + "skuname": "Standard_B1ms", + "username": "$MY_DATABASE_USERNAME", + "version": "15" +} +``` + +We also need to store the connection string to the database into an environment variable for later use. This URL will allow us to access the database within the resource we just created. + +```bash +export DATABASE_URL="postgres://$MY_DATABASE_USERNAME:$MY_DATABASE_PASSWORD@$MY_DATABASE_SERVER_NAME.postgres.database.azure.com/$MY_DATABASE_NAME" +``` + +## Create a Computer Vision resource + +We will be creating a Computer Vision resource to be able to identify cats or dogs in the pictures users upload. Creating a Computer Vision resource can be done with a single command. We are passing several arguments to the `create` command: + +- The basics: resource name, resource group, the region, and to create a Computer Vision resource. +- The SKU as `S1`, or the most cost-effective paid performance tier. + +```bash +export MY_COMPUTER_VISION_NAME=computervision$SUFFIX + +az cognitiveservices account create \ + --name $MY_COMPUTER_VISION_NAME \ + --resource-group $MY_RESOURCE_GROUP_NAME \ + --location $REGION \ + --kind ComputerVision \ + --sku S1 \ + --yes +``` + +Results: + + +```json +{ + "etag": "xxxxxxx-xxxxxx-xxxxxxx-xxxxxxxxxx", + "id": "/subscriptions/xxxx-xx-xxxxx:xx:xx.xxxxxx+xx:xx/resourceGroups/$MY_RESOURCE_GROUP_NAME/providers/Microsoft.CognitiveServices/accounts/$MY_COMPUTER_VISION_NAME", + "identity": null, + "kind": "ComputerVision", + "location": "$REGION", + "name": "$MY_COMPUTER_VISION_NAME", + "properties": { + "allowedFqdnList": null, + "apiProperties": null, + "callRateLimit": { + "count": null, + "renewalPeriod": null, + "rules": [ + { + "count": 30.0, + "dynamicThrottlingEnabled": true, + "key": "vision.recognizeText", + "matchPatterns": [ + { + "method": "POST", + "path": "vision/recognizeText" + }, + { + "method": "GET", + "path": "vision/textOperations/*" + }, + { + "method": "*", + "path": "vision/read/*" + } + ], + "minCount": null, + "renewalPeriod": 1.0 + }, + { + "count": 15.0, + "dynamicThrottlingEnabled": true, + "key": "vision", + "matchPatterns": [ + { + "method": "*", + "path": "vision/*" + } + ], + "minCount": null, + "renewalPeriod": 1.0 + }, + { + "count": 500.0, + "dynamicThrottlingEnabled": null, + "key": "container.billing", + "matchPatterns": [ + { + "method": "*", + "path": "billing/*" + } + ], + "minCount": null, + "renewalPeriod": 10.0 + }, + { + "count": 20.0, + "dynamicThrottlingEnabled": true, + "key": "default", + "matchPatterns": [ + { + "method": "*", + "path": "*" + } + ], + "minCount": null, + "renewalPeriod": 1.0 + } + ] + }, + "capabilities": [ + { + "name": "DynamicThrottling", + "value": null + }, + { + "name": "VirtualNetworks", + "value": null + }, + { + "name": "Container", + "value": "ComputerVision.VideoAnalytics,ComputerVision.ComputerVisionRead,ComputerVision.ocr,ComputerVision.readfile,ComputerVision.readfiledsd,ComputerVision.recognizetext,ComputerVision.ComputerVision,ComputerVision.ocrlayoutworker,ComputerVision.ocrcontroller,ComputerVision.ocrdispatcher,ComputerVision.ocrbillingprocessor,ComputerVision.ocranalyzer,ComputerVision.ocrpagesplitter,ComputerVision.ocrapi,ComputerVision.ocrengineworker" + } + ], + "customSubDomainName": null, + "dateCreated": "xxxx-xx-xxxxx:xx:xx.xxxxxx+xx:xx", + "deletionDate": null, + "disableLocalAuth": null, + "dynamicThrottlingEnabled": null, + "encryption": null, + "endpoint": "https://$REGION.api.cognitive.microsoft.com/", + "endpoints": { + "Computer Vision": "https://$REGION.api.cognitive.microsoft.com/", + "Container": "https://$REGION.api.cognitive.microsoft.com/" + }, + "internalId": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", + "isMigrated": false, + "migrationToken": null, + "networkAcls": null, + "privateEndpointConnections": [], + "provisioningState": "Succeeded", + "publicNetworkAccess": "Enabled", + "quotaLimit": null, + "restore": null, + "restrictOutboundNetworkAccess": null, + "scheduledPurgeDate": null, + "skuChangeInfo": null, + "userOwnedStorage": null + }, + "resourceGroup": "$MY_RESOURCE_GROUP_NAME", + "sku": { + "capacity": null, + "family": null, + "name": "S1", + "size": null, + "tier": null + }, + "systemData": { + "createdAt": "xxxx-xx-xxxxx:xx:xx.xxxxxx+xx:xx", + "createdBy": "username@domain.com", + "createdByType": "User", + "lastModifiedAt": "xxxx-xx-xxxxx:xx:xx.xxxxxx+xx:xx", + "lastModifiedBy": "username@domain.com", + "lastModifiedByType": "User" + }, + "tags": null, + "type": "Microsoft.CognitiveServices/accounts" +} +``` + +To access our computer vision resource, we need both the endpoint and the key. With the Azure CLI, we have access to two `az cognitiveservices account` commands: `show` and `keys list`, which give us what we need. + +```bash +export COMPUTER_VISION_ENDPOINT=$(az cognitiveservices account show --name $MY_COMPUTER_VISION_NAME --resource-group $MY_RESOURCE_GROUP_NAME --query "properties.endpoint" --output tsv) +export COMPUTER_VISION_KEY=$(az cognitiveservices account keys list --name $MY_COMPUTER_VISION_NAME --resource-group $MY_RESOURCE_GROUP_NAME --query "key1" --output tsv) +``` + +## Deploy the code into a Container App + +Now that we've got our storage, database, and Computer Vision resources all set up, we are ready to deploy the application code. To do this, we're going to use Azure Container Apps to host a containerized build of our Next.js app. The `Dockerfile` is already created at the root of the repository, so all we need to do is run a single command to deploy the code. + +This command will create an Azure Container Registry resource to host our Docker image, an Azure Container App resource which runs the image, and an Azure Container App Environment resource for our image. Let's break down what we're passing into the command. + +- The basics: resource name, resource group, and the region +- The name of the Azure Container App Environment resource to use or create +- The path to the source code + +```bash +export MY_CONTAINER_APP_NAME=containerapp$SUFFIX +export MY_CONTAINER_APP_ENV_NAME=containerappenv$SUFFIX + +az containerapp up \ + --name $MY_CONTAINER_APP_NAME \ + --resource-group $MY_RESOURCE_GROUP_NAME \ + --location $REGION \ + --environment $MY_CONTAINER_APP_ENV_NAME \ + --context-path computer-vision-nextjs-webapp \ + --source computer-vision-nextjs-webapp \ + --target-port 3000 \ + --ingress external \ + --env-vars \ + AZURE_DATABASE_URL=$DATABASE_URL \ + AZURE_COMPUTER_VISION_KEY=$COMPUTER_VISION_KEY \ + AZURE_COMPUTER_VISION_ENDPOINT=$COMPUTER_VISION_ENDPOINT \ + AZURE_STORAGE_ACCOUNT_NAME=$MY_STORAGE_ACCOUNT_NAME \ + AZURE_STORAGE_ACCOUNT_KEY=$STORAGE_ACCOUNT_KEY +``` + +We can verify that the command was successful by using: + +```bash +az containerapp show --name $MY_CONTAINER_APP_NAME --resource-group $MY_RESOURCE_GROUP_NAME +``` + +Results: + + +```json +{ + "id": "/subscriptions/xxxxxxx-xxxxxxxx-xxxxxxxx-xxxxxxxxx/resourceGroups/$MY_RESOURCE_GROUP_NAME/providers/Microsoft.App/containerapps/$MY_CONTAINER_APP_NAME", + "identity": { + "type": "None" + }, + "location": "West US", + "name": "$MY_CONTAINER_APP_NAME", + "properties": { + "configuration": { + "activeRevisionsMode": "Single", + "dapr": null, + "ingress": { + "allowInsecure": false, + "clientCertificateMode": null, + "corsPolicy": null, + "customDomains": null, + "exposedPort": 0, + "external": true, + "fqdn": "$MY_CONTAINER_APP_NAME.xxxxxxx-xxxxxxxxxx.$REGION.azurecontainerapps.io", + "ipSecurityRestrictions": null, + "stickySessions": null, + "targetPort": 3000, + "traffic": [ + { + "latestRevision": true, + "weight": 100 + } + ], + "transport": "Auto" + }, + "maxInactiveRevisions": null, + "registries": null, + "secrets": null, + "service": null + }, + "customDomainVerificationId": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", + "environmentId": "/subscriptions/xxxxxxxx-xxxxxxxx-xxxxxxxxx-xxxxxxxxx/resourceGroups/$MY_RESOURCE_GROUP_NAME/providers/Microsoft.App/managedEnvironments/$MY_CONTAINER_APP_ENV_NAME", + "eventStreamEndpoint": "https://$REGION.azurecontainerapps.dev/subscriptions/xxxxxxxx-xxxxxxxx-xxxxxxxxx-xxxxxxxxx/resourceGroups/$MY_RESOURCE_GROUP_NAME/containerApps/$MY_CONTAINER_APP_NAME/eventstream", + "latestReadyRevisionName": "$MY_CONTAINER_APP_NAME-xxxxxxx", + "latestRevisionFqdn": "$MY_CONTAINER_APP_NAME-xxxxxxx.kindocean-xxxxxxxx.$REGION.azurecontainerapps.io", + "latestRevisionName": "$MY_CONTAINER_APP_NAME-xxxxxxx", + "managedEnvironmentId": "/subscriptions/xxxxxxxx-xxxxxxxx-xxxxxxxxx-xxxxxxxxx/resourceGroups/$MY_RESOURCE_GROUP_NAME/providers/Microsoft.App/managedEnvironments/$MY_CONTAINER_APP_ENV_NAME", + "outboundIpAddresses": ["xx.xxx.xx.xxxx"], + "provisioningState": "Succeeded", + "runningStatus": "Running", + "template": { + "containers": [ + { + "env": [ + { + "name": "AZURE_DATABASE_URL", + "value": "$DATABASE_URL" + }, + { + "name": "AZURE_COMPUTER_VISION_KEY", + "value": "$COMPUTER_VISION_KEY" + }, + { + "name": "AZURE_COMPUTER_VISION_ENDPOINT", + "value": "$COMPUTER_VISION_ENDPOINT" + }, + { + "name": "AZURE_STORAGE_ACCOUNT_NAME", + "value": "$MY_STORAGE_ACCOUNT_NAME" + }, + { + "name": "AZURE_STORAGE_ACCOUNT_KEY", + "value": "$STORAGE_ACCOUNT_KEY" + } + ], + "image": "xxxxxx/xx-xxxx", + "name": "$MY_CONTAINER_APP_NAME", + "resources": { + "cpu": 0.5, + "ephemeralStorage": "2Gi", + "memory": "1Gi" + } + } + ], + "initContainers": null, + "revisionSuffix": "", + "scale": { + "maxReplicas": 10, + "minReplicas": null, + "rules": null + }, + "serviceBinds": null, + "terminationGracePeriodSeconds": null, + "volumes": null + }, + "workloadProfileName": null + }, + "resourceGroup": "$MY_RESOURCE_GROUP_NAME", + "systemData": { + "createdAt": "xxxx-xx-xxxxx:xx:xx.xxxxxx+xx:xx", + "createdBy": "username@domain.com", + "createdByType": "User", + "lastModifiedAt": "xxxx-xx-xxxxx:xx:xx.xxxxxx+xx:xx", + "lastModifiedBy": "username@domain.com", + "lastModifiedByType": "User" + }, + "type": "Microsoft.App/containerApps" +} +``` + +## Create a database firewall rule + +By default, our database is configured to allow traffic from an allowlist of IP addresses. We need to add the IP of our newly deployed Container App to this allowlist. We can get the IP from the `az containerapp show` command. + +```bash +export CONTAINER_APP_IP=$(az containerapp show --name $MY_CONTAINER_APP_NAME --resource-group $MY_RESOURCE_GROUP_NAME --query "properties.outboundIpAddresses[0]" --output tsv) +``` + +We can now add this IP as a firewall rule with this command: + +```bash +az postgres flexible-server firewall-rule create \ + --name $MY_DATABASE_SERVER_NAME \ + --resource-group $MY_RESOURCE_GROUP_NAME \ + --rule-name allow-container-app \ + --start-ip-address $CONTAINER_APP_IP \ + --end-ip-address $CONTAINER_APP_IP +``` + +Results: + + +```json +{ + "endIpAddress": "xx.xxx.xx.xxx", + "id": "/subscriptions/xxxxxxxx-xxxxxxxx-xxxxxxx-xxxxxxx/resourceGroups/$MY_RESOURCE_GROUP_NAME/providers/Microsoft.DBforPostgreSQL/flexibleServers/$MY_DATABASE_SERVER_NAME/firewallRules/allow-container-app", + "name": "allow-container-app", + "resourceGroup": "$MY_RESOURCE_GROUP_NAME", + "startIpAddress": "xx.xxx.xx.xxx", + "systemData": null, + "type": "Microsoft.DBforPostgreSQL/flexibleServers/firewallRules" +} +``` + +## Create a storage CORS rule + +Web browsers implement a security restriction known as same-origin policy that prevents a web page from calling APIs in a different domain. CORS provides a secure way to allow one domain (the origin domain) to call APIs in another domain. We need to add a CORS rule on the URL of our web app to our storage account. First, let's get the URL with a similar `az containerapp show` command as earlier. + +```bash +export CONTAINER_APP_URL=https://$(az containerapp show --name $MY_CONTAINER_APP_NAME --resource-group $MY_RESOURCE_GROUP_NAME --query "properties.configuration.ingress.fqdn" --output tsv) +``` + +Next, we're ready to add a CORS rule with the following command. Let's break down the different parts of this command. + +- We are specifying blob service as the storage type to add the rule to. +- We are allowing all operations to be performed. +- We are allowing only the container app URL we just saved. +- We are allowing all HTTP headers from this URL. +- Max age is the amount of time, in seconds, that a browser should cache the preflight response for a specific request. +- We are passing the storage account name and key from earlier. + +```bash +az storage cors add \ + --services b \ + --methods DELETE GET HEAD MERGE OPTIONS POST PUT PATCH \ + --origins $CONTAINER_APP_URL \ + --allowed-headers '*' \ + --max-age 3600 \ + --account-name $MY_STORAGE_ACCOUNT_NAME \ + --account-key $STORAGE_ACCOUNT_KEY +``` + +That's it! Feel free to access the newly deployed web app in your browser printing the CONTAINER_APP_URL environment variable we added earlier. + +```bash +echo $CONTAINER_APP_URL +``` + +## Next Steps + +- [Azure Container Apps documentation](https://learn.microsoft.com/azure/container-apps/) +- [Azure Database for PostgreSQL documentation](https://learn.microsoft.com/azure/postgresql/) +- [Azure Blob Storage documentation](https://learn.microsoft.com/azure/storage/blobs/) +- [Azure Computer (AI) Vision Documentation](https://learn.microsoft.com/azure/ai-services/computer-vision/) diff --git a/scenarios/CreateLinuxVMSecureWebServer/create-linux-vm-secure-web-server.md b/scenarios/CreateLinuxVMSecureWebServer/create-linux-vm-secure-web-server.md new file mode 100644 index 000000000..4c62190b8 --- /dev/null +++ b/scenarios/CreateLinuxVMSecureWebServer/create-linux-vm-secure-web-server.md @@ -0,0 +1,837 @@ +--- +title: Create a NGINX Webserver Secured via HTTPS +description: This tutorial shows how to create a NGINX Webserver Secured via HTTPS. +author: mbifeld@microsoft.com +ms.topic: article +ms.date: 11/10/2023 +ms.custom: innovation-engine +--- + +# Create a NGINX Webserver Secured via HTTPS + +To secure web servers, a Transport Layer Security (TLS), previously known as Secure Sockets Layer (SSL), certificate can be used to encrypt web traffic. These TLS/SSL certificates can be stored in Azure Key Vault, and allow secure deployments of certificates to Linux virtual machines (VMs) in Azure. In this tutorial you learn how to: + +> [!div class="checklist"] + +> * Setup and secure Azure Networking +> * Create an Azure Key Vault +> * Generate or upload a certificate to the Key Vault +> * Create a VM and install the NGINX web server +> * Inject the certificate into the VM and configure NGINX with a TLS binding + +If you choose to install and use the CLI locally, this tutorial requires that you're running the Azure CLI version 2.0.30 or later. Run `az --version` to find the version. If you need to install or upgrade, see [Install Azure CLI]( https://learn.microsoft.com//cli/azure/install-azure-cli ). + +## Create a Resource Group + +Before you can create a secure Linux VM, create a resource group with az group create. The following example creates a resource group equal to the contents of the variable *MY_RESOURCE_GROUP_NAME* in the location specified by the variable contents *REGION*: + +```bash +export RANDOM_ID="$(openssl rand -hex 3)" +export MY_RESOURCE_GROUP_NAME="myResourceGroup$RANDOM_ID" +export REGION="centralindia" + +az group create \ + --name $MY_RESOURCE_GROUP_NAME \ + --location $REGION -o JSON +``` + +Results: + + +```JSON +{ + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/myResourceGroupb1404f", + "location": "centralindia", + "managedBy": null, + "name": "myResourceGroupb1404f", + "properties": { + "provisioningState": "Succeeded" + }, + "tags": null, + "type": "Microsoft.Resources/resourceGroups" +} +``` + +## Set up VM Network + +Use az network vnet create to create a virtual network named *$MY_VNET_NAME* with a subnet named *$MY_SN_NAME*in the *$MY_RESOURCE_GROUP_NAME*resource group. + +```bash +export NETWORK_PREFIX="$(($RANDOM % 254 + 1))" +export MY_VNET_NAME="myVNet$RANDOM_ID" +export MY_VNET_PREFIX="10.$NETWORK_PREFIX.0.0/16" +export MY_SN_NAME="mySN$RANDOM_ID" +export MY_SN_PREFIX="10.$NETWORK_PREFIX.0.0/24" + +az network vnet create \ + --resource-group $MY_RESOURCE_GROUP_NAME \ + --name $MY_VNET_NAME \ + --location $REGION \ + --address-prefix $MY_VNET_PREFIX \ + --subnet-name $MY_SN_NAME \ + --subnet-prefix $MY_SN_PREFIX -o JSON +``` + +Results: + + +```JSON +{ + "newVNet": { + "addressSpace": { + "addressPrefixes": [ + "10.168.0.0/16" + ] + }, + "bgpCommunities": null, + "ddosProtectionPlan": null, + "dhcpOptions": { + "dnsServers": [] + }, + "enableDdosProtection": false, + "enableVmProtection": null, + "encryption": null, + "extendedLocation": null, + "flowTimeoutInMinutes": null, + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/myResourceGroupb1404f/providers/Microsoft.Network/virtualNetworks/myVNetb1404f", + "ipAllocations": null, + "location": "eastus", + "name": "myVNetb1404f", + "provisioningState": "Succeeded", + "resourceGroup": "myResourceGroupb1404f", + "subnets": [ + { + "addressPrefix": "10.168.0.0/24", + "addressPrefixes": null, + "applicationGatewayIpConfigurations": null, + "delegations": [], + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/myResourceGroupb1404f/providers/Microsoft.Network/virtualNetworks/myVNetb1404f/subnets/mySNb1404f", + "ipAllocations": null, + "ipConfigurationProfiles": null, + "ipConfigurations": null, + "name": "mySNb1404f", + "natGateway": null, + "networkSecurityGroup": null, + "privateEndpointNetworkPolicies": "Disabled", + "privateEndpoints": null, + "privateLinkServiceNetworkPolicies": "Enabled", + "provisioningState": "Succeeded", + "purpose": null, + "resourceGroup": "myResourceGroupb1404f", + "resourceNavigationLinks": null, + "routeTable": null, + "serviceAssociationLinks": null, + "serviceEndpointPolicies": null, + "serviceEndpoints": null, + "type": "Microsoft.Network/virtualNetworks/subnets" + } + ], + "tags": {}, + "type": "Microsoft.Network/virtualNetworks", + "virtualNetworkPeerings": [] + } +} +``` + +Use az network public-ip create to create a standard zone-redundant public IPv4 address named *$MY_PUBLIC_IP_NAME* in *$MY_RESOURCE_GROUP_NAME*. + +```bash +export MY_PUBLIC_IP_NAME="myPublicIP$RANDOM_ID" +export MY_DNS_LABEL="mydnslabel$RANDOM_ID" + +az network public-ip create \ + --name $MY_PUBLIC_IP_NAME \ + --location $REGION \ + --resource-group $MY_RESOURCE_GROUP_NAME \ + --dns-name $MY_DNS_LABEL \ + --sku Standard \ + --allocation-method static \ + --version IPv4 \ + --zone 1 2 3 -o JSON +``` + +Results: + + +```JSON +{ + "publicIp": { + "ddosSettings": null, + "deleteOption": null, + "dnsSettings": { + "domainNameLabel": "mydnslabelb1404f", + "fqdn": "mydnslabelb1404f.eastus.cloudapp.azure.com", + "reverseFqdn": null + }, + "extendedLocation": null, + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/myResourceGroupb1404f/providers/Microsoft.Network/publicIPAddresses/myPublicIPb1404f", + "idleTimeoutInMinutes": 4, + "ipAddress": "20.88.178.210", + "ipConfiguration": null, + "ipTags": [], + "linkedPublicIpAddress": null, + "location": "eastus", + "migrationPhase": null, + "name": "myPublicIPb1404f", + "natGateway": null, + "provisioningState": "Succeeded", + "publicIpAddressVersion": "IPv4", + "publicIpAllocationMethod": "Static", + "publicIpPrefix": null, + "resourceGroup": "myResourceGroupb1404f", + "servicePublicIpAddress": null, + "sku": { + "name": "Standard", + "tier": "Regional" + }, + "tags": null, + "type": "Microsoft.Network/publicIPAddresses", + "zones": [ + "1", + "2", + "3" + ] + } +} +``` + +Security rules in network security groups enable you to filter the type of network traffic that can flow in and out of virtual network subnets and network interfaces. To learn more about network security groups, see [Network security group overview](https://learn.microsoft.com/azure/virtual-network/network-security-groups-overview). + +```bash +export MY_NSG_NAME="myNSGName$RANDOM_ID" + +az network nsg create \ + --resource-group $MY_RESOURCE_GROUP_NAME \ + --name $MY_NSG_NAME \ + --location $REGION -o JSON +``` + +Results: + + +```JSON +{ + "NewNSG": { + "defaultSecurityRules": [ + { + "access": "Allow", + "description": "Allow inbound traffic from all VMs in VNET", + "destinationAddressPrefix": "VirtualNetwork", + "destinationAddressPrefixes": [], + "destinationPortRange": "*", + "destinationPortRanges": [], + "direction": "Inbound", + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/myResourceGroupb1404f/providers/Microsoft.Network/networkSecurityGroups/myNSGNameb1404f/defaultSecurityRules/AllowVnetInBound", + "name": "AllowVnetInBound", + "priority": 65000, + "protocol": "*", + "provisioningState": "Succeeded", + "resourceGroup": "myResourceGroupb1404f", + "sourceAddressPrefix": "VirtualNetwork", + "sourceAddressPrefixes": [], + "sourcePortRange": "*", + "sourcePortRanges": [], + "type": "Microsoft.Network/networkSecurityGroups/defaultSecurityRules" + } + ], + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/myResourceGroupb1404f/providers/Microsoft.Network/networkSecurityGroups/myNSGNameb1404f", + "location": "eastus", + "name": "myNSGNameb1404f", + "provisioningState": "Succeeded", + "resourceGroup": "myResourceGroupb1404f", + "securityRules": [], + "type": "Microsoft.Network/networkSecurityGroups" + } +} +``` + +Open ports 22 (SSH), 80 (HTTP) and 443 (HTTPS) to allow SSH and Web traffic + +```bash +export MY_NSG_SSH_RULE="Allow-Access$RANDOM_ID" + +az network nsg rule create \ + --resource-group $MY_RESOURCE_GROUP_NAME \ + --nsg-name $MY_NSG_NAME \ + --name $MY_NSG_SSH_RULE \ + --access Allow \ + --protocol Tcp \ + --direction Inbound \ + --priority 100 \ + --source-address-prefix '*' \ + --source-port-range '*' \ + --destination-address-prefix '*' \ + --destination-port-range 22 80 443 -o JSON +``` + +Results: + + +```JSON +{ + "access": "Allow", + "description": null, + "destinationAddressPrefix": "*", + "destinationAddressPrefixes": [], + "destinationApplicationSecurityGroups": null, + "destinationPortRange": null, + "destinationPortRanges": [ + "22", + "80", + "443" + ], + "direction": "Inbound", + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/myResourceGroupb1404f/providers/Microsoft.Network/networkSecurityGroups/myNSGNameb1404f/securityRules/MY_NSG_SSH_RULE", + "name": "MY_NSG_SSH_RULE", + "priority": 100, + "protocol": "Tcp", + "provisioningState": "Succeeded", + "resourceGroup": "myResourceGroupb1404f", + "sourceAddressPrefix": "*", + "sourceAddressPrefixes": [], + "sourceApplicationSecurityGroups": null, + "sourcePortRange": "*", + "sourcePortRanges": [], + "type": "Microsoft.Network/networkSecurityGroups/securityRules" +} +``` + +And finally create the Network Interface Card (NIC): + +```bash +export MY_VM_NIC_NAME="myVMNicName$RANDOM_ID" + +az network nic create \ + --resource-group $MY_RESOURCE_GROUP_NAME \ + --name $MY_VM_NIC_NAME \ + --location $REGION \ + --ip-forwarding false \ + --subnet $MY_SN_NAME \ + --vnet-name $MY_VNET_NAME \ + --network-security-group $MY_NSG_NAME \ + --public-ip-address $MY_PUBLIC_IP_NAME -o JSON +``` + +Results: + + +```JSON +{ + "NewNIC": { + "auxiliaryMode": "None", + "auxiliarySku": "None", + "disableTcpStateTracking": false, + "dnsSettings": { + "appliedDnsServers": [], + "dnsServers": [] + }, + "enableAcceleratedNetworking": false, + "enableIPForwarding": false, + "hostedWorkloads": [], + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/myResourceGroupb1404f/providers/Microsoft.Network/networkInterfaces/myVMNicNameb1404f", + "ipConfigurations": [ + { + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/myResourceGroupb1404f/providers/Microsoft.Network/networkInterfaces/myVMNicNameb1404f/ipConfigurations/ipconfig1", + "name": "ipconfig1", + "primary": true, + "privateIPAddress": "10.168.0.4", + "privateIPAddressVersion": "IPv4", + "privateIPAllocationMethod": "Dynamic", + "provisioningState": "Succeeded", + "resourceGroup": "myResourceGroupb1404f", + "subnet": { + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/myResourceGroupb1404f/providers/Microsoft.Network/virtualNetworks/myVNetb1404f/subnets/mySNb1404f", + "resourceGroup": "myResourceGroupb1404f" + }, + "type": "Microsoft.Network/networkInterfaces/ipConfigurations" + } + ], + "location": "eastus", + "name": "myVMNicNameb1404f", + "networkSecurityGroup": { + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/myResourceGroupb1404f/providers/Microsoft.Network/networkSecurityGroups/myNSGNameb1404f", + "resourceGroup": "myResourceGroupb1404f" + }, + "nicType": "Standard", + "provisioningState": "Succeeded", + "resourceGroup": "myResourceGroupb1404f", + "tapConfigurations": [], + "type": "Microsoft.Network/networkInterfaces", + "vnetEncryptionSupported": false + } +} +``` + +## Generate a certificate and store it in Azure Key Vault + +Azure Key Vault safeguards cryptographic keys and secrets, such as certificates or passwords. Key Vault helps streamline the certificate management process and enables you to maintain control of keys that access those certificates. You can create a self-signed certificate inside Key Vault, or upload an existing, trusted certificate that you already own. For this tutorial we'll create self-signed certificates inside the Key Vault and afterwards inject these certificates into a running VM. This process ensures that the most up-to-date certificates are installed on a web server during deployment. + +The following example creates an Azure Key Vault named *$MY_KEY_VAULT* in the chosen region *$REGION* with a retention policy of 7 days. This means once a secret, key, certificate, or key vault is deleted, it will remain recoverable for a configurable period of 7 to 90 calendar days. + +```bash +export MY_KEY_VAULT="mykeyvault$RANDOM_ID" + +az keyvault create \ + --resource-group $MY_RESOURCE_GROUP_NAME \ + --name $MY_KEY_VAULT \ + --location $REGION \ + --retention-days 7\ + --enabled-for-deployment true -o JSON +``` + +Results: + + +```JSON +{ + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/myResourceGroupb1404f/providers/Microsoft.KeyVault/vaults/myKeyVaultb1404f", + "location": "eastus", + "name": "myKeyVaultb1404f", + "properties": { + "accessPolicies": [ + { + "applicationId": null, + "permissions": { + "certificates": [ + "all" + ], + "keys": [ + "all" + ], + "secrets": [ + "all" + ], + "storage": [ + "all" + ] + }, + "tenantId": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" + } + ], + "createMode": null, + "enablePurgeProtection": null, + "enableRbacAuthorization": null, + "enableSoftDelete": true, + "enabledForDeployment": true, + "enabledForDiskEncryption": null, + "enabledForTemplateDeployment": null, + "hsmPoolResourceId": null, + "networkAcls": null, + "privateEndpointConnections": null, + "provisioningState": "Succeeded", + "publicNetworkAccess": "Enabled", + "sku": { + "family": "A", + "name": "standard" + }, + "softDeleteRetentionInDays": 7, + "tenantId": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx", + "vaultUri": "https://mykeyvaultb1404f.vault.azure.net/" + }, + "resourceGroup": "myResourceGroupb1404f", + "systemData": { + "createdAt": "2023-09-18T12:25:55.208000+00:00", + "createdBy": "example@microsoft.com", + "createdByType": "User", + "lastModifiedAt": "2023-09-18T12:25:55.208000+00:00", + "lastModifiedBy": "example@microsoft.com", + "lastModifiedByType": "User" + }, + "tags": {}, + "type": "Microsoft.KeyVault/vaults" +} +``` + +## Create a certificate and store in Azure key Vault + +Now let's generate a self-signed certificate with az keyvault certificate create that uses the default certificate policy: + +```bash +export MY_CERT_NAME="nginxcert$RANDOM_ID" + +az keyvault certificate create \ + --vault-name $MY_KEY_VAULT \ + --name $MY_CERT_NAME \ + --policy "$(az keyvault certificate get-default-policy)" -o JSON +``` + +Results: + + +```JSON +{ + "cancellationRequested": false, + "csr": "MIICr...", + "error": null, + "id": "https://mykeyvault67a7ba.vault.azure.net/certificates/nginxcert67a7ba/pending", + "issuerParameters": { + "certificateTransparency": null, + "certificateType": null, + "name": "Self" + }, + "name": "nginxcert67a7ba", + "status": "completed", + "statusDetails": null, + "target": "https://mykeyvault67a7ba.vault.azure.net/certificates/nginxcert67a7ba" +} +``` + +Finally, we need to prepare the certificate so it can be used during the VM create process. To do so we need to obtain the ID of the certificate with az keyvault secret list-versions, and convert the certificate with az vm secret format. The following example assigns the output of these commands to variables for ease of use in the next steps: + +```bash +export MY_VM_ID_NAME="myVMIDName$RANDOM_ID" + +az identity create \ + --resource-group $MY_RESOURCE_GROUP_NAME \ + --name $MY_VM_ID_NAME -o JSON +``` + +Results: + + +```JSON +{ + "clientId": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx", + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourcegroups/myResourceGroupb1404f/providers/Microsoft.ManagedIdentity/userAssignedIdentities/myVMIDNameb1404f", + "location": "eastus", + "name": "myVMIDNameb1404f", + "principalId": "e09ebfce-97f0-4aff-9abd-415ebd6f915c", + "resourceGroup": "myResourceGroupb1404f", + "tags": {}, + "tenantId": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx", + "type": "Microsoft.ManagedIdentity/userAssignedIdentities" +} +``` + +```bash +MY_VM_PRINCIPALID=$(az identity show --resource-group $MY_RESOURCE_GROUP_NAME --name $MY_VM_ID_NAME --query principalId -o tsv) + +az keyvault set-policy \ + --resource-group $MY_RESOURCE_GROUP_NAME \ + --name $MY_KEY_VAULT \ + --object-id $MY_VM_PRINCIPALID \ + --secret-permissions get list \ + --certificate-permissions get list -o JSON +``` + +Results: + + +```JSON +{ + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/myResourceGroupb1404f/providers/Microsoft.KeyVault/vaults/myKeyVaultb1404f", + "location": "eastus", + "name": "myKeyVaultb1404f", + "properties": { + "accessPolicies": [ + { + "applicationId": null, + "objectId": "ceeb4e98-5831-4d9f-b8ba-2ee14b3cdf80", + "permissions": { + "certificates": [ + "all" + ], + "keys": [ + "all" + ], + "secrets": [ + "all" + ], + "storage": [ + "all" + ] + }, + "tenantId": "bd7153ee-d085-4a28-a928-2f0ef402f076" + }, + { + "applicationId": null, + "objectId": "e09ebfce-97f0-4aff-9abd-415ebd6f915c", + "permissions": { + "certificates": [ + "list", + "get" + ], + "keys": null, + "secrets": [ + "list", + "get" + ], + "storage": null + }, + "tenantId": "bd7153ee-d085-4a28-a928-2f0ef402f076" + } + ], + "createMode": null, + "enablePurgeProtection": null, + "enableRbacAuthorization": null, + "enableSoftDelete": true, + "enabledForDeployment": true, + "enabledForDiskEncryption": null, + "enabledForTemplateDeployment": null, + "hsmPoolResourceId": null, + "networkAcls": null, + "privateEndpointConnections": null, + "provisioningState": "Succeeded", + "publicNetworkAccess": "Enabled", + "sku": { + "family": "A", + "name": "standard" + }, + "softDeleteRetentionInDays": 7, + "tenantId": "bd7153ee-d085-4a28-a928-2f0ef402f076", + "vaultUri": "https://mykeyvaultb1404f.vault.azure.net/" + }, + "resourceGroup": "myResourceGroupb1404f", + "systemData": { + "createdAt": "2023-09-18T12:25:55.208000+00:00", + "createdBy": "ajoian@microsoft.com", + "createdByType": "User", + "lastModifiedAt": "2023-09-18T12:48:08.966000+00:00", + "lastModifiedBy": "ajoian@microsoft.com", + "lastModifiedByType": "User" + }, + "tags": {}, + "type": "Microsoft.KeyVault/vaults" +} +``` + +## Create the VM + +Now create a VM with az vm create. Use the --custom-data parameter to pass in the cloud-init config file, named *cloud-init-nginx.txt*. +Cloud-init is a widely used approach to customize a Linux VM as it boots for the first time. You can use cloud-init to install packages and write files, or to configure users and security. As cloud-init runs during the initial boot process, there are no extra steps or required agents to apply your configuration. +When you create a VM, certificates and keys are stored in the protected /var/lib/waagent/ directory. In this example, we are installing and configuring the NGINX web server. + +```bash +export FQDN="${MY_DNS_LABEL}.${REGION}.cloudapp.azure.com" + +cat > cloud-init-nginx.txt </dev/null; echo "0 * * * * /root/convert_akv_cert.sh && service nginx reload") | crontab - + - service nginx restart +EOF +``` + +The following example creates a VM named *myVMName$UNIQUE_POSTFIX*: + +```bash +MY_VM_ID=$(az identity show --resource-group $MY_RESOURCE_GROUP_NAME --name $MY_VM_ID_NAME --query id -o tsv) +export MY_VM_NAME="myVMName$RANDOM_ID" +export MY_VM_IMAGE='Ubuntu2204' +export MY_VM_USERNAME="azureuser" +export MY_VM_SIZE='Standard_DS2_v2' + +az vm create \ + --resource-group $MY_RESOURCE_GROUP_NAME \ + --name $MY_VM_NAME \ + --image $MY_VM_IMAGE \ + --admin-username $MY_VM_USERNAME \ + --generate-ssh-keys \ + --assign-identity $MY_VM_ID \ + --size $MY_VM_SIZE \ + --custom-data cloud-init-nginx.txt \ + --nics $MY_VM_NIC_NAME +``` + +Results: + + +```JSON +{ + "fqdns": "mydnslabel67a7ba.eastus.cloudapp.azure.com", + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/myResourceGroup67a7ba/providers/Microsoft.Compute/virtualMachines/myVMName67a7ba", + "identity": { + "systemAssignedIdentity": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx", + "userAssignedIdentities": { + "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourcegroups/myResourceGroup67a7ba/providers/Microsoft.ManagedIdentity/userAssignedIdentities/myVMIDName67a7ba": { + "clientId": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx", + "principalId": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" + } + } + }, + "location": "eastus", + "macAddress": "60-45-BD-D3-B5-29", + "powerState": "VM running", + "privateIpAddress": "10.56.0.4", + "publicIpAddress": "20.231.118.239", + "resourceGroup": "myResourceGroup67a7ba", + "zones": "" +} +``` + +## Deploying AKV extension for VM $vm_name to retrieve cert $cert_name from AKV $akv_name..." + +```bash +MY_CERT_ID=$(az keyvault certificate show --vault-name $MY_KEY_VAULT --name $MY_CERT_NAME --query sid -o tsv) +MY_VM_CLIENTID=$(az identity show --resource-group $MY_RESOURCE_GROUP_NAME --name $MY_VM_ID_NAME --query clientId -o tsv) +MY_AKV_EXT_SETTINGS="{\"secretsManagementSettings\":{\"pollingIntervalInS\":\"3600\",\"requireInitialSync\":"true",\"certificateStoreLocation\":\"/etc/nginx/ssl/\",\"observedCertificates\":[\"$MY_CERT_ID\"]},\"authenticationSettings\":{\"msiClientId\":\"${MY_VM_CLIENTID}\"}}" + +az vm extension set \ + --resource-group $MY_RESOURCE_GROUP_NAME \ + --vm-name $MY_VM_NAME \ + -n "KeyVaultForLinux" \ + --publisher Microsoft.Azure.KeyVault \ + --version 2.0 \ + --enable-auto-upgrade true \ + --settings $MY_AKV_EXT_SETTINGS -o JSON +``` + +Results: + + +```JSON +{ + "autoUpgradeMinorVersion": true, + "enableAutomaticUpgrade": true, + "forceUpdateTag": null, + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/myResourceGroup67a7ba/providers/Microsoft.Compute/virtualMachines/myVMName67a7ba/extensions/KeyVaultForLinux", + "instanceView": null, + "location": "eastus", + "name": "KeyVaultForLinux", + "protectedSettings": null, + "protectedSettingsFromKeyVault": null, + "provisioningState": "Succeeded", + "publisher": "Microsoft.Azure.KeyVault", + "resourceGroup": "myResourceGroup67a7ba", + "settings": { + "secretsManagementSettings": { + "certificateStoreLocation": "/etc/nginx/ssl", + "observedCertificates": [ + "https://mykeyvault67a7ba.vault.azure.net/secrets/nginxcert67a7ba/aac9b30a90c04fc58bc230ae15b1148f" + ], + "pollingIntervalInS": "3600" + } + }, + "suppressFailures": null, + "tags": null, + "type": "Microsoft.Compute/virtualMachines/extensions", + "typeHandlerVersion": "2.0", + "typePropertiesType": "KeyVaultForLinux" +} +``` + +## Enable Azure AD login for a Linux Virtual Machine in Azure + +The following example deploys a VM and then installs the extension to enable Azure AD login for a Linux VM. VM extensions are small applications that provide post-deployment configuration and automation tasks on Azure virtual machines. + +```bash +az vm extension set \ + --publisher Microsoft.Azure.ActiveDirectory \ + --name AADSSHLoginForLinux \ + --resource-group $MY_RESOURCE_GROUP_NAME \ + --vm-name $MY_VM_NAME -o JSON +``` + +Results: + + +```JSON +{ + "autoUpgradeMinorVersion": true, + "enableAutomaticUpgrade": null, + "forceUpdateTag": null, + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/myResourceGroupfa636b/providers/Microsoft.Compute/virtualMachines/myVMNamefa636b/extensions/AADSSHLoginForLinux", + "instanceView": null, + "location": "eastus", + "name": "AADSSHLoginForLinux", + "protectedSettings": null, + "protectedSettingsFromKeyVault": null, + "provisioningState": "Succeeded", + "publisher": "Microsoft.Azure.ActiveDirectory", + "resourceGroup": "myResourceGroupfa636b", + "settings": null, + "suppressFailures": null, + "tags": null, + "type": "Microsoft.Compute/virtualMachines/extensions", + "typeHandlerVersion": "1.0", + "typePropertiesType": "AADSSHLoginForLinux" +} +``` + +## Browse your secure website + +Validate that the application is running by visiting the application url: + +```bash +curl --max-time 120 -k "https://$FQDN" +``` + +Results: + + +```html + + + +Welcome to nginx! + + + +

Welcome to nginx!

+

If you see this page, the nginx web server is successfully installed and +working. Further configuration is required.

+ +

For online documentation and support please refer to +nginx.org.
+Commercial support is available at +nginx.com.

+ +

Thank you for using nginx.

+ + +``` \ No newline at end of file diff --git a/scenarios/CreateRHELVMAndSSH/create-rhel-vm-ssh.md b/scenarios/CreateRHELVMAndSSH/create-rhel-vm-ssh.md index 8efb38189..ac65901a3 100644 --- a/scenarios/CreateRHELVMAndSSH/create-rhel-vm-ssh.md +++ b/scenarios/CreateRHELVMAndSSH/create-rhel-vm-ssh.md @@ -28,19 +28,6 @@ To open the Cloud Shell, just select **Try it** from the upper right corner of a If you prefer to install and use the CLI locally, this quickstart requires Azure CLI version 2.0.30 or later. Run `az --version` to find the version. If you need to install or upgrade, see [Install Azure CLI]( /cli/azure/install-azure-cli). -## Define environment variables - -The first step is to define the environment variables. Environment variables are commonly used in Linux to centralize configuration data to improve consistency and maintainability of the system. Create the following environment variables to specify the names of resources that you create later in this tutorial: - -```bash -export RANDOM_ID="$(openssl rand -hex 3)" -export MY_RESOURCE_GROUP_NAME="myVMResourceGroup$RANDOM_ID" -export REGION="westeurope" -export MY_VM_NAME="myVM$RANDOM_ID" -export MY_USERNAME=azureuser -export MY_VM_IMAGE="RedHat:RHEL:8-LVM:latest" -``` - ## Log in to Azure using the CLI In order to run commands in Azure using the CLI, you need to log in first. Log in using the `az login` command. @@ -50,6 +37,9 @@ In order to run commands in Azure using the CLI, you need to log in first. Log i A resource group is a container for related resources. All resources must be placed in a resource group. The [az group create](/cli/azure/group) command creates a resource group with the previously defined $MY_RESOURCE_GROUP_NAME and $REGION parameters. ```bash +export RANDOM_ID="$(openssl rand -hex 3)" +export MY_RESOURCE_GROUP_NAME="myVMResourceGroup$RANDOM_ID" +export REGION="westeurope" az group create --name $MY_RESOURCE_GROUP_NAME --location $REGION ``` @@ -79,6 +69,9 @@ The following example creates a VM and adds a user account. The `--generate-ssh- All other values are configured using environment variables. ```bash +export MY_VM_NAME="myVM$RANDOM_ID" +export MY_USERNAME=azureuser +export MY_VM_IMAGE="RedHat:RHEL:8-LVM:latest" az vm create \ --resource-group $MY_RESOURCE_GROUP_NAME \ --name $MY_VM_NAME \ diff --git a/scenarios/CreateSpeechService/create-speech-service.md b/scenarios/CreateSpeechService/create-speech-service.md new file mode 100644 index 000000000..08da60cd3 --- /dev/null +++ b/scenarios/CreateSpeechService/create-speech-service.md @@ -0,0 +1,198 @@ +--- +title: 'Quickstart: Create a Speech Services application on Azure' +description: Learn how to create a Speech Services application using Azure CLI. This will include creating a Speech service resource to support scenarios like speech-to-text and text-to-speech. +ms.topic: quickstart +ms.date: 10/07/2023 +author: azure-voice-guru +ms.author: azurevoice +ms.custom: cognitive-services, azure-cli, innovation-engine +--- + +# Quickstart: Create a Speech Services application on Azure + +In this quickstart, you will learn how to create a Speech Service resource using Azure CLI. This service enables scenarios such as speech-to-text, text-to-speech, and speech translation. + +--- + +## Prerequisites + +- Azure CLI installed and configured on your machine. +- Proper permissions to create resources in your Azure subscription. + +--- + +## Step 1: Create a Resource Group + +A resource group is a container that holds related resources for an Azure solution. + +```bash +export RANDOM_SUFFIX=$(openssl rand -hex 3) +export REGION="westus2" +export RESOURCE_GROUP_NAME="SpeechAppGroup$RANDOM_SUFFIX" +az group create --name $RESOURCE_GROUP_NAME --location $REGION --output json +``` + +### Results: + + + +```json +{ + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/SpeechAppGroupxxx", + "location": "westus2", + "managedBy": null, + "name": "SpeechAppGroupxxx", + "properties": { + "provisioningState": "Succeeded" + }, + "tags": null, + "type": "Microsoft.Resources/resourceGroups" +} +``` + +--- + +## Step 2: Create a Speech Service Resource + +The Speech Service is part of Azure Cognitive Services and provides functionalities like speech-to-text, text-to-speech, and translation. You will create this resource within the resource group. + +```bash +export SPEECH_SERVICE_NAME="MySpeechService$RANDOM_SUFFIX" +az cognitiveservices account create \ + --name $SPEECH_SERVICE_NAME \ + --resource-group $RESOURCE_GROUP_NAME \ + --kind SpeechServices \ + --sku S0 \ + --location $REGION \ + --yes \ + --output json +``` + +### Results: + + + +```json +{ + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/SpeechAppGroupxxx/providers/Microsoft.CognitiveServices/accounts/MySpeechServicexxx", + "location": "westus2", + "name": "MySpeechServicexxx", + "properties": { + "provisioningState": "Succeeded" + }, + "sku": { + "name": "S0" + }, + "type": "Microsoft.CognitiveServices/accounts" +} +``` + +--- + +## Step 3: Ensure Resource Provisioning Completes + +Ensure the Speech Service resource is fully provisioned before proceeding. A polling mechanism is implemented here to verify the provisioning state. + +--- + +### Updated Polling with JSON Validation + +```bash +export PROVISIONING_STATE=$(az cognitiveservices account show \ + --only-show-errors \ + --name $SPEECH_SERVICE_NAME \ + --resource-group $RESOURCE_GROUP_NAME \ + --query "properties.provisioningState" -o tsv 2>/dev/null || echo "Unknown") +echo "Current provisioning state: $PROVISIONING_STATE" +``` + +### Results: + + + +```text +Current provisioning state: Succeeded +``` + +--- + +## Step 4: Retrieve Keys and Endpoint + +You will need the keys and endpoint to use the Speech Service in your applications. + +--- + +### Retrieve Keys + +Fetch the keys for accessing the Speech Service. + +```bash +KEYS_JSON=$(az cognitiveservices account keys list \ + --only-show-errors \ + --name $SPEECH_SERVICE_NAME \ + --resource-group $RESOURCE_GROUP_NAME \ + -o json 2>/dev/null) + +if [ -z "$KEYS_JSON" ] || [ "$KEYS_JSON" == "null" ]; then + echo "Error: Failed to retrieve keys. Verify the resource status in the Azure portal." + exit 1 +fi + +export KEY1=$(echo "$KEYS_JSON" | jq -r '.key1') +export KEY2=$(echo "$KEYS_JSON" | jq -r '.key2') + +if [ -z "$KEY1" ] || [ "$KEY2" == "null" ]; then + echo "Error: Retrieved keys are empty or invalid. Inspect the resource settings." + exit 1 +fi + +echo "Key1: Retrieved successfully" +echo "Key2: Retrieved successfully" +``` + +### Results: + + + +```output +Key1: Retrieved successfully +Key2: Retrieved successfully +``` + +--- + +### Retrieve Endpoint + +Fetch the endpoint for the Speech Service. + +--- + +### Updated Endpoint Retrieval + +```bash +ENDPOINT_JSON=$(az cognitiveservices account show \ + --name $SPEECH_SERVICE_NAME \ + --resource-group $RESOURCE_GROUP_NAME \ + -o json 2>/dev/null) + +if echo "$ENDPOINT_JSON" | grep -q '"code": "404"'; then + echo "Error: Resource not found. Verify the resource name, group, or region." + exit 1 +fi + +export ENDPOINT=$(echo "$ENDPOINT_JSON" | jq -r '.properties.endpoint') +if [ -z "$ENDPOINT" ] || [ "$ENDPOINT" == "null" ]; then + echo "Error: Failed to retrieve endpoint. Verify the resource status in the Azure portal." + exit 1 +fi + +echo "Endpoint: $ENDPOINT" +``` + +### Results: + + + +```text +https://xxxxxxxxxxxxxxxxxxxxx.cognitiveservices.azure.com/ +``` diff --git a/scenarios/DeployCassandraOnAKS/deploy-cassandra-on-aks.md b/scenarios/DeployCassandraOnAKS/deploy-cassandra-on-aks.md new file mode 100644 index 000000000..9e0cab122 --- /dev/null +++ b/scenarios/DeployCassandraOnAKS/deploy-cassandra-on-aks.md @@ -0,0 +1,259 @@ +--- +title: "Deploy a Cassandra Cluster on AKS" +description: Learn how to deploy a Cassandra cluster on an Azure Kubernetes Service (AKS) cluster using Azure CLI and Kubernetes manifests. +ms.topic: tutorial +ms.date: 10/12/2023 +author: execdocwriter +ms.author: execdocwriter +ms.custom: aks, cassandra, azurecli, kubernetes, innovation-engine +--- + +# Deploy a Cassandra Cluster on AKS + +In this tutorial, you'll deploy an open-source Apache Cassandra cluster on Azure Kubernetes Service (AKS) and manage it using Kubernetes. This tutorial demonstrates creating an AKS cluster, deploying Cassandra, and verifying the deployment. + +## Prerequisites + +1. Install Azure CLI. You can follow [Install the Azure CLI](https://docs.microsoft.com/cli/azure/install-azure-cli) for instructions. +2. Install `kubectl`. You can use the `az aks install-cli` command to install it if you are using Azure Cloud Shell. + + +## Step 1: Create a Resource Group + +Create an Azure resource group to contain the AKS cluster and other resources. + +```bash +export RANDOM_SUFFIX="$(openssl rand -hex 3)" +export REGION="westus2" +export MY_RESOURCE_GROUP_NAME="MyAKSResourceGroup$RANDOM_SUFFIX" + +# Create a resource group in the specified region +az group create \ + --name $MY_RESOURCE_GROUP_NAME \ + --location $REGION +``` + +Results: + + + +```json +{ + "id": "/subscriptions/xxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/CassandraClusterRGxxx", + "location": "centralindia", + "managedBy": null, + "name": "CassandraClusterRGxxx", + "properties": { + "provisioningState": "Succeeded" + }, + "tags": null, + "type": "Microsoft.Resources/resourceGroups" +} +``` + +## Step 2: Create an AKS Cluster + +Now, create an AKS cluster in the resource group. + +```bash +export MY_AKS_CLUSTER_NAME="MyAKSCluster$RANDOM_SUFFIX" + +# Create the AKS cluster +az aks create \ + --resource-group $MY_RESOURCE_GROUP_NAME \ + --name $MY_AKS_CLUSTER_NAME \ + --node-count 3 \ + --enable-addons monitoring \ + --generate-ssh-keys +``` + +## Step 3: Connect to the AKS Cluster + +Retrieve the AKS cluster credentials and configure `kubectl`. + +```bash +az aks get-credentials \ + --resource-group $MY_RESOURCE_GROUP_NAME \ + --name $MY_AKS_CLUSTER_NAME +``` + +After running the command, your `kubectl` context will be set to the newly created AKS cluster. Verify the connection: + +```bash +kubectl get nodes +``` + +Results: + + + +```text +NAME STATUS ROLES AGE VERSION +aks-nodepool1-xxxxx-vmss000000 Ready agent 3m56s v1.26.0 +aks-nodepool1-xxxxx-vmss000001 Ready agent 3m52s v1.26.0 +aks-nodepool1-xxxxx-vmss000002 Ready agent 3m48s v1.26.0 +``` + +## Step 4: Deploy the Cassandra Cluster + +Create a Kubernetes manifest file in Cloud Shell to define the Cassandra deployment. Use a name like `cassandra-deployment.yaml`. + +```bash +cat < cassandra-deployment.yaml +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: cassandra +spec: + selector: + matchLabels: + app: cassandra + serviceName: "cassandra" + replicas: 3 + template: + metadata: + labels: + app: cassandra + spec: + containers: + - name: cassandra + image: cassandra:latest + ports: + - containerPort: 9042 + name: cql + volumeMounts: + - mountPath: /var/lib/cassandra + name: cassandra-data + volumes: + - name: cassandra-data +EOF + +# Apply the manifest to the cluster +kubectl apply -f cassandra-deployment.yaml +``` + +Results: + + + +```text +statefulset.apps/cassandra created +``` + +## Step 5: Create a Headless Service for Cassandra + +Create a Kubernetes manifest file in Cloud Shell to define the Cassandra headless service. Use a name like `cassandra-service.yaml`. + +```bash +cat < cassandra-service.yaml +apiVersion: v1 +kind: Service +metadata: + name: cassandra + namespace: default +spec: + clusterIP: None + selector: + app: cassandra + ports: + - name: cql + port: 9042 + targetPort: 9042 +EOF + +# Apply the service manifest to the cluster +kubectl apply -f cassandra-service.yaml +``` + +## Step 6: Verify Cassandra Deployment + +Check the status of the Cassandra pods to ensure deployment is successful. + +```bash +while true; do + POD_STATUSES=$(kubectl get pods -l app=cassandra -o jsonpath='{.items[*].status.phase}') + ALL_RUNNING=true + for STATUS in $POD_STATUSES; do + if [ "$STATUS" != "Running" ]; then + ALL_RUNNING=false + break + fi + done + + if [ "$ALL_RUNNING" = true ]; then + kubectl get pods -l app=cassandra + break + else + sleep 10 + fi +done +``` + +Results: + + + +```text +NAME READY STATUS RESTARTS AGE +cassandra-0 1/1 Running 0 3m +cassandra-1 1/1 Running 0 2m +cassandra-2 1/1 Running 0 1m +``` + +Verify the Cassandra StatefulSet. + +```bash +kubectl get statefulset cassandra +``` + +Results: + + + +```text +NAME READY AGE +cassandra 3/3 3m +``` + +## Step 7: Access Cassandra Cluster + +Create a temporary Pod to access the Cassandra cluster using `cqlsh`, the Cassandra query tool. + +```bash +kubectl run cassandra-client --rm -it --image=cassandra:latest -- /bin/bash +``` + +Once you are inside the Pod, connect to the Cassandra cluster using `cqlsh`. + +```bash +for i in {1..10}; do + echo "Attempt $i: Trying to connect to Cassandra cluster..." + # Try to run a simple cqlsh command (e.g. list keyspaces) + cql_output=$(cqlsh cassandra-0.cassandra -e "DESC KEYSPACES;" 2>&1) + if echo "$cql_output" | grep -q "system"; then + echo "Connected to Cassandra." + break + else + echo "cqlsh not ready yet. Retrying in 10 seconds..." + sleep 10 + fi +done +``` + +You should now be connected to the Cassandra database. + +> **Note:** When you're done testing, exit the shell and delete the Pod automatically. + +Results: + + + +```text +Connected to Test Cluster at cassandra-0.cassandra:9042. +[cqlsh 5.0.1 | Cassandra 4.0.0 | CQL spec 3.4.0 | Native protocol v4] +Use HELP for help. +``` + +This tutorial deployed an Apache Cassandra cluster on AKS. You managed the cluster using Kubernetes manifests and verified its deployment. + +> **IMPORTANT:** Do not forget to clean up unnecessary resources like the AKS cluster if you no longer need them. \ No newline at end of file diff --git a/scenarios/DeployClickhouseOnAKS/deploy-clickhouse-on-aks.md b/scenarios/DeployClickhouseOnAKS/deploy-clickhouse-on-aks.md new file mode 100644 index 000000000..7240a0af3 --- /dev/null +++ b/scenarios/DeployClickhouseOnAKS/deploy-clickhouse-on-aks.md @@ -0,0 +1,197 @@ +--- +title: 'Deploy ClickHouse Cluster on AKS' +description: Learn how to deploy a ClickHouse Cluster on Azure Kubernetes Service (AKS) using Azure CLI and Kubernetes manifests. +ms.topic: quickstart +ms.date: 10/05/2023 +author: azure-execdocwriter +ms.author: azureexecdocwriter +ms.custom: devx-track-azurecli, mode-api, innovation-engine, aks-related-content +--- + +# Deploy ClickHouse Cluster on AKS + +This Exec Doc demonstrates how to deploy a ClickHouse Cluster on Azure Kubernetes Service (AKS). ClickHouse is an open-source column-oriented database management system. By following this guide, you'll create an AKS cluster, deploy a ClickHouse cluster on it using a Kubernetes manifest, and verify the deployment. + +## Prerequisites + +Ensure that you have the following: + +1. An Azure subscription. +2. The Azure CLI installed (v2.30.0 or later). +3. Access to `kubectl` CLI to manage your Kubernetes cluster. +4. Azure CLI extensions enabled for AKS (`az extension add --name aks`). + +## Step 1: Create a Resource Group + +Create a new Azure resource group to contain all resources related to the deployment. + +```bash +export RANDOM_SUFFIX="$(openssl rand -hex 3)" +export REGION="westus2" +export MY_RESOURCE_GROUP="MyAKSResourceGroup$RANDOM_SUFFIX" +az group create --name $MY_RESOURCE_GROUP --location $REGION +``` + +Results: + + + +```json +{ + "id": "/subscriptions/xxxxx-xxxxx-xxxxx-xxxxx/resourceGroups/MyAKSResourceGroupxxx", + "location": "centralindia", + "managedBy": null, + "name": "MyAKSResourceGroupxxx", + "properties": { + "provisioningState": "Succeeded" + }, + "tags": null, + "type": "Microsoft.Resources/resourceGroups" +} +``` + +## Step 2: Create an AKS Cluster + +Create an Azure Kubernetes Service (AKS) cluster in the resource group. + +```bash +export MY_AKS_CLUSTER="MyAKSCluster$RANDOM_SUFFIX" +az aks create --resource-group $MY_RESOURCE_GROUP --name $MY_AKS_CLUSTER --node-count 3 --generate-ssh-keys +``` + +## Step 3: Connect to the AKS Cluster + +Obtain the Kubernetes credentials to connect to your AKS cluster. + +```bash +az aks get-credentials --resource-group $MY_RESOURCE_GROUP --name $MY_AKS_CLUSTER +``` + +Results: + + + +```text +Merged "MyAKSClusterxxx" as current context in /home/user/.kube/config +``` + +## Step 4: Create a Namespace for ClickHouse + +Create a Kubernetes namespace to host the ClickHouse deployment. + +```bash +kubectl create namespace clickhouse +``` + +Results: + + + +```text +namespace/clickhouse created +``` + +## Step 5: Deploy ClickHouse on AKS + +Use the following Kubernetes manifest to deploy ClickHouse. Save this manifest into a file named **clickhouse-deployment.yaml**. + +```bash +cat < clickhouse-deployment.yaml +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: clickhouse + namespace: clickhouse +spec: + serviceName: "clickhouse" + replicas: 3 + selector: + matchLabels: + app: clickhouse + template: + metadata: + labels: + app: clickhouse + spec: + containers: + - name: clickhouse + image: yandex/clickhouse-server:latest + resources: + requests: + cpu: "500m" + memory: "512Mi" + limits: + cpu: "1" + memory: "1Gi" + ports: + - containerPort: 8123 + name: http + - containerPort: 9000 + name: native + volumeMounts: + - name: clickhouse-data + mountPath: /var/lib/clickhouse + volumeClaimTemplates: + - metadata: + name: clickhouse-data + spec: + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: 10Gi +EOF +``` + +Apply the configuration to deploy ClickHouse. + +```bash +kubectl apply -f clickhouse-deployment.yaml +``` + +Results: + + + +```text +statefulset.apps/clickhouse created +persistentvolumeclaim/clickhouse-pvc created +``` + +## Step 6: Verify the Deployment + +Check if the ClickHouse pods are running correctly: + +```bash +while true; do + POD_STATUSES=$(kubectl get pods -n clickhouse -o jsonpath='{.items[*].status.phase}') + ALL_RUNNING=true + for STATUS in $POD_STATUSES; do + if [ "$STATUS" != "Running" ]; then + ALL_RUNNING=false + break + fi + done + + if [ "$ALL_RUNNING" = true ]; then + kubectl get pods -n clickhouse + break + else + sleep 10 + fi +done +``` + +Results: + + + +```text +NAME READY STATUS RESTARTS AGE +clickhouse-0 1/1 Running 0 2m +clickhouse-1 1/1 Running 0 2m +clickhouse-2 1/1 Running 0 2m +``` + +## Summary + +You have successfully deployed a ClickHouse cluster on AKS. You can now connect to the ClickHouse service using the appropriate service endpoint or Kubernetes port forwarding. \ No newline at end of file diff --git a/scenarios/DeployHAPGOnAKSTerraform/app-deployment.yaml b/scenarios/DeployHAPGOnAKSTerraform/app-deployment.yaml new file mode 100644 index 000000000..2a9dbf000 --- /dev/null +++ b/scenarios/DeployHAPGOnAKSTerraform/app-deployment.yaml @@ -0,0 +1,20 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: pg-app +spec: + replicas: 2 + selector: + matchLabels: + app: pg-app + template: + metadata: + labels: + app: pg-app + spec: + containers: + - name: pg-app + image: postgres:11 + env: + - name: POSTGRES_DB + value: \ No newline at end of file diff --git a/scenarios/DeployHAPGOnAKSTerraform/app-service.yaml b/scenarios/DeployHAPGOnAKSTerraform/app-service.yaml new file mode 100644 index 000000000..5b4dbe06d --- /dev/null +++ b/scenarios/DeployHAPGOnAKSTerraform/app-service.yaml @@ -0,0 +1,11 @@ +apiVersion: v1 +kind: Service +metadata: + name: pg-app-service +spec: + type: LoadBalancer + ports: + - port: 5432 + targetPort: 5432 + selector: + app: pg-app \ No newline at end of file diff --git a/scenarios/DeployHAPGOnAKSTerraform/deploy-ha-pg-on-aks-terraform.md b/scenarios/DeployHAPGOnAKSTerraform/deploy-ha-pg-on-aks-terraform.md new file mode 100644 index 000000000..da31384e5 --- /dev/null +++ b/scenarios/DeployHAPGOnAKSTerraform/deploy-ha-pg-on-aks-terraform.md @@ -0,0 +1,403 @@ +--- +title: Create a Highly Available PostgreSQL Cluster on Azure Kubernetes Service (AKS) using Terraform +description: This tutorial shows how to create a Highly Available PostgreSQL cluster on AKS using the CloudNativePG operator +author: russd2357,kenkilty +ms.author: rdepina,kenkilty +ms.topic: article +ms.date: 06/26/2024 +ms.custom: innovation-engine, linux-related content +--- +# Create a Highly Available PostgreSQL Cluster on Azure Kubernetes Service (AKS) using Terraform. + +In this guide, you will deploy a highly-available PostgreSQL cluster that spans multiple Azure availability zones. You will walk through the steps required to set up the PostgreSQL cluster running on [Azure Kubernetes Service](https://learn.microsoft.com/en-us/azure/aks/what-is-aks) (AKS) and perform basic Postgres operations such as backup and restore. + + +## Installing Terraform + +3. Download Terraform +Use wget to download the latest version of Terraform. You can find the latest version on the Terraform releases page. For example, to download version 1.5.0: + +```bash +if ! command -v terraform &> /dev/null +then + wget https://releases.hashicorp.com/terraform/1.5.0/terraform_1.5.0_linux_amd64.zip +fi +``` + +4. Unzip the Downloaded File +After downloading, you need to extract the Terraform binary from the zip file: + +```bash +if ! command -v terraform &> /dev/null +then + unzip terraform_1.5.0_linux_amd64.zip +fi +``` + + +5. Move Teffaform to a Directory in Your PATH +To make Terraform accessible from anywhere in your terminal, move it to /usr/local/bin: + +```bash +if ! command -v terraform &> /dev/null +then + # Create a bin directory in your home directory if it doesn't exist + mkdir -p $HOME/bin + + # Move Terraform to the bin directory in your home directory + mv terraform $HOME/bin/ + + # Add the bin directory to your PATH if it's not already included + if [[ ":$PATH:" != *":$HOME/bin:"* ]]; then + export PATH="$HOME/bin:$PATH" + fi +fi +``` + + +6. Verify the Installation +Finally, check if Terraform is installed correctly by checking its version: + +```bash +terraform -v +``` + +Results: + +```output +Terraform v1.5.0 +``` + + +## Creating a Highly Available PostgreSQL Cluster on Azure Kubernetes Service (AKS) Using Terraform + +1. Create a Terraform Configuration File Create a file named main.tf with the following content: + +```bash +# Generate a random suffix +export RANDOM_SUFFIX=$(openssl rand -hex 4) +export RESOURCE_GROUP_NAME="pg-ha-rg$RANDOM_SUFFIX" +export AKS_CLUSTER_NAME="pg-ha-aks$RANDOM_SUFFIX" +export POSTGRES_SERVER_NAME="pg-ha-server$RANDOM_SUFFIX" +export POSTGRES_DATABASE_NAME=$POSTGRES_DATABASE_NAME +export POSTGRES_DATABASE_PASSWORD=$(openssl rand -base64 32) +export POSTGRES_DATABASE_USER="pgadmin$RANDOM_SUFFIX" + +# Get the subscription ID programmatically +export TF_VAR_subscription_id=$(az account show --query id --output tsv) + +# Set additional environment variables for Terraform +export TF_VAR_resource_group_name=$RESOURCE_GROUP_NAME +export TF_VAR_location="East US" +export TF_VAR_aks_cluster_name=$AKS_CLUSTER_NAME +export TF_VAR_postgres_server_name=$POSTGRES_SERVER_NAME +export TF_VAR_postgres_database_name=$POSTGRES_DATABASE_NAME +export TF_VAR_postgres_database_user=$POSTGRES_DATABASE_USER +export TF_VAR_postgres_database_password=$POSTGRES_DATABASE_PASSWORD +``` + +```text +provider "azurerm" { + features {} +} + +resource "azurerm_resource_group" "rg" { + name = $RESOURCE_GROUP_NAME + location = "West Europe" +} + +resource "azurerm_kubernetes_cluster" "aks" { + name = $AKS_CLUSTER_NAME + location = azurerm_resource_group.rg.location + resource_group_name = azurerm_resource_group.rg.name + dns_prefix = "pgha" + + agent_pool_profile { + name = "agentpool" + count = 3 + vm_size = "Standard_DS2_v2" # SKU for AKS + os_type = "Linux" + mode = "System" + } + + identity { + type = "SystemAssigned" + } + + role_based_access_control { + enabled = true + } +} + +resource "azurerm_postgresql_server" "pg_server" { + name = $POSTGRES_SERVER_NAME + resource_group_name = azurerm_resource_group.rg.name + location = azurerm_resource_group.rg.location + version = "11" + sku { + name = "B_Gen5_2" # SKU for PostgreSQL + tier = "Basic" + capacity = 2 + } + storage_profile { + storage_mb = 5120 + } + administrator_login = $POSTGRES_DATABASE_USER + administrator_login_password = $POSTGRES_DATABASE_PASSWORD + ssl_enforcement_enabled = true +} + +resource "azurerm_postgresql_database" "pg_database" { + name = $POSTGRES_DATABASE_NAME + resource_group_name = azurerm_resource_group.rg.name + server_name = azurerm_postgresql_server.pg_server.name + charset = "UTF8" + collation = "English_United States.1252" +} +``` + + +2. Initialize Terraform Run the following command to initialize your Terraform configuration: + +```bash +terraform init +``` + +Results: + +```output +Initializing the backend... + +Initializing provider plugins... +- Finding hashicorp/azurerm versions matching ">= 2.0.0"... +- Installing hashicorp/azurerm v2.0.0... +- Installed hashicorp/azurerm v2.0.0 (signed by HashiCorp) + +Terraform has been successfully initialized! +``` + + +3. Validate the Configuration Check if your configuration is valid: + +```bash +terraform validate +``` + +Results: + +```output +Success! The configuration is valid. +``` + + +4. Plan the Deployment Generate an execution plan: + +```bash +terraform plan +``` + +Results: + +```output +Terraform will perform the following actions: + + # azurerm_kubernetes_cluster.aks will be created + + resource "azurerm_kubernetes_cluster" "aks" { + ... + } + + # azurerm_postgresql_server.pg_server will be created + + resource "azurerm_postgresql_server" "pg_server" { + ... + } + +Plan: 3 to add, 0 to change, 0 to destroy. +``` + + +5. Apply the Configuration Deploy the resources: + +```bash +terraform apply -auto-approve +``` + +Results: + +```output +azurerm_resource_group.rg: Creating... +azurerm_resource_group.rg: Creation complete after 5s [id=/subscriptions/.../resourceGroups/pg-ha-rg] +azurerm_kubernetes_cluster.aks: Creating... +azurerm_postgresql_server.pg_server: Creating... +... +Apply complete! Resources: 3 added, 0 changed, 0 destroyed. +``` + + +6. Verify the Deployment Check the status of the AKS cluster: + +```bash +az aks show --resource-group $RESOURCE_GROUP_NAME --name $AKS_CLUSTER_NAME --output table +``` + +Results: + +```output +Name ResourceGroup Location KubernetesVersion ProvisioningState +----------- --------------- ----------- -------------------- ------------------- +pg-ha-aks pg-ha-rg West Europe 1.20.7 Succeeded +``` + + +7. Connect to PostgreSQL To connect to your PostgreSQL server, you can use the following command: + +```bash +psql "host=$POSTGRES_SERVER_NAME.postgres.database.azure.com dbname=$POSTGRES_DATABASE_NAME user=$POSTGRES_DATABASE_USER@$POSTGRES_SERVER_NAME password=$POSTGRES_DATABASE_PASSWORD sslmode=require" +``` + +Results: + +```output +psql (12.3) +Type "help" for help. + +mydatabase=# +``` + +8. Deploy a Sample Application To test the PostgreSQL setup, you can deploy a simple application. Create a file named app-deployment.yaml with the following content: + +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: pg-app +spec: + replicas: 2 + selector: + matchLabels: + app: pg-app + template: + metadata: + labels: + app: pg-app + spec: + containers: + - name: pg-app + image: postgres:11 + env: + - name: POSTGRES_DB + value: +``` + +## Steps to Test Application +1. Expose the Application First, you need to create a service to expose your application. Create a file named app-service.yaml with the following content: + +```yaml +apiVersion: v1 +kind: Service +metadata: + name: pg-app-service +spec: + type: LoadBalancer + ports: + - port: 5432 + targetPort: 5432 + selector: + app: pg-app +``` + +Apply this configuration to your AKS cluster: + +```bash +kubectl apply -f app-service.yaml +``` + +Results: + +```output +service/pg-app-service created +``` + +2. Check the Status of the Service After exposing the application, check the status of the service to get the external IP address: + +```bash +kubectl get services +``` + +Results: + +```output +NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE +pg-app-service LoadBalancer 10.0.0.1 5432:XXXXX/TCP 1m +``` + +Wait a few moments until the EXTERNAL-IP is assigned. It may take a couple of minutes. + + +3. Connect to the Application Once the external IP is assigned, you can connect to the PostgreSQL database using the following command. Replace with the actual external IP address you obtained from the previous step: + +```bash +# Fetch the external IP address +export EXTERNAL_IP=$(kubectl get services pg-app-service -o jsonpath='{.status.loadBalancer.ingress[0].ip}') + +# Check if the EXTERNAL_IP is not empty +if [ -z "$EXTERNAL_IP" ]; then + echo "Error: External IP address not found. Please wait a few moments and try again." + exit 1 +fi + +# Connect to the PostgreSQL database +psql "host=$EXTERNAL_IP dbname=mydatabase user=pgadmin@pg-ha-server password=YourPassword123! sslmode=require" +``` + +Results: + +```output +psql (12.3) +Type "help" for help. + +mydatabase=# +``` + +4. Clean Up Resources +When done, destroy the resources: + +```bash +terraform destroy -auto-approve +``` + +Results: + +```output +Results: + +```output +psql (12.3) +Type "help" for help. + +mydatabase=# +``` + + + +To learn more about AKS and walk through a complete code-to-deployment example, continue to the Kubernetes cluster tutorial. + +> [!div class="nextstepaction"] +> [AKS tutorial][aks-tutorial] + + +[kubectl]: https://kubernetes.io/docs/reference/kubectl/ +[kubectl-apply]: https://kubernetes.io/docs/reference/generated/kubectl/kubectl-commands#apply +[kubectl-get]: https://kubernetes.io/docs/reference/generated/kubectl/kubectl-commands#get + + +[kubernetes-concepts]: ../concepts-clusters-workloads.md +[aks-tutorial]: ../tutorial-kubernetes-prepare-app.md +[azure-resource-group]: ../../azure-resource-manager/management/overview.md +[az-aks-create]: /cli/azure/aks#az-aks-create +[az-aks-get-credentials]: /cli/azure/aks#az-aks-get-credentials +[az-aks-install-cli]: /cli/azure/aks#az-aks-install-cli +[az-group-create]: /cli/azure/group#az-group-create +[az-group-delete]: /cli/azure/group#az-group-delete +[kubernetes-deployment]: ../concepts-clusters-workloads.md#deployments-and-yaml-manifests +[aks-solution-guidance]: /azure/architecture/reference-architectures/containers/aks-start-here?toc=/azure/aks/toc.json&bc=/azure/aks/breadcrumb/toc.json +[baseline-reference-architecture]: /azure/architecture/reference-architectures/containers/aks/baseline-aks?toc=/azure/aks/toc.json&bc=/azure/aks/breadcrumb/toc.json \ No newline at end of file diff --git a/scenarios/DeployHAPGOnAKSTerraform/main.tf b/scenarios/DeployHAPGOnAKSTerraform/main.tf new file mode 100644 index 000000000..77cdd04e4 --- /dev/null +++ b/scenarios/DeployHAPGOnAKSTerraform/main.tf @@ -0,0 +1,46 @@ +provider "azurerm" { + features {} + subscription_id = var.subscription_id +} + +resource "azurerm_resource_group" "rg" { + name = var.resource_group_name + location = var.location +} + +resource "azurerm_kubernetes_cluster" "aks" { + name = var.aks_cluster_name + location = azurerm_resource_group.rg.location + resource_group_name = azurerm_resource_group.rg.name + dns_prefix = "pgha" + + default_node_pool { + name = "agentpool" + node_count = 3 + vm_size = "Standard_DS2_v2" + } + + identity { + type = "SystemAssigned" + } +} + +resource "azurerm_postgresql_server" "pg_server" { + name = var.postgres_server_name + resource_group_name = azurerm_resource_group.rg.name + location = azurerm_resource_group.rg.location + version = "11" + administrator_login = var.postgres_database_user + administrator_login_password = var.postgres_database_password + ssl_enforcement_enabled = true + sku_name = "B_Gen5_2" + storage_mb = 5120 +} + +resource "azurerm_postgresql_database" "pg_database" { + name = var.postgres_database_name + resource_group_name = azurerm_resource_group.rg.name + server_name = azurerm_postgresql_server.pg_server.name + charset = "UTF8" + collation = "English_United States.1252" +} \ No newline at end of file diff --git a/scenarios/DeployHAPGOnAKSTerraform/variables.tf b/scenarios/DeployHAPGOnAKSTerraform/variables.tf new file mode 100644 index 000000000..cbfce95d9 --- /dev/null +++ b/scenarios/DeployHAPGOnAKSTerraform/variables.tf @@ -0,0 +1,40 @@ +variable "subscription_id" { + description = "Azure Subscription ID" + type = string +} + +variable "resource_group_name" { + description = "Resource Group Name" + type = string +} + +variable "location" { + description = "Azure Region" + type = string +} + +variable "aks_cluster_name" { + description = "AKS Cluster Name" + type = string +} + +variable "postgres_server_name" { + description = "PostgreSQL Server Name" + type = string +} + +variable "postgres_database_name" { + description = "PostgreSQL Database Name" + type = string +} + +variable "postgres_database_user" { + description = "PostgreSQL Database User" + type = string +} + +variable "postgres_database_password" { + description = "PostgreSQL Database Password" + type = string + sensitive = true +} \ No newline at end of file diff --git a/scenarios/DeployHAPGOnARO/deploy-ha-pg-on-aro.md b/scenarios/DeployHAPGOnARO/deploy-ha-pg-on-aro.md new file mode 100644 index 000000000..572fc5dec --- /dev/null +++ b/scenarios/DeployHAPGOnARO/deploy-ha-pg-on-aro.md @@ -0,0 +1,506 @@ +--- +title: Create a Highly Available PostgreSQL Cluster on Azure Red Hat OpenShift +description: This tutorial shows how to create a Highly Available PostgreSQL cluster on Azure Red Hat OpenShift (ARO) using the CloudNativePG operator +author: russd2357 +ms.author: rdepina +ms.topic: article +ms.date: 04/30/2024 +ms.custom: innovation-engine, linux-related content +--- + +# Create a Highly Available PostgreSQL Cluster on Azure Red Hat OpenShift + +## Login to Azure using the CLI + +In order to run commands against Azure using the CLI you need to login. This is done, very simply, though the `az login` command: + +## Check for Prerequisites + +Next, check for prerequisites. This can be done by running the following commands: + +- RedHat OpenShift: `az provider register -n Microsoft.RedHatOpenShift --wait` +- kubectl: `az aks install-cli` +- Openshift Client: `mkdir ~/ocp ; wget -q https://mirror.openshift.com/pub/openshift-v4/clients/ocp/latest/openshift-client-linux.tar.gz -O ~/ocp/openshift-client-linux.tar.gz ; tar -xf ~/ocp/openshift-client-linux.tar.gz ; export PATH="$PATH:~/ocp"` + +## Create a resource group + +A resource group is a container for related resources. All resources must be placed in a resource group. We will create one for this tutorial. The following command creates a resource group with the previously defined $RG_NAME, $LOCATION, and $RGTAGS parameters. + +```bash +export RGTAGS="owner=ARO Demo" +export LOCATION="westus" +export LOCAL_NAME="arodemo" +export RG_NAME="rg-arodemo-perm" +``` + +## Create VNet + +In this section, you'll be creating a Virtual Network (VNet) in Azure. Start by defining several environment variables. These variables will hold the names of your VNet and subnets, as well as the CIDR block for your VNet. Next, create the VNet with the specified name and CIDR block in your resource group using the az network vnet create command. This process may take a few minutes. + +```bash +export VNET_NAME="vnet-${LOCAL_NAME}" +export SUBNET1_NAME="sn-main" +export SUBNET2_NAME="sn-worker" +export VNET_CIDR="10.0.0.0/22" +az network vnet create -g $RG_NAME -n $VNET_NAME --address-prefixes $VNET_CIDR +``` + +Results: + + +```json +{ + "newVNet": { + "addressSpace": { + "addressPrefixes": [ + "xx.x.x.x/xx" + ] + }, + "enableDdosProtection": false, + "etag": "W/\"xxxxx-xxxxx-xxxxx-xxxxx\"", + "id": "/subscriptions/xxxxxx-xxxx-xxxx-xxxxxx/resourceGroups/xx-xxxxx-xxxxx/providers/Microsoft.Network/virtualNetworks/vnet-xx-xxxxx-xxxxx", + "location": "westus", + "name": "xxxxx-xxxxx-xxxxx-xxxxx", + "provisioningState": "Succeeded", + "resourceGroup": "xx-xxxxx-xxxxx", + "resourceGuid": "xxxxx-xxxxx-xxxxx-xxxxx", + "subnets": [], + "type": "Microsoft.Network/virtualNetworks", + "virtualNetworkPeerings": [] + } +} +``` + +## Create Main Nodes Subnet + +In this section, you'll be creating the main nodes subnet with the specified name and CIDR block within your previously created Virtual Network (VNet). Start by running the az network vnet subnet create command. This process may take a few minutes. After the subnet is successfully created, you'll be ready to deploy resources into this subnet. + +```bash +az network vnet subnet create -g $RG_NAME --vnet-name $VNET_NAME -n $SUBNET1_NAME --address-prefixes 10.0.0.0/23 +``` + +Results: + + +```json +{ + "addressPrefix": "xx.x.x.x/xx", + "delegations": [], + "etag": "W/\"xxxxx-xxxxx-xxxxx-xxxxx\"", + "id": "/subscriptions/xxxxxx-xxxx-xxxx-xxxxxx/resourceGroups/xx-xxxxx-xxxxx/providers/Microsoft.Network/virtualNetworks/vnet-xx-xxxxx-xxxxx/subnets/sn-main-xxxxx", + "name": "sn-main-xxxxx", + "privateEndpointNetworkPolicies": "Disabled", + "privateLinkServiceNetworkPolicies": "Enabled", + "provisioningState": "Succeeded", + "resourceGroup": "xx-xxxxx-xxxxx", + "type": "Microsoft.Network/virtualNetworks/subnets" +} +``` + +## Create Worker Nodes Subnet + +In this section, you'll be creating a subnet for your worker nodes with the specified name and CIDR block within your previously created Virtual Network (VNet). Start by running the az network vnet subnet create command. After the subnet is successfully created, you'll be ready to deploy your worker nodes into this subnet. + +```bash +az network vnet subnet create -g $RG_NAME --vnet-name $VNET_NAME -n $SUBNET2_NAME --address-prefixes 10.0.2.0/23 +``` + +Results: + + +```json +{ + "addressPrefix": "xx.x.x.x/xx", + "delegations": [], + "etag": "W/\"xxxxx-xxxxx-xxxxx-xxxxx\"", + "id": "/subscriptions/xxxxxx-xxxx-xxxx-xxxxxx/resourceGroups/xx-xxxxx-xxxxx/providers/Microsoft.Network/virtualNetworks/vnet-xx-xxxxx-xxxxx/subnets/sn-worker-xxxxx", + "name": "sn-worker-xxxxx", + "privateEndpointNetworkPolicies": "Disabled", + "privateLinkServiceNetworkPolicies": "Enabled", + "provisioningState": "Succeeded", + "resourceGroup": "xx-xxxxx-xxxxx", + "type": "Microsoft.Network/virtualNetworks/subnets" +} +``` + +## Create Storage accounts + +This code snippet performs the following steps: + +1. Sets the `STORAGE_ACCOUNT_NAME` environment variable to a concatenation of `stor`, `LOCAL_NAME` (converted to lowercase). +2. Sets the `BARMAN_CONTAINER_NAME` environment variable to `"barman"`. +3. Creates a storage account with the specified `STORAGE_ACCOUNT_NAME` in the specified resource group. +4. Creates a storage container with the specified `BARMAN_CONTAINER_NAME` in the created storage account. + +```bash +export STORAGE_ACCOUNT_NAME="stor${LOCAL_NAME,,}" +export BARMAN_CONTAINER_NAME="barman" + +az storage account create --name "${STORAGE_ACCOUNT_NAME}" --resource-group "${RG_NAME}" --sku Standard_LRS +az storage container create --name "${BARMAN_CONTAINER_NAME}" --account-name "${STORAGE_ACCOUNT_NAME}" +``` + +## Deploy the ARO cluster + +In this section, you'll be deploying an Azure Red Hat OpenShift (ARO) cluster. The ARO_CLUSTER_NAME variable will hold the name of your ARO cluster. The az aro create command will deploy the ARO cluster with the specified name, resource group, virtual network, subnets, and the RedHat OpenShift pull secret that you previously downloaded and saved in your Key Vault. This process may take about 30 minutes to complete. + +```bash +export ARO_CLUSTER_NAME="aro-${LOCAL_NAME}" +export ARO_PULL_SECRET=$(az keyvault secret show --name AroPullSecret --vault-name kv-rdp-dev --query value -o tsv) +export ARO_SP_ID=$(az keyvault secret show --name arodemo-sp-id --vault-name kv-rdp-dev --query value -o tsv) +export ARO_SP_PASSWORD=$(az keyvault secret show --name arodemo-sp-password --vault-name kv-rdp-dev --query value -o tsv) +echo "This will take about 30 minutes to complete..." +az aro create -g $RG_NAME -n $ARO_CLUSTER_NAME --vnet $VNET_NAME --master-subnet $SUBNET1_NAME --worker-subnet $SUBNET2_NAME --tags $RGTAGS --pull-secret ${ARO_PULL_SECRET} --client-id ${ARO_SP_ID} --client-secret ${ARO_SP_PASSWORD} +``` + +Results: + +```json +{ + "apiserverProfile": { + "ip": "xx.xxx.xx.xxx", + "url": "https://api.xxxxx.xxxxxx.aroapp.io:xxxx/", + "visibility": "Public" + }, + "clusterProfile": { + "domain": "xxxxxx", + "fipsValidatedModules": "Disabled", + "pullSecret": null, + "resourceGroupId": "/subscriptions/xxxxxx-xxxxxx-xxxxxx-xxxxxx-xxxxxx/resourcegroups/xxxxxx-xxxxxx", + "version": "4.12.25" + }, + "consoleProfile": { + "url": "https://console-openshift-console.apps.xxxxxx.xxxxxx.aroapp.io/" + }, + "id": "/subscriptions/xxxxxx-xxxxxx-xxxxxx-xxxxxx-xxxxxx/resourceGroups/rg-arodemo-xxxxxx/providers/Microsoft.RedHatOpenShift/openShiftClusters/aro-arodemo-xxxxxx", + "ingressProfiles": [ + { + "ip": "xx.xxx.xx.xxx", + "name": "default", + "visibility": "Public" + } + ], + "location": "westus", + "masterProfile": { + "diskEncryptionSetId": null, + "encryptionAtHost": "Disabled", + "subnetId": "/subscriptions/xxxxxx-xxxxxx-xxxxxx-xxxxxx-xxxxxx/resourceGroups/rg-arodemo-xxxxxx/providers/Microsoft.Network/virtualNetworks/vnet-arodemo-xxxxxx/subnets/sn-main-jffspl", + "vmSize": "Standard_D8s_v3" + }, + "name": "aro-arodemo-xxxxxx", + "networkProfile": { + "outboundType": "Loadbalancer", + "podCidr": "xx.xxx.xx.xxx/xx", + "preconfiguredNsg": "Disabled", + "serviceCidr": "xx.xxx.xx.xxx/xx" + }, + "provisioningState": "Succeeded", + "resourceGroup": "rg-arodemo-xxxxxx", + "servicePrincipalProfile": { + "clientId": "xxxxxx-xxxxxx-xxxxxx-xxxxxx-xxxxxx", + "clientSecret": null + }, + "systemData": { + "createdAt": "xxxxxx-xx-xxxxxx:xx:xx.xxxxxx+xx:xx", + "createdBy": "xxxxxx@xxxxxx.xxx", + "createdByType": "User", + "lastModifiedAt": "xxxxxx-xx-xxxxxx:xx:xx.xxxxxx+xx:xx", + "lastModifiedBy": "xxxxxx@xxxxxx.xxx", + "lastModifiedByType": "User" + }, + "tags": { + "Demo": "", + "owner": "ARO" + }, + "type": "Microsoft.RedHatOpenShift/openShiftClusters", + "workerProfiles": [ + { + "count": 3, + "diskEncryptionSetId": null, + "diskSizeGb": 128, + "encryptionAtHost": "Disabled", + "name": "worker", + "subnetId": "/subscriptions/xxxxxx-xxxxxx-xxxxxx-xxxxxx-xxxxxx/resourceGroups/rg-arodemo-xxxxxx/providers/Microsoft.Network/virtualNetworks/vnet-arodemo-xxxxxx/subnets/sn-worker-xxxxxx", + "vmSize": "Standard_D4s_v3" + } + ], + "workerProfilesStatus": [ + { + "count": 3, + "diskEncryptionSetId": null, + "diskSizeGb": 128, + "encryptionAtHost": "Disabled", + "name": "aro-arodemo-xxxxxx-xxxxxx-worker-westus", + "subnetId": "/subscriptions/xxxxxx-xxxxxx-xxxxxx-xxxxxx-xxxxxx/resourceGroups/rg-arodemo-xxxxxx/providers/Microsoft.Network/virtualNetworks/vnet-arodemo-xxxxxx/subnets/sn-worker-xxxxxx", + "vmSize": "Standard_D4s_v3" + } + ] +} +``` + +## Obtain cluster credentials and login + +This code retrieves the API server URL and login credentials for an Azure Red Hat OpenShift (ARO) cluster using the Azure CLI. + +The `az aro show` command is used to get the API server URL by providing the resource group name and ARO cluster name. The `--query` parameter is used to extract the `apiserverProfile.url` property, and the `-o tsv` option is used to output the result as a tab-separated value. + +The `az aro list-credentials` command is used to get the login credentials for the ARO cluster. The `--name` parameter specifies the ARO cluster name, and the `--resource-group` parameter specifies the resource group name. The `--query` parameter is used to extract the `kubeadminPassword` property, and the `-o tsv` option is used to output the result as a tab-separated value. + +Finally, the `oc login` command is used to log in to the ARO cluster using the retrieved API server URL, the `kubeadmin` username, and the login credentials. + +```bash +export apiServer=$(az aro show -g $RG_NAME -n $ARO_CLUSTER_NAME --query apiserverProfile.url -o tsv) +export loginCred=$(az aro list-credentials --name $ARO_CLUSTER_NAME --resource-group $RG_NAME --query "kubeadminPassword" -o tsv) + +oc login $apiServer -u kubeadmin -p $loginCred --insecure-skip-tls-verify +``` + +## Add operators to ARO + +Set the namespace to install the operators to the built-in namespace `openshift-operators`. + +```bash +export NAMESPACE="openshift-operators" +``` + +Cloud Native Postgresql operator + +```bash +channelspec=$(oc get packagemanifests cloud-native-postgresql -o jsonpath="{range .status.channels[*]}Channel: {.name} currentCSV: {.currentCSV}{'\n'}{end}" | grep "stable-v1.22") +IFS=" " read -r -a array <<< "${channelspec}" +channel=${array[1]} +csv=${array[3]} + +catalogSource=$(oc get packagemanifests cloud-native-postgresql -o jsonpath="{.status.catalogSource}") +catalogSourceNamespace=$(oc get packagemanifests cloud-native-postgresql -o jsonpath="{.status.catalogSourceNamespace}") + +cat < +```text +subscription.operators.coreos.com/rhbk-operator created +``` + +## Create the ARO PosgreSQL Database + +Fetch secrets from Key Vault and create the ARO database login secret object. + +```bash +pgUserName=$(az keyvault secret show --name AroPGUser --vault-name kv-rdp-dev --query value -o tsv) +pgPassword=$(az keyvault secret show --name AroPGPassword --vault-name kv-rdp-dev --query value -o tsv) + +oc create secret generic app-auth --from-literal=username=${pgUserName} --from-literal=password=${pgPassword} -n ${NAMESPACE} +``` + +Results: + +```text +secret/app-auth created +``` + +Create the secret for backing up to Azure Storage + +```bash +export STORAGE_ACCOUNT_KEY=$(az storage account keys list --account-name ${STORAGE_ACCOUNT_NAME} --resource-group ${RG_NAME} --query "[0].value" --output tsv) +oc create secret generic azure-storage-secret --from-literal=storage-account-name=${STORAGE_ACCOUNT_NAME} --from-literal=storage-account-key=${STORAGE_ACCOUNT_KEY} --namespace ${NAMESPACE} +``` + +Results: + +```text +secret/azure-storage-secret created +``` + +Create the Postgres Cluster + +```bash +cat < +```text +cluster.postgresql.k8s.enterprisedb.io/cluster-arodemo created +``` + +## Create the ARO Keycloak instance + +Deploy a Keycloak instance on an OpenShift cluster. It uses the `oc apply` command to apply a YAML configuration file that defines the Keycloak resource. +The YAML configuration specifies various settings for the Keycloak instance, including the database, hostname, HTTP settings, ingress, number of instances, and transaction settings. +To deploy Keycloak, run this code block in a shell environment with the necessary permissions and access to the OpenShift cluster. +Note: Make sure to replace the values of the variables `$apiServer`, `$kc_hosts`, and the database credentials (`passwordSecret` and `usernameSecret`) with the appropriate values for your environment. + +```bash +export kc_hosts=$(echo $apiServer | sed -E 's/\/\/api\./\/\/apps./' | sed -En 's/.*\/\/([^:]+).*/\1/p' ) + +cat < +```text +keycloak.k8s.keycloak.org/kc001 created +``` + +Access the workload + +```bash +URL=$(ooc get ingress kc001-ingress -o json | jq -r '.spec.rules[0].host') +curl -Iv https://$URL +``` + +Results: + +```text +* Trying 104.42.132.245:443... +* Connected to kc001.apps.foppnyl9.westus.aroapp.io (104.42.132.245) port 443 (#0) +* ALPN, offering h2 +* ALPN, offering http/1.1 +* CAfile: /etc/ssl/certs/ca-certificates.crt +* CApath: /etc/ssl/certs +* TLSv1.0 (OUT), TLS header, Certificate Status (22): +* TLSv1.3 (OUT), TLS handshake, Client hello (1): +* TLSv1.2 (IN), TLS header, Certificate Status (22): +* TLSv1.3 (IN), TLS handshake, Server hello (2): +``` \ No newline at end of file diff --git a/scenarios/DeployIGonAKS/README.md b/scenarios/DeployIGonAKS/README.md index 3443d24e3..1ae4ca382 100644 --- a/scenarios/DeployIGonAKS/README.md +++ b/scenarios/DeployIGonAKS/README.md @@ -14,22 +14,14 @@ ms.custom: innovation-engine Welcome to this tutorial where we will take you step by step in deploying [Inspektor Gadget](https://www.inspektor-gadget.io/) in an Azure Kubernetes Service (AKS) cluster with the kubectl plugin: `gadget`. This tutorial assumes you are logged into Azure CLI already and have selected a subscription to use with the CLI. -## Define Environment Variables +## Create a resource group -The First step in this tutorial is to define environment variables: +A resource group is a container for related resources. All resources must be placed in a resource group. We will create one for this tutorial. The following command creates a resource group with the previously defined $MY_RESOURCE_GROUP_NAME and $REGION parameters. ```bash export RANDOM_ID="$(openssl rand -hex 3)" export MY_RESOURCE_GROUP_NAME="myResourceGroup$RANDOM_ID" export REGION="eastus" -export MY_AKS_CLUSTER_NAME="myAKSCluster$RANDOM_ID" -``` - -## Create a resource group - -A resource group is a container for related resources. All resources must be placed in a resource group. We will create one for this tutorial. The following command creates a resource group with the previously defined $MY_RESOURCE_GROUP_NAME and $REGION parameters. - -```bash az group create --name $MY_RESOURCE_GROUP_NAME --location $REGION ``` @@ -57,6 +49,7 @@ Create an AKS cluster using the az aks create command. This will take a few minutes. ```bash +export MY_AKS_CLUSTER_NAME="myAKSCluster$RANDOM_ID" az aks create \ --resource-group $MY_RESOURCE_GROUP_NAME \ --name $MY_AKS_CLUSTER_NAME \ diff --git a/scenarios/DeployLLMWithTorchserveOnAKS/Dockerfile b/scenarios/DeployLLMWithTorchserveOnAKS/Dockerfile new file mode 100644 index 000000000..2d874b4a8 --- /dev/null +++ b/scenarios/DeployLLMWithTorchserveOnAKS/Dockerfile @@ -0,0 +1,10 @@ +FROM pytorch/torchserve:latest + +# Copy the model archive into the model store +COPY llm_model.mar /home/model-server/model-store/ + +# Expose TorchServe ports +EXPOSE 8080 8081 + +# Start TorchServe +CMD ["torchserve", "--start", "--model-store", "/home/model-server/model-store", "--models", "llm_model.mar"] \ No newline at end of file diff --git a/scenarios/DeployLLMWithTorchserveOnAKS/deploy-llm-with-torchserve-on-aks.md b/scenarios/DeployLLMWithTorchserveOnAKS/deploy-llm-with-torchserve-on-aks.md new file mode 100644 index 000000000..855dd509b --- /dev/null +++ b/scenarios/DeployLLMWithTorchserveOnAKS/deploy-llm-with-torchserve-on-aks.md @@ -0,0 +1,288 @@ +--- +title: 'Quickstart: Deploy a Large Language Model with TorchServe on Azure Kubernetes Service (AKS)' +description: Learn how to deploy a large language model using TorchServe on AKS. +ms.topic: quickstart +ms.date: 10/18/2023 +author: placeholder +ms.author: placeholder +ms.custom: devx-track-azurecli, mode-api, innovation-engine, linux-related-content +--- + +# Quickstart: Deploy a Large Language Model with TorchServe on Azure Kubernetes Service (AKS) + +In this quickstart, you will learn how to deploy a large language model (LLM) using TorchServe on Azure Kubernetes Service (AKS). TorchServe is a flexible and easy-to-use tool for serving PyTorch models at scale. + +## Prerequisites + +- An Azure subscription. If you don't have an Azure subscription, create a [free account](https://azure.microsoft.com/free/). +- Azure CLI installed. To install, see [Install Azure CLI](https://learn.microsoft.com/cli/azure/install-azure-cli). +- Kubernetes CLI (`kubectl`) installed. To install, see [Install kubectl](https://kubernetes.io/docs/tasks/tools/). +- Docker installed. To install, see [Install Docker](https://docs.docker.com/get-docker/). +- Basic knowledge of Docker, Kubernetes, and AKS. + +## Create a Resource Group + +Create a resource group with the `az group create` command. + +```bash +export RANDOM_ID="$(openssl rand -hex 3)" +export RESOURCE_GROUP="LLMResourceGroup$RANDOM_ID" +export REGION="westus2" +az group create --name $RESOURCE_GROUP --location $REGION +``` + +Results: + + + +```json +{ + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/LLMResourceGroupxxxxxx", + "location": "eastus", + "managedBy": null, + "name": "LLMResourceGroupxxxxxx", + "properties": { + "provisioningState": "Succeeded" + }, + "tags": null, + "type": "Microsoft.Resources/resourceGroups" +} +``` + +## Create an Azure Container Registry + +Create an Azure Container Registry (ACR) to store your Docker images. + +```bash +export ACR_NAME="llmacr$RANDOM_ID" +az acr create --resource-group $RESOURCE_GROUP --name $ACR_NAME --sku Basic +``` + +Results: + + + +```json +{ + "adminUserEnabled": false, + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/LLMResourceGroupxxxxxx/providers/Microsoft.ContainerRegistry/registries/llmacrxxxxxx", + "location": "eastus", + "loginServer": "llmacrxxxxxx.azurecr.io", + "name": "llmacrxxxxxx", + "provisioningState": "Succeeded", + "resourceGroup": "LLMResourceGroupxxxxxx", + "sku": { + "name": "Basic", + "tier": "Basic" + }, + "type": "Microsoft.ContainerRegistry/registries" +} +``` + +## Create an AKS Cluster + +Create an AKS cluster and attach the ACR. + +```bash +export AKS_CLUSTER="LLMAKSCluster$RANDOM_ID" + +az aks create \ + --resource-group $RESOURCE_GROUP \ + --name $AKS_CLUSTER \ + --node-count 3 \ + --attach-acr $ACR_NAME +``` + +This command may take several minutes to complete. + +## Connect to the Cluster + +Configure `kubectl` to connect to your Kubernetes cluster. + +```bash +az aks get-credentials --resource-group $RESOURCE_GROUP --name $AKS_CLUSTER +``` + +Verify the connection by listing the cluster nodes. + +```bash +kubectl get nodes +``` + +## Build and Push the Docker Image + +### Prepare Model Artifacts + +Place your model artifacts in the same directory as this markdown file. Ensure the following files are present: + +- `model.py`: Your PyTorch model definition. +- `model.pt`: Your trained model weights. +- `handler.py`: A custom handler for TorchServe. +- `requirements.txt`: Any additional Python dependencies. + +### Create a Model Archive + +Generate a TorchServe model archive (`.mar` file). + +```bash +torch-model-archiver \ + --model-name llm_model \ + --version 1.0 \ + --model-file model.py \ + --serialized-file model.pt \ + --handler handler.py \ + --extra-files requirements.txt +``` + +### Create a Dockerfile + +Create a file named `Dockerfile` in the same directory with the following content: + +```dockerfile +FROM pytorch/torchserve:latest + +# Copy the model archive into the model store +COPY llm_model.mar /home/model-server/model-store/ + +# Expose TorchServe ports +EXPOSE 8080 8081 + +# Start TorchServe +CMD ["torchserve", "--start", "--model-store", "/home/model-server/model-store", "--models", "llm_model.mar"] +``` + +### Build the Docker Image + +Build the Docker image and tag it with your ACR login server. + +```bash +export ACR_LOGIN_SERVER=$(az acr show --name $ACR_NAME --query loginServer -o tsv) +export IMAGE_TAG="$ACR_LOGIN_SERVER/llm-torchserve:latest" +docker build -t $IMAGE_TAG . +``` + +### Push the Image to ACR + +Log in to ACR and push the image. + +```bash +az acr login --name $ACR_NAME +docker push $IMAGE_TAG +``` + +## Deploy the Docker Image to AKS + +### Assign the `AcrPull` Role to the AKS Cluster's Managed Identity + +```bash +AKS_RESOURCE_GROUP=$RESOURCE_GROUP +AKS_CLUSTER_NAME=$AKS_CLUSTER + +# Get the managed identity's object ID +OBJECT_ID=$(az aks show \ + --resource-group $AKS_RESOURCE_GROUP \ + --name $AKS_CLUSTER_NAME \ + --query "identityProfile.kubeletidentity.objectId" \ + --output tsv) + +# Assign the AcrPull role using the object ID +az role assignment create \ + --assignee-object-id $OBJECT_ID \ + --assignee-principal-type ServicePrincipal \ + --role AcrPull \ + --scope $(az acr show --name $ACR_NAME --query id --output tsv) +``` + +### Create a Kubernetes Deployment + +Create a Kubernetes deployment file named `torchserve-deployment.yaml` in the same directory and add the following content: + +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: torchserve-deployment +spec: + replicas: 1 + selector: + matchLabels: + app: torchserve + template: + metadata: + labels: + app: torchserve + spec: + containers: + - name: torchserve-container + image: $IMAGE_TAG + ports: + - containerPort: 8080 +``` + +Apply the deployment: + +```bash +kubectl apply -f torchserve-deployment.yaml +``` + +## Expose the Service + +Create a service file named `torchserve-service.yaml` in the same directory with the following content: + +```yaml +apiVersion: v1 +kind: Service +metadata: + name: torchserve-service +spec: + type: LoadBalancer + ports: + - port: 80 + targetPort: 8080 + selector: + app: torchserve +``` + +Apply the service: + +```bash +kubectl apply -f torchserve-service.yaml +``` + +## Test the Deployment + +Wait for the external IP to become available: + +```bash +kubectl get service torchserve-service +``` + +Once the `EXTERNAL-IP` is assigned, you can test the deployment: + +```bash +export SERVICE_IP=$(kubectl get service torchserve-service -o jsonpath='{.status.loadBalancer.ingress[0].ip}') +kubectl get service torchserve-service --watch +curl http://$SERVICE_IP/ping +``` + +Results: + + + +```json +{ + "status": "Healthy" +} +``` + +Invoke the model inference endpoint: + +```bash +curl -X POST http://$SERVICE_IP/predictions/llm_model -T input.json +``` + +Replace `input.json` with your input data file. + +## Next Steps + +In this quickstart, you deployed a large language model using TorchServe on AKS. You can now scale your deployment, monitor performance, and integrate with other Azure services. \ No newline at end of file diff --git a/scenarios/DeployLLMWithTorchserveOnAKS/handler.py b/scenarios/DeployLLMWithTorchserveOnAKS/handler.py new file mode 100644 index 000000000..1539b4ae6 --- /dev/null +++ b/scenarios/DeployLLMWithTorchserveOnAKS/handler.py @@ -0,0 +1,12 @@ +from ts.torch_handler.base_handler import BaseHandler +import torch + +class SimpleHandler(BaseHandler): + def preprocess(self, data): + return torch.tensor(data[0]['body']) + + def inference(self, input_data): + return self.model(input_data).detach().numpy() + + def postprocess(self, inference_output): + return inference_output.tolist() \ No newline at end of file diff --git a/scenarios/DeployLLMWithTorchserveOnAKS/model.pt b/scenarios/DeployLLMWithTorchserveOnAKS/model.pt new file mode 100644 index 000000000..a142ea17e --- /dev/null +++ b/scenarios/DeployLLMWithTorchserveOnAKS/model.pt @@ -0,0 +1,5 @@ +import torch +from model import SimpleModel + +model = SimpleModel() +torch.save(model, 'model.pt') \ No newline at end of file diff --git a/scenarios/DeployLLMWithTorchserveOnAKS/model.py b/scenarios/DeployLLMWithTorchserveOnAKS/model.py new file mode 100644 index 000000000..46e71f58b --- /dev/null +++ b/scenarios/DeployLLMWithTorchserveOnAKS/model.py @@ -0,0 +1,9 @@ +import torch.nn as nn + +class SimpleModel(nn.Module): + def __init__(self): + super(SimpleModel, self).__init__() + self.linear = nn.Linear(10, 1) + + def forward(self, x): + return self.linear(x) \ No newline at end of file diff --git a/scenarios/DeployLLMWithTorchserveOnAKS/requirements.txt b/scenarios/DeployLLMWithTorchserveOnAKS/requirements.txt new file mode 100644 index 000000000..262c93aea --- /dev/null +++ b/scenarios/DeployLLMWithTorchserveOnAKS/requirements.txt @@ -0,0 +1,4 @@ +torch +torchserve +numpy +torch-model-archiver \ No newline at end of file diff --git a/scenarios/DeployLLMWithTorchserveOnAKS/torchserve-deployment.yaml b/scenarios/DeployLLMWithTorchserveOnAKS/torchserve-deployment.yaml new file mode 100644 index 000000000..57ea39859 --- /dev/null +++ b/scenarios/DeployLLMWithTorchserveOnAKS/torchserve-deployment.yaml @@ -0,0 +1,19 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: torchserve-deployment +spec: + replicas: 1 + selector: + matchLabels: + app: torchserve + template: + metadata: + labels: + app: torchserve + spec: + containers: + - name: torchserve-container + image: $IMAGE_TAG + ports: + - containerPort: 8080 \ No newline at end of file diff --git a/scenarios/DeployLLMWithTorchserveOnAKS/torchserve-service.yaml b/scenarios/DeployLLMWithTorchserveOnAKS/torchserve-service.yaml new file mode 100644 index 000000000..a555137b8 --- /dev/null +++ b/scenarios/DeployLLMWithTorchserveOnAKS/torchserve-service.yaml @@ -0,0 +1,11 @@ +apiVersion: v1 +kind: Service +metadata: + name: torchserve-service +spec: + type: LoadBalancer + ports: + - port: 80 + targetPort: 8080 + selector: + app: torchserve \ No newline at end of file diff --git a/scenarios/DeployPremiumSSDV2/deploy-premium-ssd-v2.md b/scenarios/DeployPremiumSSDV2/deploy-premium-ssd-v2.md new file mode 100644 index 000000000..86ae6d81f --- /dev/null +++ b/scenarios/DeployPremiumSSDV2/deploy-premium-ssd-v2.md @@ -0,0 +1,266 @@ +--- +title: Deploy a Premium SSD v2 managed disk +description: Learn how to deploy a Premium SSD v2 and about its regional availability. +author: roygara +ms.author: rogarana +ms.date: 08/12/2024 +ms.topic: how-to +ms.service: azure-disk-storage +ms.custom: references_regions, devx-track-azurecli, devx-track-azurepowershell +--- + +# Deploy a Premium SSD v2 + +Azure Premium SSD v2 is designed for IO-intense enterprise workloads that require sub-millisecond disk latencies and high IOPS and throughput at a low cost. Premium SSD v2 is suited for a broad range of workloads such as SQL server, Oracle, MariaDB, SAP, Cassandra, Mongo DB, big data/analytics, gaming, on virtual machines or stateful containers. For conceptual information on Premium SSD v2, see [Premium SSD v2](disks-types.md#premium-ssd-v2). + +Premium SSD v2 support a 4k physical sector size by default, but can be configured to use a 512E sector size as well. While most applications are compatible with 4k sector sizes, some require 512 byte sector sizes. Oracle Database, for example, requires release 12.2 or later in order to support 4k native disks. + +## Limitations + +[!INCLUDE [disks-prem-v2-limitations](./includes/disks-prem-v2-limitations.md)] + +### Regional availability + +[!INCLUDE [disks-premv2-regions](./includes/disks-premv2-regions.md)] + +## Prerequisites + +- Install either the latest [Azure CLI](/cli/azure/install-azure-cli) or the latest [Azure PowerShell module](/powershell/azure/install-azure-powershell). + +## Determine region availability programmatically + +Since not every region and zone supports Premium SSD v2, you can use the Azure CLI or PowerShell to determine region and zone supportability. + +# [Azure CLI](#tab/azure-cli) + +To determine the regions and zones that support Premium SSD v2, replace `yourSubscriptionId` with your subscription, and then run the [az vm list-skus](/cli/azure/vm#az-vm-list-skus) command: + +```markdown +az login + +subscriptionId="" + +az account set --subscription $subscriptionId + +az vm list-skus --resource-type disks --query "[?name=='PremiumV2_LRS'].{Region:locationInfo[0].location, Zones:locationInfo[0].zones}" +``` + +# [PowerShell](#tab/azure-powershell) + +To determine the regions and zones that support Premium SSD v2, replace `yourSubscriptionId` with your subscription, and then run the [Get-AzComputeResourceSku](/powershell/module/az.compute/get-azcomputeresourcesku) command: + +```powershell +Connect-AzAccount + +$subscriptionId="yourSubscriptionId" + +Set-AzContext -Subscription $subscriptionId + +Get-AzComputeResourceSku | where {$_.ResourceType -eq 'disks' -and $_.Name -eq 'Premiumv2_LRS'} +``` + +# [Azure portal](#tab/portal) + +To programmatically determine the regions and zones you can deploy to, use either the Azure CLI, Azure PowerShell Module. + +--- + +## Create a resource group + +An [Azure resource group][azure-resource-group] is a logical group in which Azure resources are deployed and managed. When you create a resource group, you're prompted to specify a location. This location is the storage location of your resource group metadata and where your resources run in Azure if you don't specify another region during resource creation + +Create a resource group using the [`az group create`][az-group-create] command. + +```azurecli-interactive +export RANDOM_ID="$(openssl rand -hex 3)" +export MY_RESOURCE_GROUP_NAME="myResourceGroup$RANDOM_ID" +export REGION="eastus2" +az group create --name $MY_RESOURCE_GROUP_NAME --location $REGION +``` + +Results: + +```JSON +{ + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/myAKSResourceGroupxxxxxx", + "location": "eastus", + "managedBy": null, + "name": "testResourceGroup", + "properties": { + "provisioningState": "Succeeded" + }, + "tags": null, + "type": "Microsoft.Resources/resourceGroups" +} +``` + +Now that you know the region and zone to deploy to, follow the deployment steps in this article to create a Premium SSD v2 disk and attach it to a VM. + +## Use a Premium SSD v2 + +Create a Premium SSD v2 disk in an availability zone by using the [az disk create](/cli/azure/disk#az-disk-create) command. + +The following script creates a Premium SSD v2 with a 4k sector size, to deploy one with a 512 sector size, update the `$LOGICAL_SECTOR_SIZE` parameter. Replace the values of all the variables with your own, then run the following script: + +```azurecli-interactive +## Create a Premium SSD v2 disk +export MY_DISK_NAME="myDisk$RANDOM_ID" +##Replace 4096 with 512 to deploy a disk with 512 sector size +export LOGICAL_SECTOR_SIZE=4096 +az disk create -n $MY_DISK_NAME -g $MY_RESOURCE_GROUP_NAME \ +--size-gb 100 \ +--disk-iops-read-write 5000 \ +--disk-mbps-read-write 150 \ +--location $REGION \ +--sku PremiumV2_LRS \ +--zone "1" \ +--logical-sector-size $LOGICAL_SECTOR_SIZE +``` + +## Create the VM + +Then create a VM in the same region and availability zone that supports Premium Storage and attach the disk to it by using the [az vm create](/cli/azure/vm#az-vm-create) command. + +```azurecli-interactive +export MY_VM_NAME="myVM$RANDOM_ID" +export MY_VM_IMAGE="Win2016Datacenter" +export MY_VM_SIZE="Standard_D4s_v3" +export AZURE_USERNAME=azureuser +export AZURE_PASSWORD=$(openssl rand -base64 16 | tr -dc 'a-zA-Z0-9@#%^&*()-_=+[]{}|;:,.<>?') +az vm create -n $MY_VM_NAME -g $MY_RESOURCE_GROUP_NAME \ +--image $MY_VM_IMAGE \ +--authentication-type password --admin-password $AZURE_PASSWORD --admin-username $AZURE_USERNAME \ +--size $MY_VM_SIZE \ +--location $REGION \ +--zone "1" \ +--attach-data-disks $MY_DISK_NAME +``` + +# [PowerShell](#tab/azure-powershell) + +Create a Premium SSD v2 disk in an availability zone by using the [New-AzDiskConfig](/powershell/module/az.compute/new-azdiskconfig) to define the configuration of your disk and the [New-AzDisk](/powershell/module/az.compute/new-azdisk) command to create your disk. Next, create a VM in the same region and availability zone that supports Premium Storage by using the [az vm create](/cli/azure/vm#az-vm-create). Finally, attach the disk to it by using the [Get-AzVM](/powershell/module/az.compute/get-azvm) command to identify variables for the virtual machine, the [Get-AzDisk](/powershell/module/az.compute/get-azdisk) command to identify variables for the disk, the [Add-AzVMDataDisk](/powershell/module/az.compute/add-azvmdatadisk) command to add the disk, and the [Update-AzVM](/powershell/module/az.compute/update-azvm) command to attach the new disk to the virtual machine. + +The following script creates a Premium SSD v2 with a 4k sector size, to deploy one with a 512 sector size, update the `$LOGICAL_SECTOR_SIZE` parameter. Replace the values of all the variables with your own, then run the following script: + +```powershell +# Initialize variables +$MY_RESOURCE_GROUP_NAME = "yourResourceGroupName" +$REGION = "useast" +$zone = "yourZoneNumber" +$MY_DISK_NAME = "yourMY_DISK_NAME" +$diskSizeInGiB = 100 +$diskIOPS = 5000 +$diskThroughputInMBPS = 150 +#To use a 512 sector size, replace 4096 with 512 +$LOGICAL_SECTOR_SIZE=4096 +$lun = 1 +$MY_VM_NAME = "yourMY_VM_NAME" +$MY_VM_IMAGE = "Win2016Datacenter" +$MY_VM_SIZE = "Standard_D4s_v3" +$vmAdminUser = "yourAdminUserName" +$vmAdminPassword = ConvertTo-SecureString "yourAdminUserPassword" -AsPlainText -Force +$credential = New-Object System.Management.Automation.PSCredential ($vmAdminUser, $vmAdminPassword); + +# Create a Premium SSD v2 +$diskconfig = New-AzDiskConfig ` +-Location $REGION ` +-Zone $zone ` +-DiskSizeGB $diskSizeInGiB ` +-DiskIOPSReadWrite $diskIOPS ` +-DiskMBpsReadWrite $diskThroughputInMBPS ` +-AccountType PremiumV2_LRS ` +-LOGICAL_SECTOR_SIZE $LOGICAL_SECTOR_SIZE ` +-CreateOption Empty + +New-AzDisk ` +-ResourceGroupName $MY_RESOURCE_GROUP_NAME ` +-MY_DISK_NAME $MY_DISK_NAME ` +-Disk $diskconfig + +# Create the VM +New-AzVm ` + -ResourceGroupName $MY_RESOURCE_GROUP_NAME ` + -Name $MY_VM_NAME ` + -Location $REGION ` + -Zone $zone ` + -Image $MY_VM_IMAGE ` + -Size $MY_VM_SIZE ` + -Credential $credential + +# Attach the disk to the VM +$vm = Get-AzVM -ResourceGroupName $MY_RESOURCE_GROUP_NAME -Name $MY_VM_NAME +$disk = Get-AzDisk -ResourceGroupName $MY_RESOURCE_GROUP_NAME -Name $MY_DISK_NAME +$vm = Add-AzVMDataDisk -VM $vm -Name $MY_DISK_NAME -CreateOption Attach -ManagedDiskId $disk.Id -Lun $lun +Update-AzVM -VM $vm -ResourceGroupName $MY_RESOURCE_GROUP_NAME +``` + +# [Azure portal](#tab/portal) + +1. Sign in to the [Azure portal](https://portal.azure.com/). +1. Navigate to **Virtual machines** and follow the normal VM creation process. +1. On the **Basics** page, select a [supported region](#regional-availability) and set **Availability options** to **Availability zone**. +1. Select one of the zones. +1. Fill in the rest of the values on the page as you like. + + :::image type="content" source="media/disks-deploy-premium-v2/premv2-portal-deploy.png" alt-text="Screenshot of the basics page, region and availability options and zones highlighted." lightbox="media/disks-deploy-premium-v2/premv2-portal-deploy.png"::: + +1. Proceed to the **Disks** page. +1. Under **Data disks** select **Create and attach a new disk**. + + :::image type="content" source="media/disks-deploy-premium-v2/premv2-create-data-disk.png" alt-text="Screenshot highlighting create and attach a new disk on the disk page." lightbox="media/disks-deploy-premium-v2/premv2-create-data-disk.png"::: + +1. Select the **Disk SKU** and select **Premium SSD v2**. + + :::image type="content" source="media/disks-deploy-premium-v2/premv2-select.png" alt-text="Screenshot selecting Premium SSD v2 SKU." lightbox="media/disks-deploy-premium-v2/premv2-select.png"::: + +1. Select whether you'd like to deploy a 4k or 512 logical sector size. + + :::image type="content" source="media/disks-deploy-premium-v2/premv2-sector-size.png" alt-text="Screenshot of deployment logical sector size deployment options." lightbox="media/disks-deploy-premium-v2/premv2-sector-size.png"::: + +1. Proceed through the rest of the VM deployment, making any choices that you desire. + +You've now deployed a VM with a premium SSD v2. + +--- + +## Adjust disk performance + +You can adjust the performance of a Premium SSD v2 disk four times within a 24 hour period. Creating a disk counts as one of these times, so for the first 24 hours after creating a premium SSD v2 disk you can only adjust its performance up to three times. + +For conceptual information on adjusting disk performance, see [Premium SSD v2 performance](disks-types.md#premium-ssd-v2-performance). + +# [Azure CLI](#tab/azure-cli) + +Use the [az disk update](/cli/azure/disk#az-disk-update) command to change the performance configuration of your Premium SSD v2 disk. For example, you can use the `disk-iops-read-write` parameter to adjust the max IOPS limit, and the `disk-mbps-read-write` parameter to adjust the max throughput limit of your Premium SSD v2 disk. + +The following command adjusts the performance of your disk. Update the values in the command, and then run it: + +```azurecli +export SUBSCRIPTION_ID=$(az account show --query id --output tsv) +az disk update --subscription $SUBSCRIPTION_ID --resource-group $MY_RESOURCE_GROUP_NAME --name $MY_DISK_NAME --disk-iops-read-write=5000 --disk-mbps-read-write=200 +``` + +# [PowerShell](#tab/azure-powershell) + +Use the [New-AzDiskUpdateConfig](/powershell/module/az.compute/new-azdiskupdateconfig) command to define your new performance configuration values for your Premium SSD v2 disks, and then use the [Update-AzDisk](/powershell/module/az.compute/update-azdisk) command to apply your configuration changes to your disk. For example, you can use the `DiskIOPSReadWrite` parameter to adjust the max IOPS limit, and the `DiskMBpsReadWrite` parameter to adjust the max throughput limit of your Premium SSD v2 disk. + +The following command adjusts the performance of your disk. Update the values in the command, and then run it: + +```azurepowershell +$diskupdateconfig = New-AzDiskUpdateConfig -DiskIOPSReadWrite 5000 -DiskMBpsReadWrite 200 +Update-AzDisk -ResourceGroupName $resourceGroup -MY_DISK_NAME $MY_DISK_NAME -DiskUpdate $diskupdateconfig +``` + +# [Azure portal](#tab/portal) + +1. Navigate to the disk you'd like to modify in the [Azure portal](https://portal.azure.com/). +1. Select **Size + Performance** +1. Set the values for **Disk IOPS** or **Disk throughput (MB/s)** or both, to meet your needs, then select **Save**. + +--- + +## Next steps + +Add a data disk by using either the [Azure portal](linux/attach-disk-portal.yml), [Azure CLI](linux/add-disk.md), or [PowerShell](windows/attach-disk-ps.md). + +Provide feedback on [Premium SSD v2](https://aka.ms/premium-ssd-v2-survey). \ No newline at end of file diff --git a/scenarios/DeployTensorflowOnAKS/deploy-tensorflow-on-aks.md b/scenarios/DeployTensorflowOnAKS/deploy-tensorflow-on-aks.md new file mode 100644 index 000000000..b7998ea1d --- /dev/null +++ b/scenarios/DeployTensorflowOnAKS/deploy-tensorflow-on-aks.md @@ -0,0 +1,191 @@ +--- +title: 'Setup: Deploy a Tensorflow Cluster on Azure Kubernetes Service (AKS)' +description: Learn how to deploy a Tensorflow cluster on Azure Kubernetes Service (AKS) using Azure CLI. +ms.topic: how-to +ms.date: 10/31/2023 +author: azureexecdocs +ms.author: azureexecdocs +ms.custom: devx-track-azurecli, mode-api, innovation-engine, machine-learning, kubernetes +--- + +# Setup: Deploy a Tensorflow Cluster on Azure Kubernetes Service (AKS) + +This guide demonstrates how to deploy a Tensorflow cluster on AKS using the Azure CLI. The setup includes provisioning an AKS cluster, configuring a Kubernetes namespace, and deploying a TensorFlow cluster. + + +## Prerequisites + +- Azure CLI (version 2.40.0 or later) +- Kubernetes CLI (kubectl) installed and configured with the Azure AKS cluster +- Bash shell with OpenSSL for generating random suffixes + +> **Note:** Please make sure you are logged into Azure and have set your subscription in advance. + + +## Step 1: Create a Resource Group + +Create a new resource group to hold your AKS cluster. + +```bash +export RANDOM_SUFFIX=$(openssl rand -hex 3) +export REGION="WestUS2" +export RESOURCE_GROUP_NAME="AKS-TF-ResourceGroup-$RANDOM_SUFFIX" +az group create --name $RESOURCE_GROUP_NAME --location $REGION +``` + +Results: + + + +```json +{ + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/AKS-TF-ResourceGroup-xxx", + "location": "westus2", + "managedBy": null, + "name": "AKS-TF-ResourceGroup-xxx", + "properties": { + "provisioningState": "Succeeded" + }, + "tags": null, + "type": "Microsoft.Resources/resourceGroups" +} +``` + +## Step 2: Create an AKS Cluster + +Provision an AKS cluster in the resource group. + +```bash +export AKS_CLUSTER_NAME="AKS-TF-Cluster-$RANDOM_SUFFIX" +az aks create --name $AKS_CLUSTER_NAME --resource-group $RESOURCE_GROUP_NAME --node-count 3 --enable-addons monitoring --generate-ssh-keys +``` + + +## Step 3: Connect to the AKS Cluster + +Obtain the cluster credentials and configure `kubectl` to use the newly created AKS cluster. + +```bash +az aks get-credentials --name $AKS_CLUSTER_NAME --resource-group $RESOURCE_GROUP_NAME +``` + +## Step 4: Create a Kubernetes Namespace for TensorFlow + +Create a namespace to organize resources related to TensorFlow. + +```bash +export NAMESPACE="tensorflow-cluster" +kubectl create namespace $NAMESPACE +``` + +Results: + + + +```text +namespace/tensorflow-cluster created +``` + +## Step 5: Prepare TensorFlow Deployment Configuration + +Create the TensorFlow deployment configuration file. + +```bash +cat < tensorflow-deployment.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: tensorflow-deployment + namespace: $NAMESPACE +spec: + replicas: 2 + selector: + matchLabels: + app: tensorflow + template: + metadata: + labels: + app: tensorflow + spec: + containers: + - name: tensorflow-container + image: tensorflow/tensorflow:latest + ports: + - containerPort: 8501 +EOF +``` + +## Step 6: Deploy the TensorFlow Cluster + +Deploy the TensorFlow cluster by applying the configuration file. + +```bash +kubectl apply -f tensorflow-deployment.yaml +``` + +Results: + + + +```text +deployment.apps/tensorflow-deployment created +``` + +## Step 7: Create a LoadBalancer Service for TensorFlow + +Expose the TensorFlow deployment using a LoadBalancer service to make it accessible externally. + +```bash +cat < tensorflow-service.yaml +apiVersion: v1 +kind: Service +metadata: + name: tensorflow-service + namespace: $NAMESPACE +spec: + selector: + app: tensorflow + ports: + - protocol: TCP + port: 80 + targetPort: 8501 + type: LoadBalancer +EOF + +kubectl apply -f tensorflow-service.yaml +``` + +Results: + + + +```text +service/tensorflow-service created +``` + +## Step 8: Check Service External IP + +Retrieve the external IP address of the TensorFlow service. + +```bash +while true; do + ENDPOINTS=$(kubectl get endpoints tensorflow-service --namespace $NAMESPACE -o jsonpath='{.subsets[*].addresses[*].ip}') + if [ -n "$ENDPOINTS" ]; then + echo "Service endpoints: $ENDPOINTS" + break + else + echo "Waiting for service endpoints..." + sleep 10 + fi +done +``` + +Results: + + + +```text +Service endpoints: 10.244.1.5 10.244.1.6 +``` + +This confirms that the service is routing correctly to its backend pods. \ No newline at end of file diff --git a/scenarios/DeployTrinoOnAKS/deploy-trino-on-aks.md b/scenarios/DeployTrinoOnAKS/deploy-trino-on-aks.md new file mode 100644 index 000000000..22ea52146 --- /dev/null +++ b/scenarios/DeployTrinoOnAKS/deploy-trino-on-aks.md @@ -0,0 +1,222 @@ +--- +title: "Deploy a Trino Cluster on Azure Kubernetes Service (AKS)" +description: Learn how to deploy a Trino Cluster on AKS using Azure CLI for scalable and distributed SQL query processing. +ms.topic: article +ms.date: 10/10/2023 +author: azure-author +ms.author: azurealias +ms.custom: devx-track-azurecli, mode-api, innovation-engine, aks, trino, distributed-sql, data-analytics +--- + +# Deploy a Trino Cluster on Azure Kubernetes Service (AKS) + +In this Exec Doc, you will learn how to deploy a Trino (formerly PrestoSQL) cluster on Azure Kubernetes Service (AKS). Trino is a distributed SQL query engine, ideal for large-scale data analytics. + +## Prerequisites + +1. Ensure you have Azure CLI installed in your environment or use [Azure Cloud Shell](https://shell.azure.com/). +2. Ensure a Kubernetes cluster is already deployed on AKS. You can create one using [this guide](https://learn.microsoft.com/azure/aks/). + + +## Step 2: Create Azure Resource Group + +A resource group is a container that holds related resources for the Trino deployment. + +```bash +export RANDOM_SUFFIX=$(openssl rand -hex 3) +export RESOURCE_GROUP_NAME="TrinoResourceGroup$RANDOM_SUFFIX" +export REGION="westus2" + +az group create --name $RESOURCE_GROUP_NAME --location $REGION +``` + +Results: + + + +```json +{ + "id": "/subscriptions/xxxxx-xxxxx-xxxxx-xxxxx/resourceGroups/TrinoResourceGroupxxx", + "location": "westus2", + "managedBy": null, + "name": "TrinoResourceGroupxxx", + "properties": { + "provisioningState": "Succeeded" + }, + "tags": null, + "type": "Microsoft.Resources/resourceGroups" +} +``` + +## Step 3: Create AKS Cluster + +We will deploy an AKS cluster to host the Trino cluster. + +```bash +export AKS_CLUSTER_NAME="TrinoAKSCluster$RANDOM_SUFFIX" +export CLUSTER_NODES=3 + +az aks create \ + --resource-group $RESOURCE_GROUP_NAME \ + --name $AKS_CLUSTER_NAME \ + --node-count $CLUSTER_NODES \ + --generate-ssh-keys +``` + +## Step 4: Configure `kubectl` Access + +We will configure `kubectl` to connect to the newly created AKS cluster. + +```bash +az aks get-credentials --resource-group $RESOURCE_GROUP_NAME --name $AKS_CLUSTER_NAME +``` + +## Step 5: Create Namespace for Trino + +Namespaces help organize your Kubernetes resources. + +```bash +export NAMESPACE="trino$RANDOM_SUFFIX" +kubectl create namespace $NAMESPACE +``` + +Results: + + + +```json +{ + "kind": "Namespace", + "apiVersion": "v1", + "metadata": { + "name": "trino", + "selfLink": "/api/v1/namespaces/trino", + "uid": "xxxxx-xxxxx-xxxxx-xxxxx", + "resourceVersion": "xxxx", + "creationTimestamp": "xxxx-xx-xxTxx:xx:xxZ" + } +} +``` + +## Step 6: Deploy Trino on AKS + +We will use a Kubernetes manifest to deploy the Trino cluster. + +### Create `trino-deployment.yaml` + +```bash +cat < trino-deployment.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: trino + namespace: $NAMESPACE +spec: + replicas: 2 + selector: + matchLabels: + app: trino + template: + metadata: + labels: + app: trino + spec: + containers: + - name: trino + image: trinodb/trino:latest + ports: + - containerPort: 8080 +EOF +``` + +### Apply the Deployment + +```bash +kubectl apply -f trino-deployment.yaml +``` + +Results: + + + +```text +deployment.apps/trino created +``` + +## Step 7: Expose Trino Service + +Expose the Trino deployment via a Kubernetes service for external access. + +```bash +kubectl expose deployment trino \ + --type=LoadBalancer \ + --name=trino-service \ + --namespace=$NAMESPACE \ + --port=8080 \ + --target-port=8080 +``` + +Results: + + + +```output +service/trino-service exposed +``` + + +## Step 8: Verify Deployment + +Ensure that all Trino pods are running. + +```bash +while true; do + POD_STATUSES=$(kubectl get pods --namespace=$NAMESPACE -o jsonpath='{.items[*].status.phase}') + ALL_RUNNING=true + for STATUS in $POD_STATUSES; do + if [ "$STATUS" != "Running" ]; then + ALL_RUNNING=false + break + fi + done + + if [ "$ALL_RUNNING" = true ]; then + kubectl get pods --namespace=$NAMESPACE + break + else + sleep 10 + fi +done +``` + +Results: + + + +```text +NAME READY STATUS RESTARTS AGE +trino-xxxxx-xxxxx 1/1 Running 0 5m +trino-xxxxx-xxxxx 1/1 Running 0 5m +``` + +## Step 9: Fetch Service Public IP + +Retrieve the external IP address of the Trino service. + +```bash +EXTERNAL_IP=$(kubectl get service trino-service --namespace=$NAMESPACE -o jsonpath='{.status.loadBalancer.ingress[0].ip}') +echo "External IP: $EXTERNAL_IP" +``` + +Results: + + + +```text +External IP: xx.xx.xx.xx +``` + +The `EXTERNAL-IP` field contains the Trino service's public IP. Visit `http://:8080` to access the Trino cluster. + + +You have successfully deployed a Trino cluster on Azure Kubernetes Service! 🎉 \ No newline at end of file diff --git a/scenarios/FixFstabIssuesRepairVM/fix-fstab-issues-repair-vm.md b/scenarios/FixFstabIssuesRepairVM/fix-fstab-issues-repair-vm.md new file mode 100644 index 000000000..81e5392f1 --- /dev/null +++ b/scenarios/FixFstabIssuesRepairVM/fix-fstab-issues-repair-vm.md @@ -0,0 +1,88 @@ +--- +title: Troubleshoot Linux VM boot issues due to fstab errors | Microsoft Learn +description: Explains why Linux VM cannot start and how to solve the problem. +services: virtual-machines +documentationcenter: '' +author: divargas-msft +ms.author: divargas +manager: dcscontentpm +tags: '' +ms.custom: sap:My VM is not booting, linux-related-content, devx-track-azurecli, mode-api, innovation-engine +ms.service: azure-virtual-machines +ms.collection: linux +ms.topic: troubleshooting +ms.workload: infrastructure-services +ms.tgt_pltfrm: vm-linux +ms.devlang: azurecli +ms.date: 02/25/2025 +--- + + +# Troubleshoot Linux VM boot issues due to fstab errors + +**Applies to:** :heavy_check_mark: Linux VMs + + + +The Linux filesystem table, fstab is a configuration table which is designed to configure rules where specific file systems are detected and mounted in an orderly manner during the system boot process. +This article discusses multiple conditions where a wrong fstab configuration can lead to boot issue and provides troubleshooting guidance. + +Few common reasons that can lead to Virtual Machine Boot issues due to fstab misconfiguration are listed below: + +* Traditional filesystem name is used instead of the Universally Unique Identifier (UUID) of the filesystem. +* An incorrect UUID is used. +* An entry exists for an unattached device without `nofail` option within fstab configuration. +* Incorrect entry within fstab configuration. + +## Identify fstab issues + +Check the current boot state of the VM in the serial log within the [Boot diagnostics] (/azure/virtual-machines/boot-diagnostics#boot-diagnostics-view) blade in the Azure portal. The VM will be in an Emergency Mode. You see log entries that resemble the following example leading to the Emergency Mode state: + +```output +[K[[1;31m TIME [0m] Timed out waiting for device dev-incorrect.device. +[[1;33mDEPEND[0m] Dependency failed for /data. +[[1;33mDEPEND[0m] Dependency failed for Local File Systems. +... +Welcome to emergency mode! After logging in, type "journalctl -xb" to viewsystem logs, "systemctl reboot" to reboot, "systemctl default" to try again to boot into default mode. +Give root password for maintenance +(or type Control-D to continue) +``` + + >[!Note] + > "/data" is an example of mount point used. Dependency failure for filesystem mount point will differ based on the names used. + +## Resolution + +There are 2 ways to resolve the issue: + +* Repair the VM online + * [Use the Serial Console](#use-the-serial-console) +* Repair the vm offline + * [Use Azure Linux Auto Repair (ALAR)](#use-azure-linux-auto-repair-alar) + * [Use Manual Method](#use-manual-method) + +#### Use Azure Linux Auto Repair (ALAR) + +Azure Linux Auto Repair (ALAR) scripts is a part of VM repair extension described in [Repair a Linux VM by using the Azure Virtual Machine repair commands](./repair-linux-vm-using-azure-virtual-machine-repair-commands.md). ALAR covers automation of multiple repair scenarios including `/etc/fstab` issues. + +The ALAR scripts use the repair extension `run` command and its `--run-id` option. The script-id for the automated recovery is: **linux-alar2**. Implement the following steps to automate fstab errors via offline ALAR approach: + +```azurecli-interactive +output=$(az extension add -n vm-repair; az extension update -n vm-repair; az vm repair repair-button --button-command 'fstab' --verbose --resource-group $MY_RESOURCE_GROUP_NAME --name $MY_VM_NAME) +value=$(echo "$output" | jq -r '.message') +extracted=$(echo $value) +echo "$extracted" +``` + +> [!NOTE] +> The fstab repair script will take a backup of the original file and strip off any lines in the /etc/fstab file which are not needed to boot a system. After successful start of the OS, edit the fstab again and correct any errors which didn't allow a reboot of the system before. + +[!INCLUDE [Azure Help Support](../../../includes/azure-help-support.md)] \ No newline at end of file diff --git a/scenarios/GPUNodePoolAKS/gpu-node-pool-aks.md b/scenarios/GPUNodePoolAKS/gpu-node-pool-aks.md new file mode 100644 index 000000000..516aa2783 --- /dev/null +++ b/scenarios/GPUNodePoolAKS/gpu-node-pool-aks.md @@ -0,0 +1,516 @@ +--- +title: Create a multi-instance GPU node pool in Azure Kubernetes Service (AKS) +description: Learn how to create a multi-instance GPU node pool in Azure Kubernetes Service (AKS). +ms.topic: article +ms.date: 08/30/2023 +ms.author: juda +ms.subservice: aks-nodes +--- + +# Create a multi-instance GPU node pool in Azure Kubernetes Service (AKS) + +Nvidia's A100 GPU can be divided in up to seven independent instances. Each instance has its own memory and Stream Multiprocessor (SM). For more information on the Nvidia A100, see [Nvidia A100 GPU][Nvidia A100 GPU]. + +This article walks you through how to create a multi-instance GPU node pool in an Azure Kubernetes Service (AKS) cluster. + +## Prerequisites and limitations + +* An Azure account with an active subscription. If you don't have one, you can [create an account for free](https://azure.microsoft.com/free/?WT.mc_id=A261C142F). +* Azure CLI version 2.2.0 or later installed and configured. Run `az --version` to find the version. If you need to install or upgrade, see [Install Azure CLI][install-azure-cli]. +* The Kubernetes command-line client, [kubectl](https://kubernetes.io/docs/reference/kubectl/), installed and configured. If you use Azure Cloud Shell, `kubectl` is already installed. If you want to install it locally, you can use the [`az aks install-cli`][az-aks-install-cli] command. +* Helm v3 installed and configured. For more information, see [Installing Helm](https://helm.sh/docs/intro/install/). +* You can't use Cluster Autoscaler with multi-instance node pools. + +## GPU instance profiles + +GPU instance profiles define how GPUs are partitioned. The following table shows the available GPU instance profile for the `Standard_ND96asr_v4`: + +| Profile name | Fraction of SM |Fraction of memory | Number of instances created | +|--|--|--|--| +| MIG 1g.5gb | 1/7 | 1/8 | 7 | +| MIG 2g.10gb | 2/7 | 2/8 | 3 | +| MIG 3g.20gb | 3/7 | 4/8 | 2 | +| MIG 4g.20gb | 4/7 | 4/8 | 1 | +| MIG 7g.40gb | 7/7 | 8/8 | 1 | + +As an example, the GPU instance profile of `MIG 1g.5gb` indicates that each GPU instance has 1g SM(Computing resource) and 5gb memory. In this case, the GPU is partitioned into seven instances. + +The available GPU instance profiles available for this instance size include `MIG1g`, `MIG2g`, `MIG3g`, `MIG4g`, and `MIG7g`. + +> [!IMPORTANT] +> You can't change the applied GPU instance profile after node pool creation. + +## Create an AKS cluster + +1. Create an Azure resource group using the [`az group create`][az-group-create] command. + + ```azurecli-interactive + export RANDOM_ID="$(openssl rand -hex 3)" + export MY_RESOURCE_GROUP_NAME="myAKSResourceGroup$RANDOM_ID" + export REGION="eastus2" + export MY_AKS_CLUSTER_NAME="myAKSCluster$RANDOM_ID" + az group create --name $MY_RESOURCE_GROUP_NAME --location $REGION + ``` + + Results: + + + ```JSON + { + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/myAKSResourceGroupxxxxxx", + "location": "eastus", + "managedBy": null, + "name": "testResourceGroup", + "properties": { + "provisioningState": "Succeeded" + }, + "tags": null, + "type": "Microsoft.Resources/resourceGroups" + } + ``` + +2. Create an AKS cluster using the [`az aks create`][az-aks-create] command. + + ```azurecli-interactive + az aks create \ + --resource-group $MY_RESOURCE_GROUP_NAME \ + --name $MY_AKS_CLUSTER_NAME\ + --node-count 1 \ + --generate-ssh-keys + ``` + + Results: + + + ```JSON + { + "aadProfile": null, + "addonProfiles": { + "httpApplicationRouting": null, + "kubeDashboard": null, + "omsagent": { + "config": { + "logAnalyticsWorkspaceResourceID": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourcegroups/xxxxxx/providers/Microsoft.OperationalInsights/workspaces/xxxxxx" + }, + "enabled": false + } + }, + "agentPoolProfiles": [ + { + "availabilityZones": null, + "count": 1, + "enableAutoScaling": false, + "enableEncryptionAtHost": false, + "enableFips": false, + "enableNodePublicIP": false, + "gpuInstanceProfile": null, + "kubeletConfig": null, + "kubeletDiskType": "OS", + "linuxOSConfig": null, + "maxCount": null, + "maxPods": 110, + "minCount": null, + "mode": "System", + "name": "nodepool1", + "nodeImageVersion": "AKSUbuntu-xxxx.x.x.x", + "nodeLabels": null, + "nodePublicIPPrefixID": null, + "nodeTaints": null, + "orchestratorVersion": "x.x.x", + "osDiskSizeGB": 128, + "osDiskType": "Managed", + "osSKU": "Ubuntu", + "osType": "Linux", + "podSubnetID": null, + "powerState": { + "code": "Running" + }, + "provisioningState": "Succeeded", + "proximityPlacementGroupID": null, + "scaleSetEvictionPolicy": null, + "scaleSetPriority": "Regular", + "spotMaxPrice": null, + "tags": null, + "type": "VirtualMachineScaleSets", + "upgradeSettings": { + "maxSurge": null + }, + "vmSize": "Standard_DS2_v2", + "vnetSubnetID": null + } + ], + "apiServerAccessProfile": null, + "autoScalerProfile": null, + "autoUpgradeProfile": null, + "azurePortalFQDN": null, + "azurePortalURL": "https://portal.azure.com/#resource/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/xxxxxx/providers/Microsoft.ContainerService/managedClusters/xxxxxx", + "creationData": null, + "currentKubernetesVersion": "x.x.x", + "diskEncryptionSetID": null, + "dnsPrefix": "xxxxxx", + "enablePodSecurityPolicy": null, + "enableRBAC": true, + "extendedLocation": null, + "fqdn": "xxxxxx-xxxxxx-xxxxxx.hcp.xxxxxx.azmk8s.io", + "fqdnSubdomain": null, + "httpProxyConfig": null, + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourcegroups/xxxxxx/providers/Microsoft.ContainerService/managedClusters/xxxxxx", + "identity": { + "principalId": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx", + "tenantId": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx", + "type": "SystemAssigned", + "userAssignedIdentities": null + }, + "identityProfile": null, + "ingressProfile": null, + "keyVaultSecretsProvider": null, + "kubernetesVersion": "x.x.x", + "location": "xxxxxx", + "maxAgentPools": 10, + "monitoringAddonProfile": null, + "name": "xxxxxx", + "networkProfile": { + "dnsServiceIP": "10.0.0.10", + "dockerBridgeCidr": "172.17.0.1/16", + "loadBalancerProfile": { + "allocatedOutboundPorts": null, + "effectiveOutboundIPs": [ + { + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/xxxxxx/providers/Microsoft.Network/publicIPAddresses/xxxxxx", + "resourceGroup": "xxxxxx" + } + ], + "enableMultipleStandardLoadBalancers": null, + "idleTimeoutInMinutes": null, + "managedOutboundIPs": { + "count": 1 + }, + "outboundIPPrefixes": null, + "outboundIPs": null, + "outboundPortsAllocated": null + }, + "loadBalancerSku": "Standard", + "networkMode": null, + "networkPlugin": "kubenet", + "networkPolicy": null, + "outboundType": "loadBalancer", + "podCidr": null, + "serviceCidr": "10.0.0.0/16" + }, + "nodeResourceGroup": "MC_xxxxxx_xxxxxx_xxxxxx", + "oidcIssuerProfile": null, + "podIdentityProfile": null, + "powerState": { + "code": "Running" + }, + "privateFQDN": null, + "privateLinkResources": null, + "provisioningState": "Succeeded", + "publicNetworkAccess": "Enabled", + "resourceGroup": "xxxxxx", + "securityProfile": null, + "servicePrincipalProfile": { + "clientId": "msi" + }, + "sku": { + "name": "Basic", + "tier": "Free" + }, + "storageProfile": { + "blobCsiDriver": { + "enabled": true + }, + "diskCsiDriver": { + "enabled": true + }, + "fileCsiDriver": { + "enabled": true + }, + "snapshotController": { + "enabled": true + } + }, + "tags": null, + "type": "Microsoft.ContainerService/ManagedClusters", + "windowsProfile": null + } + ``` + +## Create a multi-instance GPU node pool + +You can use either the Azure CLI or an HTTP request to the ARM API to create the node pool. + +### [Azure CLI](#tab/azure-cli) + +* Create a multi-instance GPU node pool using the [`az aks nodepool add`][az-aks-nodepool-add] command and specify the GPU instance profile. + + ```azurecli-interactive + export MY_NODE_POOL_NAME="mignode" + az aks nodepool add \ + --name $MY_NODE_POOL_NAME \ + --resource-group $MY_RESOURCE_GROUP_NAME \ + --cluster-name $MY_AKS_CLUSTER_NAME \ + --node-vm-size Standard_NC24ads_A100_v4 \ + --gpu-instance-profile MIG1g + ``` + + Results: + + + ```JSON + { + "agentPoolProfile": { + "count": 1, + "enableAutoScaling": false, + "enableEncryptionAtHost": false, + "enableFips": false, + "enableNodePublicIp": false, + "gpuInstanceProfile": "MIG1g", + "kubeletConfig": null, + "linuxOsConfig": null, + "maxCount": null, + "maxPods": 110, + "minCount": null, + "mode": "User", + "name": "mignode", + "nodeImageVersion": "AKSUbuntu-xxxx.x.x.x", + "nodeLabels": {}, + "nodePublicIpPrefixId": null, + "nodeTaints": [], + "orchestratorVersion": "x.x.x", + "osDiskSizeGb": 128, + "osDiskType": "Managed", + "osSku": "Ubuntu", + "osType": "Linux", + "podSubnetId": null, + "provisioningState": "Succeeded", + "proximityPlacementGroupId": null, + "scaleSetEvictionPolicy": null, + "scaleSetPriority": "Regular", + "spotMaxPrice": null, + "tags": null, + "type": "VirtualMachineScaleSets", + "upgradeSettings": { + "maxSurge": "1" + }, + "vmSize": "Standard_NC96ads_A100_v4", + "vnetSubnetId": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/xxxxxx/providers/Microsoft.Network/virtualNetworks/xxxxxx/subnets/xxxxxx" + }, + "creationData": null, + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/xxxxxx/providers/Microsoft.ContainerService/managedClusters/xxxxxx/agentPools/mignode", + "name": "mignode", + "provisioningState": "Succeeded", + "resourceGroup": "xxxxxx", + "type": "Microsoft.ContainerService/managedClusters/agentPools" + } + ``` + +### [HTTP request](#tab/http-request) + +* Create a multi-instance GPU node pool by placing the GPU instance profile in the request body. + + ```http + { + "properties": { + "count": 1, + "vmSize": "Standard_ND96asr_v4", + "type": "VirtualMachineScaleSets", + "gpuInstanceProfile": "MIG1g" + } + } + ``` + +--- + +## Determine multi-instance GPU (MIG) strategy + +Before you install the Nvidia plugins, you need to specify which multi-instance GPU (MIG) strategy to use for GPU partitioning: *Single strategy* or *Mixed strategy*. The two strategies don't affect how you execute CPU workloads, but how GPU resources are displayed. + +* **Single strategy**: The single strategy treats every GPU instance as a GPU. If you use this strategy, the GPU resources are displayed as `nvidia.com/gpu: 1`. +* **Mixed strategy**: The mixed strategy exposes the GPU instances and the GPU instance profile. If you use this strategy, the GPU resource are displayed as `nvidia.com/mig1g.5gb: 1`. + +## Install the NVIDIA device plugin and GPU feature discovery + +1. Set your MIG strategy as an environment variable. You can use either single or mixed strategy. + + ```azurecli-interactive + # Single strategy + export MIG_STRATEGY=single + + # Mixed strategy + export MIG_STRATEGY=mixed + ``` + +2. Add the Nvidia device plugin and GPU feature discovery helm repos using the `helm repo add` and `helm repo update` commands. + + ```azurecli-interactive + helm repo add nvdp https://nvidia.github.io/k8s-device-plugin + helm repo add nvgfd https://nvidia.github.io/gpu-feature-discovery + helm repo update + ``` + +3. Install the Nvidia device plugin using the `helm install` command. + + ```azurecli-interactive + helm install \ + --version=0.14.0 \ + --generate-name \ + --set migStrategy=${MIG_STRATEGY} \ + nvdp/nvidia-device-plugin + ``` + +4. Install the GPU feature discovery using the `helm install` command. + + ```azurecli-interactive + helm install \ + --version=0.2.0 \ + --generate-name \ + --set migStrategy=${MIG_STRATEGY} \ + nvgfd/gpu-feature-discovery + ``` + +## Confirm multi-instance GPU capability + +1. Configure `kubectl` to connect to your AKS cluster using the [`az aks get-credentials`][az-aks-get-credentials] command. + + ```azurecli-interactive + az aks get-credentials --resource-group $MY_RESOURCE_GROUP_NAME --name $MY_AKS_CLUSTER_NAME + ``` + +2. Verify the connection to your cluster using the `kubectl get` command to return a list of cluster nodes. + + ```azurecli-interactive + kubectl get nodes -o wide + ``` + +3. Confirm the node has multi-instance GPU capability using the `kubectl describe node` command. The following example command describes the node named *mignode*, which uses MIG1g as the GPU instance profile. + + ```azurecli-interactive + kubectl describe node mignode + ``` + + Your output should resemble the following example output: + + ```output + # Single strategy output + Allocatable: + nvidia.com/gpu: 56 + + # Mixed strategy output + Allocatable: + nvidia.com/mig-1g.5gb: 56 + ``` + +## Schedule work + +The following examples are based on cuda base image version 12.1.1 for Ubuntu22.04, tagged as `12.1.1-base-ubuntu22.04`. + +### Single strategy + +1. Create a file named `single-strategy-example.yaml` and copy in the following manifest. + + ```bash + cat < single-strategy-example.yaml + apiVersion: v1 + kind: Pod + metadata: + name: nvidia-single + spec: + containers: + - name: nvidia-single + image: nvidia/cuda:12.1.1-base-ubuntu22.04 + command: ["/bin/sh"] + args: ["-c","sleep 1000"] + resources: + limits: + "nvidia.com/gpu": 1 + EOF + ``` + +2. Deploy the application using the `kubectl apply` command and specify the name of your YAML manifest. + + ```azurecli-interactive + kubectl apply -f single-strategy-example.yaml + ``` + +3. Verify the allocated GPU devices using the `kubectl exec` command. This command returns a list of the cluster nodes. + + ```azurecli-interactive + kubectl exec nvidia-single -- nvidia-smi -L + ``` + + The following example resembles output showing successfully created deployments and services: + + ```output + GPU 0: NVIDIA A100 40GB PCIe (UUID: GPU-48aeb943-9458-4282-da24-e5f49e0db44b) + MIG 1g.5gb Device 0: (UUID: MIG-fb42055e-9e53-5764-9278-438605a3014c) + MIG 1g.5gb Device 1: (UUID: MIG-3d4db13e-c42d-5555-98f4-8b50389791bc) + MIG 1g.5gb Device 2: (UUID: MIG-de819d17-9382-56a2-b9ca-aec36c88014f) + MIG 1g.5gb Device 3: (UUID: MIG-50ab4b32-92db-5567-bf6d-fac646fe29f2) + MIG 1g.5gb Device 4: (UUID: MIG-7b6b1b6e-5101-58a4-b5f5-21563789e62e) + MIG 1g.5gb Device 5: (UUID: MIG-14549027-dd49-5cc0-bca4-55e67011bd85) + MIG 1g.5gb Device 6: (UUID: MIG-37e055e8-8890-567f-a646-ebf9fde3ce7a) + ``` + +### Mixed strategy + +1. Create a file named `mixed-strategy-example.yaml` and copy in the following manifest. + + ```yaml + cat < mixed-strategy-example.yaml + apiVersion: v1 + kind: Pod + metadata: + name: nvidia-mixed + spec: + containers: + - name: nvidia-mixed + image: nvidia/cuda:12.1.1-base-ubuntu22.04 + command: ["/bin/sh"] + args: ["-c","sleep 100"] + resources: + limits: + "nvidia.com/mig-1g.5gb": 1 + EOF + ``` + +2. Deploy the application using the `kubectl apply` command and specify the name of your YAML manifest. + + ```azurecli-interactive + kubectl apply -f mixed-strategy-example.yaml + ``` + +3. Verify the allocated GPU devices using the `kubectl exec` command. This command returns a list of the cluster nodes. + + ```azurecli-interactive + kubectl exec nvidia-mixed -- nvidia-smi -L + ``` + + The following example resembles output showing successfully created deployments and services: + + ```output + GPU 0: NVIDIA A100 40GB PCIe (UUID: GPU-48aeb943-9458-4282-da24-e5f49e0db44b) + MIG 1g.5gb Device 0: (UUID: MIG-fb42055e-9e53-5764-9278-438605a3014c) + ``` + +> [!IMPORTANT] +> The `latest` tag for CUDA images has been deprecated on Docker Hub. Please refer to [NVIDIA's repository](https://hub.docker.com/r/nvidia/cuda/tags) for the latest images and corresponding tags. + +## Troubleshooting + +If you don't see multi-instance GPU capability after creating the node pool, confirm the API version isn't older than *2021-08-01*. + +## Next steps + +For more information on AKS node pools, see [Manage node pools for a cluster in AKS](./manage-node-pools.md). + + +[az-group-create]: /cli/azure/group#az_group_create +[az-aks-create]: /cli/azure/aks#az_aks_create +[az-aks-nodepool-add]: /cli/azure/aks/nodepool#az_aks_nodepool_add +[install-azure-cli]: /cli/azure/install-azure-cli +[az-aks-install-cli]: /cli/azure/aks#az_aks_install_cli +[az-aks-get-credentials]: /cli/azure/aks#az_aks_get_credentials + + +[Nvidia A100 GPU]:https://www.nvidia.com/en-us/data-center/a100/ \ No newline at end of file diff --git a/scenarios/KernelBootIssuesRepairVM/kernel-related-boot-issues-repairvm.md b/scenarios/KernelBootIssuesRepairVM/kernel-related-boot-issues-repairvm.md new file mode 100644 index 000000000..3b230795c --- /dev/null +++ b/scenarios/KernelBootIssuesRepairVM/kernel-related-boot-issues-repairvm.md @@ -0,0 +1,84 @@ +--- +title: Recover Azure Linux VM from kernel panic due to missing initramfs +description: Provides solutions to an issue in which a Linux virtual machine (VM) can't boot after applying kernel changes. +author: divargas-msft +ms.author: divargas +ms.date: 02/25/2025 +ms.reviewer: jofrance +ms.service: azure-virtual-machines +ms.custom: sap:Cannot start or stop my VM, devx-track-azurecli, mode-api, innovation-engine, linux-related-content +ms.workload: infrastructure-services +ms.tgt_pltfrm: vm-linux +ms.collection: linux +ms.topic: troubleshooting +--- + +# Azure Linux virtual machine fails to boot after applying kernel changes + +**Applies to:** :heavy_check_mark: Linux VMs + + + + +## Prerequisites + +Make sure the [serial console](serial-console-linux.md) is enabled and functional in the Linux VM. + +## Kernel panic - not syncing: VFS: Unable to mount root fs on unknown-block(0,0) + +This error occurs because of a recent system update (kernel). It's most commonly seen in RHEL-based distributions. +You can [identify this issue from the Azure serial console](#identify-kernel-boot-issue). You'll see any of the following error messages: + +1. "Kernel panic - not syncing: VFS: Unable to mount root fs on unknown-block(0,0)" + + ```output + [ 301.026129] Kernel panic - not syncing: VFS: Unable to mount root fs on unknown-block(0,0) + [ 301.027122] CPU: 0 PID: 1 Comm: swapper/0 Tainted: G ------------ T 3.10.0-1160.36.2.el7.x86_64 #1 + [ 301.027122] Hardware name: Microsoft Corporation Virtual Machine/Virtual Machine, BIOS 090008 12/07/2018 + [ 301.027122] Call Trace: + [ 301.027122] [] dump_stack+0x19/0x1b + [ 301.027122] [] panic+0xe8/0x21f + [ 301.027122] [] mount_block_root+0x291/0x2a0 + [ 301.027122] [] mount_root+0x53/0x56 + [ 301.027122] [] prepare_namespace+0x13c/0x174 + [ 301.027122] [] kernel_init_freeable+0x222/0x249 + [ 301.027122] [] ? initcall_blcklist+0xb0/0xb0 + [ 301.027122] [] ? rest_init+0x80/0x80 + [ 301.027122] [] kernel_init+0xe/0x100 + [ 301.027122] [] ret_from_fork_nospec_begin+0x21/0x21 + [ 301.027122] [] ? rest_init+0x80/0x80 + [ 301.027122] Kernel Offset: 0xc00000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) + ``` + +2. "error: file '/initramfs-*.img' not found" + + > error: file '/initramfs-3.10.0-1160.36.2.el7.x86_64.img' not found. + +This kind of error indicates that the initramfs file isn't generated, the GRUB configuration file has the initrd entry missing after a patching process, or a GRUB manual misconfiguration. + +### Regenerate missing initramfs by using Azure Repair VM ALAR scripts + +1. Create a repair VM by running the following Bash command line with [Azure Cloud Shell](/azure/cloud-shell/overview). For more information, see [Use Azure Linux Auto Repair (ALAR) to fix a Linux VM - initrd option](repair-linux-vm-using-ALAR.md#initrd). This command will regenerate the initrd/initramfs image, regenerate the GRUB configuration file if it has the initrd entry missing, and swap the OS disk + +```azurecli-interactive +output=$(az extension add -n vm-repair; az extension update -n vm-repair; az vm repair repair-button --button-command 'initrd' --verbose --resource-group $MY_RESOURCE_GROUP_NAME --name $MY_VM_NAME) +value=$(echo "$output" | jq -r '.message') +extracted=$(echo $value) +echo "$extracted" +``` + +2. Once the repair VM command has been executed, restart the original VM and validate that it's able to boot up. + +## Next steps + +If the specific boot error isn't a kernel related boot issue, see [Troubleshoot Azure Linux Virtual Machines boot errors](./boot-error-troubleshoot-linux.md) for further troubleshooting options. + +[!INCLUDE [Azure Help Support](../../../includes/azure-help-support.md)] \ No newline at end of file diff --git a/scenarios/ObtainPerformanceMetricsLinuxSustem/obtain-performance-metrics-linux-system.md b/scenarios/ObtainPerformanceMetricsLinuxSustem/obtain-performance-metrics-linux-system.md new file mode 100644 index 000000000..2424ff0dd --- /dev/null +++ b/scenarios/ObtainPerformanceMetricsLinuxSustem/obtain-performance-metrics-linux-system.md @@ -0,0 +1,641 @@ +--- +title: Obtaining Performance metrics from a Linux system +description: Learn how to obtainer Performance metrics from a Linux system. +author: divargas-msft +ms.author: esflores +editor: divargas-msft +ms.reviewer: divargas +ms.service: virtual-machines +ms.collection: linux +ms.topic: troubleshooting-general +ms.workload: infrastructure-services +ms.tgt_pltfrm: vm-linux +ms.date: 07/16/2024 +ms.custom: devx-track-azurecli, mode-api, innovation-engine, linux-related-content +--- + +# Obtaining Performance metrics from a Linux system + +**Applies to:** :heavy_check_mark: Linux VMs + +This article is going to cover instructions to determine how to quickly obtain performance metrics from a Linux System. + +There are several commands that can be used to obtain performance counters on Linux. Commands such as `vmstat` and `uptime`, provide general system metrics such as CPU usage, System Memory, and System load. +Most of the commands are already installed by default with others being readily available in default repositories. +The commands can be separated into: + +* CPU +* Memory +* Disk I/O +* Processes + +## Sysstat utilities installation + + + +> [!NOTE] +> Some of these commands need to be run as `root` to be able to gather all relevant details. + +> [!NOTE] +> Some commands are part of the `sysstat` package which might not be installed by default. The package can be easily installed with `sudo apt install sysstat`, `dnf install sysstat` or `zypper install sysstat` for those popular distros. + +The full command for installation of the `sysstat` package on some popular Distros is: + +```bash +output=$(az vm run-command invoke --resource-group $MY_RESOURCE_GROUP_NAME --name $MY_VM_NAME --command-id RunShellScript --scripts "/bin/bash -c 'OS=\$(cat /etc/os-release|grep NAME|head -1|cut -d= -f2 | sed \"s/\\\"//g\"); if [[ \$OS =~ \"Ubuntu\" ]] || [[ \$OS =~ \"Debian\" ]]; then sudo apt install sysstat -y; elif [[ \$OS =~ \"Red Hat\" ]]; then sudo dnf install sysstat -y; elif [[ \$OS =~ \"SUSE\" ]]; then sudo zypper install sysstat --non-interactive; else echo \"Unknown distribution\"; fi'") +value=$(echo "$output" | jq -r '.value[0].message') +extracted=$(echo "$value" | awk '/\[stdout\]/,/\[stderr\]/' | sed '/\[stdout\]/d' | sed '/\[stderr\]/d') +echo "$extracted" +``` + +## CPU + +### mpstat + +The `mpstat` utility is part of the `sysstat` package. It displays per CPU utilization and averages, which is helpful to quickly identify CPU usage. `mpstat` provides an overview of CPU utilization across the available CPUs, helping identify usage balance and if a single CPU is heavily loaded. + +The full command is: + +```azurecli-interactive +output=$(az vm run-command invoke --resource-group $MY_RESOURCE_GROUP_NAME --name $MY_VM_NAME --command-id RunShellScript --scripts 'mpstat -P ALL 1 2') +value=$(echo "$output" | jq -r '.value[0].message') +extracted=$(echo "$value" | awk '/\[stdout\]/,/\[stderr\]/' | sed '/\[stdout\]/d' | sed '/\[stderr\]/d') +echo "$extracted" +``` + +The options and arguments are: + +* `-P`: Indicates the processor to display statistics, the ALL argument indicates to display statistics for all the online CPUs in the system. +* `1`: The first numeric argument indicates how often to refresh the display in seconds. +* `2`: The second numeric argument indicates how many times the data refreshes. + +The number of times the `mpstat` command displays data can be changed by increasing the second numeric argument to accommodate for longer data collection times. Ideally 3 or 5 seconds should suffice, for systems with increased core counts 2 seconds can be used to reduce the amount of data displayed. +From the output: + +```output +Linux 5.14.0-362.8.1.el9_3.x86_64 (alma9) 02/21/24 _x86_64_ (8 CPU) + +16:55:50 CPU %usr %nice %sys %iowait %irq %soft %steal %guest %gnice %idle +16:55:51 all 69.09 0.00 30.16 0.00 0.38 0.38 0.00 0.00 0.00 0.00 +16:55:51 0 77.23 0.00 21.78 0.00 0.99 0.00 0.00 0.00 0.00 0.00 +16:55:51 1 97.03 0.00 0.99 0.00 0.99 0.99 0.00 0.00 0.00 0.00 +16:55:51 2 11.11 0.00 88.89 0.00 0.00 0.00 0.00 0.00 0.00 0.00 +16:55:51 3 11.00 0.00 88.00 0.00 0.00 1.00 0.00 0.00 0.00 0.00 +16:55:51 4 83.84 0.00 16.16 0.00 0.00 0.00 0.00 0.00 0.00 0.00 +16:55:51 5 76.00 0.00 23.00 0.00 1.00 0.00 0.00 0.00 0.00 0.00 +16:55:51 6 96.00 0.00 3.00 0.00 0.00 1.00 0.00 0.00 0.00 0.00 +16:55:51 7 100.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 +[...] + +Average: CPU %usr %nice %sys %iowait %irq %soft %steal %guest %gnice %idle +Average: all 74.02 0.00 25.52 0.00 0.25 0.21 0.00 0.00 0.00 0.00 +Average: 0 63.00 0.00 36.67 0.00 0.33 0.00 0.00 0.00 0.00 0.00 +Average: 1 97.33 0.00 1.67 0.00 0.33 0.67 0.00 0.00 0.00 0.00 +Average: 2 42.33 0.00 57.33 0.00 0.33 0.00 0.00 0.00 0.00 0.00 +Average: 3 34.33 0.00 65.00 0.00 0.33 0.33 0.00 0.00 0.00 0.00 +Average: 4 88.63 0.00 11.04 0.00 0.00 0.33 0.00 0.00 0.00 0.00 +Average: 5 71.33 0.00 28.33 0.00 0.33 0.00 0.00 0.00 0.00 0.00 +Average: 6 95.65 0.00 4.01 0.00 0.00 0.33 0.00 0.00 0.00 0.00 +Average: 7 99.67 0.00 0.00 0.00 0.33 0.00 0.00 0.00 0.00 0.00 +``` + +There are a couple of important things to note. The first line displays useful information: + +* Kernel and release: `5.14.0-362.8.1.el9_3.x86_64` +* Hostname: `alma9` +* Date: `02/21/24` +* Architecture: `_x86_64_` +* Total amount of CPUs (this information is useful to interpret the output from other commands): `(8 CPU)` + +Then the metrics for the CPUs are displayed, to explain each of the columns: + +* `Time`: The time the sample was collected +* `CPU`: The CPU numeric identifier, the ALL identifier is an average for all the CPUs. +* `%usr`: The percentage of CPU utilization for user space, normally user applications. +* `%nice`: The percentage of CPU utilization for user space processes with a nice (priority) value. +* `%sys`: The percentage of CPU utilization for kernel space processes. +* `%iowait`: The percentage of CPU time spent idle waiting for outstanding I/O. +* `%irq`: The percentage of CPU time spent serving hardware interrupts. +* `%soft`: The percentage of CPU time spent serving software interrupts. +* `%steal`: The percentage of CPU time spent serving other virtual machines (not applicable to Azure due to no overprovisioning of CPU). +* `%guest`: The percentage of CPU time spent serving virtual CPUs (not applicable to Azure, only applicable to bare metal systems running virtual machines). +* `%gnice`: The percentage of CPU time spent serving virtual CPUs with a nice value (not applicable to Azure, only applicable to bare metal systems running virtual machines). +* `%idle`: The percentage of CPU time spent idle, and without waiting for I/O requests. + +#### Things to look out for + +Some details to keep in mind when reviewing the output for `mpstat`: + +* Verify that all CPUs are properly loaded and not a single CPU is serving all the load. This information could indicate a single threaded application. +* Look for a healthy balance between `%usr` and `%sys` as the opposite would indicate more time spent on the actual workload than serving kernel processes. +* Look for `%iowait` percentages as high values could indicate a system that is constantly waiting for I/O requests. +* High `%soft` usage could indicate high network traffic. + +### `vmstat` + +The `vmstat` utility is widely available in most Linux distributions, it provides high level overview for CPU, Memory, and Disk I/O utilization in a single pane. +The command for `vmstat` is: + +```azurecli-interactive +output=$(az vm run-command invoke --resource-group $MY_RESOURCE_GROUP_NAME --name $MY_VM_NAME --command-id RunShellScript --scripts 'vmstat -w 1 5') +value=$(echo "$output" | jq -r '.value[0].message') +extracted=$(echo "$value" | awk '/\[stdout\]/,/\[stderr\]/' | sed '/\[stdout\]/d' | sed '/\[stderr\]/d') +echo "$extracted" +``` + +The options and arguments are: + +* `-w`: Use wide printing to keep consistent columns. +* `1`: The first numeric argument indicates how often to refresh the display in seconds. +* `5`: The second numeric argument indicates how many times the data refreshes. + +The output: + +```output +--procs-- -----------------------memory---------------------- ---swap-- -----io---- -system-- --------cpu-------- + r b swpd free buff cache si so bi bo in cs us sy id wa st + 14 0 0 26059408 164 137468 0 0 89 3228 56 122 3 1 95 1 0 + 14 1 0 24388660 164 145468 0 0 0 7811 3264 13870 76 24 0 0 0 + 18 1 0 23060116 164 155272 0 0 44 8075 3704 15129 78 22 0 0 0 + 18 1 0 21078640 164 165108 0 0 295 8837 3742 15529 73 27 0 0 0 + 15 2 0 19015276 164 175960 0 0 9 8561 3639 15177 73 27 0 0 0 +``` + +`vmstat` splits the output in six groups: + +* `procs`: statistics for processes. +* `memory`: statistics for system memory. +* `swap`: statistics for swap. +* `io`: statistics for disk io. +* `system`: statistics for context switches and interrupts. +* `cpu`: statistics for CPU usage. + +>Note: `vmstat` shows overall statistics for the entire system (that is, all CPUs, all block devices aggregated). + +#### `procs` + +The `procs` section has two columns: + +* `r`: The number of runnable processes in the run queue. +* `b`: The number of processes blocked waiting for I/O. + +This section immediately shows if there's any bottleneck on the system. High numbers on either of the columns indicate processes queuing up waiting for resources. + +The `r` column indicates the number of processes that are waiting for CPU time to be able to run. An easy way to interpret this number is as follows: if the number of processes in the `r` queue is higher than the number of total CPUs, then it can be inferred that the system has the CPU heavily loaded, and it can't allocate CPU time for all the processes waiting to run. + +The `b` column indicates the number of processes waiting to run that are being blocked by I/O requests. A high number in this column would indicate a system that's experiencing high I/O, and processes are unable to run due to other processes waiting to completed I/O requests. Which could also indicate high disk latency. + +#### `memory` + +The memory section has four columns: + +* `swpd`: The amount swap memory used. +* `free`: The amount of memory free. +* `buff`: The amount of memory used for buffers. +* `cache`: The amount of memory used for cache. + +> [!NOTE] +> The values are shown in bytes. + +This section provides a high level overview of memory usage. + +#### `swap` + +The swap section has two columns: + +* `si`: The amount of memory swapped in (moved from system memory to swap) per second. +* `so`: The amount of memory swapped out (moved from swap to system memory) per second. + +If high `si` is observed, it might represent a system that is running out of system memory and is moving pages to swap (swapping). + +#### `io` + +The `io` section has two columns: + +* `bi`: The number of blocks received from a block device (reads blocks per second) per second. +* `bo`: The number of blocks sent to a block device (writes per second) per second. + +> [!NOTE] +> These values are in blocks per second. + +#### `system` + +The `system` section has two columns: + +* `in`: The number of interrupts per second. +* `cs`: The number of context switches per second. + +A high number of interrupts per second might indicate a system that is busy with hardware devices (for example network operations). + +A high number of context switches might indicate a busy system with many short running processes, there's no good or bad number here. + +#### `cpu` + +This section has five columns: + +* `us`: User space percent utilization. +* `sy`: System (kernel space) percent utilization. +* `id`: Percent utilization of the amount of time the CPU is idle. +* `wa`: Percent utilization of the amount of time the CPU is idle waiting for processes with I/O. +* `st`: Percent utilization of the amount of time the CPU spent serving other virtual CPUs (not applicable to Azure). + +The values are presented in percentage. These values are the same as presented by the `mpstat` utility and serve to provide a high level overview of CPU usage. Follow a similar process for "[Things to look out for](#mpstat)" for `mpstat` when reviewing these values. + +### `uptime` + +Lastly, for CPU related metrics, the `uptime` utility provides a broad overview of the system load with the load average values. + +```azurecli-interactive +output=$(az vm run-command invoke --resource-group $MY_RESOURCE_GROUP_NAME --name $MY_VM_NAME --command-id RunShellScript --scripts 'uptime') +value=$(echo "$output" | jq -r '.value[0].message') +extracted=$(echo "$value" | awk '/\[stdout\]/,/\[stderr\]/' | sed '/\[stdout\]/d' | sed '/\[stderr\]/d') +echo "$extracted" +``` + +```output +16:55:53 up 9 min, 2 users, load average: 9.26, 2.91, 1.18 +``` + +The load average displays three numbers. These numbers are for `1`, `5` and `15` minute intervals of system load. + +To interpret these values, it's important to know the number of available CPUs in the system, obtained from the `mpstat` output before. The value depends on the total CPUs, so as an example of the `mpstat` output the system has 8 CPUs, a load average of 8 would mean that ALL cores are loaded to a 100%. + +A value of `4` would mean that half of the CPUs were loaded at 100% (or a total of 50% load on ALL CPUs). In the previous output, the load average is `9.26`, which means the CPU is loaded at about 115%. + +The `1m`, `5m`, `15m` intervals help identify if load is increasing or decreasing over time. + +> [NOTE] +> The `nproc` command can also be used to obtain the number of CPUs. + +## Memory + +For memory, there are two commands that can obtain details about usage. + +### `free` + +The `free` command shows system memory utilization. + +To run it: + +```azurecli-interactive +output=$(az vm run-command invoke --resource-group $MY_RESOURCE_GROUP_NAME --name $MY_VM_NAME --command-id RunShellScript --scripts 'free -h') +value=$(echo "$output" | jq -r '.value[0].message') +extracted=$(echo "$value" | awk '/\[stdout\]/,/\[stderr\]/' | sed '/\[stdout\]/d' | sed '/\[stderr\]/d') +echo "$extracted" +``` + +The options and arguments are: + +* `-h`: Display values dynamically as human readable (for example: Mib, Gib, Tib) + +The output: + +```output + total used free shared buff/cache available +Mem: 31Gi 19Gi 12Gi 23Mi 87Mi 11Gi +Swap: 23Gi 0B 23Gi +``` + +From the output, look for the total system memory vs the available, and the used vs total swap. The available memory takes into consideration memory allocated for cache, which can be returned for user applications. + +Some swap usage is normal in modern kernels as some less often used memory pages can be moved to swap. + +### `swapon` + +The `swapon` command displays where swap is configured and the respective priorities of the swap devices or files. + +To run the command: + +```azurecli-interactive +output=$(az vm run-command invoke --resource-group $MY_RESOURCE_GROUP_NAME --name $MY_VM_NAME --command-id RunShellScript --scripts 'swapon -s') +value=$(echo "$output" | jq -r '.value[0].message') +extracted=$(echo "$value" | awk '/\[stdout\]/,/\[stderr\]/' | sed '/\[stdout\]/d' | sed '/\[stderr\]/d') +echo "$extracted" +``` + +The output: + +```output +Filename Type Size Used Priority +/dev/zram0 partition 16G 0B 100 +/mnt/swapfile file 8G 0B -2 +``` + +This information is important to verify if swap is configured on a location that isn't ideal, for example on a data or OS disk. In the Azure frame of reference, swap should be configured on the ephemeral drive as it provides the best performance. + +### Things to look out for + +* Keep in mind the memory is a finite resource, once both system memory (RAM) and swap is exhausted, the processes are to be killed by the Out Of Memorry killer (OOM). +* Verify swap isn't configured on a data disk or the OS disk, as that would create issues with I/O due to latency differences. Swap should be configured on the ephemeral drive. +* Keep also in consideration that it's common to see on the `free -h` output that the free values are close to zero, this behavior is due to page cache, the kernel releases those pages as needed. + +## I/O + +Disk I/O is one of the areas Azure suffers the most when throttled, as disks can reach `100ms+` latencies. The following commands help to identify these scenarios. + +### `iostat` + +The `iostat` utility is part of the `sysstat` package. It displays per block device usage statistics and helps identify block related performance issues. + +The `iostat` utility provides details for metrics such as throughput, latency, and queue size. These metrics help understand if disk I/O becomes a limiting factor. +To run, use the command: + +```azurecli-interactive +output=$(az vm run-command invoke --resource-group $MY_RESOURCE_GROUP_NAME --name $MY_VM_NAME --command-id RunShellScript --scripts 'iostat -dxtm 1 5') +value=$(echo "$output" | jq -r '.value[0].message') +extracted=$(echo "$value" | awk '/\[stdout\]/,/\[stderr\]/' | sed '/\[stdout\]/d' | sed '/\[stderr\]/d') +echo "$extracted" +``` + +The options and arguments are: + +* `-d`: Per device usage report. +* `-x`: Extended statistics. +* `-t`: Display the timestamp for each report. +* `-m`: Display in MB/s. +* `1`: The first numeric argument indicates how often to refresh the display in seconds. +* `2`: The second numeric argument indicates how many times the data refreshes. + +The output: + +```output +Linux 5.14.0-362.8.1.el9_3.x86_64 (alma9) 02/21/24 _x86_64_ (8 CPU) + +02/21/24 16:55:50 +Device r/s rMB/s rrqm/s %rrqm r_await rareq-sz w/s wMB/s wrqm/s %wrqm w_await wareq-sz d/s dMB/s drqm/s %drqm d_await dareq-sz f/s f_await aqu-sz %util +sda 1.07 0.02 0.00 0.00 1.95 20.40 23.25 24.55 3.30 12.42 113.75 1081.06 0.26 537.75 0.26 49.83 0.03 2083250.04 0.00 0.00 2.65 2.42 +sdb 16.99 0.67 0.36 2.05 2.00 40.47 65.26 0.44 1.55 2.32 1.32 6.92 0.00 0.00 0.00 0.00 0.00 0.00 30.56 1.30 0.16 7.16 +zram0 0.51 0.00 0.00 0.00 0.00 4.00 0.00 0.00 0.00 0.00 0.00 4.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + +``` + +The output has several columns that aren't important (extra columns due to the `-x` option), some of the important ones are: + +* `r/s`: Read operations per second (IOPS). +* `rMB/s`: Read megabytes per second. +* `r_await`: Read latency in milliseconds. +* `rareq-sz`: Average read request size in kilobytes. +* `w/s`: Write operations per second (IOPS). +* `wMB/s`: Write megabytes per second. +* `w_await`: Write latency in milliseconds. +* `wareq-size`: Average write request size in kilobytes. +* `aqu-sz`: Average queue size. + +#### Things to look out for + +* Look for `r/s` and `w/s` (IOPS) and `rMB/s` and `wMB/s` and verify that these values are within the limits of the given disk. If the values are close or higher the limits, the disk are going to be throttled, leading to high latency. This information can also be corroborated with the `%iowait` metric from `mpstat`. +* The latency is an excellent metric to verify if the disk is performing as expected. Normally, less than `9ms` is the expected latency for PremiumSSD, other offerings have different latency targets. +* The queue size is a great indicator of saturation. Normally, requests would be served near real time and the number remains close to one (as the queue never grows). A higher number could indicate disk saturation (that is, requests queuing up). There's no good or bad number for this metric. Understanding that anything higher than one means that requests are queuing up helps determine if there's disk saturation. + +### `lsblk` + +The `lsblk` utility shows the block devices attached to the system, while it doesn't provide performance metrics, it allows a quick overview of how these devices are configured and which mountpoints are being used. + +To run, use the command: + +```azurecli-interactive +output=$(az vm run-command invoke --resource-group $MY_RESOURCE_GROUP_NAME --name $MY_VM_NAME --command-id RunShellScript --scripts 'lsblk') +value=$(echo "$output" | jq -r '.value[0].message') +extracted=$(echo "$value" | awk '/\[stdout\]/,/\[stderr\]/' | sed '/\[stdout\]/d' | sed '/\[stderr\]/d') +echo "$extracted" +``` + +The output: + +```output +NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINTS +sda 8:0 0 300G 0 disk +└─sda1 8:1 0 300G 0 part /mnt +sdb 8:16 0 30G 0 disk +├─sdb1 8:17 0 1M 0 part +├─sdb2 8:18 0 200M 0 part /boot/efi +├─sdb3 8:19 0 1G 0 part /boot +└─sdb4 8:20 0 28.8G 0 part / +zram0 252:0 0 16G 0 disk [SWAP] +``` + +#### Things to look out for + +* Look for where the devices are mounted. +* Verify swap it's not configured inside of a data disk or OS disk, if enabled. + +> Note: An easy way to correlate the block device to a LUN in Azure is by running `ls -lr /dev/disk/azure`. + +## Process + +Gathering details on a per process basis helps understand where the load of the system is coming from. + +The main utility to gather process statics is `pidstat` as it provides details per process for CPU, Memory, and I/O statistics. + +Lastly, a simple `ps` to sort process by top CPU, and memory usage complete the metrics. + +> [!NOTE] +> Since these commands display details about running processes, they need to run as root with `sudo`. This command allows all processes to be displayed and not just the user's. + +### `pidstat` + +The `pidstat` utility is also part of the `sysstat` package. It's like `mpstat` or iostat where it displays metrics for a given amount of time. By default, `pidstat` only displays metrics for processes with activity. + +Arguments for `pidstat` are the same for other `sysstat` utilities: + +* 1: The first numeric argument indicates how often to refresh the display in seconds. +* 2: The second numeric argument indicates how many times the data refreshes. + +> [!NOTE] +> The output can grow considerably if there are many processes with activity. + +#### Process CPU statistics + +To gather process CPU statistics, run `pidstat` without any options: + +The following commands can be used if you want to execute it from Azure CLI: + +```azurecli-interactive +output=$(az vm run-command invoke --resource-group $MY_RESOURCE_GROUP_NAME --name $MY_VM_NAME --command-id RunShellScript --scripts 'pidstat 1 2') +value=$(echo "$output" | jq -r '.value[0].message') +extracted=$(echo "$value" | awk '/\[stdout\]/,/\[stderr\]/' | sed '/\[stdout\]/d' | sed '/\[stderr\]/d') +echo "$extracted" +``` + +The output: + +```output +Linux 5.14.0-362.8.1.el9_3.x86_64 (alma9) 02/21/24 _x86_64_ (8 CPU) + +# Time UID PID %usr %system %guest %wait %CPU CPU Command +16:55:48 0 66 0.0% 1.0% 0.0% 0.0% 1.0% 0 kworker/u16:2-xfs-cil/sdb4 +16:55:48 0 70 0.0% 1.0% 0.0% 0.0% 1.0% 0 kworker/u16:6-xfs-cil/sdb4 +16:55:48 0 92 0.0% 1.0% 0.0% 0.0% 1.0% 3 kworker/3:1H-kblockd +16:55:48 0 308 0.0% 1.0% 0.0% 0.0% 1.0% 1 kworker/1:1H-kblockd +16:55:48 0 2068 0.0% 1.0% 0.0% 0.0% 1.0% 1 kworker/1:3-xfs-conv/sdb4 +16:55:48 0 2181 63.1% 1.0% 0.0% 35.9% 64.1% 5 stress-ng-cpu +16:55:48 0 2182 28.2% 0.0% 0.0% 70.9% 28.2% 6 stress-ng-cpu +16:55:48 0 2183 28.2% 0.0% 0.0% 69.9% 28.2% 7 stress-ng-cpu +16:55:48 0 2184 62.1% 0.0% 0.0% 36.9% 62.1% 0 stress-ng-cpu +16:55:48 0 2185 43.7% 0.0% 0.0% 54.4% 43.7% 2 stress-ng-cpu +16:55:48 0 2186 30.1% 0.0% 0.0% 68.0% 30.1% 7 stress-ng-cpu +16:55:48 0 2187 64.1% 0.0% 0.0% 34.0% 64.1% 3 stress-ng-cpu +``` + +The command displays per process usage for `%usr`, `%system`, `%guest` (not applicable to Azure), `%wait`, and total `%CPU` usage. + +##### Things to look out for + +* Look for processes with high %wait (iowait) percentage as it might indicate processes that are blocked waiting for I/O, which might also indicate disk saturation. +* Verify that no single process consumes 100% of the CPU as it might indicate a single threaded application. + +#### Process Memory statistics + +To gather process memory statistics, use the `-r` option: + +```azurecli-interactive +output=$(az vm run-command invoke --resource-group $MY_RESOURCE_GROUP_NAME --name $MY_VM_NAME --command-id RunShellScript --scripts 'pidstat -r 1 2') +value=$(echo "$output" | jq -r '.value[0].message') +extracted=$(echo "$value" | awk '/\[stdout\]/,/\[stderr\]/' | sed '/\[stdout\]/d' | sed '/\[stderr\]/d') +echo "$extracted" +``` + +The output: + +```output +Linux 5.14.0-362.8.1.el9_3.x86_64 (alma9) 02/21/24 _x86_64_ (8 CPU) + +# Time UID PID minflt/s majflt/s VSZ RSS %MEM Command +16:55:49 0 2199 119244.12 0.00 13.6G 7.4G 23.5% stress-ng-vm +16:55:49 0 2200 392911.76 0.00 13.6G 9.3G 29.7% stress-ng-vm +16:55:49 0 2211 1129.41 0.00 72.3M 3.2M 0.0% stress-ng-iomix +16:55:49 0 2220 0.98 0.00 71.8M 2.4M 0.0% stress-ng-iomix +16:55:49 0 2239 1129.41 0.00 72.3M 3.2M 0.0% stress-ng-iomix +16:55:49 0 2240 1129.41 0.00 72.3M 3.2M 0.0% stress-ng-iomix +16:55:49 0 2256 0.98 0.00 71.8M 2.4M 0.0% stress-ng-iomix +16:55:49 0 2265 1129.41 0.00 72.3M 3.2M 0.0% stress-ng-iomix +``` + +The metrics collected are: + +* `minflt/s`: Minor faults per second, this metric indicates the number of pages loaded from system memory (RAM). +* `mjflt/s`: Major faults per second, this metric indicates the number of pages loaded from disk (SWAP). +* `VSZ`: Virtual memory used in bytes. +* `RSS`: Resident memory used (actual allocated memory) in bytes. +* `%MEM`: Percentage of total memory used. +* `Command`: The name of the process. + +##### Things to look out for + +* Look for major faults per second, as this value would indicate a process that is swapping pages to or from disk. This behavior could indicate memory exhaustion, and could lead to `OOM` events or performance degradation due to slower swap. +* Verify that a single process doesn't consume 100% of the available memory. This behavior could indicate a memory leak. + +> [!NOTE] +> the `--human` option can be used to display numbers in human readable format (that is, `Kb`, `Mb`, `GB`). + +#### Process I/O statistics + +To gather process memory statistics, use the `-d` option: + +```azurecli-interactive +output=$(az vm run-command invoke --resource-group $MY_RESOURCE_GROUP_NAME --name $MY_VM_NAME --command-id RunShellScript --scripts 'pidstat -d 1 2') +value=$(echo "$output" | jq -r '.value[0].message') +extracted=$(echo "$value" | awk '/\[stdout\]/,/\[stderr\]/' | sed '/\[stdout\]/d' | sed '/\[stderr\]/d') +echo "$extracted" +``` + +The output: + +```outputLinux 5.14.0-362.8.1.el9_3.x86_64 (alma9) 02/21/24 _x86_64_ (8 CPU) + +# Time UID PID kB_rd/s kB_wr/s kB_ccwr/s iodelay Command +16:55:50 0 86 55.4k 0.0B 0.0B 0 kworker/1:1-xfs-conv/sdb4 +16:55:50 0 2201 4.0k 194.1k 0.0B 0 stress-ng-iomix +16:55:50 0 2202 0.0B 99.0k 0.0B 0 stress-ng-iomix +16:55:50 0 2203 0.0B 23.8k 0.0B 0 stress-ng-iomix +16:55:50 0 2204 0.0B 15.8k 0.0B 0 stress-ng-iomix +16:55:50 0 2212 0.0B 103.0k 0.0B 0 stress-ng-iomix +16:55:50 0 2213 4.0k 99.0k 0.0B 0 stress-ng-iomix +16:55:50 0 2215 0.0B 178.2k 0.0B 0 stress-ng-iomix +16:55:50 0 2216 7.9k 237.6k 0.0B 0 stress-ng-iomix +16:55:50 0 2218 0.0B 95.0k 0.0B 0 stress-ng-iomix +16:55:50 0 2221 0.0B 15.8k 0.0B 0 stress-ng-iomix +``` + +The metrics collected are: + +* `kB_rd/s`: Read kilobytes per second. +* `kB_wr/s`: Write kilobytes per second. +* `Command`: Name of the process. + +##### Things to look out for + +* Look for single processes with high read/write rates per second. This information is a guidance for processes with I/O more than identifying issues. +Note: the `--human` option can be used to display numbers in human readable format (that is, `Kb`, `Mb`, `GB`). + +### Top CPU processes + +Lastly `ps` command displays system processes, and can be either sorted by CPU or Memory. + +To sort by CPU and obtain the top 10 processes: + +```azurecli-interactive +output=$(az vm run-command invoke --resource-group $MY_RESOURCE_GROUP_NAME --name $MY_VM_NAME --command-id RunShellScript --scripts 'ps aux --sort=-%cpu | head -10') +value=$(echo "$output" | jq -r '.value[0].message') +extracted=$(echo "$value" | awk '/\[stdout\]/,/\[stderr\]/' | sed '/\[stdout\]/d' | sed '/\[stderr\]/d') +echo "$extracted" +``` + +```output +USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND +root 2190 94.8 0.0 73524 5588 pts/1 R+ 16:55 0:14 stress-ng --cpu 12 --vm 2 --vm-bytes 120% --iomix 4 --timeout 240 +root 2200 56.8 43.1 14248092 14175632 pts/1 R+ 16:55 0:08 stress-ng --cpu 12 --vm 2 --vm-bytes 120% --iomix 4 --timeout 240 +root 2192 50.6 0.0 73524 5836 pts/1 R+ 16:55 0:07 stress-ng --cpu 12 --vm 2 --vm-bytes 120% --iomix 4 --timeout 240 +root 2184 50.4 0.0 73524 5836 pts/1 R+ 16:55 0:07 stress-ng --cpu 12 --vm 2 --vm-bytes 120% --iomix 4 --timeout 240 +root 2182 44.3 0.0 73524 5808 pts/1 R+ 16:55 0:06 stress-ng --cpu 12 --vm 2 --vm-bytes 120% --iomix 4 --timeout 240 +root 2187 43.4 0.0 73524 5708 pts/1 R+ 16:55 0:06 stress-ng --cpu 12 --vm 2 --vm-bytes 120% --iomix 4 --timeout 240 +root 2199 42.9 33.0 14248092 10845272 pts/1 R+ 16:55 0:06 stress-ng --cpu 12 --vm 2 --vm-bytes 120% --iomix 4 --timeout 240 +root 2186 42.0 0.0 73524 5836 pts/1 R+ 16:55 0:06 stress-ng --cpu 12 --vm 2 --vm-bytes 120% --iomix 4 --timeout 240 +root 2191 41.2 0.0 73524 5592 pts/1 R+ 16:55 0:06 stress-ng --cpu 12 --vm 2 --vm-bytes 120% --iomix 4 --timeout 240 +``` + +## Top memory processes +To sort by `MEM%` and obtain the top 10 processes: + +```azurecli-interactive +output=$(az vm run-command invoke --resource-group $MY_RESOURCE_GROUP_NAME --name $MY_VM_NAME --command-id RunShellScript --scripts 'ps aux --sort=-%mem| head -10') +value=$(echo "$output" | jq -r '.value[0].message') +extracted=$(echo "$value" | awk '/\[stdout\]/,/\[stderr\]/' | sed '/\[stdout\]/d' | sed '/\[stderr\]/d') +echo "$extracted" +``` + +```output + PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND +root 2200 57.0 43.1 14248092 14175632 pts/1 R+ 16:55 0:08 stress-ng --cpu 12 --vm 2 --vm-bytes 120% --iomix 4 --timeout 240 +root 2199 43.0 33.0 14248092 10871144 pts/1 R+ 16:55 0:06 stress-ng --cpu 12 --vm 2 --vm-bytes 120% --iomix 4 --timeout 240 +root 1231 0.2 0.1 336308 33764 ? Sl 16:46 0:01 /usr/bin/python3 -u bin/WALinuxAgent-2.9.1.1-py3.8.egg -run-exthandlers +root 835 0.0 0.0 127076 24860 ? Ssl 16:46 0:00 /usr/bin/python3 -s /usr/sbin/firewalld --nofork --nopid +root 1199 0.0 0.0 30164 15600 ? Ss 16:46 0:00 /usr/bin/python3 -u /usr/sbin/waagent -daemon +root 1 0.2 0.0 173208 12356 ? Ss 16:46 0:01 /usr/lib/systemd/systemd --switched-root --system --deserialize 31 +root 966 0.0 0.0 3102460 10936 ? Sl 16:46 0:00 /var/lib/waagent/Microsoft.GuestConfiguration.ConfigurationforLinux-1.26.60/GCAgent/GC/gc_linux_service +panzer 1803 0.0 0.0 22360 8220 ? Ss 16:49 0:00 /usr/lib/systemd/systemd --user +root 2180 0.0 0.0 73524 6968 pts/1 SL+ 16:55 0:00 stress-ng --cpu 12 --vm 2 --vm-bytes 120% --iomix 4 --timeout 240 +``` + +## Putting all together + +A simple bash script can collect all details in a single run, and append the output to a file for later use: + +```azurecli-interactive +output=$(az vm run-command invoke --resource-group $MY_RESOURCE_GROUP_NAME --name $MY_VM_NAME --command-id RunShellScript --scripts 'mpstat -P ALL 1 2 && vmstat -w 1 5 && uptime && free -h && swapon && iostat -dxtm 1 1 && lsblk && ls -l /dev/disk/azure && pidstat 1 1 -h --human && pidstat -r 1 1 -h --human && pidstat -d 1 1 -h --human && ps aux --sort=-%cpu | head -20 && ps aux --sort=-%mem | head -20') +value=$(echo "$output" | jq -r '.value[0].message') +extracted=$(echo "$value" | awk '/\[stdout\]/,/\[stderr\]/' | sed '/\[stdout\]/d' | sed '/\[stderr\]/d') +echo "$extracted" +``` + +To run, you can create a file with the above contents, add execute permissions by running `chmod +x gather.sh`, and run with `sudo ./gather.sh`. + +This script saves the output of the commands in a file located in the same directory where the script was invoked. \ No newline at end of file diff --git a/scenarios/PostgresRagLlmDemo/README.md b/scenarios/PostgresRagLlmDemo/README.md index 9419ccb98..c32d4c412 100644 --- a/scenarios/PostgresRagLlmDemo/README.md +++ b/scenarios/PostgresRagLlmDemo/README.md @@ -11,12 +11,12 @@ ms.custom: innovation-engine, linux-related-content ## Introduction In this doc, we go over how to host the infrastructure required to run a basic LLM model with RAG capabilities on Azure. -We first set up a Postgres database capable of storing vector embeddings for documents/knowledge files that we want to use to -augment our queries. We then create an Azure OpenAI deployment capable of generating embeddings and answering questions using the latest 'gpt-4-turbo' model. -We then use a python script to fill our postgres database with embeddings from a sample "knowledge.txt" file containing information about an imaginary -resource called 'Zytonium'. Once the database is filled with those embeddings, we use the same python script to answer any -questions we have about 'Zytonium'. The script will search the database for relevant information for our query using an embeddings search and -then augment our query with that relevant information before being sent our LLM to answer. + +We first set up a Postgres database capable of storing vector embeddings for documents/knowledge files that we want to use to augment our queries. We then create an Azure OpenAI deployment capable of generating embeddings and answering questions using the latest 'gpt-4-turbo' model. + +We then use a python script to fill our postgres database with embeddings from a sample "knowledge.txt" file containing information about an imaginary resource called 'Zytonium'. Once the database is filled with those embeddings, we use the same python script to answer any questions we have about 'Zytonium'. + +The script will search the database for relevant information for our query using an embeddings search and then augment our query with that relevant information before being sent our LLM to answer. ## Set up resource group @@ -29,7 +29,7 @@ export REGION="centralus" az group create \ --name $RG_NAME \ - --location $REGION \ + --location $REGION ``` ## Create OpenAI resources @@ -46,7 +46,7 @@ az cognitiveservices account create \ --resource-group $RG_NAME \ --location westus \ --kind OpenAI \ - --sku s0 \ + --sku s0 ``` ## Create OpenAI deployments @@ -123,10 +123,8 @@ psql \ ## Populate with data from knowledge file -The chat bot uses a local file called "knowledge.txt" as the sample document to generate embeddings for -and to store those embeddings in the newly created postgres database. Then any questions you ask will -be augmented with context from the "knowledge.txt" after searching the document for the most relevant -pieces of context using the embeddings. The "knowledge.txt" is about a fictional material called Zytonium. +The chat bot uses a local file called "knowledge.txt" as the sample document to generate embeddings for and to store those embeddings in the newly created postgres database. Then any questions you ask will be augmented with context from the "knowledge.txt" after searching the document for the most relevant pieces of context using the embeddings. The "knowledge.txt" is about a fictional material called Zytonium. + You can view the full knowledge.txt and the code for the chatbot by looking in the "scenarios/PostgresRagLlmDemo" directory. ```bash @@ -140,10 +138,10 @@ python chat.py --populate --api-key $API_KEY --endpoint $ENDPOINT --pguser $PGUS ## Run Chat bot -This final step prints out the command you can copy/paste into the terminal to run the chatbot. `cd ~/scenarios/PostgresRagLlmDemo && python chat.py --api-key $API_KEY --endpoint $ENDPOINT --pguser $PGUSER --phhost $PGHOST --pgpassword $PGPASSWORD --pgdatabase $PGDATABASE` +To run the chatbot, paste this following command to the terminal: `cd ~/scenarios/PostgresRagLlmDemo && python chat.py --api-key $API_KEY --endpoint $ENDPOINT --pguser $PGUSER --phhost $PGHOST --pgpassword $PGPASSWORD --pgdatabase $PGDATABASE` ```bash echo " To run the chatbot, see the last step for more info. " -``` +``` \ No newline at end of file diff --git a/scenarios/PostgresRagLlmDemo/__pycache__/db.cpython-310.pyc b/scenarios/PostgresRagLlmDemo/__pycache__/db.cpython-310.pyc new file mode 100644 index 000000000..f6aebc19d Binary files /dev/null and b/scenarios/PostgresRagLlmDemo/__pycache__/db.cpython-310.pyc differ diff --git a/scenarios/PostgresRagLlmDemo/chat.py b/scenarios/PostgresRagLlmDemo/chat.py index bc4450a3f..0b2cbaaa4 100644 --- a/scenarios/PostgresRagLlmDemo/chat.py +++ b/scenarios/PostgresRagLlmDemo/chat.py @@ -1,4 +1,5 @@ import argparse +import logging from textwrap import dedent from langchain_text_splitters import RecursiveCharacterTextSplitter @@ -6,6 +7,9 @@ from db import VectorDatabase +# Configure logging +logging.basicConfig(level=logging.INFO) + parser = argparse.ArgumentParser() parser.add_argument('--api-key', dest='api_key', type=str) parser.add_argument('--endpoint', dest='endpoint', type=str) @@ -33,6 +37,7 @@ def __init__(self): ) def load_file(self, text_file: str): + logging.info(f"Loading file: {text_file}") with open(text_file, encoding="UTF-8") as f: data = f.read() chunks = self.text_splitter.create_documents([data]) @@ -40,9 +45,7 @@ def load_file(self, text_file: str): text = chunk.page_content embedding = self.__create_embedding(text) self.db.save_embedding(i, text, embedding) - - def __create_embedding(self, text: str): - return self.api.embeddings.create(model="text-embedding-ada-002", input=text).data[0].embedding + logging.info("Done loading data.") def get_answer(self, question: str): question_embedding = self.__create_embedding(question) @@ -71,22 +74,22 @@ def get_answer(self, question: str): ) return response.choices[0].message.content + def __create_embedding(self, text: str): + return self.api.embeddings.create(model="text-embedding-ada-002", input=text).data[0].embedding + def main(): chat_bot = ChatBot() if args.populate: - print("Loading embedding data into database...") chat_bot.load_file("knowledge.txt") - print("Done loading data.") - return - - while True: - q = input("Ask a question (q to exit): ") - if q == "q": - break - print(chat_bot.get_answer(q)) + else: + while True: + q = input("Ask a question (q to exit): ") + if q == "q": + break + print(chat_bot.get_answer(q)) if __name__ == "__main__": - main() + main() \ No newline at end of file diff --git a/scenarios/PostgresRagLlmDemo/requirements.txt b/scenarios/PostgresRagLlmDemo/requirements.txt index 9b2c99cd9..c640a75ec 100644 --- a/scenarios/PostgresRagLlmDemo/requirements.txt +++ b/scenarios/PostgresRagLlmDemo/requirements.txt @@ -1,4 +1,4 @@ azure-identity==1.17.1 -openai==1.42.0 +openai==1.55.3 psycopg2==2.9.9 -langchain-text-splitters==0.2.2 +langchain-text-splitters==0.2.2 \ No newline at end of file diff --git a/scenarios/README.md b/scenarios/README.md deleted file mode 100644 index 970ea482a..000000000 --- a/scenarios/README.md +++ /dev/null @@ -1,2 +0,0 @@ -This is a test -This is a test diff --git a/scenarios/SpringBoot/spring-boot.md b/scenarios/SpringBoot/spring-boot.md new file mode 100644 index 000000000..688514935 --- /dev/null +++ b/scenarios/SpringBoot/spring-boot.md @@ -0,0 +1,87 @@ +# SpringBootDemo + +Spring Boot application that we will deploy to Kubernetes clusters in Azure. + +## Deploying to VM + +### Create and connect to the VM + +Log in and create VM: + +```bash +export RANDOM_ID="$(openssl rand -hex 3)" +export RESOURCE_GROUP="SpringBoot$RANDOM_ID" +export REGION="westus2" + +az group create --name ${RESOURCE_GROUP} --location ${REGION} +``` + +```bash +export VM_NAME="springboot-vm$RANDOM_ID" +export ADMIN_USERNAME="vm-admin-name$RANDOM_ID" +export VM_IMAGE="Ubuntu2204" + +az vm create \ + --resource-group ${RESOURCE_GROUP} \ + --name ${VM_NAME} \ + --image ${VM_IMAGE} \ + --admin-username ${ADMIN_USERNAME} \ + --generate-ssh-keys \ + --public-ip-sku Standard --size standard_d4s_v3 +``` + +Store the VM IP address for later: + +```bash +export VM_IP_ADDRESS=`az vm show -d -g ${RESOURCE_GROUP} -n ${VM_NAME} --query publicIps -o tsv` +``` + +Run the following to open port 8080 on the vm since SpringBoot uses it + +```bash +az vm open-port --port 8080 --resource-group ${RESOURCE_GROUP} --name ${VM_NAME} --priority 1100 +``` + +Connect to the VM: + +```bash +ssh -o StrictHostKeyChecking=no -t ${ADMIN_USERNAME}@${VM_IP_ADDRESS} +``` + +### Deploy the application + +Install Java and maven needed for application + +```bash +sudo apt-get update +sudo apt-get install default-jdk +sudo apt-get install maven +``` + +Now it's time to clone the project into the vm and give it proper permissions: + +```bash +cd /opt +sudo git clone https://github.com/dasha91/SpringBootDemo +cd SpringBootDemo +sudo chmod -R 777 /opt/SpringBootDemo/ +``` + +Run and deploy the app + +```bash +mvn clean install +mvn spring-boot:run +``` + +### Verify the application + +Finally, go to http://[$VM_IP_ADDRESS]:8080 to confirm that it's working :D :D :D + +To verify if the application is running, you can use the `curl` command: + +```bash +curl http://[$VM_IP_ADDRESS]:8080 +``` + +If the application is running, you should see the HTML content of the Spring Boot application's home page. \ No newline at end of file diff --git a/scenarios/SupportArticles-docs/support/azure/azure-kubernetes/availability-performance/cluster-service-health-probe-mode-issues.md b/scenarios/SupportArticles-docs/support/azure/azure-kubernetes/availability-performance/cluster-service-health-probe-mode-issues.md new file mode 100644 index 000000000..3386bf637 --- /dev/null +++ b/scenarios/SupportArticles-docs/support/azure/azure-kubernetes/availability-performance/cluster-service-health-probe-mode-issues.md @@ -0,0 +1,244 @@ +--- +title: Troubleshoot the health probe mode for AKS cluster service load balancer +description: Diagnoses and fixes common issues with the health probe mode feature. +ms.date: 06/03/2024 +ms.reviewer: niqi, cssakscic, v-weizhu +ms.service: azure-kubernetes-service +ms.custom: sap:Node/node pool availability and performance, devx-track-azurecli, innovation-engine +--- + +# Troubleshoot issues when enabling the AKS cluster service health probe mode + +The health probe mode feature allows you to configure how Azure Load Balancer probes the health of the nodes in your Azure Kubernetes Service (AKS) cluster. You can choose between two modes: Shared and ServiceNodePort. The Shared mode uses a single health probe for all external traffic policy cluster services that use the same load balancer. In contrast, the ServiceNodePort mode uses a separate health probe for each service. The Shared mode can reduce the number of health probes and improve the performance of the load balancer, but it requires some additional components to work properly. To enable this feature, see [How to enable the health probe mode feature using the Azure CLI](#how-to-enable-the-health-probe-mode-feature-using-the-azure-cli). + +This article describes some common issues about using the health probe mode feature in an AKS cluster and helps you troubleshoot and resolve these issues. + +## Symptoms + +When creating or updating an AKS cluster by using the Azure CLI, if you enable the health probe mode feature using the `--cluster-service-load-balancer-health-probe-mode Shared` flag, the following issues occur: + +- The load balancer doesn't distribute traffic to the nodes as expected. + +- The load balancer reports unhealthy nodes even if they're healthy. + +- The health-probe-proxy sidecar container crashes or doesn't start. + +- The cloud-node-manager pod crashes or doesn't start. + +The following operations also happen: + +1. RP frontend checks if the request is valid and updates the corresponding property in the LoadBalancerProfile. + +2. RP async calls the cloud provider config secret reconciler to update the cloud provider config secret based on the LoadBalancerProfile. + +3. Overlaymgr reconciles the cloud-node-manager chart to enable the health-probe-proxy sidecar. + +## Initial troubleshooting + +To troubleshoot these issues, follow these steps: + +0. First, connect to your AKS cluster using the Azure CLI: + + ```azurecli + export RESOURCE_GROUP="aks-rg" + export AKS_CLUSTER_NAME="aks-cluster" + az aks get-credentials --resource-group $RESOURCE_GROUP --name $AKS_CLUSTER_NAME --overwrite-existing + ``` + +1. Next, check the RP frontend log to see if the health probe mode in the LoadBalancerProfile is properly configured. You can use the `az aks show` command to view the LoadBalancerProfile property of your cluster. + + ```azurecli + export RESOURCE_GROUP="aks-rg" + export AKS_CLUSTER_NAME="aks-cluster" + az aks show --resource-group $RESOURCE_GROUP --name $AKS_CLUSTER_NAME --query "networkProfile.loadBalancerProfile" + ``` + Results: + + + + ```output + { + "clusterServiceLoadBalancerHealthProbeMode": "Shared", + "managedOutboundIPs": null, + "outboundIPs": null, + "outboundIPPrefixes": null, + "allocatedOutboundPorts": null, + "effectiveOutboundIPs": [ + { + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/MC_aks-rg_aks-cluster_eastus2/providers/Microsoft.Network/publicIPAddresses/xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + } + ], + "idleTimeoutInMinutes": 30, + "loadBalancerSku": "standard", + "managedOutboundIPv6": null + } + ``` + +2. Check the cloud provider configuration. In modern AKS clusters, the cloud provider configuration is managed internally and the `ccp` namespace doesn't exist. Instead, check for cloud provider related resources and verify the cloud-node-manager pods are running properly: + + + ```bash + # Check for cloud provider related ConfigMaps in kube-system + kubectl get configmap -n kube-system | grep -i azure + + # Check if cloud-node-manager pods are running (indicates cloud provider integration is working) + kubectl get pods -n kube-system | grep cloud-node-manager + + # Check the azure-ip-masq-agent-config if it exists + kubectl get configmap azure-ip-masq-agent-config-reconciled -n kube-system -o yaml 2>/dev/null || echo "ConfigMap not found" + ``` + Results: + + + + ```output + configmap/azure-ip-masq-agent-config-reconciled 1 11h + + cloud-node-manager-rfb2w 2/2 Running 0 16m + ``` + +3. Check the chart or overlay daemonset cloud-node-manager to see if the health-probe-proxy sidecar container is enabled. You can use the `kubectl get ds` command to view the daemonset. + + ```shell + kubectl get ds -n kube-system cloud-node-manager -o yaml + ``` + Results: + + + + ```output + apiVersion: apps/v1 + kind: DaemonSet + metadata: + name: cloud-node-manager + namespace: kube-system + ... + spec: + template: + spec: + containers: + - name: cloud-node-manager + image: mcr.microsoft.com/oss/kubernetes/azure-cloud-node-manager:xxxxxxxx + - name: health-probe-proxy + image: mcr.microsoft.com/oss/kubernetes/azure-health-probe-proxy:xxxxxxxx + ... + ``` + +## Cause 1: The health probe mode isn't Shared or ServiceNodePort + +The health probe mode feature only works with these two modes. If you use any other mode, the feature won't work. + +### Solution 1: Use the correct health probe mode + +Make sure you use the Shared or ServiceNodePort mode when creating or updating your cluster. You can use the `--cluster-service-load-balancer-health-probe-mode` flag to specify the mode. + +## Cause 2: The toggle for the health probe mode feature is off + +The health probe mode feature is controlled by a toggle that can be enabled or disabled by the AKS team. If the toggle is off, the feature won't work. + +### Solution 2: Turn on the toggle + +Contact the AKS team to check if the toggle for the health probe mode feature is on or off. If it's off, ask them to turn it on for your subscription. + +## Cause 3: The load balancer SKU is Basic + +The health probe mode feature only works with the Standard Load Balancer SKU. If you use the Basic Load Balancer SKU, the feature won't work. + +### Solution 3: Use the Standard Load Balancer SKU + +Make sure you use the Standard Load Balancer SKU when creating or updating your cluster. You can use the `--load-balancer-sku` flag to specify the SKU. + +## Cause 4: The feature isn't registered + +The health probe mode feature requires you to register the feature on your subscription. If the feature isn't registered, it won't work. + +### Solution 4: Register the feature + +Make sure you register the feature for your subscription before creating or updating your cluster. You can use the `az feature register` command to register the feature. + +```azurecli +export FEATURE_NAME="EnableSLBSharedHealthProbePreview" +export PROVIDER_NAMESPACE="Microsoft.ContainerService" +az feature register --name $FEATURE_NAME --namespace $PROVIDER_NAMESPACE +``` +Results: + + + +```output +{ + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/providers/Microsoft.Features/providers/Microsoft.ContainerService/features/EnableAKSClusterServiceLoadBalancerHealthProbeMode", + "name": "Microsoft.ContainerService/EnableAKSClusterServiceLoadBalancerHealthProbeMode", + "properties": { + "state": "Registering" + }, + "type": "Microsoft.Features/providers/features" +} +``` + +## Cause 5: The Kubernetes version is earlier than v1.28.0 + +The health probe mode feature requires a minimum Kubernetes version of v1.28.0. If you use an older version, the feature won't work. + +### Solution 5: Upgrade the Kubernetes version + +Make sure you use Kubernetes v1.28.0 or a later version when creating or updating your cluster. You can use the `--kubernetes-version` flag to specify the version. + +## Known issues + +For Windows, the kube-proxy component doesn't start until you create the first non-HPC pod in a node. This issue affects the health probe mode feature and causes the load balancer to report unhealthy nodes. It will be fixed in a future update. + +## How to enable the health probe mode feature using the Azure CLI + +To enable the health probe mode feature, run one of the following commands: + +Enable `ServiceNodePort` health probe mode (default) for a cluster: + +```shell +export RESOURCE_GROUP="aks-rg" +export AKS_CLUSTER_NAME="aks-cluster" +az aks update --resource-group $RESOURCE_GROUP --name $AKS_CLUSTER_NAME --cluster-service-load-balancer-health-probe-mode ServiceNodePort +``` +Results: + +```output +{ + "name": "aks-cluster", + "location": "eastus2", + "resourceGroup": "aks-rg", + "kubernetesVersion": "1.28.x", + "provisioningState": "Succeeded", + "loadBalancerProfile": { + "clusterServiceLoadBalancerHealthProbeMode": "ServiceNodePort", + ... + }, + ... +} +``` + +Enable `Shared` health probe mode for a cluster: + +```shell +export RESOURCE_GROUP="MyAksResourceGroup" +export AKS_CLUSTER_NAME="MyAksCluster" +az aks update --resource-group $RESOURCE_GROUP --name $AKS_CLUSTER_NAME --cluster-service-load-balancer-health-probe-mode Shared +``` + +Results: + +```output +{ + "name": "MyAksCluster", + "location": "eastus2", + "resourceGroup": "MyAksResourceGroup", + "kubernetesVersion": "1.28.x", + "provisioningState": "Succeeded", + "loadBalancerProfile": { + "clusterServiceLoadBalancerHealthProbeMode": "Shared", + ... + }, + ... +} +``` + +[!INCLUDE [Azure Help Support](../../../includes/azure-help-support.md)] diff --git a/scenarios/SupportArticles-docs/support/azure/azure-kubernetes/availability-performance/node-not-ready-after-being-healthy.md b/scenarios/SupportArticles-docs/support/azure/azure-kubernetes/availability-performance/node-not-ready-after-being-healthy.md new file mode 100644 index 000000000..0ece14b57 --- /dev/null +++ b/scenarios/SupportArticles-docs/support/azure/azure-kubernetes/availability-performance/node-not-ready-after-being-healthy.md @@ -0,0 +1,181 @@ +--- +title: Node Not Ready status after node is in a healthy state +description: Troubleshoot scenarios in which an Azure Kubernetes Service (AKS) cluster node goes to a Not Ready status after is in a healthy state. +ms.date: 08/27/2024 +ms.reviewer: rissing, chiragpa, momajed, v-leedennis +ms.service: azure-kubernetes-service +#Customer intent: As an Azure Kubernetes user, I want to prevent an Azure Kubernetes Service (AKS) cluster node from regressing to a Not Ready status so that I can continue to use the cluster node successfully. +ms.custom: sap:Node/node pool availability and performance, innovation-engine +--- + +# Troubleshoot a change in a healthy node to Not Ready status + +This article discusses a scenario in which the status of an Azure Kubernetes Service (AKS) cluster node changes to **Not Ready** after the node is in a healthy state for some time. This article outlines the particular cause and provides a possible solution. + +## Prerequisites + +- The Kubernetes [kubectl](https://kubernetes.io/docs/reference/kubectl/overview/) tool. To install kubectl by using Azure CLI, run the [az aks install-cli](/cli/azure/aks#az-aks-install-cli) command. +- The Kubernetes [kubelet](https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/) tool. +- The Kubernetes [containerd](https://kubernetes.io/docs/setup/production-environment/container-runtimes/#containerd) tool. +- The following Linux tools: + - [awk](https://man7.org/linux/man-pages/man1/awk.1p.html) + - [head](https://man7.org/linux/man-pages/man1/head.1.html) + - [journalctl](https://man7.org/linux/man-pages/man1/journalctl.1.html) + - [ps](https://man7.org/linux/man-pages/man1/ps.1.html) + - [sort](https://man7.org/linux/man-pages/man1/sort.1.html) + - [watch](https://man7.org/linux/man-pages/man1/watch.1.html) + +## Connect to the AKS cluster + +Before you can troubleshoot the issue, you must connect to the AKS cluster. To do so, run the following commands: + +```bash +export RANDOM_SUFFIX=$(head -c 3 /dev/urandom | xxd -p) +export RESOURCE_GROUP="my-resource-group$RANDOM_SUFFIX" +export AKS_CLUSTER="my-aks-cluster$RANDOM_SUFFIX" +az aks get-credentials --resource-group $RESOURCE_GROUP --name $AKS_CLUSTER --overwrite-existing +``` + +## Symptoms + +The status of a cluster node that has a healthy state (all services running) unexpectedly changes to **Not Ready**. To view the status of a node, run the following [kubectl describe](https://kubernetes.io/docs/reference/generated/kubectl/kubectl-commands#describe) command: + +```bash +kubectl describe nodes +``` + +## Cause + +The [kubelet](https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/) stopped posting its **Ready** status. + +Examine the output of the `kubectl describe nodes` command to find the [Conditions](https://kubernetes.io/docs/reference/node/node-status/#condition) field and the [Capacity and Allocatable](https://kubernetes.io/docs/reference/node/node-status/#capacity) blocks. Do the content of these fields appear as expected? (For example, in the **Conditions** field, does the `message` property contain the "kubelet is posting ready status" string?) In this case, if you have direct Secure Shell (SSH) access to the node, check the recent events to understand the error. Look within the */var/log/syslog* file instead of */var/log/messages* (not available on all distributions). Or, generate the kubelet and container daemon log files by running the following shell commands: + +```bash +# First, identify the NotReady node +export NODE_NAME=$(kubectl get nodes --no-headers | grep NotReady | awk '{print $1}' | head -1) + +if [ -z "$NODE_NAME" ]; then + echo "No NotReady nodes found" + kubectl get nodes +else + echo "Found NotReady node: $NODE_NAME" + + # Use kubectl debug to access the node + kubectl debug node/$NODE_NAME -it --image=mcr.microsoft.com/dotnet/runtime-deps:6.0 -- chroot /host bash -c " + echo '=== Checking syslog ===' + if [ -f /var/log/syslog ]; then + tail -100 /var/log/syslog + else + echo 'syslog not found' + fi + + echo '=== Checking kubelet logs ===' + journalctl -u kubelet --no-pager | tail -100 + + echo '=== Checking containerd logs ===' + journalctl -u containerd --no-pager | tail -100 + " +fi +``` + +After you run these commands, examine the syslog and daemon log files for more information about the error. + +## Solution + +### Step 1: Check for changes in network-level + +If all cluster nodes regressed to a **Not Ready** status, check whether any changes occurred at the network level. Examples of network-level changes include: + +- Domain name system (DNS) changes +- Firewall rule changes, such as port, fully qualified domain names (FQDNs), and so on. +- Added network security groups (NSGs) +- Applied or changed route table configurations for AKS traffic + +If there were changes at the network level, make any necessary corrections. If you have direct Secure Shell (SSH) access to the node, you can use the `curl` or `telnet` command to check the connectivity to [AKS outbound requirements](/azure/aks/outbound-rules-control-egress). After you've fixed the issues, stop and restart the nodes. If the nodes stay in a healthy state after these fixes, you can safely skip the remaining steps. + +### Step 2: Stop and restart the nodes + +If only a few nodes regressed to a **Not Ready** status, simply stop and restart the nodes. This action alone might return the nodes to a healthy state. Then, check [Azure Kubernetes Service diagnostics overview](/azure/aks/concepts-diagnostics) to determine whether there are any issues, such as the following issues: + +- Node faults +- Source network address translation (SNAT) failures +- Node input/output operations per second (IOPS) performance issues +- Other issues + +If the diagnostics don't discover any underlying issues and the nodes returned to Ready status, you can safely skip the remaining steps. + +### Step 3: Fix SNAT issues for public AKS API clusters + +Did AKS diagnostics uncover any SNAT issues? If so, take some of the following actions, as appropriate: + +- Check whether your connections remain idle for a long time and rely on the default idle time-out to release its port. If the connections exhibit this behavior, you might have to reduce the default time-out of 30 minutes. + +- Determine how your application creates outbound connectivity. For example, does it use code review or packet capture? + +- Determine whether this activity represents the expected behavior or, instead, it shows that the application is misbehaving. Use metrics and logs in Azure Monitor to substantiate your findings. For example, you can use the **Failed** category as a SNAT Connections metric. + +- Evaluate whether appropriate patterns are followed. + +- Evaluate whether you should mitigate SNAT port exhaustion by using extra outbound IP addresses and more allocated outbound ports. For more information, see [Scale the number of managed outbound public IPs](/azure/aks/load-balancer-standard#scale-the-number-of-managed-outbound-public-ips) and [Configure the allocated outbound ports](/azure/aks/load-balancer-standard#configure-the-allocated-outbound-ports). + +For more information about how to troubleshoot SNAT port exhaution, see [Troubleshoot SNAT port exhaustion on AKS nodes](../connectivity/snat-port-exhaustion.md?tabs=for-a-linux-pod). + +### Step 4: Fix IOPS performance issues + +If AKS diagnostics uncover issues that reduce IOPS performance, take some of the following actions, as appropriate: + +- To increase IOPS on virtual machine (VM) scale sets, choose a a larger disk size that offers better IOPS performance by deploying a new node pool. Direct resizing VMSS directly isn't supported. For more information on resizing node pools, see [Resize node pools in Azure Kubernetes Service (AKS)](/azure/aks/resize-node-pool?tabs=azure-cli). + +- Increase the node SKU size for more memory and CPU processing capability. + +- Consider using [Ephemeral OS](/azure/aks/cluster-configuration#ephemeral-os). + +- Limit the CPU and memory usage for pods. These limits help prevent node CPU consumption and out-of-memory situations. + +- Use scheduling topology methods to add more nodes and distribute the load among the nodes. For more information, see [Pod topology spread constraints](https://kubernetes.io/docs/concepts/workloads/pods/pod-topology-spread-constraints/). + +### Step 5: Fix threading issues + +Kubernetes components such as kubelets and [containerd runtimes](https://kubernetes.io/docs/setup/production-environment/container-runtimes/#containerd) rely heavily on threading, and they spawn new threads regularly. If the allocation of new threads is unsuccessful, this failure can affect service readiness, as follows: + +- The node status changes to **Not Ready**, but it's restarted by a remediator, and is able to recover. + +- In the */var/log/messages* and */var/log/syslog* log files, there are repeated occurrences of the following error entries: + + > pthread_create failed: Resource temporarily unavailable by various processes + + The processes that are cited include containerd and possibly kubelet. + +- The node status changes to **Not Ready** soon after the `pthread_create` failure entries are written to the log files. + +Process IDs (PIDs) represent threads. The default number of PIDs that a pod can use might be dependent on the operating system. However, the default number is at least 32,768. This amount is more than enough PIDs for most situations. Are there any known application requirements for higher PID resources? If there aren't, then even an eight-fold increase to 262,144 PIDs might not be enough to accommodate a high-resource application. + +Instead, identify the offending application, and then take the appropriate action. Consider other options, such as increasing the VM size or upgrading AKS. These actions can mitigate the issue temporarily, but they aren't a guarantee that the issue won't reappear again. + +To monitor the thread count for each control group (cgroup) and print the top eight cgroups, run the following shell command: + +```bash +# Show current thread count for each cgroup (top 8) +ps -e -w -o "thcount,cgname" --no-headers | awk '{a[$2] += $1} END{for (i in a) print a[i], i}' | sort --numeric-sort --reverse | head --lines=8 +``` + +For more information, see [Process ID limits and reservations](https://kubernetes.io/docs/concepts/policy/pid-limiting/). + +Kubernetes offers two methods to manage PID exhaustion at the node level: + +1. Configure the maximum number of PIDs that are allowed on a pod within a kubelet by using the `--pod-max-pids` parameter. This configuration sets the `pids.max` setting within the cgroup of each pod. You can also use the `--system-reserved` and `--kube-reserved` parameters to configure the system and kubelet limits, respectively. + +1. Configure PID-based eviction. + +> [!NOTE] +> By default, neither of these methods are set up. Additionally, you can't currently configure either method by using [Node configuration for AKS node pools](/azure/aks/custom-node-configuration). + +### Step 6: Use a higher service tier + +You can make sure that the AKS API server has high availability by using a higher service tier. For more information, see the [Azure Kubernetes Service (AKS) Uptime SLA](/azure/aks/uptime-sla). + +## More information + +- To view the health and performance of the AKS API server and kubelets, see [Managed AKS components](/azure/aks/monitor-aks#level-2---managed-aks-components). + +- For general troubleshooting steps, see [Basic troubleshooting of node not ready failures](node-not-ready-basic-troubleshooting.md). diff --git a/scenarios/SupportArticles-docs/support/azure/azure-kubernetes/availability-performance/node-not-ready-custom-script-extension-errors.md b/scenarios/SupportArticles-docs/support/azure/azure-kubernetes/availability-performance/node-not-ready-custom-script-extension-errors.md new file mode 100644 index 000000000..a08f76e3d --- /dev/null +++ b/scenarios/SupportArticles-docs/support/azure/azure-kubernetes/availability-performance/node-not-ready-custom-script-extension-errors.md @@ -0,0 +1,150 @@ +--- +title: Node Not Ready because of custom script extension (CSE) errors +description: Troubleshoot scenarios in which custom script extension (CSE) errors cause Node Not Ready states in an Azure Kubernetes Service (AKS) cluster node pool. +ms.date: 06/08/2024 +ms.reviewer: rissing, chiragpa, momajed, v-leedennis +ms.service: azure-kubernetes-service +ms.custom: sap:Node/node pool availability and performance, devx-track-azurecli, innovation-engine +author: MicrosoftDocs +ms.author: MicrosoftDocs +--- + +# Troubleshoot node not ready failures caused by CSE errors + +This article helps you troubleshoot scenarios in which a Microsoft Azure Kubernetes Service (AKS) cluster isn't in the `Succeeded` state and an AKS node isn't ready within a node pool because of custom script extension (CSE) errors. + +## Prerequisites + +- [Azure CLI](/cli/azure/install-azure-cli) + +## Symptoms + +Because of CSE errors, an AKS cluster node isn't ready within a node pool, and the AKS cluster isn't in the `Succeeded` state. + +## Cause + +The node extension deployment fails and returns more than one error code when you provision the [kubelet](https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/) and other components. This is the most common cause of errors. To verify that the node extension deployment is failing when you provision the kubelet, follow these steps: + +1. To better understand the current failure on the cluster, run the [az aks show](/cli/azure/aks#az-aks-show) and [az resource update](/cli/azure/resource#az-resource-update) commands to set up debugging: + + Set your environment variables and run the commands to view the cluster's status and debug information. + + ```azurecli + export RG_NAME="my-aks-rg" + export CLUSTER_NAME="myakscluster" + clusterResourceId=$(az aks show \ + --resource-group $RG_NAME --name $CLUSTER_NAME --output tsv --query id) + az resource update --debug --verbose --ids $clusterResourceId + ``` + + Results: + + + + ```output + { + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/my-aks-rg-xxx/providers/Microsoft.ContainerService/managedClusters/myaksclusterxxx", + "name": "myaksclusterxxx", + "type": "Microsoft.ContainerService/managedClusters", + "location": "eastus2", + "tags": null, + "properties": { + ... + } + } + ``` + +1. Check the debugging output and the error messages that you received from the `az resource update` command against the error list in the [CSE helper](https://github.com/Azure/AgentBaker/blob/1bf9892afd715a34e0c6b7312e712047f10319ce/parts/linux/cloud-init/artifacts/cse_helpers.sh) executable file on GitHub. + +If any of the errors involve the CSE deployment of the kubelet, then you've verified that the scenario that's described here's the cause of the Node Not Ready failure. + +In general, exit codes identify the specific issue that's causing the failure. For example, you see messages such as "Unable to communicate with API server" or "Unable to connect to internet." Or the exit codes might alert you to API network time-outs, or a node fault that needs a replacement. + +## Solution 1: Make sure your custom DNS server is configured correctly + +Set up your custom Domain Name System (DNS) server so that it can do name resolution correctly. Configure the server to meet the following requirements: + +- If you're using custom DNS servers, make sure that the servers are healthy and reachable over the network. + +- Make sure that custom DNS servers have the required [conditional forwarders to the Azure DNS IP address](/azure/private-link/private-endpoint-dns#on-premises-workloads-using-a-dns-forwarder) (or the forwarder to that address). + +- Make sure that your private AKS DNS zone is linked to your custom DNS virtual networks if they're hosted on Azure. + +- Don't use the Azure DNS IP address with the IP addresses of your custom DNS server. Doing this isn't recommended. + +- Avoid using IP addresses instead of the DNS server in DNS settings. You can use Azure CLI commands to check for this situation on a Virtual Machine Scale Set or availability set. + + - For Virtual Machine Scale Set nodes, use the [az vmss run-command invoke](/cli/azure/vmss/run-command#az-vmss-run-command-invoke) command: + + > **Important:** You must specify the `--instance-id` of the VM scale set. Here, we demonstrate querying for a valid instance ID (e.g., 0) and a likely VMSS in an AKS node resource group. Update values appropriately to match your environment. + + ```azurecli + export NODE_RESOURCE_GROUP=$(az aks show --resource-group $RG_NAME --name $CLUSTER_NAME --query nodeResourceGroup -o tsv) + export VMSS_NAME=$(az vmss list --resource-group $NODE_RESOURCE_GROUP --query "[0].name" -o tsv) + export DNS_IP_ADDRESS="10.0.0.10" + export INSTANCE_ID=$(az vmss list-instances --resource-group $NODE_RESOURCE_GROUP --name $VMSS_NAME --query "[0].instanceId" -o tsv) + export API_FQDN=$(az aks show --resource-group $RG_NAME --name $CLUSTER_NAME --query fqdn -o tsv) + + az vmss run-command invoke \ + --resource-group $NODE_RESOURCE_GROUP \ + --name $VMSS_NAME \ + --instance-id $INSTANCE_ID \ + --command-id RunShellScript \ + --output tsv \ + --query "value[0].message" \ + --scripts "telnet $DNS_IP_ADDRESS 53" + az vmss run-command invoke \ + --resource-group $NODE_RESOURCE_GROUP \ + --name $VMSS_NAME \ + --instance-id $INSTANCE_ID \ + --command-id RunShellScript \ + --output tsv \ + --query "value[0].message" \ + --scripts "nslookup $API_FQDN $DNS_IP_ADDRESS" + ``` + + - For VM availability set nodes, use the [az vm run-command invoke](/cli/azure/vm/run-command#az-vm-run-command-invoke) command: + + > **Important:** You must specify the `--name` of a valid VM in an availability set in your resource group. Here is a template for running network checks. + + ```azurecli + az vm run-command invoke \ + --resource-group $RG_NAME \ + --name $AVAILABILITY_SET_VM \ + --command-id RunShellScript \ + --output tsv \ + --query "value[0].message" \ + --scripts "telnet $DNS_IP_ADDRESS 53" + az vm run-command invoke \ + --resource-group $RG_NAME \ + --name $AVAILABILITY_SET_VM \ + --command-id RunShellScript \ + --output tsv \ + --query "value[0].message" \ + --scripts "nslookup $API_FQDN $DNS_IP_ADDRESS" + ``` + +For more information, see [Name resolution for resources in Azure virtual networks](/azure/virtual-network/virtual-networks-name-resolution-for-vms-and-role-instances) and [Hub and spoke with custom DNS](/azure/aks/private-clusters#hub-and-spoke-with-custom-dns). + +## Solution 2: Fix API network time-outs + +Make sure that the API server can be reached and isn't subject to delays. To do this, follow these steps: + +- Check the AKS subnet to see whether the assigned network security group (NSG) is blocking the egress traffic port 443 to the API server. + +- Check the node itself to see whether the node has another NSG that's blocking the traffic. + +- Check the AKS subnet for any assigned route table. If a route table has a network virtual appliance (NVA) or firewall, make sure that port 443 is available for egress traffic. For more information, see [Control egress traffic for cluster nodes in AKS](/azure/aks/limit-egress-traffic). + +- If the DNS resolves names successfully and the API is reachable, but the node CSE failed because of an API time-out, take the appropriate action as shown in the following table. + + | Set type | Action | + | -------- | ------ | + | VM availability set | Delete the node from the Azure portal and the AKS API by using the [kubectl delete](https://kubernetes.io/docs/reference/generated/kubectl/kubectl-commands#delete) node command, and then scale up the cluster again. | + | Virtual Machine Scale Set | Either reimage the node from the Azure portal, or delete the node, and then scale up the cluster again. To delete the specific node, use [az aks nodepool delete-machines](/cli/azure/aks/nodepool#az-aks-nodepool-delete-machines) command. It will cordon & drain first and then delete the node. | + +- If the requests are being throttled by the AKS API server, upgrade to a higher service tier. For more information, see [Pricing tiers for AKS](/azure/aks/free-standard-pricing-tiers). + +## More information + +- For general troubleshooting steps, see [Basic troubleshooting of Node Not Ready failures](node-not-ready-basic-troubleshooting.md). diff --git a/scenarios/SupportArticles-docs/support/azure/azure-kubernetes/connectivity/client-ip-address-cannot-access-api-server.md b/scenarios/SupportArticles-docs/support/azure/azure-kubernetes/connectivity/client-ip-address-cannot-access-api-server.md new file mode 100644 index 000000000..1c6380502 --- /dev/null +++ b/scenarios/SupportArticles-docs/support/azure/azure-kubernetes/connectivity/client-ip-address-cannot-access-api-server.md @@ -0,0 +1,122 @@ +--- +title: Client IP address can't access the API server +description: Troubleshoot issues caused when the client IP address can't access the API server on an Azure Kubernetes Service (AKS) cluster. +ms.topic: article +ms.date: 06/11/2024 +author: microsoftdocs +ms.author: microsoftdocs +ms.custom: sap:Connectivity, innovation-engine +--- + +# Client IP address can't access the API server + +This article describes how to fix issues that occur when you can't connect to an Azure Kubernetes Service (AKS) cluster because your client IP address can't access the AKS API server. + +## Prerequisites + +- [Azure CLI](/cli/azure/install-azure-cli). +- The client URL ([curl](https://techcommunity.microsoft.com/t5/containers/tar-and-curl-come-to-windows/ba-p/382409)) tool. + +## Symptoms + +### [Azure portal](#tab/azure-portal) + +When you try to access Kubernetes resources such as mamespaces and workloads from the Azure portal, you might encounter the following errors: + +> Network error +> +> Unable to reach the api server 'https://\' or api server is too busy to respond. Check your network settings and refresh to try again. + +:::image type="content" source="media/client-ip-address-cannot-access-api-server/network-error.png" alt-text="Screenshot of mamespaces in the AKS resource." lightbox="media/client-ip-address-cannot-access-api-server/network-error.png"::: + +### [Azure CLI](#tab/azure-cli) + +When you try to connect to a cluster using the Azure CLI, you might see the following errors: + +```output +"Unhandled Error" err="couldn't get current server API group list: Get \"https://:443/api?timeout=32s\": dial tcp :443: i/o timeout" + +Unable to connect to the server: dial tcp :443: i/o timeout + +Unable to connect to the server: dial tcp :443: connectex: A connection attempt failed because the connected party did not properly respond after a period, or established connection failed because connected host has failed to respond. +``` + +--- + +## Cause + +[API server-authorized IP ranges](/azure/aks/api-server-authorized-ip-ranges) may have been enabled on the cluster's API server, but the client's IP address wasn't included in the IP ranges. To check whether this feature has been enabled, see if the following [az aks show](/cli/azure/aks#az-aks-show) command in Azure CLI produces a list of IP ranges: + +```azurecli +az aks show --resource-group ${RG_NAME} \ + --name ${CLUSTER_NAME} \ + --query apiServerAccessProfile.authorizedIpRanges +``` + +## Solution + +Look at the cluster's API server-authorized ranges, and add your client's IP address within that range. + +> [!NOTE] +> +> 1. Do you access the API server from a corporate network where traffic is routed through a proxy server or firewall? Then ask your network administrator before you add your client IP address to the list of authorized ranges for the API server. +> +> 1. Also ask your cluster administrator before you add your client IP address, because there might be security concerns with adding a temporary IP address to the list of authorized ranges. + +### [Azure portal](#tab/azure-portal) + +1. Navigate to the cluster from the Azure portal. +2. In the left menu, locate **Settings** and then select **Networking**. +3. On the **Networking** page, select the **Overview** tab. +4. Select **Manage** under **Resource settings**. +5. In the **Authorized IP ranges** pane, add your client IP address as shown in the following screenshot: + + :::image type="content" source="media/client-ip-address-cannot-access-api-server/authorized-ip-ranges.png" alt-text="Screenshot of Authorized-ip-ranges pane." lightbox="media/client-ip-address-cannot-access-api-server/authorized-ip-ranges.png"::: + +### [Azure CLI](#tab/azure-cli) + +1. Get your client IP address by running this [curl](https://curl.se/docs/manpage.html) command: + + ```azurecli + export CLIENT_IP=$(curl --silent https://ipinfo.io/ip | tr -d '\n') + echo $CLIENT_IP + ``` + + Results: + + + + ```output + 0.255.127.63 + ``` + +2. Update the API server-authorized range with the [az aks update](/cli/azure/aks#az-aks-update) command in Azure CLI, using your client IP address: + + ```azurecli + az aks update --resource-group $RG_NAME \ + --name $CLUSTER_NAME \ + --api-server-authorized-ip-ranges $CLIENT_IP + ``` + + Results: + + + + ```output + { + "apiServerAccessProfile": { + "authorizedIpRanges": [ + "0.255.127.63/32" + ], + ... + }, + ... + "name": "aks-cluster-xxx", + "resourceGroup": "aks-rg-xxx", + ... + } + ``` + +--- + +[!INCLUDE [Azure Help Support](../../../includes/azure-help-support.md)] diff --git a/scenarios/SupportArticles-docs/support/azure/azure-kubernetes/connectivity/tcp-timeouts-dial-tcp-nodeip-10250-io-timeout.md b/scenarios/SupportArticles-docs/support/azure/azure-kubernetes/connectivity/tcp-timeouts-dial-tcp-nodeip-10250-io-timeout.md new file mode 100644 index 000000000..9c1406b85 --- /dev/null +++ b/scenarios/SupportArticles-docs/support/azure/azure-kubernetes/connectivity/tcp-timeouts-dial-tcp-nodeip-10250-io-timeout.md @@ -0,0 +1,50 @@ +--- +title: TCP 10250 I/O timeout errors when connecting to a node's Kubelet for log retrieval +description: Learn how to troubleshoot TCP 10250 I/O timeout errors that occur when retrieving kubectl logs from a pod in an Azure Kubernetes Service (AKS) cluster. +ms.topic: article +ms.date: 06/03/2025 +author: '' +ms.author: '' +ms.custom: sap:Connectivity, innovation-engine +ms.reviewer: chiragpa, nickoman, v-leedennis +ms.service: azure-kubernetes-service +keywords: +#Customer intent: As an Azure Kubernetes user, I want to troubleshoot why I'm receiving TCP timeouts (such as 'dial tcp :10250: i/o timeout') so that I can use my Azure Kubernetes Service (AKS) cluster successfully. +--- + +# 10250 I/O timeouts error when running kubectl log command + +TCP timeouts can be caused by blockages of internal traffic that runs between nodes. To investigate TCP time-outs, verify that this traffic isn't being blocked, for example, by [network security groups](/azure/aks/concepts-security#azure-network-security-groups) (NSGs) on the subnet for your cluster nodes. + +## Connect to the cluster + +First, connect to your Azure Kubernetes Service (AKS) cluster by running the following command: + +```bash +export RESOURCE_GROUP= +export CLUSTER_NAME= + +az aks get-credentials --resource-group $RESOURCE_GROUP --name $CLUSTER_NAME +``` + +## Symptoms + +Tunnel functionalities, such as `kubectl logs` and code execution, work only for pods that are hosted on nodes on which tunnel service pods are deployed. Pods on other nodes that have no tunnel service pods cannot reach to the tunnel. When viewing the logs of these pods, you receive the following error message: + +```bash +kubectl logs $POD_NAME +``` + +Results: + + + +```output +Error from server: Get "https://aks-agentpool-xxxxxxxxx-vmssxxxxxxxxx:10250/containerLogs/vsm-mba-prod/mba-api-app-xxxxxxxxxx/technosvc": dial tcp :10250: i/o timeout +``` + +## Solution + +To resolve this issue, allow traffic on port 10250 as described in this [article](tunnel-connectivity-issues.md). + +[!INCLUDE [Azure Help Support](../../../includes/azure-help-support.md)] diff --git a/scenarios/SupportArticles-docs/support/azure/azure-kubernetes/connectivity/tcp-timeouts-kubetctl-third-party-tools-connect-api-server.md b/scenarios/SupportArticles-docs/support/azure/azure-kubernetes/connectivity/tcp-timeouts-kubetctl-third-party-tools-connect-api-server.md new file mode 100644 index 000000000..82cea04cc --- /dev/null +++ b/scenarios/SupportArticles-docs/support/azure/azure-kubernetes/connectivity/tcp-timeouts-kubetctl-third-party-tools-connect-api-server.md @@ -0,0 +1,143 @@ +--- +title: TCP time-outs when kubectl or other 3rd-party tools connect to API +description: Troubleshoot TCP time-outs that occur when kubectl or other third-party tools connect to the API server in Azure Kubernetes Service (AKS). +ms.topic: article +ms.date: 06/03/2024 +author: azureuser +ms.author: azureuser +ms.custom: sap:Connectivity,innovation-engine +--- + +# TCP time-outs when kubectl or other third-party tools connect to the API server + +This article discusses how to troubleshoot TCP time-outs that occur when [kubectl](https://kubernetes.io/docs/reference/kubectl/) or other third-party tools are used to connect to the API server in Microsoft Azure Kubernetes Service (AKS). To ensure its service-level objectives (SLOs) and service-level agreements (SLAs), AKS uses high-availability (HA) control planes that scale vertically and horizontally, based on the number of cores. + +## Symptoms + +You experience repeated connection time-outs. + +## Cause 1: Pods that are responsible for node-to-control plane communication aren't running + +If only a few of your API commands are timing out consistently, the following pods might not be in a running state: + +- `konnectivity-agent` +- `tunnelfront` +- `aks-link` + +> [!NOTE] +> In newer AKS versions, `tunnelfront` and `aks-link` are replaced with `konnectivity-agent`, so you'll only see `konnectivity-agent`. + +These pods are responsible for communication between a node and the control plane. + +### Solution: Reduce the utilization or stress of the node hosts + +Make sure the nodes that host these pods aren't overly utilized or under stress. Consider moving the nodes to their own [system node pool](/azure/aks/use-system-pools). + +To check which node the `konnectivity-agent` pod is hosted on and the usage of the node, run the following commands: + +Set access to the AKS cluster. Replace the values of `ResourceGroupName` and `AKSClusterName` with your own. + +```bash +az aks get-credentials --resource-group ${ResourceGroupName} --name ${AKSClusterName} --overwrite-existing +``` + +Check the running pods in the kube-system namespace and which node each one is assigned to: + +```bash +kubectl get pod -n kube-system -o wide +``` + +Results: + + + +```output +NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES +konnectivity-agent-xxxxx 1/1 Running 0 22h 10.xxx.xx.xxx aks-nodepool1-xxxxx-vmss000000 +coredns-xxxxx 1/1 Running 0 22h 10.xxx.xx.xxx aks-nodepool1-xxxxx-vmss000001 +# ...other pods... +``` + +Check the usage of the nodes and see resource utilization for each node: + +```bash +kubectl top node +``` + +Results: + + + +```output +NAME CPU(cores) CPU% MEMORY(bytes) MEMORY% +aks-nodepool1-xxxxx-vmss000000 125m 12% 1510Mi 37% +aks-nodepool1-xxxxx-vmss000001 106m 10% 1203Mi 42% +# ...other nodes... +``` + +## Cause 2: Access is blocked on some required ports, FQDNs, and IP addresses + +If the required ports, fully qualified domain names (FQDNs), and IP addresses aren't all opened, several command calls might fail. Secure, tunneled communication on AKS between the API server and the [kubelet](https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/) (through the `konnectivity-agent` pod) requires some of those items to work successfully. + +### Solution: Open the necessary ports, FQDNs, and IP addresses + +For more information about what ports, FQDNs, and IP addresses need to be opened, see [Outbound network and FQDN rules for Azure Kubernetes Service (AKS) clusters](/azure/aks/outbound-rules-control-egress). + +## Cause 3: The Application-Layer Protocol Negotiation TLS extension is blocked + +To establish a connection between the control plane and nodes, the `konnectivity-agent` pod requires the [Transport Layer Security (TLS) extension for Application-Layer Protocol Negotiation (ALPN)](https://datatracker.ietf.org/doc/html/rfc7301). You might have previously blocked this extension. + +### Solution: Enable the ALPN extension + +Enable the ALPN extension on the `konnectivity-agent` pod to prevent TCP time-outs. + +## Cause 4: The API server's IP authorized ranges doesn't cover your current IP address + +If you use authorized IP address ranges on your API server, your API calls will be blocked if your IP isn't included in the authorized ranges. + +### Solution: Modify the authorized IP address ranges so that it covers your IP address + +Change the authorized IP address ranges so that your IP address is covered. For more information, see [Update a cluster's API server authorized IP ranges](/azure/aks/api-server-authorized-ip-ranges#update-a-clusters-api-server-authorized-ip-ranges). + +## Cause 5: A client or application leaks calls to the API server + +Frequent GET calls can accumulate and overload the API server. + +### Solution: Use watches instead of GET calls, but make sure the application doesn't leak those calls + +Make sure that you use watches instead of frequent GET calls to the API server. You also have to make sure that your third-party applications don't leak any watch connections or GET calls. For example, in the [Istio microservice architecture](https://istio-releases.github.io/v0.1/docs/concepts/what-is-istio/overview.html), a [bug in the mixer application](https://github.com/istio/istio/issues/19481) creates a new API server watch connection whenever a secret is read internally. Because this behavior happens at a regular interval, the watch connections quickly accumulate. These connections eventually cause the API server to become overloaded no matter the scaling pattern. + +## Cause 6: Too many releases in your Helm deployments + +If you use too many releases in your deployments of [Helm](https://helm.sh/) (the Kubernetes package manager), the nodes start to consume too much memory. It also results in a large amount of `ConfigMap` (configuration data) objects, which might cause unnecessary usage spikes on the API server. + +### Solution: Limit the maximum number of revisions for each release + +Because the maximum number of revisions for each release is infinite by default, you need to run a command to set this maximum number to a reasonable value. For Helm 2, the command is [helm init](https://v2.helm.sh/docs/helm/#helm-init). For Helm 3, the command is [helm upgrade](https://helm.sh/docs/helm/helm_upgrade/). Set the `--history-max ` parameter when you run the command. + +| Version | Command | +|---------|--------------------------------------------------------------------------------| +| Helm 2 | `helm init --history-max ...` | +| Helm 3 | `helm upgrade ... --history-max ...` | + +## Cause 7: Internal traffic between nodes is being blocked + +There might be internal traffic blockages between nodes in your AKS cluster. + +### Solution: Troubleshoot the "dial tcp :10250: i/o timeout" error + +See [Troubleshoot TCP timeouts, such as "dial tcp :10250: i/o timeout"](tcp-timeouts-dial-tcp-nodeip-10250-io-timeout.md). + +## Cause 8: Your cluster is private + +Your cluster is a private cluster, but the client from which you're trying to access the API server is in a public or different network that can't connect to the subnet used by AKS. + +### Solution: Use a client that can access the AKS subnet + +Since your cluster is private and its control plane is in the AKS subnet, it can't be connected to the API server unless it's in a network that can connect to the AKS subnet. It's an expected behavior. + +In this case, try to access the API server from a client in a network that can communicate with the AKS subnet. Additionally, verify network security groups (NSGs) or other appliances between networks aren't blocking packets. + +[!INCLUDE [Third-party disclaimer](../../../includes/third-party-disclaimer.md)] + +[!INCLUDE [Azure Help Support](../../../includes/azure-help-support.md)] diff --git a/scenarios/SupportArticles-docs/support/azure/azure-kubernetes/connectivity/troubleshoot-cluster-connection-issues-api-server.md b/scenarios/SupportArticles-docs/support/azure/azure-kubernetes/connectivity/troubleshoot-cluster-connection-issues-api-server.md new file mode 100644 index 000000000..cf8f9023d --- /dev/null +++ b/scenarios/SupportArticles-docs/support/azure/azure-kubernetes/connectivity/troubleshoot-cluster-connection-issues-api-server.md @@ -0,0 +1,90 @@ +--- +title: Troubleshoot cluster connection issues with the API server +description: Troubleshoot issues that occur when you attempt to connect to the API server of an Azure Kubernetes Service (AKS) cluster. +ms.date: 08/30/2024 +ms.reviewer: rissing chiragpa, beleite, v-leedennis, v-weizhu +ms.service: azure-kubernetes-service +#Customer intent: As an Azure Kubernetes user, I want to take basic troubleshooting measures so that I can avoid cluster connectivity issues with the API server. +ms.custom: sap:Connectivity,innovation-engine +--- + +# Basic troubleshooting of cluster connection issues with the API server + +This article discusses connection issues to an Azure Kubernetes Service (AKS) cluster when you can't reach the cluster's API server through the Kubernetes cluster command-line tool ([kubectl](https://kubernetes.io/docs/reference/kubectl/overview/)) or any other tool, such as using REST API through a programming language. + +## Prerequisites + +- [Azure CLI](/cli/azure/install-azure-cli). + +## Root cause and solutions + +Connection issues to the API server can occur for many reasons, but the root cause is often related to an error with one of these items: + +- Network +- Authentication +- Authorization + +You can take these common troubleshooting steps to check the connectivity to the AKS cluster's API server: + +1. Enter the following [az aks show](/cli/azure/aks#az-aks-show) command in Azure CLI. This command gets the fully qualified domain name (FQDN) of your AKS cluster. + + First, export your resource names to environment variables and add a random suffix to the resource group and cluster names for unique testing. + + ```azurecli + export RANDOM_SUFFIX=$(head -c 3 /dev/urandom | xxd -p) + export RESOURCE_GROUP="my-aks-rg$RANDOM_SUFFIX" + export AKS_CLUSTER="myakscluster$RANDOM_SUFFIX" + az aks show --resource-group $RESOURCE_GROUP --name $AKS_CLUSTER --query fqdn + ``` + + Results: + + + + ```output + "xxxxxx-xxxxxxxx.hcp.eastus2.azmk8s.io" + ``` + +2. With the FQDN, check whether the API server is reachable from the client machine by using the name server lookup ([nslookup](/windows-server/administration/windows-commands/nslookup)), client URL ([curl](https://curl.se/docs/manpage.html)), and [telnet](/windows-server/administration/windows-commands/telnet) commands: + + Replace `` with the actual FQDN returned from the previous step. For demonstration, we use a variable. + + ```bash + export CLUSTER_FQDN=$(az aks show --resource-group $RESOURCE_GROUP --name $AKS_CLUSTER --query fqdn -o tsv) + + # Check if the DNS Resolution is working: + nslookup $CLUSTER_FQDN + + # Then check if the API Server is reachable: + curl -k -Iv https://$CLUSTER_FQDN + + # Test raw TCP connectivity (output will vary depending on environment) + timeout 5 telnet $CLUSTER_FQDN 443 || echo "Connection test completed" + ``` + +3. If the AKS cluster is private, make sure you run the command from a virtual machine (VM) that can access the AKS cluster's Azure Virtual Network. See [Options for connecting to the private cluster](/azure/aks/private-clusters#options-for-connecting-to-the-private-cluster). + +4. If necessary, follow the steps in the troubleshooting article [Client IP address can't access the API server](client-ip-address-cannot-access-api-server.md), so the API server adds your client IP address to the IP ranges it authorizes. + +5. Make sure the version of kubectl on your client machine isn't two or more minor versions behind the AKS cluster's version of that tool. To install the latest version of kubectl, run the [az aks install-cli](/cli/azure/aks#az-aks-install-cli) command in Azure CLI. You can then run [kubectl version](https://kubernetes.io/docs/reference/generated/kubectl/kubectl-commands#version) command to check the version number of the new installation. + + For example, on Linux you would run these commands: + + ```shell + sudo az aks install-cli + kubectl version --client + ``` + + For other client operating systems, use these [kubectl installation instructions](https://kubernetes.io/docs/tasks/tools/). + +6. If necessary, follow the steps in the troubleshooting article [Config file isn't available when connecting](config-file-is-not-available-when-connecting.md), so your Kubernetes configuration file (*config*) is valid and can be found at connection time. + +7. If necessary, follow the steps in the troubleshooting article [User can't get cluster resources](user-cannot-get-cluster-resources.md), so you can list the details of your cluster nodes. + +8. If you're using a firewall to control egress traffic from AKS worker nodes, make sure the firewall allows the [minimum required egress rules for AKS](/azure/aks/limit-egress-traffic). + +9. Make sure the [network security group that's associated with AKS nodes](/azure/aks/concepts-security#azure-network-security-groups) allows communication on TCP port 10250 within the AKS nodes. + +For other common troubleshooting steps, see [TCP time-outs when kubectl or other third-party tools connect to the API server](tcp-timeouts-kubetctl-third-party-tools-connect-api-server.md). + +[!INCLUDE [Azure Help Support](../../../includes/azure-help-support.md)] diff --git a/scenarios/SupportArticles-docs/support/azure/azure-kubernetes/connectivity/user-cannot-get-cluster-resources.md b/scenarios/SupportArticles-docs/support/azure/azure-kubernetes/connectivity/user-cannot-get-cluster-resources.md new file mode 100644 index 000000000..35096c287 --- /dev/null +++ b/scenarios/SupportArticles-docs/support/azure/azure-kubernetes/connectivity/user-cannot-get-cluster-resources.md @@ -0,0 +1,107 @@ +--- +title: Troubleshoot "Forbidden" error when trying to access AKS cluster resources +description: Troubleshoot "Error from server (Forbidden)" RBAC-related errors that occur when you try to view Kubernetes resources in an AKS cluster. +ms.date: 08/26/2024 +ms.reviewer: rissing chiragpa, v-leedennis +ms.service: azure-kubernetes-service +#Customer intent: As an Azure Kubernetes administrator, I want to fix RBAC-related errors so that users can access their cluster resources. +ms.custom: sap:Connectivity,innovation-engine +--- + +# Troubleshoot "Forbidden" error when trying to access AKS cluster resources + +This article explains how to troubleshoot and resolve "Error from server (Forbidden)" errors that are related to Role-Based Access Control (RBAC) when you try to view Kubernetes resources in an Azure Kubernetes Service (AKS) cluster. + +## Prerequisites + +The Kubernetes cluster command-line tool ([kubectl](https://kubernetes.io/docs/tasks/tools/)) + +> [!NOTE] +> If you use [Azure Cloud Shell](/azure/cloud-shell/overview) to run shell commands, kubectl is already installed. If you use a local shell and already have [Azure CLI](/cli/azure/install-azure-cli) installed, you can alternatively install kubectl by running the [az aks install-cli](/cli/azure/aks#az-aks-install-cli) command. + +## Symptoms + +When you run `kubectl` commands to view details of a Kubernetes resource type, such as a deployment, pod, or worker node, you receive the following error message: + +```output +$ kubectl get nodes +Error from server (Forbidden): nodes is forbidden: User "aaaa11111-11aa-aa11-a1a1-111111aaaaa" cannot list resource "nodes" in API group "" at the cluster scope +``` + +## Cause + +This error indicates that you're trying to access Kubernetes resources by using a Microsoft Entra ID account that doesn’t have the required role-based access control (RBAC) permissions. + +## Solution + +Depending on the RBAC type that's configured for the cluster ([Kubernetes RBAC](/azure/aks/azure-ad-rbac) or [Azure RBAC](/azure/aks/manage-azure-rbac)), different solutions might apply. Run the following command to determine which RBAC type the cluster is using: + +Run the following command to determine which RBAC type your AKS cluster is using: + +```bash +az aks show -g $RESOURCE_GROUP -n $CLUSTER_NAME --query aadProfile.enableAzureRbac +``` + +Results: + +```output +false +``` + +- If the result is **null** or empty, the cluster doesn't have Azure AD integration enabled. See [Solving permission issues in local Kubernetes RBAC clusters](#solving-permissions-issues-in-local-kubernetes-rbac-clusters). +- If the result is **false**, the cluster uses Kubernetes RBAC. See [Solving permission issues in Kubernetes RBAC-based AKS clusters](#solving-permissions-issues-in-kubernetes-rbac-based-aks-clusters). +- If the result is **true**, the cluster uses Azure RBAC. See [Solving permission issues in Azure RBAC-based AKS clusters](#solving-permissions-issues-in-azure-rbac-based-aks-clusters). + +### Solving permissions issues in local Kubernetes RBAC clusters + +If your cluster doesn't have Azure AD integration (result was null), it uses cluster admin credentials: + +```bash +# Get admin credentials for full access +az aks get-credentials --resource-group $RESOURCE_GROUP --name $CLUSTER_NAME --admin + +# Verify access +kubectl get nodes +``` + +**Warning**: Admin credentials provide full cluster access. Use carefully and consider enabling Azure AD integration for better security. + +### Solving permissions issues in Kubernetes RBAC-based AKS clusters + +If the cluster uses Kubernetes RBAC, permissions for the user account are configured through the creation of RoleBinding or ClusterRoleBinding Kubernetes resources. For more information, see [Kubernetes RBAC documentation](https://kubernetes.io/docs/reference/access-authn-authz/rbac/). + +Additionally, in Microsoft Entra ID integrated clusters, a ClusterRoleBinding resource is automatically created to grant the administrator access to the cluster to members of a pre-designated Microsoft Entra ID group. + +To resolve the "Error from server (Forbidden)" error for a specific user, use one of the following methods. + +#### Method 1: Create a custom RoleBinding or ClusterRoleBinding resource + +You can create a custom RoleBinding or ClusterRoleBinding resource to grant the necessary permissions to the user (or a group of which the user is a member). For detailed steps, see [Use Kubernetes role-based access control with Microsoft Entra ID in Azure Kubernetes Service](/azure/aks/azure-ad-rbac). + +#### Method 2: Add the user to the pre-designated Microsoft Entra ID admin group + +1. Retrieve the ID of the pre-designated Microsoft Entra ID admin group. To do this, run the following command: + + ```bash + az aks show -g $RESOURCE_GROUP -n $CLUSTER_NAME --query aadProfile.adminGroupObjectIDs + ``` + + Results: + + ```output + [ + "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" + ] + ``` + +2. Add the user to the pre-designated Microsoft Entra ID admin group by using the group ID that you retrieved in the previous step. For more detailed steps, see [Add members or owners of a group](/entra/fundamentals/how-to-manage-groups#add-members-or-owners-of-a-group). + +### Solving permissions issues in Azure RBAC-based AKS clusters + +If the cluster uses Azure RBAC, permissions for users are configured through the creation of [Azure role assignments](/azure/role-based-access-control/role-assignments). + +AKS provides a set of built-in roles that can be used to create role assignments for the Microsoft Entra ID users or groups to give them access to Kubernetes objects in a specific namespace or at cluster scope. For detailed steps to assign built-in roles to users or groups in Azure RBAC-based clusters, see [AKS built-in roles](/azure/aks/manage-azure-rbac#aks-built-in-roles). + +Alternatively, you can create your own custom Azure role definitions to provide a more granular management of permissions over specific types of Kubernetes objects and operations. For detailed guidance to create and assign custom roles to users and groups in Azure RBAC-based clusters, see [Create custom roles definitions](/azure/aks/manage-azure-rbac#create-custom-roles-definitions). + +[!INCLUDE [Azure Help Support](../../../includes/azure-help-support.md)] diff --git a/scenarios/SupportArticles-docs/support/azure/azure-kubernetes/create-upgrade-delete/cannot-scale-cluster-autoscaler-enabled-node-pool.md b/scenarios/SupportArticles-docs/support/azure/azure-kubernetes/create-upgrade-delete/cannot-scale-cluster-autoscaler-enabled-node-pool.md new file mode 100644 index 000000000..a2ca2e94f --- /dev/null +++ b/scenarios/SupportArticles-docs/support/azure/azure-kubernetes/create-upgrade-delete/cannot-scale-cluster-autoscaler-enabled-node-pool.md @@ -0,0 +1,72 @@ +--- +title: Cluster autoscaler fails to scale with cannot scale cluster autoscaler enabled node pool error +description: Learn how to troubleshoot the cannot scale cluster autoscaler enabled node pool error when your autoscaler isn't scaling up or down. +author: sgeannina +ms.author: ninasegares +ms.date: 06/09/2024 +ms.reviewer: aritraghosh, chiragpa +ms.service: azure-kubernetes-service +ms.custom: sap:Create, Upgrade, Scale and Delete operations (cluster or nodepool), innovation-engine +--- + +# Cluster autoscaler fails to scale with "cannot scale cluster autoscaler enabled node pool" error + +This article discusses how to resolve the "cannot scale cluster autoscaler enabled node pool" error that appears when scaling a cluster with an autoscaler enabled node pool. + +## Symptoms + +You receive an error message that resembles the following message: + +> `kubectl get nodes` outputs "No resources found" +> All pods state is `Pending` +> Scale operations are failing with "Cannot scale cluster autoscaler enabled node pool" error + +## Troubleshooting checklist + +Azure Kubernetes Service (AKS) uses virtual machine scale sets-based agent pools, which contain cluster nodes and [cluster autoscaling capabilities](/azure/aks/cluster-autoscaler) if enabled. + +### Check that the cluster virtual machine scale set exists + +1. Sign in to [Azure portal](https://portal.azure.com). +1. Find the node resource group by searching the following names: + + - The default name `MC_{AksResourceGroupName}_{YourAksClusterName}_{AksResourceLocation}`. + - The custom name (if it was provided at creation). + + > [!NOTE] + > When you create a new cluster, AKS automatically creates a second resource group to store the AKS resources. For more information, see [Why are two resource groups created with AKS?](/azure/aks/faq#why-are-two-resource-groups-created-with-aks) + +1. Check the list of resources and make sure that there's a virtual machine scale set. + +## Cause 1: The cluster virtual machine scale set was deleted + +Deleting the virtual machine scale set attached to the cluster causes the cluster autoscaler to fail. It also causes issues when provisioning resources such as nodes and pods. + +> [!NOTE] +> Modifying any resource under the node resource group in the AKS cluster is an unsupported action and will cause cluster operation failures. You can prevent changes from being made to the node resource group by [blocking users from modifying resources](/azure/aks/cluster-configuration#fully-managed-resource-group-preview) managed by the AKS cluster. + +## Cause 2: Tags or any other properties were modified from the node resource group + +You may receive scaling errors if you modify or delete Azure-created tags and other resource properties in the node resource group. For more information, see [Can I modify tags and other properties of the AKS resources in the node resource group?](/azure/aks/faq#can-i-modify-tags-and-other-properties-of-the-aks-resources-in-the-node-resource-group) + +## Cause 3: The cluster node resource group was deleted + +Deleting the cluster node resource group causes issues when provisioning the infrastructure resources required by the cluster, which causes the cluster autoscaler to fail. + +## Solution: Update the cluster to the goal state without changing the configuration + +To resolve this issue, you can run the following command to recover the deleted virtual machine scale set or any tags (missing or modified): + +> [!NOTE] +> It might take a few minutes until the operation completes. + +Set your environment variables for the AKS cluster resource group and cluster name before running the command. A random suffix is included to prevent name collisions during repeatable executions, but you must ensure the resource group and cluster exist. + +```azurecli +export RANDOM_SUFFIX=$(head -c 3 /dev/urandom | xxd -p) +export AKS_RG_NAME="MyAksResourceGroup$RANDOM_SUFFIX" +export AKS_CLUSTER_NAME="MyAksCluster$RANDOM_SUFFIX" +az aks update --resource-group $AKS_RG_NAME --name $AKS_CLUSTER_NAME --no-wait +``` + +[!INCLUDE [Azure Help Support](../../../includes/azure-help-support.md)] diff --git a/scenarios/SupportArticles-docs/support/azure/azure-kubernetes/create-upgrade-delete/error-code-badrequest-or-invalidclientsecret.md b/scenarios/SupportArticles-docs/support/azure/azure-kubernetes/create-upgrade-delete/error-code-badrequest-or-invalidclientsecret.md new file mode 100644 index 000000000..5d0bbf509 --- /dev/null +++ b/scenarios/SupportArticles-docs/support/azure/azure-kubernetes/create-upgrade-delete/error-code-badrequest-or-invalidclientsecret.md @@ -0,0 +1,73 @@ +--- +title: AADSTS7000222 - BadRequest or InvalidClientSecret error +description: Learn how to troubleshoot the BadRequest or InvalidClientSecret error when you try to create or upgrade an Azure Kubernetes Service (AKS) cluster. +ms.topic: article +ms.date: 06/13/2024 +author: axelgMS +ms.author: axelg +ms.custom: sap:Create, Upgrade, Scale and Delete operations (cluster or nodepool), innovation-engine +--- + +# AADSTS7000222 - BadRequest or InvalidClientSecret error + +This article discusses how to identify and resolve the `AADSTS7000222` error (`BadRequest` or `InvalidClientSecret`) that occurs when you try to create or upgrade a Microsoft Azure Kubernetes Service (AKS) cluster. + +## Prerequisites + +- [Azure CLI](/cli/azure/install-azure-cli) + +## Symptoms + +When you try to create or upgrade an AKS cluster, you receive one of the following error messages. + +| Error code | Message | +|--|--| +| `BadRequest` | **The credentials in ServicePrincipalProfile were invalid.** Please see for more details. (Details: adal: Refresh request failed. Status Code = '401'. Response body: {"error": "invalid_client", "error_description": "**AADSTS7000222: The provided client secret keys for app '\' are expired.** Visit the Azure portal to create new keys for your app: , or consider using certificate credentials for added security: ." | +| `InvalidClientSecret` | **Customer auth is not valid for tenant: \**: adal: Refresh request failed. Status Code = '401'. Response body: {"error": "invalid_client", "error_description": "**AADSTS7000222: The provided client secret keys for app '\' are expired.** Visit the Azure portal to create new keys for your app: , or consider using certificate credentials for added security: ." | + +## Cause + +The issue that generates this service principal alert usually occurs for one of the following reasons: + +- The client secret expired. + +- Incorrect credentials were provided. + +- The service principal doesn't exist within the Microsoft Entra ID tenant of the subscription. + +#### Verify the cause + +Use the following commands to retrieve the service principal profile for your AKS cluster and check the expiration date of the service principal. Make sure to set the appropriate variables for your AKS resource group and cluster name. + +```azurecli +SP_ID=$(az aks show --resource-group $RESOURCE_GROUP_NAME \ + --name $AKS_CLUSTER_NAME \ + --query servicePrincipalProfile.clientId \ + --output tsv) +az ad app credential list --id "$SP_ID" +``` + +Alternatively, you can verify that the service principal name and secret are correct and aren't expired. To do this, follow these steps: + +1. In the [Azure portal](https://portal.azure.com), search for and select **Microsoft Entra ID**. + +1. In the navigation pane of Microsoft Entra ID, select **App registrations**. + +1. On the **Owned applications** tab, select the affected application. + +1. Find the service principal name and secret information, and verify that the information is correct and current. + +## Solution + +1. In the [Update or rotate the credentials for an AKS cluster](/azure/aks/update-credentials) article, follow the instructions in one of the following article sections, as appropriate: + + - [Reset the existing service principal credentials](/azure/aks/update-credentials#reset-the-existing-service-principal-credentials) + - [Create a new service principal](/azure/aks/update-credentials#create-a-new-service-principal) + +1. Using your new service principal credentials, follow the instructions in the [Update AKS cluster with service principal credentials](/azure/aks/update-credentials#update-aks-cluster-with-service-principal-credentials) section of that article. + +## More information + +- [Use a service principal with Azure Kubernetes Service (AKS)](/azure/aks/kubernetes-service-principal) (especially the [Troubleshoot](/azure/aks/kubernetes-service-principal#troubleshoot) section) + +[!INCLUDE [Azure Help Support](../../../includes/azure-help-support.md)] diff --git a/scenarios/SupportArticles-docs/support/azure/azure-kubernetes/create-upgrade-delete/error-code-cnidownloadtimeoutvmextensionerror.md b/scenarios/SupportArticles-docs/support/azure/azure-kubernetes/create-upgrade-delete/error-code-cnidownloadtimeoutvmextensionerror.md new file mode 100644 index 000000000..62fe39a22 --- /dev/null +++ b/scenarios/SupportArticles-docs/support/azure/azure-kubernetes/create-upgrade-delete/error-code-cnidownloadtimeoutvmextensionerror.md @@ -0,0 +1,131 @@ +--- +title: Troubleshoot Container Network Interface download failures +description: Learn how to resolve Container Network Interface download failures when you try to create and deploy an Azure Kubernetes Service (AKS) cluster. +ms.topic: article +ms.date: 06/12/2024 +author: v-jsitser +ms.author: v-jsitser +ms.custom: sap:Create, Upgrade, Scale and Delete operations (cluster or nodepool), innovation-engine +editor: v-jsitser +ms.reviewer: axelg, chiragpa, mariochaves, v-weizhu, v-leedennis +#Customer intent: As an Azure Kubernetes user, I want to troubleshoot the container network interface download failures so that I can successfully create and deploy an Azure Kubernetes Service (AKS) cluster. +--- + +# Troubleshoot Container Network Interface download failures + +This article discusses how to identify and resolve the `CniDownloadTimeoutVMExtensionError` error code (also known as error code `ERR_CNI_DOWNLOAD_TIMEOUT`, error number 41) or the `WINDOWS_CSE_ERROR_DOWNLOAD_CNI_PACKAGE` error code (error number 35) that occurs when you try to create and deploy a Microsoft Azure Kubernetes Service (AKS) cluster. + +## Prerequisites + +- The [Curl](https://curl.se/download.html) command-line tool +- Network access from the same environment where AKS nodes will be deployed (same VNet, firewall rules, etc.) + +## Symptoms + +When you try to create a Linux-based AKS cluster, you receive the following error message: + +```output +Message: We are unable to serve this request due to an internal error +SubCode: CniDownloadTimeoutVMExtensionError; +Message="VM has reported a failure when processing extension 'vmssCSE'. +Error message: "Enable failed: failed to execute command: command terminated with exit status=41\n[stdout]\n{ +"ExitCode": "41", +``` + +When you try to create a Windows-based AKS cluster, you receive the following error message: + +```output +Message="VM has reported a failure when processing extension 'vmssCSE' (publisher 'Microsoft.Compute' and type 'CustomScriptExtension'). +Error message: 'Command execution finished, but failed because it returned a non-zero exit code of: '1'. The command had an error output of: 'ExitCode: |35|, +Output: |WINDOWS_CSE_ERROR_DOWNLOAD_CNI_PACKAGE|, Error: |Failed in downloading \r\nhttps://acs-mirror.azureedge.net/azure-cni/v1.4.56/binaries/azure-vnet-cni-overlay-windows-amd64-v1.4.56.zip. +Error: \r\nUnable to connect to the r|\r\nAt line:1 ...' +For more information, check the instance view by executing Get-AzVmssVm or Get-AzVm (https://aka.ms/GetAzVm). These commands can be executed using CloudShell (https://aka.ms/CloudShell)'. More information on troubleshooting is available at https://aka.ms/VMExtensionCSEWindowsTroubleshoot. +``` + +## Cause + +Your cluster nodes can't connect to the endpoint that's used to download the Container Network Interface (CNI) libraries. In most cases, this issue occurs because a network virtual appliance is blocking Secure Sockets Layer (SSL) communication or an SSL certificate. + +## Solution + +Run a Curl command to verify that your nodes can download the binaries: + +First, attempt a test download of the Azure CNI package for Linux from the official mirror endpoint. + +```bash +curl -I https://acs-mirror.azureedge.net/cni/azure-vnet-cni-linux-amd64-v1.0.25.tgz +``` + +Results: + + + +```output +HTTP/2 200 +content-length: 970752 +content-type: application/x-gzip +last-modified: Wed, 22 Jun 2022 00:00:00 GMT +etag: "0x8DA53F1234567" +server: ECAcc (dab/4B9E) +x-cache: HIT +cache-control: public, max-age=86400 +accept-ranges: bytes +date: Thu, 05 Jun 2025 00:00:00 GMT +``` + +This command checks if the endpoint is reachable and returns the HTTP headers. If you see a `200 OK` response, it indicates that the endpoint is accessible. + +Next, attempt a download with validation and save the file locally for further troubleshooting. This will help determine if SSL or outbound connectivity is correctly configured. + +```bash +# Create a temporary directory for testing +mkdir -p /tmp/cni-test + +# Download the CNI package to the temp directory +curl -L --fail https://acs-mirror.azureedge.net/cni/azure-vnet-cni-linux-amd64-v1.0.25.tgz --output /tmp/cni-test/azure-vnet-cni-linux-amd64-v1.0.25.tgz && echo "Download successful" || echo "Download failed" +``` + +Results: + + + +```output + % Total % Received % Xferd Average Speed Time Time Time Current + Dload Upload Total Spent Left Speed +100 6495k 100 6495k 0 0 8234k 0 --:--:-- --:--:-- --:--:-- 8230k +Download successful +``` + +Verify the downloaded file: + +```bash +ls -la /tmp/cni-test/ +file /tmp/cni-test/azure-vnet-cni-linux-amd64-v1.0.25.tgz +``` + +Results: + + + +```output +total 6500 +drwxr-xr-x 2 user user 4096 Jun 20 10:30 . +drwxrwxrwt 8 root root 4096 Jun 20 10:30 .. +-rw-r--r-- 1 user user 6651392 Jun 20 10:30 azure-vnet-cni-linux-amd64-v1.0.25.tgz + +/tmp/cni-test/azure-vnet-cni-linux-amd64-v1.0.25.tgz: gzip compressed data, from Unix, original size modulo 2^32 20070400 +``` + +Clean up the test files: + +```bash +rm -rf /tmp/cni-test/ +``` + +If you can't download these files, make sure that traffic is allowed to the downloading endpoint. For more information, see [Azure Global required FQDN/application rules](/azure/aks/outbound-rules-control-egress#azure-global-required-fqdn--application-rules). + +## References + +- [General troubleshooting of AKS cluster creation issues](troubleshoot-aks-cluster-creation-issues.md) + +[!INCLUDE [Azure Help Support](../../../includes/azure-help-support.md)] diff --git a/scenarios/SupportArticles-docs/support/azure/azure-kubernetes/create-upgrade-delete/upgrading-or-scaling-does-not-succeed.md b/scenarios/SupportArticles-docs/support/azure/azure-kubernetes/create-upgrade-delete/upgrading-or-scaling-does-not-succeed.md new file mode 100644 index 000000000..4d57eca7d --- /dev/null +++ b/scenarios/SupportArticles-docs/support/azure/azure-kubernetes/create-upgrade-delete/upgrading-or-scaling-does-not-succeed.md @@ -0,0 +1,79 @@ +--- +title: Troubleshoot cluster upgrading and scaling errors +description: Troubleshoot errors that occur when you try to upgrade or scale an Azure Kubernetes Service (AKS) cluster. +ms.topic: article +ms.date: 06/12/2024 +author: v-jsitser +ms.author: v-jsitser +ms.custom: sap:Create, Upgrade, Scale and Delete operations (cluster or nodepool), innovation-engine +--- + +# Troubleshoot cluster upgrading and scaling errors + +This article discusses how to troubleshoot errors that occur when you try to upgrade or scale a Microsoft Azure Kubernetes Service (AKS) cluster. + +Some causes of failure when you try to upgrade or scale an AKS cluster are as follows. + +## Cause 1: Cluster is in a failed state + +If a cluster is in a `failed` state, `upgrade` or `scale` operations won't succeed. A cluster can enter a failed state for many reasons. + +Here are the most common reasons and corresponding solutions: + +- Scaling while having an insufficient Compute Resource Provider (CRP) quota. + + To resolve this issue, increase your resource quota before you scale by following these steps: + + 1. Scale your cluster back to a stable goal state within the quota. + + 2. [Request an increase in your resource quota](/azure/azure-resource-manager/troubleshooting/error-resource-quota#solution). + + 3. Try to scale up again beyond the initial quota limits. + + 4. Retry the original operation. This second operation should bring your cluster to a successful state. + +- Scaling a cluster that uses advanced networking such as Azure Container Networking Interface (CNI), Azure CNI for dynamic IP allocation but has insufficient subnet (networking) resources. + + To resolve this issue, see [Troubleshoot the SubnetIsFull error code](error-code-subnetisfull.md). + +- Upgrading a cluster that has Pod Disruption Budgets (PDBs) which may cause eviction failures. + + To resolve this issue, remove or adjust the PDB so that the pod can be drained. For more information, see [Troubleshoot UpgradeFailed errors due to eviction failures caused by PDBs](error-code-poddrainfailure.md). + +- Upgrading a cluster that uses deprecated APIs. + + For Kubernetes versions upgrading to 1.26 or later, AKS checks whether deprecated APIs are used before starting the cluster upgrade. To resolve this issue and start to upgrade, see [How to mitigate stopped upgrade operations due to deprecated APIs](/azure/aks/stop-cluster-upgrade-api-breaking-changes#mitigate-stopped-upgrade-operations). + +## Cause 2: You're trying to upgrade and scale at the same time + +A cluster or node pool can't simultaneously upgrade and scale. Instead, each operation type must finish on the target resource before the next request runs on that same resource. Therefore, operations are limited when active upgrade or scale operations are occurring or attempted. + +To resolve this issue, follow these steps: + +1. Determine the current status of your cluster before you try an operation. + + To retrieve detailed status about your cluster, run the following [az aks show](/cli/azure/aks#az-aks-show) command: + + ```azurecli + az aks show --resource-group $RESOURCE_GROUP_NAME --name $CLUSTER_NAME --output table + ``` + + Results: + + + + ```output + Name Location ResourceGroup KubernetesVersion ProvisioningState Fqdn + ------------- ----------- ------------------- ------------------- ------------------- --------------- + myAKSClusterx eastus2 myResourceGroupx 1.27.x Succeeded xxxxx.xxxxxx.x + ``` + +2. Refer to the following table to take the appropriate action based on the cluster's status: + + | ProvisioningState | Action | + |-------------------------------|-----------------------------------------------------------------------------------------| + | Upgrading | Wait until the operation finishes. | + | Failed | Follow the solutions that are outlined in [Cause 1](#cause-1-cluster-is-in-a-failed-state). | + | Succeeded | Retry the scale or other previously failed operation. | + +[!INCLUDE [Azure Help Support](../../../includes/azure-help-support.md)] \ No newline at end of file diff --git a/scenarios/SupportArticles-docs/support/azure/azure-kubernetes/extensions/aks-cost-analysis-add-on-issues.md b/scenarios/SupportArticles-docs/support/azure/azure-kubernetes/extensions/aks-cost-analysis-add-on-issues.md new file mode 100644 index 000000000..779b5aaf6 --- /dev/null +++ b/scenarios/SupportArticles-docs/support/azure/azure-kubernetes/extensions/aks-cost-analysis-add-on-issues.md @@ -0,0 +1,122 @@ +--- +title: Azure Kubernetes Service Cost Analysis add-on issues +description: Learn how to resolve issues that occur when you try to enable the Azure Kubernetes Service (AKS) Cost Analysis add-on. +ms.date: 06/25/2024 +author: kaysieyu +ms.author: kaysieyu +ms.reviewer: pram, chiragpa, joharder, cssakscic, dafell, v-leedennis, v-weizhu +editor: v-jsitser +ms.service: azure-kubernetes-service +ms.custom: sap:Extensions, Policies and Add-Ons, references_regions, innovation-engine +--- + +# AKS Cost Analysis add-on issues + +This article discusses how to troubleshoot problems that you might experience when you enable the Microsoft Azure Kubernetes Service (AKS) Cost Analysis add-on during cluster creation or a cluster update. + +## Prerequisites + +- [Azure CLI](/cli/azure/install-azure-cli) + +## Symptoms + +After you create or update an AKS cluster, you receive an error message in the following format: + +| Error code | Cause | +|--|--| +| `InvalidDiskCSISettingForCostAnalysis` | [Cause 1: Azure Disk CSI driver is disabled](#cause-1-azure-disk-csi-driver-is-disabled) | +| `InvalidManagedIdentitySettingForCostAnalysis` | [Cause 2: Managed identity is disabled](#cause-2-managed-identity-is-disabled) | +| `CostAnalysisNotEnabledInRegion` | [Cause 3: The add-on is unavailable in your region](#cause-3-the-add-on-is-unavailable-in-your-region) | +| `InvalidManagedClusterSKUForFeature` | [Cause 4: The add-on is unavailable on the free pricing tier](#cause-4-the-add-on-is-unavailable-on-the-free-pricing-tier) | +| Pod `OOMKilled` | [Cause 5: The cost-analysis-agent pod gets the OOMKilled error](#cause-5-the-cost-analysis-agent-pod-gets-the-oomkilled-error) | +| Pod `Pending` | [Cause 6:The cost-analysis-agent pod is stuck in the Pending state](#cause-6-the-cost-analysis-agent-pod-is-stuck-in-the-pending-state) | + +## Cause 1: Azure Disk CSI driver is disabled + +You can't enable the Cost Analysis add-on on a cluster in which the [Azure Disk Container Storage Interface (CSI) driver](/azure/aks/azure-disk-csi) is disabled. + +### Solution: Update the cluster to enable the Azure Disk CSI driver + +Run the [az aks update][aks-update] command, and specify the `--enable-disk-driver` parameter. This parameter enables the Azure Disk CSI driver in AKS. + +First, define the environment variables for your resource group and AKS cluster, using unique values for repeated runs: + +```azurecli +export RANDOM_SUFFIX=$(head -c 3 /dev/urandom | xxd -p) +export RESOURCE_GROUP="my-aks-resource-group$RANDOM_SUFFIX" +export AKS_CLUSTER="my-aks-cluster$RANDOM_SUFFIX" +az aks update --resource-group $RESOURCE_GROUP --name $AKS_CLUSTER --enable-disk-driver +``` + +For more information, see [CSI drivers on AKS](/azure/aks/csi-storage-drivers). + +## Cause 2: Managed identity is disabled + +You can enable the Cost Analysis add-on only on a cluster that has a system-assigned or user-assigned managed identity. + +### Solution: Update the cluster to enable managed identity + +Run the [az aks update][aks-update] command, and specify the `--enable-managed-identity` parameter: + +```azurecli +export RANDOM_SUFFIX=$(head -c 3 /dev/urandom | xxd -p) +export RESOURCE_GROUP="my-aks-resource-group$RANDOM_SUFFIX" +export AKS_CLUSTER="my-aks-cluster$RANDOM_SUFFIX" +az aks update --resource-group $RESOURCE_GROUP --name $AKS_CLUSTER --enable-managed-identity +``` + +For more information, see [Use a managed identity in AKS](/azure/aks/use-managed-identity). + +## Cause 3: The add-on is unavailable in your region + +The Cost Analysis add-on isn't currently enabled in your region. + +> [!NOTE] +> The AKS Cost Analysis add-on is currently unavailable in the following regions: +> +> - `usnateast` +> - `usnatwest` +> - `usseceast` +> - `ussecwest` + + +## Cause 4: The add-on is unavailable on the free pricing tier + +You can't enable the Cost Analysis add-on on AKS clusters that are on the free pricing tier. + +### Solution: Update the cluster to use the Standard or Premium pricing tier + +Upgrade the AKS cluster to the Standard or Premium pricing tier. To do this, run the below [az aks update][aks-update] command that specify the `--tier` parameter. The `--tier` parameter can be set to either `standard` or `premium` (example below shows `standard`): + +```azurecli +export RANDOM_SUFFIX=$(head -c 3 /dev/urandom | xxd -p) +export RESOURCE_GROUP="my-aks-resource-group$RANDOM_SUFFIX" +export AKS_CLUSTER="my-aks-cluster$RANDOM_SUFFIX" +az aks update --resource-group $RESOURCE_GROUP --name $AKS_CLUSTER --tier standard +``` + +For more information, see [Free and Standard pricing tiers for AKS cluster management](/azure/aks/free-standard-pricing-tiers). + +## Cause 5: The cost-analysis-agent pod gets the OOMKilled error + +The current memory limit for the cost-analysis-agent pod is set to 4 GB. + +The pod's usage depends on the number of deployed containers, which can be roughly 200 MB + 0.5 MB per container. The current memory limit supports approximately 7000 containers per cluster. + +When the pod's usage exceeds the allocated 4 GB limit, large clusters may experience the `OOMKill` error. + +### Solution: Disable the add-on + +Currently, customizing or manually increasing memory limits for the add-on isn't supported. To resolve this issue, disable the add-on. + +## Cause 6: The cost-analysis-agent pod is stuck in the Pending state + +If the pod is stuck in the Pending state with the FailedScheduling error, the nodes in the cluster have exhausted memory capacity. + +### Solution: Ensure there's sufficient allocatable memory + +The current memory request of the cost-analysis-agent pod is set to 500 MB. Ensure that there's sufficient allocatable memory for the pod to be scheduled + +[!INCLUDE [Azure Help Support](../../../includes/azure-help-support.md)] + +[aks-update]: /cli/azure/aks#az-aks-update \ No newline at end of file diff --git a/scenarios/TroubleshootVMGrubError/troubleshoot-vm-grub-error-repairvm.md b/scenarios/TroubleshootVMGrubError/troubleshoot-vm-grub-error-repairvm.md new file mode 100644 index 000000000..2bbc7dc15 --- /dev/null +++ b/scenarios/TroubleshootVMGrubError/troubleshoot-vm-grub-error-repairvm.md @@ -0,0 +1,104 @@ +--- +title: Linux VM boots to GRUB rescue +description: Provides troubleshooting guidance for GRUB rescue issues with Linux virtual machines. +services: virtual-machines +documentationcenter: '' +author: divargas +ms.service: azure-virtual-machines +ms.collection: linux +ms.workload: infrastructure-services +ms.tgt_pltfrm: vm-linux +ms.custom: sap:My VM is not booting, linux-related-content +ms.topic: troubleshooting +ms.date: 02/25/2025 +ms.author: divargas +ms.reviewer: ekpathak, v-leedennis, v-weizhu +--- + +# Linux virtual machine boots to GRUB rescue + +**Applies to:** :heavy_check_mark: Linux VMs + + + + + +This article discusses multiple conditions that cause GRUB rescue issues and provides troubleshooting guidance. + +During the boot process, the boot loader tries to locate the Linux kernel and hand off the boot control. If this handoff can't be performed, the virtual machine (VM) enters a GRUB rescue console. The GRUB rescue console prompt isn't shown in the Azure serial console log, but it can be shown in the [Azure boot diagnostics screenshot](/azure/virtual-machines/boot-diagnostics#boot-diagnostics-view). + +## Identify GRUB rescue issue + +[View a boot diagnostics screenshot](/azure/virtual-machines/boot-diagnostics#boot-diagnostics-view) in the VM **Boot diagnostics** page of the Azure portal. This screenshot helps diagnose the GRUB rescue issue and determine if a boot error causes the issue. + +The following text is an example of a GRUB rescue issue: + +```output +error: file '/boot/grub2/i386-pc/normal.mod' not found. +Entering rescue mode... +grub rescue> +``` + +## Troubleshoot GRUB rescue issue offline + +1. To troubleshoot a GRUB rescue issue, a rescue/repair VM is required. Use [vm repair commands](repair-linux-vm-using-azure-virtual-machine-repair-commands.md) to create a repair VM that has a copy of the affected VM's OS disk attached. Mount the copy of the OS file systems in the repair VM by using [chroot](chroot-environment-linux.md). + + > [!NOTE] + > Alternatively, you can create a rescue VM manually by using the Azure portal. For more information, see [Troubleshoot a Linux VM by attaching the OS disk to a recovery VM using the Azure portal](troubleshoot-recovery-disks-portal-linux.md). + +2. [Identify GRUB rescue issue](#identify-grub-rescue-issue). When you encounter one of the following GRUB rescue issues, go to the corresponding section to resolve it: + + * [Error: unknown filesystem](#unknown-filesystem) + * [Error 15: File not found](#error15) + * [Error: file '/boot/grub2/i386-pc/normal.mod' not found](#normal-mod-file-not-found) + * [Error: no such partition](#no-such-partition) + * [Error: symbol 'grub_efi_get_secure_boot' not found](#grub_efi_get_secure_boot) + * [Other GRUB rescue errors](#other-grub-rescue-errors) + +3. After the GRUB rescue issue is resolved, perform the following actions: + + 1. Unmount the copy of the file systems from the rescue/repair VM. + + 2. Run the `az vm repair restore` command to swap the repaired OS disk with the original OS disk of the VM. For more information, see Step 5 in [Repair a Linux VM by using the Azure Virtual Machine repair commands](repair-linux-vm-using-azure-virtual-machine-repair-commands.md). + + 3. Check whether the VM can start by taking a look at the Azure serial console or by trying to connect to the VM. + +4. If the entire /boot partition or other important contents are missing and can't be recovered, we recommend restoring the VM from a backup. For more information, see [How to restore Azure VM data in Azure portal](/azure/backup/backup-azure-arm-restore-vms). + +See the following sections for detailed errors, possible causes, and solutions. + +> [!NOTE] +> In the commands mentioned in the following sections, replace `/dev/sdX` with the corresponding Operating System (OS) disk device. + +### Reinstall GRUB and regenerate GRUB configuration file using Auto Repair (ALAR) + +Azure Linux Auto Repair (ALAR) scripts are part of the VM repair extension described in [Use Azure Linux Auto Repair (ALAR) to fix a Linux VM](./repair-linux-vm-using-alar.md). ALAR covers the automation of multiple repair scenarios, including GRUB rescue issues. + +The ALAR scripts use the repair extension `repair-button` to fix GRUB issues by specifying `--button-command grubfix` for Generation 1 VMs, or `--button-command efifix` for Generation 2 VMs. This parameter triggers the automated recovery. Implement the following step to automate the fix of common GRUB errors that could be fixed by reinstalling GRUB and regenerating the corresponding configuration file: + +```azurecli-interactive +GEN=$(az vm get-instance-view --resource-group $MY_RESOURCE_GROUP_NAME --name $MY_VM_NAME --query "instanceView.hyperVGeneration" --output tsv) +if [[ "$GEN" =~ "[Vv]?2" ]]; then ALAR="efifix"; else ALAR="grubfix"; fi +output=$(az extension add -n vm-repair; az extension update -n vm-repair; az vm repair repair-button --button-command $ALAR --verbose --resource-group $MY_RESOURCE_GROUP_NAME --name $MY_VM_NAME) +value=$(echo "$output" | jq -r '.message') +extracted=$(echo $value) +echo "$extracted" +``` + +The repair VM script, in conjunction with the ALAR script, temporarily creates a resource group, a repair VM, and a copy of the affected VM's OS disk. It reinstalls GRUB and regenerates the corresponding GRUB configuration file and then it swaps the OS disk of the broken VM with the copied fixed disk. Finally, the `repair-button` script will automatically delete the resource group containing the temporary repair VM. + +## Next steps + +If the specific boot error isn't a GRUB rescue issue, refer to [Troubleshoot Azure Linux Virtual Machines boot errors](boot-error-troubleshoot-linux.md) for further troubleshooting options. + +[!INCLUDE [Third-party disclaimer](../../../includes/third-party-disclaimer.md)] + +[!INCLUDE [Third-party contact disclaimer](../../../includes/third-party-contact-disclaimer.md)] \ No newline at end of file diff --git a/scenarios/UseIGOnAKS/alert-bad-process.yaml b/scenarios/UseIGOnAKS/alert-bad-process.yaml new file mode 100644 index 000000000..c7f812a2f --- /dev/null +++ b/scenarios/UseIGOnAKS/alert-bad-process.yaml @@ -0,0 +1,13 @@ +apiVersion: 1 +kind: instance-spec +image: trace_exec:v0.38.0 +name: alert-bad-process +paramValues: + # monitor all namespaces + operator.KubeManager.all-namespaces: true + # monitor shell executions (only bash on this example) + operator.filter.filter: proc.comm==bash + # name of the metric to export + operator.otel-metrics.otel-metrics-name: 'exec:shell_executions' + # annotate gadget to enable metrics collection + operator.oci.annotate: exec:metrics.collect=true,exec:metrics.implicit-counter.name=shell_executions,exec.k8s.namespace:metrics.type=key,exec.k8s.podname:metrics.type=key,exec.k8s.containername:metrics.type=key diff --git a/scenarios/UseIGOnAKS/ama-metrics-settings-configmap.yaml b/scenarios/UseIGOnAKS/ama-metrics-settings-configmap.yaml new file mode 100644 index 000000000..73481cb01 --- /dev/null +++ b/scenarios/UseIGOnAKS/ama-metrics-settings-configmap.yaml @@ -0,0 +1,84 @@ +kind: ConfigMap +apiVersion: v1 +data: + schema-version: + #string.used by agent to parse config. supported versions are {v1}. Configs with other schema versions will be rejected by the agent. + v1 + config-version: + #string.used by customer to keep track of this config file's version in their source control/repository (max allowed 10 chars, other chars will be truncated) + ver1 + prometheus-collector-settings: |- + cluster_alias = "" + default-scrape-settings-enabled: |- + kubelet = true + coredns = false + cadvisor = true + kubeproxy = false + apiserver = false + kubestate = true + nodeexporter = true + windowsexporter = false + windowskubeproxy = false + kappiebasic = true + networkobservabilityRetina = true + networkobservabilityHubble = true + networkobservabilityCilium = true + prometheuscollectorhealth = false + controlplane-apiserver = true + controlplane-cluster-autoscaler = false + controlplane-kube-scheduler = false + controlplane-kube-controller-manager = false + controlplane-etcd = true + acstor-capacity-provisioner = true + acstor-metrics-exporter = true + # Regex for which namespaces to scrape through pod annotation based scraping. + # This is none by default. + # Ex: Use 'namespace1|namespace2' to scrape the pods in the namespaces 'namespace1' and 'namespace2'. + pod-annotation-based-scraping: |- + podannotationnamespaceregex = "" + default-targets-metrics-keep-list: |- + kubelet = "" + coredns = "" + cadvisor = "" + kubeproxy = "" + apiserver = "" + kubestate = "" + nodeexporter = "" + windowsexporter = "" + windowskubeproxy = "" + podannotations = "" + kappiebasic = "" + networkobservabilityRetina = "" + networkobservabilityHubble = "" + networkobservabilityCilium = "" + controlplane-apiserver = "" + controlplane-cluster-autoscaler = "" + controlplane-kube-scheduler = "" + controlplane-kube-controller-manager = "" + controlplane-etcd = "" + acstor-capacity-provisioner = "" + acstor-metrics-exporter = "" + minimalingestionprofile = true + default-targets-scrape-interval-settings: |- + kubelet = "30s" + coredns = "30s" + cadvisor = "30s" + kubeproxy = "30s" + apiserver = "30s" + kubestate = "30s" + nodeexporter = "30s" + windowsexporter = "30s" + windowskubeproxy = "30s" + kappiebasic = "30s" + networkobservabilityRetina = "30s" + networkobservabilityHubble = "30s" + networkobservabilityCilium = "30s" + prometheuscollectorhealth = "30s" + acstor-capacity-provisioner = "30s" + acstor-metrics-exporter = "30s" + podannotations = "30s" + debug-mode: |- + enabled = false +metadata: + name: ama-metrics-settings-configmap + namespace: kube-system \ No newline at end of file diff --git a/scenarios/UseIGOnAKS/use-ig-on-aks.md b/scenarios/UseIGOnAKS/use-ig-on-aks.md new file mode 100644 index 000000000..38acf8803 --- /dev/null +++ b/scenarios/UseIGOnAKS/use-ig-on-aks.md @@ -0,0 +1,198 @@ +--- +title: Comprehensive Guide to Using Inspektor Gadget in Kubernetes +description: This Exec Doc provides a detailed walkthrough of a shell script that demonstrates various operations with the Inspektor Gadget in a Kubernetes environment. It explains each functional block, how the gadget plugin is installed, deployed, and used to run examples, export metrics, and verify configurations. +ms.topic: article +ms.date: 03/19/2025 +author: yourgithubusername +ms.author: yourmsalias +ms.custom: innovation-engine, kubernetes, gadget, monitoring +--- + +# Detailed Walkthrough: Inspektor Gadget Shell Script + +This document provides a step-by-step explanation of the provided shell script. The script demonstrates several operations related to the Inspektor Gadget in a Kubernetes environment. Each section below explains the purpose and the functionality of the code blocks that follow. The commands remain unchanged; only the documentation around them has been added for clarity. + +--- + +## Connecting to Your AKS Cluster + +Before running any commands, ensure that your local environment is connected to the desired AKS (Azure Kubernetes Service) cluster. Use the following command to retrieve the cluster credentials and configure `kubectl` to interact with the cluster: + +```bash +# Retrieve AKS cluster credentials: +az aks get-credentials --resource-group $RESOURCE_GROUP --name $CLUSTER_NAME +``` + +After executing this command, `kubectl` will be configured to communicate with the specified AKS cluster. + +--- + +## Viewing AKS Cluster Nodes + +In this section, the script lists the nodes of the current AKS (Azure Kubernetes Service) cluster using the Kubernetes CLI (`kubectl`). This allows you to verify that your cluster is up and running and view the status of the nodes. + +```bash +# Show AKS cluster: + +kubectl get nodes +``` + +After executing this block, the output will display the current nodes in the cluster along with their status, roles, and version information. + +--- + +## Installing the Inspektor Gadget Plugin + +This section installs the Inspektor Gadget plugin using `kubectl krew`. The gadget plugin extends kubectl with additional functionalities, enabling more effective monitoring and tracing within the cluster. + +```bash +# Install kubectl plugin: + +kubectl krew install gadget +``` + +Once installed, the gadget plugin is available for subsequent commands in the script. + +--- + +## Verifying Gadget Plugin Version + +Here, the script verifies the version and server status of the gadget plugin. It checks that the plugin is correctly installed and provides details about its client and server versions. The expected output is a client version (e.g., vX.Y.Z) and a note that the server version is not available. + +```bash +# Verify version and server status: + +kubectl gadget version +# Expected output: +# Client version: vX.Y.Z +# Server version: not available +``` + +This output helps determine that the gadget plugin is operational on your local client. You may compare the shown version with the expected output. + +--- + +## Deploying Inspektor Gadget and Re-Verification + +In this section, the script deploys the Inspektor Gadget in the Kubernetes environment. The command includes options to enable the OpenTelemetry (OTEL) metrics listener on the specified address (0.0.0.0:2223). After deploying, the version command is run again to verify that the gadget deployment is correctly configured, even though the server version remains "not available". + +```bash +# Deploy Inspektor Gadget: + +kubectl gadget deploy --otel-metrics-listen --otel-metrics-listen-address 0.0.0.0:2223 + +# Verify version and server status: + +kubectl gadget version +# Expected output: +# Client version: vX.Y.Z +# Server version: not available +``` + +This deployment sets up the gadget to collect the required metrics, and the follow-up version check confirms that the plugin is still active. + +--- + +## Demonstrating Gadget Usage with trace_exec + +This section illustrates different methods to run the gadget plugin using the `trace_exec` example. The commands include: + +1. Running the gadget with a specific trace_exec version. +2. Creating a test pod running Ubuntu in an interactive session, which is automatically removed after exit. +3. Running the gadget with JSON formatted output. +4. Running the gadget with filtering to display only processes with the command matching "bash". + +These examples show various ways to leverage the gadget for tracing executions in the cluster. + +```bash +# Run simple example with trace_exec with a 10-second timeout to prevent indefinite execution: +timeout 5s kubectl gadget run trace_exec || true + +kubectl delete pod demo-pod + +# Create a background pod that will generate events for us to trace: +kubectl run demo-pod --image=ubuntu -- /bin/bash -c "for i in {1..11}; do echo Running commands...; ls -la /; sleep 1; done" + +# Wait briefly for the pod to start generating events +sleep 5 + +# Run gadget with JSON output and timeout +timeout 5s kubectl gadget run trace_exec --output jsonpretty || true + +# Run gadget with filtering and timeout +timeout 5s kubectl gadget run trace_exec --all-namespaces --filter proc.comm=bash || echo "Attachment timed out, continuing with demo" +``` + +Each command demonstrates a different facet of the gadget's capabilities, from initiating traces to filtering outputs based on process names. + +--- + +## Creating Metrics Configuration for Alerting + +In this part of the script, a metrics configuration file is edited. The file (alert-bad-process.yaml) is intended to define rules to generate a metric based on certain events in the cluster. The metric, in this context, is used to track shell executions. + +```bash +# Generate a metric based on these events: + +cat alert-bad-process.yaml +``` + +--- + +## Exporting Metrics and Managing Gadget Lifecycle + +This section deploys the gadget manifest using the YAML file created in the previous section. The command includes several annotations to instruct the gadget to collect metrics. The process is detached so that it runs in the background. Subsequently, the script lists the running gadget instances. + +```bash +# Clean up any existing instance of the same name +kubectl gadget delete alert-bad-process + +# Run gadget manifest to export metrics: +kubectl gadget run -f alert-bad-process.yaml --annotate exec:metrics.collect=true,exec:metrics.implicit-counter.name=shell_executions,exec.k8s.namespace:metrics.type=key,exec.k8s.podname:metrics.type=key,exec.k8s.containername:metrics.type=key --detach +``` + +These commands ensure that metrics are being collected as defined in the YAML manifest and verify that the gadget is running correctly in headless mode. + +--- + +## Verifying Prometheus Configuration for Metrics Collection + +This section checks the managed Prometheus configuration to ensure that it is set up to scrape metrics from the OTEL listener endpoint exposed on each Inspektor Gadget pod. The first command retrieves the relevant configmap, and the second command displays its full YAML definition with a pager for detailed inspection. Review the output to confirm that the configuration contains the expected annotation for pod-based scraping related to the gadget. + +```bash +# Configure managed Prometheus to collect data from the OTEL listener endpoint we expose on each IG pod? +# Documentation: https://learn.microsoft.com/en-us/azure/azure-monitor/containers/prometheus-metrics-scrape-configuration?tabs=CRDConfig%2CCRDScrapeConfig%2CConfigFileScrapeConfigBasicAuth%2CConfigFileScrapeConfigTLSAuth#configmaps + +kubectl get configmaps -n kube-system ama-metrics-settings-configmap + +# It should contain: pod-annotation-based-scraping: podannotationnamespaceregex = "gadget" +kubectl get configmaps -n kube-system ama-metrics-settings-configmap -o yaml | grep -A 5 "pod-annotation-based-scraping" +``` + +--- + +## Monitoring, Alerting, and Cleanup + +In the final part of the script, the focus shifts to monitoring and alerting: + +1. It provides guidance for viewing the `shell_executions_total` metric in the Grafana dashboard. +2. It suggests creating a Prometheus group alert with a rule that triggers when `shell_executions_total` exceeds 0. +3. Finally, the script undeploys the Inspektor Gadget to clean up resources. + +```bash +# Show shell_executions_total metric in Grafana dashboard: shell_executions_total +# Documentation: https://learn.microsoft.com/en-us/azure/managed-grafana/overview + +# Create a prometheus group alert with the rule "shell_executions_total > 0" +# Documentation: https://learn.microsoft.com/en-us/azure/azure-monitor/essentials/prometheus-rule-groups + +# Undeploy IG +kubectl gadget undeploy +``` + +These steps ensure that your metrics are visually accessible via Grafana and that alerts are configured for proactive monitoring. The final undeploy command removes the deployed gadget from the cluster, wrapping up the execution workflow. + +## Next Steps +- [Real-world scenarios where Inspektor Gadget can help you](https://go.microsoft.com/fwlink/p/?linkid=2260402#use-cases) +- [Explore the available gadgets](https://go.microsoft.com/fwlink/p/?linkid=2260070) +- [Run your own eBPF program](https://go.microsoft.com/fwlink/p/?linkid=2259865) \ No newline at end of file diff --git a/scenarios/azure-aks-docs/articles/aks/access-control-managed-azure-ad.md b/scenarios/azure-aks-docs/articles/aks/access-control-managed-azure-ad.md new file mode 100644 index 000000000..6ff342dcd --- /dev/null +++ b/scenarios/azure-aks-docs/articles/aks/access-control-managed-azure-ad.md @@ -0,0 +1,89 @@ +--- +title: Control cluster access using Conditional Access with AKS-managed Microsoft Entra integration +description: Learn how to access clusters using Conditional Access when integrating Microsoft Entra ID in your Azure Kubernetes Service (AKS) clusters. +ms.topic: concept-article +ms.subservice: aks-integration +ms.date: 06/25/2024 +author: nickomang +ms.author: nickoman +ms.custom: devx-track-azurecli, innovation-engine +--- + +# Control cluster access using Conditional Access with AKS-managed Microsoft Entra integration + +When you integrate Microsoft Entra ID with your AKS cluster, you can use [Conditional Access][aad-conditional-access] for just-in-time requests to control access to your cluster. This article shows you how to enable Conditional Access on your AKS clusters. + +> [!NOTE] +> Microsoft Entra Conditional Access has Microsoft Entra ID P1, P2, or Governance capabilities requiring a Premium P2 SKU. For more on Microsoft Entra ID licenses and SKUs, see [Microsoft Entra ID Governance licensing fundamentals][licensing-fundamentals] and [pricing guide][aad-pricing]. + +## Before you begin + +* See [AKS-managed Microsoft Entra integration](./managed-azure-ad.md) for an overview and setup instructions. + +## Use Conditional Access with Microsoft Entra ID and AKS + +1. In the Azure portal, go to the **Microsoft Entra ID** page and select **Enterprise applications**. +1. Select **Conditional Access** > **Policies** > **New policy**. +1. Enter a name for the policy, such as *aks-policy*. +1. Under **Assignments**, select **Users and groups**. Choose the users and groups you want to apply the policy to. In this example, choose the same Microsoft Entra group that has administrator access to your cluster. +1. Under **Cloud apps or actions** > **Include**, select **Select apps**. Search for **Azure Kubernetes Service** and select **Azure Kubernetes Service Microsoft Entra Server**. +1. Under **Access controls** > **Grant**, select **Grant access**, **Require device to be marked as compliant**, and **Require all the selected controls**. +1. Confirm your settings, set **Enable policy** to **On**, and then select **Create**. + +## Verify your Conditional Access policy has been successfully listed + +After implementing your Conditional Access policy, verify that it works as expected by accessing the AKS cluster and checking the sign-in activity. + +1. Get the user credentials to access the cluster using the [`az aks get-credentials`][az-aks-get-credentials] command. + + Assign values to the required environment variables. The AKS cluster and resource group must exist. + + ```azurecli-interactive + export RANDOM_SUFFIX=$(head -c 3 /dev/urandom | xxd -p) + export RESOURCE_GROUP="myResourceGroup$RANDOM_SUFFIX" + export AKS_CLUSTER="myManagedCluster$RANDOM_SUFFIX" + ``` + + Download credentials required to access your AKS cluster. + + ```azurecli-interactive + az aks get-credentials --resource-group $RESOURCE_GROUP --name $AKS_CLUSTER --overwrite-existing + ``` + +1. Follow the instructions to sign in. + +1. View the nodes in the cluster using the `kubectl get nodes` command. + + ```azurecli-interactive + kubectl get nodes + ``` + + Results: + + + + ```output + NAME STATUS ROLES AGE VERSION + aks-nodepool1-xxxxx-vmss000000 Ready agent 3d2h v1.xx.x + aks-nodepool1-xxxxx-vmss000001 Ready agent 3d2h v1.xx.x + ``` + +1. In the Azure portal, navigate to **Microsoft Entra ID** and select **Enterprise applications** > **Activity** > **Sign-ins**. + +1. Under the **Conditional Access** column you should see a status of *Success*. Select the event and then select the **Conditional Access** tab. Your Conditional Access policy will be listed. + +## Next steps + +For more information, see the following articles: + +* Use [kubelogin](https://github.com/Azure/kubelogin) to access features for Azure authentication that aren't available in kubectl. +* [Use Privileged Identity Management (PIM) to control access to your Azure Kubernetes Service (AKS) clusters][pim-aks]. + + +[aad-pricing]: https://azure.microsoft.com/pricing/details/active-directory/ + + +[aad-conditional-access]: /azure/active-directory/conditional-access/overview +[licensing-fundamentals]: /entra/id-governance/licensing-fundamentals +[az-aks-get-credentials]: /cli/azure/aks#az_aks_get_credentials +[pim-aks]: ./privileged-identity-management.md diff --git a/scenarios/azure-aks-docs/articles/aks/access-private-cluster.md b/scenarios/azure-aks-docs/articles/aks/access-private-cluster.md new file mode 100644 index 000000000..f03aa86d9 --- /dev/null +++ b/scenarios/azure-aks-docs/articles/aks/access-private-cluster.md @@ -0,0 +1,215 @@ +--- +title: 'Access a private Azure Kubernetes Service (AKS) cluster using the command invoke or Run command feature' +description: Learn how to access a private Azure Kubernetes Service (AKS) cluster using the Azure CLI command invoke feature or the Azure portal Run command feature. +ms.topic: concept-article +ms.subservice: aks-security +ms.custom: devx-track-azurecli,innovation-engine +ms.date: 06/13/2024 +author: schaffererin +ms.author: schaffererin +--- + +# Access a private Azure Kubernetes Service (AKS) cluster using the command invoke or Run command feature + +When you access a private AKS cluster, you need to connect to the cluster from the cluster virtual network, a peered network, or a configured private endpoint. These approaches require configuring a VPN, Express Route, deploying a *jumpbox* within the cluster virtual network, or creating a private endpoint inside of another virtual network. + +With the Azure CLI, you can use `command invoke` to access private clusters without the need to configure a VPN or Express Route. `command invoke` allows you to remotely invoke commands, like `kubectl` and `helm`, on your private cluster through the Azure API without directly connecting to the cluster. The `Microsoft.ContainerService/managedClusters/runcommand/action` and `Microsoft.ContainerService/managedclusters/commandResults/read` actions control the permissions for using `command invoke`. + +With the Azure portal, you can use the `Run command` feature to run commands on your private cluster. The `Run command` feature uses the same `command invoke` functionality to run commands on your cluster. + +The pod created by the `Run command` provides `kubectl` and `helm` for operating your cluster. `jq`, `xargs`, `grep`, and `awk` are available for Bash support. + +## Before you begin + +Before you begin, make sure you have the following resources and permissions: + +* An existing private cluster. If you don't have one, see [Create a private AKS cluster](./private-clusters.md). +* The Azure CLI version 2.24.0 or later. Run `az --version` to find the version. If you need to install or upgrade, see [Install Azure CLI](/cli/azure/install-azure-cli). +* Access to the `Microsoft.ContainerService/managedClusters/runcommand/action` and `Microsoft.ContainerService/managedclusters/commandResults/read` roles on the cluster. + +### Limitations + +This feature is designed to simplify cluster access and is ***not designed for programmatic access***. If you have a program invoke Kubernetes using `Run command`, the following disadvantages apply: + +* You only get *exitCode* and *text output*, and you lose API level details. +* One extra hop introduces extra failure points. + +The pod created by the `Run command` is hard coded with a `200m CPU` and `500Mi memory` request, and a `500m CPU` and `1Gi memory` limit. In rare cases where all your node is packed, the pod can't be scheduled within the ARM API limitation of 60 seconds. This means that the `Run command` would fail, even if it's configured to autoscale. + +`command invoke` runs the commands from your cluster, so any commands run in this manner are subject to your configured networking restrictions and any other configured restrictions. Make sure there are enough nodes and resources in your cluster to schedule this command pod. + +> [!NOTE] +> The output for `command invoke` is limited to 512kB in size. + +## Run commands on your AKS cluster + +### [Azure CLI - `command invoke`](#tab/azure-cli) + +Below are examples of how to use `az aks command invoke` to execute commands against a private AKS cluster. These examples assume you have an existing resource group and AKS cluster. + +#### Use `command invoke` to run a single command + +You can run a command on your cluster using the `az aks command invoke --command` command. The following example command runs the `kubectl get pods -n kube-system` command on the *myPrivateCluster* cluster in *myResourceGroup*. + +First, set environment variables for your resource group and cluster name to use in subsequent commands. + +```bash +export AKS_RESOURCE_GROUP="myResourceGroup" +export AKS_CLUSTER_NAME="myPrivateCluster" +``` + +The environment variables above will allow you to run AKS commands in the next sections without having to rewrite their names. + +To run a single kubectl command on your AKS cluster: + +```azurecli +az aks command invoke \ + --resource-group $AKS_RESOURCE_GROUP \ + --name $AKS_CLUSTER_NAME \ + --command "kubectl get pods -n kube-system" +``` + +#### Use `command invoke` to run multiple commands + +You can also run multiple commands. The following example executes three `helm` commands in sequence on the cluster. + +```azurecli +az aks command invoke \ + --resource-group $AKS_RESOURCE_GROUP \ + --name $AKS_CLUSTER_NAME \ + --command "helm repo add bitnami https://charts.bitnami.com/bitnami && helm repo update && helm install my-release bitnami/nginx" +``` + +#### Use `command invoke` to run commands with an attached file + +When using the `--file` parameter with `az aks command invoke`, the file must exist and be accessible in your current working directory. Below, we create a minimal deployment file for demonstration. + +To run a command with a file attached, first create a Kubernetes manifest file named `deployment.yaml`. The following example creates a small nginx deployment and applies it with `command invoke`: + +```bash +cat < deployment.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: nginx-demo +spec: + replicas: 1 + selector: + matchLabels: + app: nginx-demo + template: + metadata: + labels: + app: nginx-demo + spec: + containers: + - name: nginx + image: nginx:1.21.6 + ports: + - containerPort: 80 +EOF + +az aks command invoke \ + --resource-group $AKS_RESOURCE_GROUP \ + --name $AKS_CLUSTER_NAME \ + --command "kubectl apply -f deployment.yaml -n default" \ + --file deployment.yaml +``` + +#### Use `command invoke` to run commands with all files in the current directory attached + +Use only small, necessary files to avoid exceeding system size limits. Below, two minimal YAML files are created before attaching them. + +```bash +cat < deployment.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: nginx-demo +spec: + replicas: 1 + selector: + matchLabels: + app: nginx-demo + template: + metadata: + labels: + app: nginx-demo + spec: + containers: + - name: nginx + image: nginx:1.21.6 + ports: + - containerPort: 80 +EOF + +cat < configmap.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: nginx-config +data: + welcome-message: "Hello from configmap" +EOF + +az aks command invoke \ + --resource-group $AKS_RESOURCE_GROUP \ + --name $AKS_CLUSTER_NAME \ + --command "kubectl apply -f deployment.yaml -f configmap.yaml -n default" \ + --file deployment.yaml \ + --file configmap.yaml +``` + +### [Azure portal - `Run command`](#tab/azure-portal) + +To get started with `Run command`, navigate to your private cluster in the Azure portal. In the service menu, under **Kubernetes resources**, select **Run command**. + +### `Run command` commands + +You can use the following kubectl commands with the `Run command` feature: + +* `kubectl get nodes` +* `kubectl get deployments` +* `kubectl get pods` +* `kubectl describe nodes` +* `kubectl describe pod ` +* `kubectl describe deployment ` +* `kubectl apply -f ` + +### Use `Run command` to run a single command + +1. In the Azure portal, navigate to your private cluster. +2. In the service menu, under **Kubernetes resources**, select **Run command**. +3. Enter the command you want to run and select **Run**. + +### Use `Run command` to run commands with attached files + +1. In the Azure portal, navigate to your private cluster. +2. In the service menu, under **Kubernetes resources**, select **Run command**. +3. Select **Attach files** > **Browse for files**. + + :::image type="content" source="media/access-private-cluster/azure-portal-run-command-attach-files.png" alt-text="Screenshot of attaching files to the Azure portal Run command."::: + +4. Select the file(s) you want to attach and then select **Attach**. +5. Enter the command you want to run and select **Run**. + +## Disable `Run command` + +Currently, the only way you can disable the `Run command` feature is by setting [`.properties.apiServerAccessProfile.disableRunCommand` to `true`](/rest/api/aks/managed-clusters/create-or-update). + +--- + +## Troubleshooting + +For information on the most common issues with `az aks command invoke` and how to fix them, see [Resolve `az aks command invoke` failures][command-invoke-troubleshoot]. + +## Next steps + +In this article, you learned how to access a private cluster and run commands on that cluster. For more information on AKS clusters, see the following articles: + +* [Use a private endpoint connection in AKS](./private-clusters.md#use-a-private-endpoint-connection) +* [Virtual networking peering in AKS](./private-clusters.md#virtual-network-peering) +* [Hub and spoke with custom DNS in AKS](./private-clusters.md#hub-and-spoke-with-custom-dns) + + +[command-invoke-troubleshoot]: /troubleshoot/azure/azure-kubernetes/resolve-az-aks-command-invoke-failures \ No newline at end of file diff --git a/scenarios/azure-aks-docs/articles/aks/airflow-create-infrastructure.md b/scenarios/azure-aks-docs/articles/aks/airflow-create-infrastructure.md new file mode 100644 index 000000000..7953b8bf8 --- /dev/null +++ b/scenarios/azure-aks-docs/articles/aks/airflow-create-infrastructure.md @@ -0,0 +1,240 @@ +--- +title: Create the infrastructure for deploying Apache Airflow on Azure Kubernetes Service (AKS) +description: In this article, you create the infrastructure needed to deploy Apache Airflow on Azure Kubernetes Service (AKS) using Helm. +ms.topic: how-to +ms.custom: azure-kubernetes-service +ms.date: 12/19/2024 +author: schaffererin +ms.author: schaffererin +--- + +# Create the infrastructure for running Apache Airflow on Azure Kubernetes Service (AKS) + +In this article, you create the infrastructure required to run Apache Airflow on Azure Kubernetes Service (AKS). + +## Prerequisites + +* If you haven't already, review the [Overview for deploying an Apache Airflow cluster on Azure Kubernetes Service (AKS)](./airflow-overview.md). +* An Azure subscription. If you don't have one, create a [free account](https://azure.microsoft.com/free/?WT.mc_id=A261C142F). +* Azure CLI version 2.61.0. To install or upgrade, see [Install Azure CLI](/cli/azure/install-azure-cli). +* Helm version 3 or later. To install, see [Installing Helm](https://helm.sh/docs/intro/install/). +* `kubectl`, which is installed in Azure Cloud Shell by default. +* GitHub Repo to store Airflow Dags. +* Docker installed on your local machine. To install, see [Get Docker](https://docs.docker.com/get-docker/). + +## Set environment variables + +* Set the required environment variables for use throughout this guide: + + ```bash + export random=$(echo $RANDOM | tr '[0-9]' '[a-z]') + export MY_LOCATION=canadacentral + export MY_RESOURCE_GROUP_NAME=apache-airflow-rg$(echo $random) + export MY_IDENTITY_NAME=airflow-identity-123$(echo $random) + export MY_ACR_REGISTRY=mydnsrandomname$(echo $random) + export MY_KEYVAULT_NAME=airflow-vault-$(echo $random)-kv + export MY_CLUSTER_NAME=apache-airflow-aks$(echo $random) + export SERVICE_ACCOUNT_NAME=airflow$(echo $random) + export SERVICE_ACCOUNT_NAMESPACE=airflow + export AKS_AIRFLOW_NAMESPACE=airflow + export AKS_AIRFLOW_CLUSTER_NAME=cluster-aks-airflow$(echo $random) + export AKS_AIRFLOW_LOGS_STORAGE_ACCOUNT_NAME=airflowsasa$(echo $random) + export AKS_AIRFLOW_LOGS_STORAGE_CONTAINER_NAME=airflow-logs$(echo $random) + export AKS_AIRFLOW_LOGS_STORAGE_SECRET_NAME=storage-account-credentials$(echo $random) + ``` + +## Create a resource group + +* Create a resource group using the [`az group create`](/cli/azure/group#az-group-create) command. + + ```azurecli-interactive + az group create --name $MY_RESOURCE_GROUP_NAME --location $MY_LOCATION --output table + ``` + + Example output: + + ```output + Location Name + ------------- ----------------- + $MY_LOCATION $MY_RESOURCE_GROUP_NAME + ``` + +## Create an identity to access secrets in Azure Key Vault + +In this step, we create a user-assigned managed identity that the External Secrets Operator uses to access the Airflow passwords stored in Azure Key Vault. + +* Create a user-assigned managed identity using the [`az identity create`](/cli/azure/identity#az-identity-create) command. + + ```azurecli-interactive + az identity create --name $MY_IDENTITY_NAME --resource-group $MY_RESOURCE_GROUP_NAME --output table + export MY_IDENTITY_NAME_ID=$(az identity show --name $MY_IDENTITY_NAME --resource-group $MY_RESOURCE_GROUP_NAME --query id --output tsv) + export MY_IDENTITY_NAME_PRINCIPAL_ID=$(az identity show --name $MY_IDENTITY_NAME --resource-group $MY_RESOURCE_GROUP_NAME --query principalId --output tsv) + export MY_IDENTITY_NAME_CLIENT_ID=$(az identity show --name $MY_IDENTITY_NAME --resource-group $MY_RESOURCE_GROUP_NAME --query clientId --output tsv) + ``` + + Example output: + + ```output + ClientId Location Name PrincipalId ResourceGroup TenantId + ------------------------------------ ------------- -------------------- ------------------------------------ ----------------------- ------------------------------------ + 00001111-aaaa-2222-bbbb-3333cccc4444 $MY_LOCATION $MY_IDENTITY_NAME aaaaaaaa-bbbb-cccc-1111-222222222222 $MY_RESOURCE_GROUP_NAME aaaabbbb-0000-cccc-1111-dddd2222eeee + ``` + +## Create an Azure Key Vault instance + +* Create an Azure Key Vault instance using the [`az keyvault create`](/cli/azure/keyvault#az-keyvault-create) command. + + ```azurecli-interactive + az keyvault create --name $MY_KEYVAULT_NAME --resource-group $MY_RESOURCE_GROUP_NAME --location $MY_LOCATION --enable-rbac-authorization false --output table + export KEYVAULTID=$(az keyvault show --name $MY_KEYVAULT_NAME --query "id" --output tsv) + export KEYVAULTURL=$(az keyvault show --name $MY_KEYVAULT_NAME --query "properties.vaultUri" --output tsv) + ``` + + Example output: + + ```output + Location Name ResourceGroup + ------------- -------------------- ---------------------- + $MY_LOCATION $MY_KEYVAULT_NAME $MY_RESOURCE_GROUP_NAME + ``` + +## Create an Azure Container Registry + +* Create an Azure Container Registry to store and manage your container images using the [`az acr create`](/cli/azure/acr#az-acr-create) command. + + ```azurecli-interactive + az acr create \ + --name ${MY_ACR_REGISTRY} \ + --resource-group $MY_RESOURCE_GROUP_NAME \ + --sku Premium \ + --location $MY_LOCATION \ + --admin-enabled true \ + --output table + export MY_ACR_REGISTRY_ID=$(az acr show --name $MY_ACR_REGISTRY --resource-group $MY_RESOURCE_GROUP_NAME --query id --output tsv) + ``` + + Example output: + + ```output + NAME RESOURCE GROUP LOCATION SKU LOGIN SERVER CREATION DATE ADMIN ENABLED + -------------------- ---------------------- ------------- ------- ------------------------------- -------------------- --------------- + mydnsrandomnamebfbje $MY_RESOURCE_GROUP_NAME $MY_LOCATION Premium mydnsrandomnamebfbje.azurecr.io 2024-11-07T00:32:48Z True + ``` + +## Create an Azure storage account + +* Create an Azure Storage Account to store the Airflow logs using the [`az acr create`](/cli/azure/storage/account#az-storage-account-create) command. + + ```azurecli-interactive + az storage account create --name $AKS_AIRFLOW_LOGS_STORAGE_ACCOUNT_NAME --resource-group $MY_RESOURCE_GROUP_NAME --location $MY_LOCATION --sku Standard_ZRS --output table + export AKS_AIRFLOW_LOGS_STORAGE_ACCOUNT_KEY=$(az storage account keys list --account-name $AKS_AIRFLOW_LOGS_STORAGE_ACCOUNT_NAME --query "[0].value" -o tsv) + az storage container create --name $AKS_AIRFLOW_LOGS_STORAGE_CONTAINER_NAME --account-name $AKS_AIRFLOW_LOGS_STORAGE_ACCOUNT_NAME --output table --account-key $AKS_AIRFLOW_LOGS_STORAGE_ACCOUNT_KEY + az keyvault secret set --vault-name $MY_KEYVAULT_NAME --name AKS-AIRFLOW-LOGS-STORAGE-ACCOUNT-NAME --value $AKS_AIRFLOW_LOGS_STORAGE_ACCOUNT_NAME + az keyvault secret set --vault-name $MY_KEYVAULT_NAME --name AKS-AIRFLOW-LOGS-STORAGE-ACCOUNT-KEY --value $AKS_AIRFLOW_LOGS_STORAGE_ACCOUNT_KEY + ``` + + Example output: + + ```output + AccessTier AllowBlobPublicAccess AllowCrossTenantReplication CreationTime EnableHttpsTrafficOnly Kind Location MinimumTlsVersion Name PrimaryLocation ProvisioningState ResourceGroup StatusOfPrimary + ------------ ----------------------- ----------------------------- -------------------------------- ------------------------ --------- ------------- ------------------- ---------------- ----------------- ------------------- ----------------- ----------------- + Hot False False 2024-11-07T00:22:13.323104+00:00 True StorageV2 $MY_LOCATION TLS1_0 airflowsasabfbje $MY_LOCATION Succeeded $MY_RESOURCE_GROUP_NAME available + Created + --------- + True + ``` + +## Create an AKS cluster + +In this step, we create an AKS cluster with workload identity and OIDC issuer enabled. The workload identity gives the External Secrets Operator service account permission to access the Airflow passwords stored in your key vault. + +1. Create an AKS cluster using the [`az aks create`](/cli/azure/aks#az-aks-create) command. + + ```azurecli-interactive + az aks create \ + --location $MY_LOCATION \ + --name $MY_CLUSTER_NAME \ + --tier standard \ + --resource-group $MY_RESOURCE_GROUP_NAME \ + --network-plugin azure \ + --node-vm-size Standard_DS4_v2 \ + --node-count 3 \ + --auto-upgrade-channel stable \ + --node-os-upgrade-channel NodeImage \ + --attach-acr ${MY_ACR_REGISTRY} \ + --enable-oidc-issuer \ + --enable-blob-driver \ + --enable-workload-identity \ + --zones 1 2 3 \ + --generate-ssh-keys \ + --output table + ``` + + Example output: + + ```output + AzurePortalFqdn CurrentKubernetesVersion DisableLocalAccounts DnsPrefix EnableRbac Fqdn KubernetesVersion Location MaxAgentPools Name NodeResourceGroup ProvisioningState ResourceGroup ResourceUid SupportPlan + ------------------------------------------------------------------------------ -------------------------- ---------------------- ---------------------------------- ------------ ----------------------------------------------------------------------- ------------------- ------------- --------------- ------------------ ----------------------------------------------------- ------------------- ----------------------- ------------------------------------ ------------------ + apache-air-apache-airflow-r-363a0a-rhf6saad.portal.hcp.$MY_LOCATION.azmk8s.io 1.29.9 False apache-air-apache-airflow-r-363a0a True apache-air-apache-airflow-r-363a0a-rhf6saad.hcp.$MY_LOCATION.azmk8s.io 1.29 $MY_LOCATION 100 $MY_CLUSTER_NAME MC_apache-airflow-rg_apache-airflow-aks_$MY_LOCATION Succeeded $MY_RESOURCE_GROUP_NAME b1b1b1b1-cccc-dddd-eeee-f2f2f2f2f2f2 KubernetesOfficial + ``` + +2. Get the OIDC issuer URL to use for the workload identity configuration using the [`az aks show`](/cli/azure/aks#az-aks-show) command. + + ```azurecli-interactive + export OIDC_URL=$(az aks show --resource-group $MY_RESOURCE_GROUP_NAME --name $MY_CLUSTER_NAME --query oidcIssuerProfile.issuerUrl --output tsv) + ``` + +3. Assign the `AcrPull` role to the kubelet identity using the [`az role assignment create`](/cli/azure/role/assignment#az-role-assignment-create) command. + + ```azurecli-interactive + export KUBELET_IDENTITY=$(az aks show -g $MY_RESOURCE_GROUP_NAME --name $MY_CLUSTER_NAME --output tsv --query identityProfile.kubeletidentity.objectId) + az role assignment create \ + --assignee ${KUBELET_IDENTITY} \ + --role "AcrPull" \ + --scope ${MY_ACR_REGISTRY_ID} \ + --output table + ``` + + Example output: + + ```output + CreatedBy CreatedOn Name PrincipalId PrincipalName PrincipalType ResourceGroup RoleDefinitionId RoleDefinitionName Scope UpdatedBy UpdatedOn + ------------------------------------ -------------------------------- ------------------------------------ ------------------------------------ ------------------------------------ ---------------- ----------------------- ------------------------------------------------------------------------------------------------------------------------------------------ -------------------- ---------------------------------------------------------------------------------------------------------------------------------------------------------- ------------------------------------ -------------------------------- + ccccdddd-2222-eeee-3333-ffff4444aaaa 2024-11-07T00:43:26.905445+00:00 b1b1b1b1-cccc-dddd-eeee-f2f2f2f2f2f2 bbbbbbbb-cccc-dddd-2222-333333333333 cccccccc-dddd-eeee-3333-444444444444 ServicePrincipal $MY_RESOURCE_GROUP_NAME /subscriptions/aaaa0a0a-bb1b-cc2c-dd3d-eeeeee4e4e4e/providers/Microsoft.Authorization/roleDefinitions/7f951dda-4ed3-4680-a7ca-43fe172d538d AcrPull /subscriptions/aaaa0a0a-bb1b-cc2c-dd3d-eeeeee4e4e4e/resourceGroups/$MY_RESOURCE_GROUP_NAME/providers/Microsoft.ContainerRegistry/registries/mydnsrandomnamebfbje ccccdddd-2222-eeee-3333-ffff4444aaaa 2024-11-07T00:43:26.905445+00:00 + ``` + +## Connect to the AKS cluster + +* Configure `kubectl` to connect to your AKS cluster using the [`az aks get-credentials`](/cli/azure/aks#az-aks-get-credentials) command. + + ```azurecli-interactive + az aks get-credentials --resource-group $MY_RESOURCE_GROUP_NAME --name $MY_CLUSTER_NAME --overwrite-existing --output table + ``` + +## Upload Apache Airflow images to your container registry + +In this section, we download the Apache Airflow images from Docker Hub and upload them to Azure Container Registry. This step ensures that the images are available in your private registry and can be used in your AKS cluster. We don't recommend consuming the public image in a production environment. + +* Import the Airflow images from Docker Hub and upload them to your container registry using the [`az acr import`](/cli/azure/acr#az-acr-import) command. + + ```azurecli-interactive + az acr import --name $MY_ACR_REGISTRY --source docker.io/apache/airflow:airflow-pgbouncer-2024.01.19-1.21.0 --image airflow:airflow-pgbouncer-2024.01.19-1.21.0 + az acr import --name $MY_ACR_REGISTRY --source docker.io/apache/airflow:airflow-pgbouncer-exporter-2024.06.18-0.17.0 --image airflow:airflow-pgbouncer-exporter-2024.06.18-0.17.0 + az acr import --name $MY_ACR_REGISTRY --source docker.io/bitnami/postgresql:16.1.0-debian-11-r15 --image postgresql:16.1.0-debian-11-r15 + az acr import --name $MY_ACR_REGISTRY --source quay.io/prometheus/statsd-exporter:v0.26.1 --image statsd-exporter:v0.26.1 + az acr import --name $MY_ACR_REGISTRY --source docker.io/apache/airflow:2.9.3 --image airflow:2.9.3 + az acr import --name $MY_ACR_REGISTRY --source registry.k8s.io/git-sync/git-sync:v4.1.0 --image git-sync:v4.1.0 + ``` + +## Next step + +> [!div class="nextstepaction"] +> [Deploy Apache Airflow on Azure Kubernetes Service (AKS)](./airflow-deploy.md) + +## Contributors + +*Microsoft maintains this article. The following contributors originally wrote it:* + +* Don High | Principal Customer Engineer +* Satya Chandragiri | Senior Digital Cloud Solution Architect +* Erin Schaffer | Content Developer 2 \ No newline at end of file diff --git a/scenarios/azure-aks-docs/articles/aks/airflow-deploy.md b/scenarios/azure-aks-docs/articles/aks/airflow-deploy.md new file mode 100644 index 000000000..5fbbb7bd2 --- /dev/null +++ b/scenarios/azure-aks-docs/articles/aks/airflow-deploy.md @@ -0,0 +1,524 @@ +--- +title: Configure and deploy Apache Airflow on Azure Kubernetes Service (AKS) +description: In this article, you configure and deploy Apache Airflow on Azure Kubernetes Service (AKS) using Helm. +ms.topic: how-to +ms.custom: azure-kubernetes-service +ms.date: 12/19/2024 +author: schaffererin +ms.author: schaffererin +--- + +# Configure and deploy Airflow on Azure Kubernetes Service (AKS) + +In this article, you configure and deploy Apache Airflow on Azure Kubernetes Service (AKS) using Helm. + +## Configure workload identity + +1. Create a namespace for the Airflow cluster using the `kubectl create namespace` command. + + ```bash + kubectl create namespace ${AKS_AIRFLOW_NAMESPACE} --dry-run=client --output yaml | kubectl apply -f - + ``` + + Example output: + + ```output + namespace/airflow created + ``` + +2. Create a service account and configure workload identity using the `kubectl apply` command. + + ```bash + export TENANT_ID=$(az account show --query tenantId -o tsv) + cat < + ```output + serviceaccount/airflow created + ``` + +## Install the External Secrets Operator + +In this section, we use Helm to install the External Secrets Operator. The External Secrets Operator is a Kubernetes operator that manages the lifecycle of external secrets stored in external secret stores like Azure Key Vault. + +1. Add the External Secrets Helm repository and update the repository using the `helm repo add` and `helm repo update` commands. + + ```bash + helm repo add external-secrets https://charts.external-secrets.io + helm repo update + ``` + + Example output: + + ```output + Hang tight while we grab the latest from your chart repositories... + ...Successfully got an update from the "external-secrets" chart repository + ``` + +2. Install the External Secrets Operator using the `helm install` command. + + ```bash + helm install external-secrets \ + external-secrets/external-secrets \ + --namespace ${AKS_AIRFLOW_NAMESPACE} \ + --create-namespace \ + --set installCRDs=true \ + --wait + ``` + + Example output: + + ```output + NAME: external-secrets + LAST DEPLOYED: Thu Nov 7 11:16:07 2024 + NAMESPACE: airflow + STATUS: deployed + REVISION: 1 + TEST SUITE: None + NOTES: + external-secrets has been deployed successfully in namespace airflow! + + In order to begin using ExternalSecrets, you will need to set up a SecretStore + or ClusterSecretStore resource (for example, by creating a 'vault' SecretStore). + + More information on the different types of SecretStores and how to configure them + can be found in our Github: https://github.com/external-secrets/external-secrets + ``` + +### Create secrets + +1. Create a `SecretStore` resource to access the Airflow passwords stored in your key vault using the `kubectl apply` command. + + ```bash + kubectl apply -f - < + ```output + secretstore.external-secrets.io/azure-store created + ``` + +2. Create an `ExternalSecret` resource, which creates a Kubernetes `Secret` in the `airflow` namespace with the `Airflow` secrets stored in your key vault, using the `kubectl apply` command. + + ```bash + kubectl apply -f - < + ```output + externalsecret.external-secrets.io/airflow-aks-azure-logs-secrets created + ``` + +3. Create a federated credential using the `az identity federated-credential create` command. + + ```azurecli-interactive + az identity federated-credential create \ + --name external-secret-operator \ + --identity-name ${MY_IDENTITY_NAME} \ + --resource-group ${MY_RESOURCE_GROUP_NAME} \ + --issuer ${OIDC_URL} \ + --subject system:serviceaccount:${AKS_AIRFLOW_NAMESPACE}:${SERVICE_ACCOUNT_NAME} \ + --output table + ``` + + Example output: + + ```output + Issuer Name ResourceGroup Subject + ----------------------------------------------------------------------------------------------------------------------- ------------------------ ----------------------- ------------------------------------- + https://$MY_LOCATION.oic.prod-aks.azure.com/c2c2c2c2-dddd-eeee-ffff-a3a3a3a3a3a3/aaaa0a0a-bb1b-cc2c-dd3d-eeeeee4e4e4e/ external-secret-operator $MY_RESOURCE_GROUP_NAME system:serviceaccount:airflow:airflow + ``` + +4. Give permission to the user-assigned identity to access the secret using the [`az keyvault set-policy`](/cli/azure/keyvault#az-keyvault-set-policy) command. + + ```azurecli-interactive + az keyvault set-policy --name $MY_KEYVAULT_NAME --object-id $MY_IDENTITY_NAME_PRINCIPAL_ID --secret-permissions get --output table + ``` + + Example output: + + ```output + Location Name ResourceGroup + ------------- ---------------------- ----------------------- + $MY_LOCATION $MY_KEYVAULT_NAME $MY_RESOURCE_GROUP_NAME + ``` + +## Create a persistent volume for Apache Airflow logs + +* Create a persistent volume using the `kubectl apply` command. + + ```bash + kubectl apply -f - < airflow_values.yaml + + images: + airflow: + repository: $MY_ACR_REGISTRY.azurecr.io/airflow + tag: 2.9.3 + # Specifying digest takes precedence over tag. + digest: ~ + pullPolicy: IfNotPresent + # To avoid images with user code, you can turn this to 'true' and + # all the 'run-airflow-migrations' and 'wait-for-airflow-migrations' containers/jobs + # will use the images from 'defaultAirflowRepository:defaultAirflowTag' values + # to run and wait for DB migrations . + useDefaultImageForMigration: false + # timeout (in seconds) for airflow-migrations to complete + migrationsWaitTimeout: 60 + pod_template: + # Note that `images.pod_template.repository` and `images.pod_template.tag` parameters + # can be overridden in `config.kubernetes` section. So for these parameters to have effect + # `config.kubernetes.worker_container_repository` and `config.kubernetes.worker_container_tag` + # must be not set . + repository: $MY_ACR_REGISTRY.azurecr.io/airflow + tag: 2.9.3 + pullPolicy: IfNotPresent + flower: + repository: $MY_ACR_REGISTRY.azurecr.io/airflow + tag: 2.9.3 + pullPolicy: IfNotPresent + statsd: + repository: $MY_ACR_REGISTRY.azurecr.io/statsd-exporter + tag: v0.26.1 + pullPolicy: IfNotPresent + pgbouncer: + repository: $MY_ACR_REGISTRY.azurecr.io/airflow + tag: airflow-pgbouncer-2024.01.19-1.21.0 + pullPolicy: IfNotPresent + pgbouncerExporter: + repository: $MY_ACR_REGISTRY.azurecr.io/airflow + tag: airflow-pgbouncer-exporter-2024.06.18-0.17.0 + pullPolicy: IfNotPresent + gitSync: + repository: $MY_ACR_REGISTRY.azurecr.io/git-sync + tag: v4.1.0 + pullPolicy: IfNotPresent + + + # Airflow executor + executor: "KubernetesExecutor" + + # Environment variables for all airflow containers + env: + - name: ENVIRONMENT + value: dev + + extraEnv: | + - name: AIRFLOW__CORE__DEFAULT_TIMEZONE + value: 'America/New_York' + + # Configuration for postgresql subchart + # Not recommended for production! Instead, spin up your own Postgresql server and use the `data` attribute in this + # yaml file. + postgresql: + enabled: true + + # Enable pgbouncer. See https://airflow.apache.org/docs/helm-chart/stable/production-guide.html#pgbouncer + pgbouncer: + enabled: true + + dags: + gitSync: + enabled: true + repo: https://github.com/donhighmsft/airflowexamples.git + branch: main + rev: HEAD + depth: 1 + maxFailures: 0 + subPath: "dags" + # sshKeySecret: airflow-git-ssh-secret + # knownHosts: | + # github.com ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQCj7ndNxQowgcQnjshcLrqPEiiphnt+VTTvDP6mHBL9j1aNUkY4Ue1gvwnGLVlOhGeYrnZaMgRK6+PKCUXaDbC7qtbW8gIkhL7aGCsOr/C56SJMy/BCZfxd1nWzAOxSDPgVsmerOBYfNqltV9/hWCqBywINIR+5dIg6JTJ72pcEpEjcYgXkE2YEFXV1JHnsKgbLWNlhScqb2UmyRkQyytRLtL+38TGxkxCflmO+5Z8CSSNY7GidjMIZ7Q4zMjA2n1nGrlTDkzwDCsw+wqFPGQA179cnfGWOWRVruj16z6XyvxvjJwbz0wQZ75XK5tKSb7FNyeIEs4TT4jk+S4dhPeAUC5y+bDYirYgM4GC7uEnztnZyaVWQ7B381AK4Qdrwt51ZqExKbQpTUNn+EjqoTwvqNj4kqx5QUCI0ThS/YkOxJCXmPUWZbhjpCg56i+2aB6CmK2JGhn57K5mj0MNdBXA4/WnwH6XoPWJzK5Nyu2zB3nAZp+S5hpQs+p1vN1/wsjk= + + logs: + persistence: + enabled: true + existingClaim: pvc-airflow-logs + storageClassName: azureblob-fuse-premium + + # We disable the log groomer sidecar because we use Azure Blob Storage for logs, with lifecyle policy set. + triggerer: + logGroomerSidecar: + enabled: false + + scheduler: + logGroomerSidecar: + enabled: false + + workers: + logGroomerSidecar: + enabled: false + + EOF + ``` + +2. Add the Apache Airflow Helm repository and update the repository using the `helm repo add` and `helm repo update` commands. + + ```bash + helm repo add apache-airflow https://airflow.apache.org + helm repo update + ``` + + Example output: + + ```output + "apache-airflow" has been added to your repositories + Hang tight while we grab the latest from your chart repositories... + ...Successfully got an update from the "apache-airflow" chart repository + ``` + +3. Search the Helm repository for the Apache Airflow chart using the `helm search repo` command. + + ```bash + helm search repo airflow + ``` + + Example output: + + ```output + NAME CHART VERSION APP VERSION DESCRIPTION + apache-airflow/airflow 1.15.0 2.9.3 The official Helm chart to deploy Apache Airflo... + ``` + +4. Install the Apache Airflow chart using the `helm install` command. + + ```bash + if ! helm list --namespace ${AKS_AIRFLOW_NAMESPACE} | grep -q external-secrets; then + helm install external-secrets \ + external-secrets/external-secrets \ + --namespace ${AKS_AIRFLOW_NAMESPACE} \ + --create-namespace \ + --set installCRDs=true \ + --wait + else + echo "External Secrets Operator is already installed." + fi + ``` + + Example output: + + ```output + NAME: airflow + LAST DEPLOYED: Fri Nov 8 11:59:43 2024 + NAMESPACE: airflow + STATUS: deployed + REVISION: 1 + TEST SUITE: None + NOTES: + Thank you for installing Apache Airflow 2.9.3! + + Your release is named airflow. + You can now access your dashboard(s) by executing the following command(s) and visiting the corresponding port at localhost in your browser: + + Airflow Webserver: kubectl port-forward svc/airflow-webserver 8080:8080 --namespace airflow + Default Webserver (Airflow UI) Login credentials: + username: admin + password: admin + Default Postgres connection credentials: + username: postgres + password: postgres + port: 5432 + + You can get Fernet Key value by running the following: + + echo Fernet Key: $(kubectl get secret --namespace airflow airflow-fernet-key -o jsonpath="{.data.fernet-key}" | base64 --decode) + + ########################################################### + # WARNING: You should set a static webserver secret key # + ########################################################### + + You are using a dynamically generated webserver secret key, which can lead to + unnecessary restarts of your Airflow components. + + Information on how to set a static webserver secret key can be found here: + https://airflow.apache.org/docs/helm-chart/stable/production-guide.html#webserver-secret-key + ``` + +5. Verify the installation using the `kubectl get pods` command. + + ```bash + kubectl get pods -n airflow + ``` + + Example output: + + ```output + NAME READY STATUS RESTARTS AGE + airflow-create-user-kklqf 1/1 Running 0 12s + airflow-pgbouncer-d7bf9f649-25fnt 2/2 Running 0 61s + airflow-postgresql-0 1/1 Running 0 61s + airflow-run-airflow-migrations-zns2b 0/1 Completed 0 60s + airflow-scheduler-5c45c6dbdd-7t6hv 1/2 Running 0 61s + airflow-statsd-6df8564664-6rbw8 1/1 Running 0 61s + airflow-triggerer-0 2/2 Running 0 61s + airflow-webserver-7df76f944c-vcd5s 0/1 Running 0 61s + external-secrets-748f44c8b8-w7qrk 1/1 Running 0 3h6m + external-secrets-cert-controller-57b9f4cb7c-vl4m8 1/1 Running 0 3h6m + external-secrets-webhook-5954b69786-69rlp 1/1 Running 0 3h6m + ``` + +## Access Airflow UI + +1. Securely access the Airflow UI through port-forwarding using the `kubectl port-forward` command. + + `kubectl port-forward svc/airflow-webserver 8080:8080 -n airflow` + +2. Open your browser and navigate to `localhost:8080` to access the Airflow UI. +3. Use the default webserver URL and login credentials provided during the Airflow Helm chart installation to log in. +4. Explore and manage your workflows securely through the Airflow UI. + +## Integrate Git with Airflow + +**Integrating Git with Apache Airflow** enables seamless version control and streamlined management of your workflow definitions, ensuring that all DAGs are both organized and easily auditable. + +1. **Set up a Git repository for DAGs**. Create a dedicated Git repository to house all your Airflow DAG definitions. This repository serves as the central source of truth for your workflows, allowing you to manage, track, and collaborate on DAGs effectively. +2. **Configure Airflow to sync DAGs from Git**. Update Airflow’s configuration to automatically pull DAGs from your Git repository by setting the Git repository URL and any required authentication credentials directly in Airflow’s configuration files or through Helm chart values. This setup enables automated synchronization of DAGs, ensuring that Airflow is always up to date with the latest version of your workflows. + +This integration enhances the development and deployment workflow by introducing full version control, enabling rollbacks, and supporting team collaboration in a production-grade setup. + +## Make your Airflow on Kubernetes production-grade + +The following best practices can help you make your **Apache Airflow on Kubernetes** deployment production-grade: + +* Ensure you have a robust setup focused on scalability, security, and reliability. +* Use dedicated, autoscaling nodes, and select a resilient executor like **KubernetesExecutor**, **CeleryExecutor**, or **CeleryKubernetesExecutor**. +* Use a managed, high-availability database back end like MySQL or [PostgreSQL](./deploy-postgresql-ha.md). +* Establish comprehensive monitoring and centralized logging to maintain performance insights. +* Secure your environment with network policies, SSL, and Role-Based Access Control (RBAC), and configure Airflow components (Scheduler, Web Server, Workers) for high availability. +* Implement CI/CD pipelines for smooth DAG deployment, and set up regular backups for disaster recovery. + +## Next steps + +To learn more about deploy open-source software on Azure Kubernetes Service (AKS), see the following articles: + +* [Deploy a MongoDB cluster on Azure Kubernetes Service (AKS)](./mongodb-overview.md) +* [Deploy a highly available PostgreSQL database on Azure Kubernetes Service (AKS)](./postgresql-ha-overview.md) +* [Deploy a Valkey cluster on Azure Kubernetes Service (AKS)](./valkey-overview.md) + +## Contributors + +*Microsoft maintains this article. The following contributors originally wrote it:* + +* Don High | Principal Customer Engineer +* Satya Chandragiri | Senior Digital Cloud Solution Architect +* Erin Schaffer | Content Developer 2 \ No newline at end of file diff --git a/scenarios/azure-aks-docs/articles/aks/aks-migration.md b/scenarios/azure-aks-docs/articles/aks/aks-migration.md new file mode 100644 index 000000000..c2873003a --- /dev/null +++ b/scenarios/azure-aks-docs/articles/aks/aks-migration.md @@ -0,0 +1,308 @@ +--- +title: Migrate to Azure Kubernetes Service (AKS) +description: This article shows you how to migrate to Azure Kubernetes Service (AKS). +ms.topic: concept-article +ms.date: 06/12/2024 +author: your-github-username +ms.author: your-alias +ms.custom: mvc, devx-track-azurecli, innovation-engine +ms.collection: + - migration +--- + +# Migrate to Azure Kubernetes Service (AKS) + +To help you plan and execute a successful migration to Azure Kubernetes Service (AKS), this guide provides details for the current recommended AKS configuration. While this article doesn't cover every scenario, it contains links to more detailed information for planning a successful migration. + +In this article, we summarize migration details for: + +> [!div class="checklist"] +> +> * Containerizing applications through Azure Migrate +> * AKS with Azure Load Balancer (Standard) and Virtual Machine Scale Sets +> * Existing attached Azure services +> * Ensure valid quotas +> * High availability and business continuity +> * Considerations for stateless applications +> * Considerations for stateful applications +> * Deployment of your cluster configuration + +> [!NOTE] +> Depending on your scenario, the following open-source tools might help with your migration: +> +> * [Velero](https://velero.io/) (Requires Kubernetes 1.7+) +> * [Azure Kube CLI extension](https://github.com/yaron2/azure-kube-cli) + +## Before you begin + +* Ensure your target Kubernetes version is within the supported window for AKS. Older versions may not be within the supported range and require a version upgrade for AKS support. For more information, see [AKS supported Kubernetes versions](./supported-kubernetes-versions.md). +* If you're migrating to a newer version of Kubernetes, review the [Kubernetes version and version skew support policy](https://kubernetes.io/docs/setup/release/version-skew-policy/#supported-versions). + +An important practice that you should include as part of your migration process is remembering to follow commonly used deployment and testing patterns. Testing your application before deployment is an important step to ensure its quality, functionality, and compatibility with the target environment. It can help you identify and fix any errors, bugs, or issues that might affect the performance, security, or usability of the application or underlying infrastructure. + +## Use Azure Migrate to migrate your applications to AKS + +Azure Migrate offers a unified platform to assess and migrate to Azure on-premises servers, infrastructure, applications, and data. For AKS, you can use Azure Migrate for the following tasks: + +* [Containerizing ASP.NET applications and migrating to AKS](/azure/migrate/tutorial-app-containerization-aspnet-kubernetes). +* [Containerizing Java web applications and migrating to AKS](/azure/migrate/tutorial-app-containerization-java-kubernetes). + +## AKS with Standard Load Balancer and Virtual Machine Scale Sets + +AKS is a managed service offering unique capabilities with lower management overhead. Since AKS is a managed service, you must select from a set of AKS-supported [regions](./quotas-skus-regions.md). You may need to modify your existing applications to keep them healthy on the AKS-managed control plane during the transition from your existing cluster to AKS. + +We recommend using AKS clusters backed by [Virtual Machine Scale Sets](/azure/virtual-machine-scale-sets/) and [Load Balancer (Standard)](./load-balancer-standard.md) to ensure you get the following features: + +* [Multiple node pools](./create-node-pools.md), +* [Availability zones](/azure/reliability/availability-zones-overview), +* [Authorized IP ranges](./api-server-authorized-ip-ranges.md), +* [Cluster autoscaler](./cluster-autoscaler.md), +* [Azure Policy for AKS](/azure/governance/policy/concepts/policy-for-kubernetes), and +* Other new features as they're released. + +AKS clusters backed by [virtual machine (VM) availability sets](/azure/virtual-machines/availability#availability-sets) lack support for many of these features. + +### Create an AKS cluster with Load Balancer (Standard) and Virtual Machine Scale Sets + +### [Azure CLI](#tab/azure-cli) + +The following example creates an AKS cluster with single node pool backed by a Virtual Machine Scale Set. It enables the cluster autoscaler on the node pool for the cluster and sets a minimum of *one* and a maximum of *three* nodes. + +1. Create a resource group using the [`az group create`][az-group-create] command. + + First, export variables and add a random suffix to ensure resource names are unique. A reliable VM size is also specified for broad subscription compatibility. + + ```azurecli-interactive + export RANDOM_SUFFIX=$(head -c 3 /dev/urandom | xxd -p) + export RESOURCE_GROUP="myResourceGroup$RANDOM_SUFFIX" + export REGION="eastus2" + az group create --name $RESOURCE_GROUP --location $REGION + ``` + + Results: + + + + ```output + { + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/myResourceGroupxxx", + "location": "eastus2", + "managedBy": null, + "name": "myResourceGroupxxx", + "properties": { + "provisioningState": "Succeeded" + }, + "tags": null, + "type": "Microsoft.Resources/resourceGroups" + } + ``` + +2. Create an AKS cluster using the [`az aks create`][az-aks-create] command. + + Set the cluster name and create an AKS cluster with autoscaler and standard load balancer. The VM size is set to Standard_DS2_v2 for reliability in most subscriptions. + + ```azurecli-interactive + export CLUSTER_NAME="myAKSCluster$RANDOM_SUFFIX" + az aks create \ + --resource-group $RESOURCE_GROUP \ + --name $CLUSTER_NAME \ + --node-count 1 \ + --node-vm-size Standard_DS2_v2 \ + --vm-set-type VirtualMachineScaleSets \ + --load-balancer-sku standard \ + --enable-cluster-autoscaler \ + --min-count 1 \ + --max-count 3 \ + --generate-ssh-keys + ``` + + Results: + + + + ```output + { + "aadProfile": null, + "addonProfiles": {}, + "agentPoolProfiles": [ + { + "count": 1, + "enableAutoScaling": true, + "maxCount": 3, + "minCount": 1, + "name": "nodepool1", + "orchestratorVersion": "x.xx.x", + "osType": "Linux", + "provisioningState": "Succeeded", + "vmSize": "Standard_DS2_v2", + "type": "VirtualMachineScaleSets" + } + ], + "dnsPrefix": "myaksclusterxxx-dns-xxxxxxxx", + "enableRBAC": true, + "fqdn": "myaksclusterxxx-dns-xxxxxxxx.eastus2.cloudapp.azure.com", + ... + "provisioningState": "Succeeded", + ... + } + ``` + +### [Terraform](#tab/terraform) + +The following code creates a resource group and a Kubernetes cluster in Azure, with auto-scaling enabled and specific network settings, using Terraform. + +> [!NOTE] +> The sample code for this article is located in the [Azure Terraform GitHub repo](https://github.com/Azure/terraform/tree/master/quickstart/101-aks-standard-lb-and-vmss). You can view the log file containing the [test results from current and previous versions of Terraform](https://github.com/Azure/terraform/tree/master/quickstart/101-aks-standard-lb-and-vmss/TestRecord.md). +> +> See more [articles and sample code showing how to use Terraform to manage Azure resources](/azure/terraform) + +1. Create a directory in which to test and run the sample Terraform code, and make it the current directory. + +1. Create a file named `main.tf`, and insert the following code: + :::code language="Terraform" source="~/terraform_samples/quickstart/101-aks-standard-lb-and-vmss/main.tf"::: + +1. Create a file named `outputs.tf`, and insert the following code: + :::code language="Terraform" source="~/terraform_samples/quickstart/101-aks-standard-lb-and-vmss/outputs.tf"::: + +1. Create a file named `providers.tf`, and insert the following code: + :::code language="Terraform" source="~/terraform_samples/quickstart/101-aks-standard-lb-and-vmss/providers.tf"::: + +1. Create a file named `variables.tf`, and insert the following code: + :::code language="Terraform" source="~/terraform_samples/quickstart/101-aks-standard-lb-and-vmss/variables.tf"::: + +1. Initialize Terraform. + + [!INCLUDE [terraform-init.md](~/azure-dev-docs-pr/articles/terraform/includes/terraform-init.md)] + +1. Create a Terraform execution plan. + + [!INCLUDE [terraform-plan.md](~/azure-dev-docs-pr/articles/terraform/includes/terraform-plan.md)] + +1. Apply a Terraform execution plan. + + [!INCLUDE [terraform-apply-plan.md](~/azure-dev-docs-pr/articles/terraform/includes/terraform-apply-plan.md)] + +--- + +## Existing attached Azure Services + +When migrating clusters, you may have attached external Azure services. While the following services don't require resource recreation, they require updating connections from previous to new clusters to maintain functionality: + +* Azure Container Registry +* Azure Log Analytics +* Azure Application Insights +* Azure Traffic Manager +* Azure Storage account +* External databases + +## Ensure valid quotas + +Since other VMs are deployed into your subscription during migration, you should verify your quotas and limits are sufficient for these resources. If necessary, request an increase in [vCPU quota](/azure/azure-portal/supportability/per-vm-quota-requests). + +You may need to request an increase for [network quotas](/azure/azure-portal/supportability/networking-quota-requests) to ensure you don't exhaust IPs. For more information, see [networking and IP ranges for AKS](./configure-kubenet.md). + +For more information, see [Azure subscription and service limits](/azure/azure-resource-manager/management/azure-subscription-service-limits). To check your current quotas, in the Azure portal, go to the [subscriptions blade](https://portal.azure.com/#blade/Microsoft_Azure_Billing/SubscriptionsBlade), select your subscription, and then select **Usage + quotas**. + +## High availability and business continuity + +If your application can't handle downtime, you need to follow best practices for high availability migration scenarios. Read more about [Best practices for complex business continuity planning, disaster recovery, and maximizing uptime in Azure Kubernetes Service (AKS)](./operator-best-practices-multi-region.md). + +For complex applications, you typically migrate over time rather than all at once, meaning the old and new environments might need to communicate over the network. Applications previously using `ClusterIP` services to communicate might need to be exposed as type `LoadBalancer` and secured appropriately. + +To complete the migration, you want to point clients to the new services that run on AKS. We recommend you redirect traffic by updating DNS to point to the load balancer sitting in front of your AKS cluster. + +[Azure Traffic Manager](/azure/traffic-manager/) can direct customers to the desired Kubernetes cluster and application instance. Traffic Manager is a DNS-based traffic load balancer that can distribute network traffic across regions. For the best performance and redundancy, direct all application traffic through Traffic Manager before it goes to your AKS cluster. + +In a multi-cluster deployment, customers should connect to a Traffic Manager DNS name that points to the services on each AKS cluster. Define these services by using Traffic Manager endpoints. Each endpoint is the *service load balancer IP*. Use this configuration to direct network traffic from the Traffic Manager endpoint in one region to the endpoint in a different region. + +![AKS with Traffic Manager](media/operator-best-practices-bc-dr/aks-azure-traffic-manager.png) + +[Azure Front Door](/azure/frontdoor/front-door-overview) is another option for routing traffic for AKS clusters. With Azure Front Door, you can define, manage, and monitor the global routing for your web traffic by optimizing for best performance and instant global failover for high availability. + +### Considerations for stateless applications + +Stateless application migration involves the following steps: + +1. Apply your resource definitions (YAML or Helm) to the new cluster. +2. Ensure everything works as expected. +3. Redirect traffic to activate your new cluster. + +### Considerations for stateful applications + +Carefully plan your migration of stateful applications to avoid data loss or unexpected downtime. + +* If you use Azure Files, you can mount the file share as a volume into the new cluster. See [Mount Static Azure Files as a Volume](./azure-csi-files-storage-provision.md#mount-file-share-as-a-persistent-volume). +* If you use Azure Managed Disks, you can only mount the disk if unattached to any VM. See [Mount Static Azure Disk as a Volume](./azure-csi-disk-storage-provision.md#mount-disk-as-a-volume). +* If neither of those approaches work, you can use a backup and restore options. See [Velero on Azure](https://github.com/vmware-tanzu/velero-plugin-for-microsoft-azure/blob/master/README.md). + +#### Azure Files + +Unlike disks, Azure Files can be mounted to multiple hosts concurrently. In your AKS cluster, Azure and Kubernetes don't prevent you from creating a pod that your AKS cluster still uses. To prevent data loss and unexpected behavior, ensure the clusters don't simultaneously write to the same files. + +If your application can host multiple replicas that point to the same file share, follow the stateless migration steps and deploy your YAML definitions to your new cluster. + +If not, a possible migration approach involves the following steps: + +1. Validate your application is working correctly. +2. Point your live traffic to your new AKS cluster. +3. Disconnect the old cluster. + +If you want to start with an empty share and make a copy of the source data, you can use the [`az storage file copy`](/cli/azure/storage/file/copy) command to migrate your data. + +#### Migrating persistent volumes + +If you're migrating existing persistent volumes to AKS, you generally follow these steps: + +1. Quiesce writes to the application. + * This step is optional and requires downtime. +1. Take snapshots of the disks. +1. Create new managed disks from the snapshots. +1. Create persistent volumes in AKS. +1. Update pod specifications to [use existing volumes](./azure-disk-csi.md) rather than PersistentVolumeClaims (static provisioning). +1. Deploy your application to AKS. +1. Validate your application is working correctly. +1. Point your live traffic to your new AKS cluster. + +> [!IMPORTANT] +> If you choose not to quiesce writes, you need to replicate data to the new deployment. Otherwise, you miss the data that was written after you took the disk snapshots. + +The following open-source tools can help you create managed disks and migrate volumes between Kubernetes clusters: + +* [Azure CLI Disk Copy extension](https://github.com/noelbundick/azure-cli-disk-copy-extension) copies and converts disks across resource groups and Azure regions. +* [Azure Kube CLI extension](https://github.com/yaron2/azure-kube-cli) enumerates ACS Kubernetes volumes and migrates them to an AKS cluster. + +### Deployment of your cluster configuration + +We recommend you use your existing continuous integration and continuous delivery pipeline to deploy a known-good configuration to AKS. You can use Azure Pipelines to [build and deploy your applications to AKS](/azure/devops/pipelines/ecosystems/kubernetes/aks-template). Clone your existing deployment tasks and ensure `kubeconfig` points to the new AKS cluster. + +If that's not possible, export resource definitions from your existing Kubernetes cluster, and then apply them to AKS. You can use `kubectl` to export objects. For example: + +```console +kubectl get deployment -o yaml > deployments.yaml +``` + +Be sure to examine the output and remove any unnecessary live data fields. + +### Moving existing resources to another region + +You might want to move your AKS cluster to a [different region supported by AKS][region-availability]. We recommend you create a new cluster in the other region and then deploy your resources and applications to your new cluster. + +If you have any services running on your AKS cluster, you need to install and configure those services on your cluster in the new region. + +In this article, we summarized migration details for: + +> [!div class="checklist"] +> +> * Containerizing applications through Azure Migrate +> * AKS with Load Balancer (Standard) and Virtual Machine Scale Sets +> * Existing attached Azure services +> * Ensuring valid quotas +> * High availability and business continuity +> * Considerations for stateless applications +> * Considerations for stateful applications +> * Deploying your cluster configuration + + +[region-availability]: https://azure.microsoft.com/global-infrastructure/services/?products=kubernetes-service +[az-group-create]: /cli/azure/group#az_group_create +[az-aks-create]: /cli/azure/aks#az_aks_create \ No newline at end of file diff --git a/scenarios/azure-aks-docs/articles/aks/auto-upgrade-cluster.md b/scenarios/azure-aks-docs/articles/aks/auto-upgrade-cluster.md new file mode 100644 index 000000000..98416b5bc --- /dev/null +++ b/scenarios/azure-aks-docs/articles/aks/auto-upgrade-cluster.md @@ -0,0 +1,170 @@ +--- +title: Automatically upgrade an Azure Kubernetes Service (AKS) cluster +description: Learn how to automatically upgrade an Azure Kubernetes Service (AKS) cluster to get the latest features and security updates. +ms.topic: how-to +ms.author: nickoman +author: nickomang +ms.subservice: aks-upgrade +ms.date: 05/01/2023 +ms.custom: aks-upgrade, automation, innovation-engine +--- + +# Automatically upgrade an Azure Kubernetes Service (AKS) cluster + +Part of the AKS cluster lifecycle involves performing periodic upgrades to the latest Kubernetes version. It’s important you apply the latest security releases or upgrade to get the latest features. Before learning about auto-upgrade, make sure you understand the [AKS cluster upgrade fundamentals][upgrade-aks-cluster]. + +> [!NOTE] +> Any upgrade operation, whether performed manually or automatically, upgrades the node image version if it's not already on the latest version. The latest version is contingent on a full AKS release and can be determined by visiting the [AKS release tracker][release-tracker]. +> +> Auto-upgrade first upgrades the control plane, and then upgrades agent pools one by one. + +## Why use cluster auto-upgrade + +Cluster auto-upgrade provides a "set once and forget" mechanism that yields tangible time and operational cost benefits. You don't need to stop your workloads, redeploy your workloads, or create a new AKS cluster. By enabling auto-upgrade, you can ensure your clusters are up to date and don't miss the latest features or patches from AKS and upstream Kubernetes. + +AKS follows a strict supportability versioning window. With properly selected auto-upgrade channels, you can avoid clusters falling into an unsupported version. For more on the AKS support window, see [Alias minor versions][supported-kubernetes-versions]. + +## Customer versus AKS-initiated auto-upgrades + +You can specify cluster auto-upgrade specifics using the following guidance. The upgrades occur based on your specified cadence and are recommended to remain on supported Kubernetes versions. + +AKS also initiates auto-upgrades for unsupported clusters. When a cluster in an n-3 version (where n is the latest supported AKS GA minor version) is about to drop to n-4, AKS automatically upgrades the cluster to n-2 to remain in an AKS support [policy][supported-kubernetes-versions]. Automatically upgrading a platform supported cluster to a supported version is enabled by default. Stopped node pools are upgraded during an auto-upgrade operation. The upgrade applies to nodes when the node pool is started. To minimize disruptions, set up [maintenance windows][planned-maintenance]. + +## Cluster auto-upgrade limitations + +If you’re using cluster auto-upgrade, you can no longer upgrade the control plane first, and then upgrade the individual node pools. Cluster auto-upgrade always upgrades the control plane and the node pools together. You can't upgrade the control plane only. Running the `az aks upgrade --control-plane-only` command raises the following error: `NotAllAgentPoolOrchestratorVersionSpecifiedAndUnchanged: Using managed cluster api, all Agent pools' OrchestratorVersion must be all specified or all unspecified. If all specified, they must be stay unchanged or the same with control plane.` + +If using the `node-image` (legacy and not to be used) cluster auto-upgrade channel or the `NodeImage` node image auto-upgrade channel, Linux [unattended upgrades][unattended-upgrades] are disabled by default. + +## Cluster auto-upgrade channels + +Automatically completed upgrades are functionally the same as manual upgrades. The [selected auto-upgrade channel][planned-maintenance] determines the timing of upgrades. When making changes to auto-upgrade, allow 24 hours for the changes to take effect. Automatically upgrading a cluster follows the same process as manually upgrading a cluster. For more information, see [Upgrade an AKS cluster][upgrade-aks-cluster]. + +The following upgrade channels are available: + +|Channel| Action | Example +|---|---|---| +| `none`| disables auto-upgrades and keeps the cluster at its current version of Kubernetes.| Default setting if left unchanged.| +| `patch`| automatically upgrades the cluster to the latest supported patch version when it becomes available while keeping the minor version the same.| For example, if a cluster runs version *1.17.7*, and versions *1.17.9*, *1.18.4*, *1.18.6*, and *1.19.1* are available, the cluster upgrades to *1.17.9*.| +| `stable`| automatically upgrades the cluster to the latest supported patch release on minor version *N-1*, where *N* is the latest supported minor version.| For example, if a cluster runs version *1.17.7* and versions *1.17.9*, *1.18.4*, *1.18.6*, and *1.19.1* are available, the cluster upgrades to *1.18.6*.| +| `rapid`| automatically upgrades the cluster to the latest supported patch release on the latest supported minor version.| In cases where the cluster's Kubernetes version is an *N-2* minor version, where *N* is the latest supported minor version, the cluster first upgrades to the latest supported patch version on *N-1* minor version. For example, if a cluster runs version *1.17.7* and versions *1.17.9*, *1.18.4*, *1.18.6*, and *1.19.1* are available, the cluster first upgrades to *1.18.6*, then upgrades to *1.19.1*.| +| `node-image`(legacy)| automatically upgrades the node image to the latest version available.| Microsoft provides patches and new images for image nodes frequently (usually weekly), but your running nodes don't get the new images unless you do a node image upgrade. Turning on the node-image channel automatically updates your node images whenever a new version is available. If you use this channel, Linux [unattended upgrades] are disabled by default. Node image upgrades work on patch versions that are deprecated, so long as the minor Kubernetes version is still supported. This channel is no longer recommended and is planned for deprecation in future. For an option that can automatically upgrade node images, see the `NodeImage` channel in [node image auto-upgrade][node-image-auto-upgrade]. | + +> [!NOTE] +> +> Keep the following information in mind when using cluster auto-upgrade: +> +> * Cluster auto-upgrade only updates to GA versions of Kubernetes and doesn't update to preview versions. +> +> * With AKS, you can create a cluster without specifying the exact patch version. When you create a cluster without designating a patch, the cluster runs the minor version's latest GA patch. To learn more, see [AKS support window][supported-kubernetes-versions]. +> +> * Auto-upgrade requires the cluster's Kubernetes version to be within the [AKS support window][supported-kubernetes-versions], even if using the `node-image` channel. +> +> * If you're using the preview API `11-02-preview` or later, and you select the `node-image` cluster auto-upgrade channel, the [node image auto-upgrade channel][node-image-auto-upgrade] automatically sets to `NodeImage`. +> +> * Each cluster can only be associated with a single auto-upgrade channel. This is because your specified channel determines the Kubernetes version that runs on the cluster. +> +> * If your cluster has no auto-upgrade channel and you enable it for LTS *(Long-Term Support)*, it will default to a `patch` auto-upgrade channel. + +## Use cluster auto-upgrade with a new AKS cluster + +### [Azure CLI](#tab/azure-cli) + +* Set the auto-upgrade channel when creating a new cluster using the [`az aks create`][az-aks-create] command and the `auto-upgrade-channel` parameter. + +```text +export RANDOM_SUFFIX=$(openssl rand -hex 3) +export RESOURCE_GROUP="myResourceGroup$RANDOM_SUFFIX" +export AKS_CLUSTER_NAME="myAKSCluster" +az aks create --resource-group $RESOURCE_GROUP --name $AKS_CLUSTER_NAME --auto-upgrade-channel stable --generate-ssh-keys +``` + +### [Azure portal](#tab/azure-portal) + +1. In the Azure portal, select **Create a resource** > **Containers** > **Azure Kubernetes Service (AKS)**. +2. In the **Basics** tab, under **Cluster details**, select the desired auto-upgrade channel from the **Automatic upgrade** dropdown. We recommend selecting the **Enabled with patch (recommended)** option. + + :::image type="content" source="./media/auto-upgrade-cluster/portal-autoupgrade-new-cluster.png" alt-text="The screenshot of the create blade for an AKS cluster in the Azure portal. The automatic upgrade field shows 'Enabled with patch (recommended)' selected."::: + +3. Complete the remaining steps to create the cluster. + +--- + +## Use cluster auto-upgrade with an existing AKS cluster + +### [Azure CLI](#tab/azure-cli) + +* Set the auto-upgrade channel on an existing cluster using the [`az aks update`][az-aks-update] command with the `auto-upgrade-channel` parameter. + +```azurecli-interactive +az aks update --resource-group $RESOURCE_GROUP --name $AKS_CLUSTER_NAME --auto-upgrade-channel stable +``` + +Results: + + + +```JSON +{ + "id": "/subscriptions/xxxxx-xxxxx-xxxxx-xxxxx/resourceGroups/myResourceGroupabc123/providers/Microsoft.ContainerService/managedClusters/myAKSCluster", + "properties": { + "autoUpgradeChannel": "stable", + "provisioningState": "Succeeded" + } +} +``` + +### [Azure portal](#tab/azure-portal) + +1. In the Azure portal, navigate to your AKS cluster. +2. In the service menu, under **Settings**, select **Cluster configuration**. +3. Under **Upgrade** > **Kubernetes version**, select **Upgrade version**. + + :::image type="content" source="./media/auto-upgrade-cluster/portal-autoupgrade-existing-cluster.png" alt-text="The screenshot of the upgrade blade for an AKS cluster in the Azure portal."::: + +4. On the **Upgrade Kubernetes version** page, select the desired auto-upgrade channel from the **Automatic upgrade** dropdown. We recommend selecting the **Enabled with patch (recommended)** option. + + :::image type="content" source="./media/auto-upgrade-cluster/portal-autoupgrade-upgrade-page-existing-cluster.png" alt-text="The screenshot of the Upgrade Kubernetes page for an AKS cluster in the Azure portal."::: + +5. Select **Save**. + +--- + +## Use auto-upgrade with Planned Maintenance + +If using Planned Maintenance and cluster auto-upgrade, your upgrade starts during your specified maintenance window. + +> [!NOTE] +> To ensure proper functionality, use a maintenance window of *four hours or more*. + +For more information on how to set a maintenance window with Planned Maintenance, see [Use Planned Maintenance to schedule maintenance windows for your Azure Kubernetes Service (AKS) cluster][planned-maintenance]. + +## Best practices for cluster auto-upgrade + +Use the following best practices to help maximize your success when using auto-upgrade: + +* To ensure your cluster is always in a supported version (i.e within the N-2 rule), choose either `stable` or `rapid` channels. +* If you're interested in getting the latest patches as soon as possible, use the `patch` channel. The `node-image` channel is a good fit if you want your agent pools to always run the most recent node images. +* To automatically upgrade node images while using a different cluster upgrade channel, consider using the [node image auto-upgrade][node-image-auto-upgrade] `NodeImage` channel. +* Follow [Operator best practices][operator-best-practices-scheduler]. +* Follow [PDB best practices][pdb-best-practices]. +* For upgrade troubleshooting information, see the [AKS troubleshooting documentation][aks-troubleshoot-docs]. + +For a detailed discussion of upgrade best practices and other considerations, see [AKS patch and upgrade guidance][upgrade-operators-guide]. + + +[supported-kubernetes-versions]: ./supported-kubernetes-versions.md +[upgrade-aks-cluster]: ./upgrade-cluster.md +[planned-maintenance]: ./planned-maintenance.md +[operator-best-practices-scheduler]: operator-best-practices-scheduler.md#plan-for-availability-using-pod-disruption-budgets +[node-image-auto-upgrade]: auto-upgrade-node-image.md +[az-aks-create]: /cli/azure/aks#az_aks_create +[az-aks-update]: /cli/azure/aks#az_aks_update +[aks-troubleshoot-docs]: /support/azure/azure-kubernetes/welcome-azure-kubernetes +[upgrade-operators-guide]: /azure/architecture/operator-guides/aks/aks-upgrade-practices + + +[pdb-best-practices]: https://kubernetes.io/docs/tasks/run-application/configure-pdb/ +[release-tracker]: release-tracker.md +[k8s-deprecation]: https://kubernetes.io/blog/2022/11/18/upcoming-changes-in-kubernetes-1-26/#:~:text=A%20deprecated%20API%20is%20one%20that%20has%20been,point%20you%20must%20migrate%20to%20using%20the%20replacement +[unattended-upgrades]: https://help.ubuntu.com/community/AutomaticSecurityUpdates \ No newline at end of file diff --git a/scenarios/azure-aks-docs/articles/aks/auto-upgrade-node-os-image.md b/scenarios/azure-aks-docs/articles/aks/auto-upgrade-node-os-image.md new file mode 100644 index 000000000..f0635a4ec --- /dev/null +++ b/scenarios/azure-aks-docs/articles/aks/auto-upgrade-node-os-image.md @@ -0,0 +1,232 @@ +--- +title: autoupgrade Node OS Images +description: Learn how to choose an upgrade channel that best supports your needs for cluster's node OS security and maintenance. +ms.topic: how-to +ms.custom: build-2023, devx-track-azurecli, innovation-engine +ms.author: kaarthis +author: kaarthis +ms.subservice: aks-upgrade +ms.date: 05/10/2024 +--- + +# autoupgrade node OS images + +AKS provides multiple autoupgrade channels dedicated to timely node-level OS security updates. This channel is different from cluster-level Kubernetes version upgrades and supersedes it. + +## Interactions between node OS autoupgrade and cluster autoupgrade + +Node-level OS security updates are released at a faster rate than Kubernetes patch or minor version updates. The node OS autoupgrade channel grants you flexibility and enables a customized strategy for node-level OS security updates. Then, you can choose a separate plan for cluster-level Kubernetes version [autoupgrades][Autoupgrade]. +It's best to use both cluster-level [autoupgrades][Autoupgrade] and the node OS autoupgrade channel together. Scheduling can be fine-tuned by applying two separate sets of [maintenance windows][planned-maintenance] - `aksManagedAutoUpgradeSchedule` for the cluster [autoupgrade][Autoupgrade] channel and `aksManagedNodeOSUpgradeSchedule` for the node OS autoupgrade channel. + +## Channels for node OS image upgrades + +The selected channel determines the timing of upgrades. When making changes to node OS auto-upgrade channels, allow up to 24 hours for the changes to take effect. + +> [!NOTE] +> - Once you change from one channel to another channel, **a reimage is triggered leading to rolling nodes**. +> - Node OS image auto-upgrade won't affect the cluster's Kubernetes version. Starting with API version 2023-06-01, the default for any new cluster created is `NodeImage`. + +The following upgrade channels are available. You're allowed to choose one of these options: + +|Channel|Description|OS-specific behavior| +|---|---|---| +| `None`| Your nodes don't have security updates applied automatically. This means you're solely responsible for your security updates.|N/A| +| `Unmanaged`|OS updates are applied automatically through the OS built-in patching infrastructure. Newly allocated machines are unpatched initially. The OS's infrastructure patches them at some point.|Ubuntu and Azure Linux (CPU node pools) apply security patches through unattended upgrade/dnf-automatic roughly once per day around 06:00 UTC. Windows doesn't automatically apply security patches, so this option behaves equivalently to `None`. You need to manage the reboot process by using a tool like [kured][kured].| +| `SecurityPatch`|OS security patches, which are AKS-tested, fully managed, and applied with safe deployment practices. AKS regularly updates the node's virtual hard disk (VHD) with patches from the image maintainer labeled "security only." There might be disruptions when the security patches are applied to the nodes. However AKS is limiting disruptions by only reimaging your nodes only when necessary, such as for certain kernel security packages. When the patches are applied, the VHD is updated and existing machines are upgraded to that VHD, honoring maintenance windows and surge settings. If AKS decides that reimaging nodes isn't necessary, it patches nodes live without draining pods and performs no VHD update. This option incurs the extra cost of hosting the VHDs in your node resource group. If you use this channel, Linux [unattended upgrades][unattended-upgrades] are disabled by default.|Azure Linux doesn't support this channel on GPU-enabled VMs. `SecurityPatch` works on kubernetes patch versions that are deprecated, so long as the minor Kubernetes version is still supported.| +| `NodeImage`|AKS updates the nodes with a newly patched VHD containing security fixes and bug fixes on a weekly cadence. The update to the new VHD is disruptive, following maintenance windows and surge settings. No extra VHD cost is incurred when choosing this option. If you use this channel, Linux [unattended upgrades][unattended-upgrades] are disabled by default. Node image upgrades are supported as long as cluster k8s minor version is still in support. Node images are AKS-tested, fully managed, and applied with safe deployment practices.| + +## What to choose - SecurityPatch Channel or NodeImage Channel? + +There are two important considerations for you to choose between `SecurityPatch` or `NodeImage` channels. + +|Property|NodeImage Channel|SecurityPatch Channel|Recommended Channel| +|---|---|---|---| +| `Speed of shipping`|The typical build, test, release, and rollout timelines for a new VHD can take approximately 2 weeks following safe deployment practices. Although in the event of CVEs, accelerated rollouts can occur on a case by case basis. The exact timing when a new VHD hits a region can be monitored via [release-tracker]. | SecurityPatch releases are relatively faster than `NodeImage`, even with safe deployment practices. SecurityPatch has the advantage of 'Live-patching' in Linux environments, where patching leads to selective 'reimaging' and does not reimage every time a patch gets applied. Re-image if it happens is controlled by maintenance windows. |`SecurityPatch`| +| `Bugfixes`| Carries bug fixes in addition to security fixes.| Strictly carries only security fixes.| `NodeImage`| + +## Set the node OS autoupgrade channel on a new cluster + +### [Azure CLI](#tab/azure-cli) + +* Set the node OS autoupgrade channel on a new cluster using the [`az aks create`][az-aks-create] command with the `--node-os-upgrade-channel` parameter. The following example sets the node OS autoupgrade channel to `SecurityPatch`. + +```text +export RANDOM_SUFFIX=$(openssl rand -hex 3) +export RESOURCE_GROUP="myResourceGroup$RANDOM_SUFFIX" +export AKS_CLUSTER="myAKSCluster$RANDOM_SUFFIX" +az aks create \ + --resource-group $RESOURCE_GROUP \ + --name $AKS_CLUSTER \ + --node-os-upgrade-channel SecurityPatch \ + --generate-ssh-keys +``` + +### [Azure portal](#tab/azure-portal) + +1. In the Azure portal, select **Create a resource** > **Containers** > **Azure Kubernetes Service (AKS)**. +2. In the **Basics** tab, under **Cluster details**, select the desired channel type from the **Node security channel type** dropdown. + + :::image type="content" source="./media/auto-upgrade-node-os-image/set-nodeimage-channel-portal.png" alt-text="A screenshot of the Azure portal showing the node security channel type option in the Basics tab of the AKS cluster creation page."::: + +3. Select **Security channel scheduler** and choose the desired maintenance window using the [Planned Maintenance feature](./planned-maintenance.md). We recommend selecting the default option **Every week on Sunday (recommended)**. + + :::image type="content" source="./media/auto-upgrade-node-os-image/set-nodeimage-maintenance-window-portal.png" alt-text="A screenshot of the Azure portal showing the security channel scheduler option in the Basics tab of the AKS cluster creation page."::: + +4. Complete the remaining steps to create the cluster. + +--- + +## Set the node OS autoupgrade channel on an existing cluster + +### [Azure CLI](#tab/azure-cli) + +* Set the node os autoupgrade channel on an existing cluster using the [`az aks update`][az-aks-update] command with the `--node-os-upgrade-channel` parameter. The following example sets the node OS autoupgrade channel to `SecurityPatch`. + +```azurecli-interactive +az aks update --resource-group $RESOURCE_GROUP --name $AKS_CLUSTER --node-os-upgrade-channel SecurityPatch +``` + +Results: + + +```JSON +{ + "autoUpgradeProfile": { + "nodeOsUpgradeChannel": "SecurityPatch" + } +} +``` + +### [Azure portal](#tab/azure-portal) + +1. In the Azure portal, navigate to your AKS cluster. +2. In the **Settings** section, select **Cluster configuration**. +3. Under **Security updates**, select the desired channel type from the **Node security channel type** dropdown. + + :::image type="content" source="./media/auto-upgrade-node-os-image/set-nodeimage-channel-portal-existing.png" alt-text="A screenshot of the Azure portal showing the node security channel type option in the Cluster configuration page of an existing AKS cluster."::: + +4. For **Security channel scheduler**, select **Add schedule**. +5. On the **Add maintenance schedule** page, configure the following maintenance window settings using the [Planned Maintenance feature](./planned-maintenance.md): + + * **Repeats**: Select the desired frequency for the maintenance window. We recommend selecting **Weekly**. + * **Frequency**: Select the desired day of the week for the maintenance window. We recommend selecting **Sunday**. + * **Maintenance start date**: Select the desired start date for the maintenance window. + * **Maintenance start time**: Select the desired start time for the maintenance window. + * **UTC offset**: Select the desired UTC offset for the maintenance window. If not set, the default is **+00:00**. + + :::image type="content" source="./media/auto-upgrade-node-os-image/set-nodeimage-maintenance-window-portal-existing.png" alt-text="A screenshot of the Azure portal showing the maintenance schedule configuration options in the Add maintenance schedule page of an existing AKS cluster."::: + +6. Select **Save** > **Apply**. + +--- + +## Update ownership and schedule + +The default cadence means there's no planned maintenance window applied. + +|Channel|Updates Ownership|Default cadence| +|---|---|---| +| `Unmanaged`|OS driven security updates. AKS has no control over these updates.|Nightly around 6AM UTC for Ubuntu and Azure Linux. Monthly for Windows.| +| `SecurityPatch`|AKS-tested, fully managed, and applied with safe deployment practices. For more information, see [Increased security and resiliency of Canonical workloads on Azure][Blog].|Typically faster than weekly, AKS determined cadence.| +| `NodeImage`|AKS-tested, fully managed, and applied with safe deployment practices. For more real time information on releases, look up [AKS Node Images in Release tracker][release-tracker] |Weekly.| + +> [!NOTE] +> While Windows security updates are released on a monthly basis, using the `Unmanaged` channel will not automatically apply these updates to Windows nodes. If you choose the `Unmanaged` channel, you need to manage the reboot process for Windows nodes. + +## Node channel known limitations + +- Currently, when you set the [cluster autoupgrade channel][Autoupgrade] to `node-image`, it also automatically sets the node OS autoupgrade channel to `NodeImage`. You can't change node OS autoupgrade channel value if your cluster autoupgrade channel is `node-image`. In order to set the node OS autoupgrade channel value, check the [cluster autoupgrade channel][Autoupgrade] value isn't `node-image`. + +- The `SecurityPatch` channel isn't supported on Windows OS node pools. + + > [!NOTE] + > Use CLI version 2.61.0 or above for the `SecurityPatch` channel. + +## Node OS planned maintenance windows + +Planned maintenance for the node OS autoupgrade starts at your specified maintenance window. + +> [!NOTE] +> To ensure proper functionality, use a maintenance window of four hours or more. + +For more information on Planned Maintenance, see [Use Planned Maintenance to schedule maintenance windows for your Azure Kubernetes Service (AKS) cluster][planned-maintenance]. + +## Node OS autoupgrades FAQ + +### How can I check the current nodeOsUpgradeChannel value on a cluster? + +Run the `az aks show` command and check the "autoUpgradeProfile" to determine what value the `nodeOsUpgradeChannel` is set to: + +```azurecli-interactive +az aks show --resource-group $RESOURCE_GROUP --name $AKS_CLUSTER --query "autoUpgradeProfile" +``` + +Results: + + +```JSON +{ + "nodeOsUpgradeChannel": "SecurityPatch" +} +``` + +### How can I monitor the status of node OS autoupgrades? + +To view the status of your node OS auto upgrades, look up [activity logs][monitor-aks] on your cluster. You can also look up specific upgrade-related events as mentioned in [Upgrade an AKS cluster][aks-upgrade]. AKS also emits upgrade-related Event Grid events. To learn more, see [AKS as an Event Grid source][aks-eventgrid]. + +### Can I change the node OS autoupgrade channel value if my cluster autoupgrade channel is set to `node-image`? + + No. Currently, when you set the [cluster autoupgrade channel][Autoupgrade] to `node-image`, it also automatically sets the node OS autoupgrade channel to `NodeImage`. You can't change the node OS autoupgrade channel value if your cluster autoupgrade channel is `node-image`. In order to be able to change the node OS autoupgrade channel values, make sure the [cluster autoupgrade channel][Autoupgrade] isn't `node-image`. + +### Why is `SecurityPatch` recommended over `Unmanaged` channel? + +On the `Unmanaged` channel, AKS has no control over how and when the security updates are delivered. With `SecurityPatch`, the security updates are fully tested and follow safe deployment practices. `SecurityPatch` also honors maintenance windows. For more details, see [Increased security and resiliency of Canonical workloads on Azure][Blog]. + +### Does `SecurityPatch` always lead to a reimage of my nodes? + +AKS limits reimages to only when absolutely necessary, such as certain kernel packages that may require a reimage to get fully applied. `SecurityPatch` is designed to minimize disruptions as much as possible. If AKS decides reimaging nodes isn't necessary, it will patch nodes live without draining pods and no VHD update is performed in such cases. + +### Why does `SecurityPatch` channel requires to reach `snapshot.ubuntu.com` endpoint? + +With the `SecurityPatch` channel, the Linux cluster nodes have to download the required security patches and updates from ubuntu snapshot service described in [ubuntu-snapshots-on-azure-ensuring-predictability-and-consistency-in-cloud-deployments](https://ubuntu.com/blog/ubuntu-snapshots-on-azure-ensuring-predictability-and-consistency-in-cloud-deployments). + +### How do I know if a `SecurityPatch` or `NodeImage` upgrade is applied on my node? + +Run the `kubectl get nodes --show-labels` command to list the nodes in your cluster and their labels + +Among the returned labels, you should see a line similar to the following output: + +```output +kubernetes.azure.com/node-image-version=AKSUbuntu-2204gen2containerd-202410.27.0-2024.12.01 +``` + +Here, the base node image version is `AKSUbuntu-2204gen2containerd-202410.27.0`. If applicable, the security patch version typically follows. In the above example, it's `2024.12.01`. + +The same details also be looked up in the Azure portal under the node label view: + +:::image type="content" source="./media/auto-upgrade-node-os-image/nodeimage-securitypatch-inline.png" alt-text="A screenshot of the nodes page for an AKS cluster in the Azure portal. The label for node image version clearly shows the base node image and the latest applied security patch date." lightbox="./media/auto-upgrade-node-os-image/nodeimage-securitypatch.png"::: + +## Next steps + +For a detailed discussion of upgrade best practices and other considerations, see [AKS patch and upgrade guidance][upgrade-operators-guide]. + + +[planned-maintenance]: planned-maintenance.md +[release-tracker]: release-tracker.md +[az-provider-register]: /cli/azure/provider#az-provider-register +[az-feature-register]: /cli/azure/feature#az-feature-register +[az-feature-show]: /cli/azure/feature#az-feature-show +[upgrade-aks-cluster]: upgrade-cluster.md +[unattended-upgrades]: https://help.ubuntu.com/community/AutomaticSecurityUpdates +[Autoupgrade]: auto-upgrade-cluster.md +[kured]: node-updates-kured.md +[supported]: ./support-policies.md +[monitor-aks]: ./monitor-aks-reference.md +[aks-eventgrid]: ./quickstart-event-grid.md +[aks-upgrade]: ./upgrade-cluster.md +[upgrade-operators-guide]: /azure/architecture/operator-guides/aks/aks-upgrade-practices +[az-aks-create]: /cli/azure/aks#az-aks-create +[az-aks-update]: /cli/azure/aks#az-aks-update + + +[Blog]: https://techcommunity.microsoft.com/t5/linux-and-open-source-blog/increased-security-and-resiliency-of-canonical-workloads-on/ba-p/3970623 diff --git a/scenarios/azure-aks-docs/articles/aks/azure-cni-powered-by-cilium.md b/scenarios/azure-aks-docs/articles/aks/azure-cni-powered-by-cilium.md new file mode 100644 index 000000000..d1a7f8651 --- /dev/null +++ b/scenarios/azure-aks-docs/articles/aks/azure-cni-powered-by-cilium.md @@ -0,0 +1,229 @@ +--- +title: Configure Azure CNI Powered by Cilium in Azure Kubernetes Service (AKS) +description: Learn how to create an Azure Kubernetes Service (AKS) cluster with Azure CNI Powered by Cilium. +ms.topic: how-to +ms.date: 02/12/2024 +author: asudbring +ms.author: allensu +ms.subservice: aks-networking +ms.custom: references_regions, devx-track-azurecli, build-2023, innovation-engine +--- + +# Configure Azure CNI Powered by Cilium in Azure Kubernetes Service (AKS) + +Azure CNI Powered by Cilium combines the robust control plane of Azure CNI with the data plane of [Cilium](https://cilium.io/) to provide high-performance networking and security. + +By making use of eBPF programs loaded into the Linux kernel and a more efficient API object structure, Azure CNI Powered by Cilium provides the following benefits: + +- Functionality equivalent to existing Azure CNI and Azure CNI Overlay plugins + +- Improved Service routing + +- More efficient network policy enforcement + +- Better observability of cluster traffic + +- Support for larger clusters (more nodes, pods, and services) + +## IP Address Management (IPAM) with Azure CNI Powered by Cilium + +Azure CNI Powered by Cilium can be deployed using two different methods for assigning pod IPs: + +- Assign IP addresses from an overlay network (similar to Azure CNI Overlay mode) + +- Assign IP addresses from a virtual network (similar to existing Azure CNI with Dynamic Pod IP Assignment) + +If you aren't sure which option to select, read ["Choosing a network model to use."](./azure-cni-overlay.md#choosing-a-network-model-to-use) + +## Versions + +| Kubernetes Version | Cilium Version | +|--------------------|----------------| +| 1.27 (LTS) | 1.13.18 | +| 1.28 (End of Life) | 1.13.18 | +| 1.29 | 1.14.19 | +| 1.30 (LTS) | 1.14.19 | +| 1.31 | 1.16.6 | +| 1.32 | 1.17.0 | + +See [Supported Kubernetes Versions](./supported-kubernetes-versions.md) for more information on AKS versioning and release timelines. + +## Network Policy Enforcement + +Cilium enforces [network policies to allow or deny traffic between pods](./operator-best-practices-network.md#control-traffic-flow-with-network-policies). With Cilium, you don't need to install a separate network policy engine such as Azure Network Policy Manager or Calico. + +## Limitations + +Azure CNI powered by Cilium currently has the following limitations: + +* Available only for Linux and not for Windows. + +* Cilium L7 policy enforcement is disabled. + +* Network policies can't use `ipBlock` to allow access to node or pod IPs. See [frequently asked questions](#frequently-asked-questions) for details and recommended workaround. + +* Multiple Kubernetes services can't use the same host port with different protocols (for example, TCP or UDP) ([Cilium issue #14287](https://github.com/cilium/cilium/issues/14287)). + +* Network policies may be enforced on reply packets when a pod connects to itself via service cluster IP ([Cilium issue #19406](https://github.com/cilium/cilium/issues/19406)). + +* Network policies aren't applied to pods using host networking (`spec.hostNetwork: true`) because these pods use the host identity instead of having individual identities. + +## Prerequisites + +* Azure CLI version 2.48.1 or later. Run `az --version` to see the currently installed version. If you need to install or upgrade, see [Install Azure CLI](/cli/azure/install-azure-cli). + +* If using ARM templates or the REST API, the AKS API version must be 2022-09-02-preview or later. + +> [!NOTE] +> Previous AKS API versions (2022-09-02preview to 2023-01-02preview) used the field [`networkProfile.ebpfDataplane=cilium`](https://github.com/Azure/azure-rest-api-specs/blob/06dbe269f7d9c709cc225c92358b38c3c2b74d60/specification/containerservice/resource-manager/Microsoft.ContainerService/aks/preview/2022-09-02-preview/managedClusters.json#L6939-L6955). AKS API versions since 2023-02-02preview use the field [`networkProfile.networkDataplane=cilium`](https://github.com/Azure/azure-rest-api-specs/blob/06dbe269f7d9c709cc225c92358b38c3c2b74d60/specification/containerservice/resource-manager/Microsoft.ContainerService/aks/preview/2023-02-02-preview/managedClusters.json#L7152-L7173) to enable Azure CNI Powered by Cilium. + +## Create a new AKS Cluster with Azure CNI Powered by Cilium + +### Create a Resource Group + +Use the following command to create a resource group. Environment variables are declared and used below to replace placeholders. + +```azurecli-interactive +export RANDOM_SUFFIX=$(openssl rand -hex 3) +export RESOURCE_GROUP="myResourceGroup$RANDOM_SUFFIX" +export REGION="EastUS2" + +az group create \ + --name $RESOURCE_GROUP \ + --location $REGION +``` + +Result: + + +```JSON +{ + "id": "/subscriptions/xxxxx-xxxxx-xxxxx-xxxxx/resourceGroups/myResourceGroupxxx", + "location": "WestUS2", + "name": "myResourceGroupxxx", + "provisioningState": "Succeeded" +} +``` + +### Assign IP addresses from an overlay network + +Use the following commands to create a cluster with an overlay network and Cilium. Environment variables are declared and used below to replace placeholders. + +```azurecli-interactive +export CLUSTER_NAME="myAKSCluster$RANDOM_SUFFIX" + +az aks create \ + --name $CLUSTER_NAME \ + --resource-group $RESOURCE_GROUP \ + --location $REGION \ + --network-plugin azure \ + --network-plugin-mode overlay \ + --pod-cidr 192.168.0.0/16 \ + --network-dataplane cilium \ + --generate-ssh-keys +``` + + +```JSON +{ + "id": "/subscriptions/xxxxx-xxxxx-xxxxx-xxxxx/resourceGroups/myResourceGroupxxx/providers/Microsoft.ContainerService/managedClusters/myAKSClusterxxx", + "location": "WestUS2", + "name": "myAKSClusterxxx", + "provisioningState": "Succeeded" +} +``` + +> [!NOTE] +> The `--network-dataplane cilium` flag replaces the deprecated `--enable-ebpf-dataplane` flag used in earlier versions of the aks-preview CLI extension. + +## Frequently asked questions + +- **Can I customize Cilium configuration?** + + No, AKS manages the Cilium configuration and it can't be modified. We recommend that customers who require more control use [AKS BYO CNI](./use-byo-cni.md) and install Cilium manually. + +- **Can I use `CiliumNetworkPolicy` custom resources instead of Kubernetes `NetworkPolicy` resources?** + + `CiliumNetworkPolicy` custom resources are partially supported. Customers may use FQDN filtering as part of the [Advanced Container Networking Services](./advanced-container-networking-services-overview.md) feature bundle. + + This `CiliumNetworkPolicy` example demonstrates a sample matching pattern for services that match the specified label. + + ```yaml + apiVersion: "cilium.io/v2" + kind: CiliumNetworkPolicy + metadata: + name: "example-fqdn" + spec: + endpointSelector: + matchLabels: + foo: bar + egress: + - toFQDNs: + - matchPattern: "*.example.com" + ``` + +- **Why is traffic being blocked when the `NetworkPolicy` has an `ipBlock` that allows the IP address?** + + A limitation of Azure CNI Powered by Cilium is that a `NetworkPolicy`'s `ipBlock` can't select pod or node IPs. + + For example, this `NetworkPolicy` has an `ipBlock` that allows all egress to `0.0.0.0/0`: + ```yaml + apiVersion: networking.k8s.io/v1 + kind: NetworkPolicy + metadata: + name: example-ipblock + spec: + podSelector: {} + policyTypes: + - Egress + egress: + - to: + - ipBlock: + cidr: 0.0.0.0/0 # This will still block pod and node IPs. + ``` + + However, when this `NetworkPolicy` is applied, Cilium blocks egress to pod and node IPs even though the IPs are within the `ipBlock` CIDR. + + As a workaround, you can add `namespaceSelector` and `podSelector` to select pods. This example selects all pods in all namespaces: + ```yaml + apiVersion: networking.k8s.io/v1 + kind: NetworkPolicy + metadata: + name: example-ipblock + spec: + podSelector: {} + policyTypes: + - Egress + egress: + - to: + - ipBlock: + cidr: 0.0.0.0/0 + - namespaceSelector: {} + - podSelector: {} + ``` + + > [!NOTE] + > It isn't currently possible to specify a `NetworkPolicy` with an `ipBlock` to allow traffic to node IPs. +- **Does AKS configure CPU or memory limits on the Cilium `daemonset`?** + + No, AKS doesn't configure CPU or memory limits on the Cilium `daemonset` because Cilium is a critical system component for pod networking and network policy enforcement. + +- **Does Azure CNI powered by Cilium use Kube-Proxy?** + + No, AKS clusters created with network dataplane as Cilium don't use Kube-Proxy. + If the AKS clusters are on [Azure CNI Overlay](./azure-cni-overlay.md) or [Azure CNI with dynamic IP allocation](./configure-azure-cni-dynamic-ip-allocation.md) and are upgraded to AKS clusters running Azure CNI powered by Cilium, new nodes workloads are created without kube-proxy. Older workloads are also migrated to run without kube-proxy as a part of this upgrade process. + +## Next steps + +Learn more about networking in AKS in the following articles: + +* [Upgrade Azure CNI IPAM modes and Dataplane Technology](upgrade-azure-cni.md). + +* [Use a static IP address with the Azure Kubernetes Service (AKS) load balancer](static-ip.md) + +* [Use an internal load balancer with Azure Container Service (AKS)](internal-lb.md) + +* [Create a basic ingress controller with external network connectivity][aks-ingress-basic] + + +[aks-ingress-basic]: ingress-basic.md \ No newline at end of file diff --git a/scenarios/azure-aks-docs/articles/aks/concepts-network-azure-cni-pod-subnet.md b/scenarios/azure-aks-docs/articles/aks/concepts-network-azure-cni-pod-subnet.md new file mode 100644 index 000000000..ce27025a1 --- /dev/null +++ b/scenarios/azure-aks-docs/articles/aks/concepts-network-azure-cni-pod-subnet.md @@ -0,0 +1,133 @@ +--- +title: Concepts - Azure CNI Pod Subnet networking in AKS +description: Learn about Azure CNI Pod Subnet, dynamic IP allocation mode, and static block allocation mode in Azure Kubernetes Service (AKS). +ms.topic: concept-article +ms.date: 05/21/2024 +author: schaffererin +ms.author: schaffererin +ms.custom: references_regions, innovation-engine +--- + +# Azure Container Networking Interface (CNI) Pod Subnet + +Azure CNI Pod Subnet assigns IP addresses to pods from a separate subnet from your cluster Nodes. This feature is available in two modes: Dynamic IP Allocation and Static Block Allocation (Preview). + +## Prerequisites + +> [!NOTE] +> When using static block allocation of CIDRs, exposing an application as a Private Link Service using a Kubernetes Load Balancer Service isn't supported. + +- Review the [prerequisites][azure-cni-prereq] for configuring basic Azure CNI networking in AKS, as the same prerequisites apply to this article. +- Review the [deployment parameters][azure-cni-deployment-parameters] for configuring basic Azure CNI networking in AKS, as the same parameters apply. +- AKS Engine and DIY clusters aren't supported. +- Azure CLI version `2.37.0` or later and the `aks-preview` extension version `2.0.0b2` or later. +- Register the subscription-level feature flag for your subscription: 'Microsoft.ContainerService/AzureVnetScalePreview'. + +## Enable Container Insights (AKS monitoring) + +If you have an existing cluster, you can enable Container Insights (AKS monitoring) using the following command **only if your cluster was created with monitoring enabled or is associated with a valid Log Analytics Workspace in the same region**. Otherwise, refer to Microsoft Docs for additional workspace setup requirements. + +```azurecli-interactive +az aks enable-addons --addons monitoring --name $CLUSTER_NAME --resource-group $RESOURCE_GROUP_NAME +``` + +Results: + + + +```output +{ + "addons": [ + { + "addonType": "Monitoring", + "enabled": true, + "identity": { + "clientId": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx", + "objectId": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx", + "resourceId": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/xxxxxxxx/providers/Microsoft.ManagedIdentity/userAssignedIdentities/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" + }, + "name": "omsagent", + "config": { + ... + } + }, + ... + ], + "name": "my-aks-cluster", + "resourceGroup": "my-aks-rg", + ... +} +``` + +## Dynamic IP allocation mode + +Dynamic IP allocation helps mitigate pod IP address exhaustion issues by allocating pod IPs from a subnet that's separate from the subnet hosting the AKS cluster. + +The dynamic IP allocation mode offers the following benefits: + +- **Better IP utilization**: IPs are dynamically allocated to cluster Pods from the Pod subnet. This leads to better utilization of IPs in the cluster compared to the traditional CNI solution, which does static allocation of IPs for every node. +- **Scalable and flexible**: Node and pod subnets can be scaled independently. A single pod subnet can be shared across multiple node pools of a cluster or across multiple AKS clusters deployed in the same VNet. You can also configure a separate pod subnet for a node pool. +- **High performance**: Since pods are assigned VNet IPs, they have direct connectivity to other cluster pods and resources in the VNet. The solution supports very large clusters without any degradation in performance. +- **Separate VNet policies for pods**: Since pods have a separate subnet, you can configure separate VNet policies for them that are different from node policies. This enables many useful scenarios, such as allowing internet connectivity only for pods and not for nodes, fixing the source IP for pod in a node pool using an Azure NAT Gateway, and using network security groups (NSGs) to filter traffic between node pools. +- **Kubernetes network policies**: Both the Azure Network Policies and Calico work with this mode. + +### Plan IP addressing + +With dynamic IP allocation, nodes and pods scale independently, so you can plan their address spaces separately. Since pod subnets can be configured to the granularity of a node pool, you can always add a new subnet when you add a node pool. The system pods in a cluster/node pool also receive IPs from the pod subnet, so this behavior needs to be accounted for. + +IPs are allocated to nodes in batches of 16. Pod subnet IP allocation should be planned with a minimum of 16 IPs per node in the cluster, as the nodes request 16 IPs on startup and request another batch of 16 anytime there are <8 IPs unallocated in their allotment. + +IP address planning for Kubernetes services and Docker Bridge remain unchanged. + +## Static block allocation mode (Preview) + +Static block allocation helps mitigate potential pod subnet sizing and Azure address mapping limitations by assigning CIDR blocks to nodes rather than individual IPs. + +The static block allocation mode offers the following benefits: + +- **Better IP scalability**: CIDR blocks are statically allocated to the cluster nodes and are present for the lifetime of the node, as opposed to the traditional dynamic allocation of individual IPs with traditional CNI. This enables routing based on CIDR blocks and helps scale the cluster limit up to 1 million pods from the traditional 65K pods per cluster. Your Azure Virtual Network must be large enough to accommodate the scale of your cluster. +- **Flexibility**: Node and pod subnets can be scaled independently. A single pod subnet can be shared across multiple node pools of a cluster or across multiple AKS clusters deployed in the same VNet. You can also configure a separate pod subnet for a node pool. +- **High performance**: Since pods are assigned virtual network IPs, they have direct connectivity to other cluster pods and resources in the VNet. +- **Separate VNet policies for pods**: Since pods have a separate subnet, you can configure separate VNet policies for them that are different from node policies. This enables many useful scenarios such as allowing internet connectivity only for pods and not for nodes, fixing the source IP for pod in a node pool using an Azure NAT Gateway, and using NSGs to filter traffic between node pools. +- **Kubernetes network policies**: Cilium, Azure NPM, and Calico work with this solution. + +### Limitations + +Below are some of the limitations of using Azure CNI Static Block allocation: +- Minimum Kubernetes Version required is 1.28 +- Maximum subnet size supported is x.x.x.x/12 ~ 1 million IPs +- Only a single mode of operation can be used per subnet. If a subnet uses Static Block allocation mode, it cannot be use Dynamic IP allocation mode in a different cluster or node pool with the same subnet and vice versa. +- Only supported in new clusters or when adding node pools with a different subnet to existing clusters. Migrating or updating existing clusters or node pools is not supported. +- Across all the CIDR blocks assigned to a node in the node pool, one IP will be selected as the primary IP of the node. Thus, for network administrators selecting the `--max-pods` value try to use the calculation below to best serve your needs and have optimal usage of IPs in the subnet: + +`max_pods = (N * 16) - 1` where `N` is any positive integer and `N` > 0 + +### Plan IP addressing + +With static block allocation, nodes and pods scale independently, so you can plan their address spaces separately. Since pod subnets can be configured to the granularity of a node pool, you can always add a new subnet when you add a node pool. The system pods in a cluster/node pool also receive IPs from the pod subnet, so this behavior needs to be accounted for. + +CIDR blocks of /28 (16 IPs) are allocated to nodes based on your `--max-pods` configuration for your node pool, which defines the maximum number of pods per node. 1 IP is reserved on each node from all the available IPs on that node for internal purposes. + +While planning your IPs, it's important to define your `--max-pods` configuration using the following calculation: `max_pods_per_node = (16 * N) - 1`, where `N` is any positive integer greater than `0`. + +Ideal values with no IP wastage would require the max pods value to conform to the above expression. + +See the following example cases: + +| Example case | `max_pods` | CIDR Blocks allocated per node | Total IP available for pods | IP wastage for node | +| --- | --- | --- | --- | --- | +| Low wastage (acceptable) | 30 | 2 | (16 * 2) - 1 = 32 - 1 = 31 | 31 - 30 = 1 | +| Ideal case | 31 | 2 | (16 * 2) - 1 = 32 - 1 = 31 | 31 - 31 = 0 | +| High wastage (not recommended) | 32 | 3 | (16 * 3) - 1 = 48 - 1 = 47 | 47 - 32 = 15 | + +IP address planning for Kubernetes services remains unchanged. + +> [!NOTE] +> Ensure your VNet has a sufficiently large and contiguous address space to support your cluster's scale. + + + + +[azure-cni-prereq]: ./configure-azure-cni.md#prerequisites +[azure-cni-deployment-parameters]: ./azure-cni-overview.md#deployment-parameters +[az-aks-enable-addons]: /cli/azure/aks#az_aks_enable_addons \ No newline at end of file diff --git a/scenarios/azure-aks-docs/articles/aks/concepts-preview-api-life-cycle.md b/scenarios/azure-aks-docs/articles/aks/concepts-preview-api-life-cycle.md new file mode 100644 index 000000000..e3dc80cf1 --- /dev/null +++ b/scenarios/azure-aks-docs/articles/aks/concepts-preview-api-life-cycle.md @@ -0,0 +1,80 @@ +--- +title: AKS Preview API life cycle +description: Learn about the AKS preview API life cycle. +ms.custom: azure-kubernetes-service,innovation-engine +ms.topic: concept-article +ms.date: 06/06/2024 +author: matthchr +ms.author: matthchr + +--- + +# AKS Preview API life cycle + +The Azure Kubernetes Service (AKS) preview APIs (APIs that end in `-preview`) have a lifespan of ~one year from their release date. +This means that you can expect the 2023-01-02-preview API to be deprecated somewhere around January 1st, 2024. + +We love when people try our preview features and give us feedback, so we encourage you to use the preview APIs and the +tools built on them (such as the [AKS Preview CLI Extension](https://github.com/Azure/azure-cli-extensions/tree/main/src/aks-preview)). + +After an API version is deprecated, it will no longer function! We recommend you routinely: +- Update your ARM/BICEP templates using preview API versions to use the latest version of the preview API. +- Update your AKS preview CLI extension to the latest version. +- Update any preview SDKs or other tools built on the preview API to the latest version. + +You should perform these updates at a minimum every 6-9 months. If you fail to do so, you will be notified that you are using a soon-to-be deprecated +API version as deprecation approaches. + +## How to check what API versions you're using + +If you're unsure what client or tool is using this API version, check the [activity logs](/azure/azure-monitor/essentials/activity-log) +using the following command: + +Set the API version you want to inspect for recent usage in the activity log. In this example, we are checking for the `2022-04-01-preview` API version. + +```bash +export API_VERSION="2022-04-01-preview" +az monitor activity-log list --offset 30d --max-events 10000 --namespace microsoft.containerservice --query "[?eventName.value == 'EndRequest' && contains(not_null(httpRequest.uri,''), '$API_VERSION')]" +``` + +## How to update to a newer version of the API + +- For Azure SDKs: use a newer API version by updating to a [newer version of the SDK](https://azure.github.io/azure-sdk/releases/latest/index.html?search=containerservice). +- For Azure CLI: Update the CLI itself and the aks-preview extension (if used) to the latest version by running `az upgrade` and `az extension update --name "aks-preview"`. +- For Terraform: Update to the latest version of the AzureRM Terraform module. To find out what version of the API a particular Terraform release is using, + check the [Terraform release notes](/azure/developer/terraform/provider-version-history-azurerm) or + git log [this file](https://github.com/hashicorp/terraform-provider-azurerm/blob/main/internal/services/containers/client/client.go). +- For other tools: Update the tool to the latest version. + + +## Upcoming deprecations + +| API version | Announce Date | Deprecation Date | +|--------------------|-------------------|-------------------| +| 2022-09-02-preview | March 27, 2024 | June 20, 2024 | +| 2022-10-02-preview | March 27, 2024 | June 20, 2024 | +| 2023-01-02-preview | March 27, 2024 | June 20, 2024 | +| 2023-02-02-preview | March 27, 2024 | June 20, 2024 | +| 2023-03-02-preview | Oct 21, 2024 | February 3, 2025 | +| 2023-04-02-preview | Oct 21, 2024 | February 10, 2025 | +| 2023-05-02-preview | Oct 21, 2024 | February 17, 2025 | +| 2023-06-02-preview | Oct 21, 2024 | February 24, 2025 | +| 2023-07-02-preview | Oct 21, 2024 | March 3, 2025 | +| 2023-08-02-preview | Oct 21, 2024 | March 10, 2025 | + +## Completed deprecations + +| API version | Announce Date | Deprecation Date | +|--------------------|-------------------|-------------------| +| 2018-08-01-preview | March 7, 2023 | June 1, 2023 | +| 2021-11-01-preview | March 23, 2023 | July 1, 2023 | +| 2022-02-02-preview | April 27, 2023 | August 1, 2023 | +| 2022-01-02-preview | May 3, 2023 | Sept 1, 2023 | +| 2022-03-02-preview | May 3, 2023 | Sept 1, 2023 | +| 2022-04-02-preview | May 3, 2023 | Sept 1, 2023 | +| 2022-05-02-preview | May 3, 2023 | Sept 1, 2023 | +| 2022-06-02-preview | May 3, 2023 | Sept 1, 2023 | +| 2022-07-02-preview | November 20, 2023 | February 14, 2024 | +| 2022-08-02-preview | March 27, 2024 | June 20, 2024 | +| 2022-08-03-preview | March 27, 2024 | June 20, 2024 | +| 2022-11-02-preview | March 27, 2024 | June 20, 2024 | diff --git a/scenarios/azure-aks-docs/articles/aks/cost-analysis.md b/scenarios/azure-aks-docs/articles/aks/cost-analysis.md new file mode 100644 index 000000000..4c152e43b --- /dev/null +++ b/scenarios/azure-aks-docs/articles/aks/cost-analysis.md @@ -0,0 +1,154 @@ +--- +title: Azure Kubernetes Service (AKS) cost analysis +description: Learn how to use cost analysis to surface granular cost allocation data for your Azure Kubernetes Service (AKS) cluster. +author: schaffererin +ms.author: schaffererin +ms.service: azure-kubernetes-service +ms.subservice: aks-monitoring +ms.topic: how-to +ms.date: 06/17/2024 +--- + +# Azure Kubernetes Service (AKS) cost analysis + +In this article, you learn how to enable cost analysis on Azure Kubernetes Service (AKS) to view detailed cost data for cluster resources. + +## About cost analysis + +AKS clusters rely on Azure resources, such as virtual machines (VMs), virtual disks, load balancers, and public IP addresses. Multiple applications can use these resources. The resource consumption patterns often differ for each application, so their contribution toward the total cluster resource cost might also vary. Some applications might have footprints across multiple clusters, which can pose a challenge when performing cost attribution and cost management. + +When you enable cost analysis on your AKS cluster, you can view detailed cost allocation scoped to Kubernetes constructs, such as clusters and namespaces, and Azure Compute, Network, and Storage resources. The add-on is built on top of [OpenCost](https://www.opencost.io/), an open-source Cloud Native Computing Foundation Incubating project for usage data collection. Usage data is reconciled with your Azure invoice data to provide a comprehensive view of your AKS cluster costs directly in the Azure portal Cost Management views. + +For more information on Microsoft Cost Management, see [Start analyzing costs in Azure](/azure/cost-management-billing/costs/quick-acm-cost-analysis). + +After enabling the cost analysis add-on and allowing time for data to be collected, you can use the information in [Understand AKS usage and costs](./understand-aks-costs.md) to help you understand your data. + +## Prerequisites + +* Your cluster must use the `Standard` or `Premium` tier, not the `Free` tier. +* To view cost analysis information, you must have one of the following roles on the subscription hosting the cluster: `Owner`, `Contributor`, `Reader`, `Cost Management Contributor`, or `Cost Management Reader`. +* [Microsoft Entra Workload ID](./workload-identity-overview.md) configured on your cluster. +* If using the Azure CLI, you need version `2.61.0` or later installed. +* Once you have enabled cost analysis, you can't downgrade your cluster to the `Free` tier without first disabling cost analysis. +* Access to the Azure API including Azure Resource Manager (ARM) API. For a list of fully qualified domain names (FQDNs) required, see [AKS Cost Analysis required FQDN](./outbound-rules-control-egress.md#aks-cost-analysis-add-on). + +## Limitations + +* Kubernetes cost views are only available for the *Enterprise Agreement* and *Microsoft Customer Agreement* Microsoft Azure offer types. For more information, see [Supported Microsoft Azure offers](/azure/cost-management-billing/costs/understand-cost-mgt-data#supported-microsoft-azure-offers). +* Currently, virtual nodes aren't supported. + +## Enable cost analysis on your AKS cluster + +You can enable the cost analysis with the `--enable-cost-analysis` flag during one of the following operations: + +* Creating a `Standard` or `Premium` tier AKS cluster. +* Updating an existing `Standard` or `Premium` tier AKS cluster. +* Upgrading a `Free` cluster to `Standard` or `Premium`. +* Upgrading a `Standard` cluster to `Premium`. +* Downgrading a `Premium` cluster to `Standard` tier. + +### Enable cost analysis on a new cluster + +Enable cost analysis on a new cluster using the [`az aks create`][az-aks-create] command with the `--enable-cost-analysis` flag. The following example creates a new AKS cluster in the `Standard` tier with cost analysis enabled: + +```text +export RANDOM_SUFFIX=$(openssl rand -hex 3) +export RESOURCE_GROUP="AKSCostRG$RANDOM_SUFFIX" +export CLUSTER_NAME="AKSCostCluster$RANDOM_SUFFIX" +export LOCATION="WestUS2" +az aks create --resource-group $RESOURCE_GROUP --name $CLUSTER_NAME --location $LOCATION --enable-managed-identity --generate-ssh-keys --tier standard --enable-cost-analysis +``` + +Results: + +```JSON +{ + "id": "/subscriptions/xxxxx/resourceGroups/AKSCostRGxxxx", + "location": "WestUS2", + "name": "AKSCostClusterxxxx", + "properties": { + "provisioningState": "Succeeded" + }, + "tags": null, + "type": "Microsoft.ContainerService/managedClusters" +} +``` + +### Enable cost analysis on an existing cluster + +Enable cost analysis on an existing cluster using the [`az aks update`][az-aks-update] command with the `--enable-cost-analysis` flag. The following example updates an existing AKS cluster in the `Standard` tier to enable cost analysis: + +```azurecli-interactive +az aks update --resource-group $RESOURCE_GROUP --name $CLUSTER_NAME --enable-cost-analysis +``` + +Results: + + + +```JSON +{ + "id": "/subscriptions/xxxxx/resourceGroups/AKSCostRGxxxx", + "name": "AKSCostClusterxxxx", + "properties": { + "provisioningState": "Succeeded" + } +} +``` + +> [!NOTE] +> An agent is deployed to the cluster when you enable the add-on. The agent consumes a small amount of CPU and Memory resources. + +> [!WARNING] +> The AKS cost analysis add-on Memory usage is dependent on the number of containers deployed. You can roughly approximate Memory consumption using *200 MB + 0.5 MB per container*. The current Memory limit is set to *4 GB*, which supports approximately *7000 containers per cluster*. These estimates are subject to change. + +## Disable cost analysis on your AKS cluster + +Disable cost analysis using the [`az aks update`][az-aks-update] command with the `--disable-cost-analysis` flag. + +```text +az aks update --name $CLUSTER_NAME --resource-group $RESOURCE_GROUP --disable-cost-analysis +``` + +Results: + +```JSON +{ + "id": "/subscriptions/xxxxx/resourceGroups/AKSCostRGxxxx", + "name": "AKSCostClusterxxxx", + "properties": { + "provisioningState": "Succeeded" + } +} +``` + +> [!NOTE] +> If you want to downgrade your cluster from the `Standard` or `Premium` tier to the `Free` tier while cost analysis is enabled, you must first disable cost analysis. + +## View the cost data + +You can view cost allocation data in the Azure portal. For more information, see [View AKS costs in Microsoft Cost Management](/azure/cost-management-billing/costs/view-kubernetes-costs). + +### Cost definitions + +In the Kubernetes namespaces and assets views, you might see any of the following charges: + +* **Idle charges** represent the cost of available resource capacity that isn't used by any workloads. +* **Service charges** represent the charges associated with the service, like Uptime SLA, Microsoft Defender for Containers, etc. +* **System charges** represent the cost of capacity reserved by AKS on each node to run system processes required by the cluster, including the kubelet and container runtime. [Learn more](./concepts-clusters-workloads.md#resource-reservations). +* **Unallocated charges** represent the cost of resources that couldn't be allocated to namespaces. + +> [!NOTE] +> It might take *up to one day* for data to finalize. After 24 hours, any fluctuations in costs for the previous day will have stabilized. + +## Troubleshooting + +If you're experiencing issues, such as the `cost-agent` pod getting `OOMKilled` or stuck in a `Pending` state, see [Troubleshoot AKS cost analysis add-on issues](/troubleshoot/azure/azure-kubernetes/aks-cost-analysis-add-on-issues). + +## Next steps + +For more information on cost in AKS, see [Understand Azure Kubernetes Service (AKS) usage and costs](./understand-aks-costs.md). + + +[az-aks-create]: /cli/azure/aks#az-aks-create +[az-aks-update]: /cli/azure/aks#az-aks-update \ No newline at end of file diff --git a/scenarios/azure-aks-docs/articles/aks/create-postgresql-ha.md b/scenarios/azure-aks-docs/articles/aks/create-postgresql-ha.md new file mode 100644 index 000000000..13c346d4a --- /dev/null +++ b/scenarios/azure-aks-docs/articles/aks/create-postgresql-ha.md @@ -0,0 +1,545 @@ +--- +title: 'Create infrastructure for deploying a highly available PostgreSQL database on AKS' +description: Create the infrastructure needed to deploy a highly available PostgreSQL database on AKS using the CloudNativePG operator. +ms.topic: how-to +ms.date: 06/07/2024 +author: kenkilty +ms.author: kkilty +ms.custom: innovation-engine, aks-related-content +--- + +# Create infrastructure for deploying a highly available PostgreSQL database on AKS + +In this article, you create the infrastructure needed to deploy a highly available PostgreSQL database on AKS using the [CloudNativePG (CNPG)](https://cloudnative-pg.io/) operator. + +[!INCLUDE [open source disclaimer](./includes/open-source-disclaimer.md)] + +## Before you begin + +* Review the deployment overview and make sure you meet all the prerequisites in [How to deploy a highly available PostgreSQL database on AKS with Azure CLI][postgresql-ha-deployment-overview]. +* [Set environment variables](#set-environment-variables) for use throughout this guide. +* [Install the required extensions](#install-required-extensions). + +## Install required extensions + +The `aks-preview`, `k8s-extension` and `amg` extensions provide more functionality for managing Kubernetes clusters and querying Azure resources. Install these extensions using the following [`az extension add`][az-extension-add] commands: + +```bash +az extension add --upgrade --name aks-preview --yes --allow-preview true +az extension add --upgrade --name k8s-extension --yes --allow-preview false +az extension add --upgrade --name amg --yes --allow-preview false +``` + +As a prerequisite for utilizing kubectl, it is essential to first install [Krew][install-krew], followed by the installation of the [CNPG plugin][cnpg-plugin]. This will enable the management of the PostgreSQL operator using the subsequent commands. + +```bash +( + set -x; cd "$(mktemp -d)" && + OS="$(uname | tr '[:upper:]' '[:lower:]')" && + ARCH="$(uname -m | sed -e 's/x86_64/amd64/' -e 's/\(arm\)\(64\)\?.*/\1\2/' -e 's/aarch64$/arm64/')" && + KREW="krew-${OS}_${ARCH}" && + curl -fsSLO "https://github.com/kubernetes-sigs/krew/releases/latest/download/${KREW}.tar.gz" && + tar zxvf "${KREW}.tar.gz" && + ./"${KREW}" install krew +) + +export PATH="${KREW_ROOT:-$HOME/.krew}/bin:$PATH" + +kubectl krew install cnpg +``` + +## Create a resource group + +Create a resource group to hold the resources you create in this guide using the [`az group create`][az-group-create] command. + +```bash +export TAGS="owner=user" +export LOCAL_NAME="cnpg" +export RESOURCE_GROUP_NAME="rg-${LOCAL_NAME}-${SUFFIX}" +export PRIMARY_CLUSTER_REGION="westus3" +az group create \ + --name $RESOURCE_GROUP_NAME \ + --location $PRIMARY_CLUSTER_REGION \ + --tags $TAGS \ + --query 'properties.provisioningState' \ + --output tsv +``` + +## Create a user-assigned managed identity + +In this section, you create a user-assigned managed identity (UAMI) to allow the CNPG PostgreSQL to use an AKS workload identity to access Azure Blob Storage. This configuration allows the PostgreSQL cluster on AKS to connect to Azure Blob Storage without a secret. + +1. Create a user-assigned managed identity using the [`az identity create`][az-identity-create] command. + + ```bash + export SUFFIX=$(cat /dev/urandom | LC_ALL=C tr -dc 'a-z0-9' | fold -w 8 | head -n 1) + export AKS_UAMI_CLUSTER_IDENTITY_NAME="mi-aks-${LOCAL_NAME}-${SUFFIX}" + AKS_UAMI_WI_IDENTITY=$(az identity create \ + --name $AKS_UAMI_CLUSTER_IDENTITY_NAME \ + --resource-group $RESOURCE_GROUP_NAME \ + --location $PRIMARY_CLUSTER_REGION \ + --output json) + ``` + +1. Enable AKS workload identity and generate a service account to use later in this guide using the following commands: + + ```bash + export AKS_UAMI_WORKLOAD_OBJECTID=$( \ + echo "${AKS_UAMI_WI_IDENTITY}" | jq -r '.principalId') + export AKS_UAMI_WORKLOAD_RESOURCEID=$( \ + echo "${AKS_UAMI_WI_IDENTITY}" | jq -r '.id') + export AKS_UAMI_WORKLOAD_CLIENTID=$( \ + echo "${AKS_UAMI_WI_IDENTITY}" | jq -r '.clientId') + + echo "ObjectId: $AKS_UAMI_WORKLOAD_OBJECTID" + echo "ResourceId: $AKS_UAMI_WORKLOAD_RESOURCEID" + echo "ClientId: $AKS_UAMI_WORKLOAD_CLIENTID" + ``` + +The object ID is a unique identifier for the client ID (also known as the application ID) that uniquely identifies a security principal of type *Application* within the Microsoft Entra ID tenant. The resource ID is a unique identifier to manage and locate a resource in Azure. These values are required to enabled AKS workload identity. + +The CNPG operator automatically generates a service account called *postgres* that you use later in the guide to create a federated credential that enables OAuth access from PostgreSQL to Azure Storage. + +## Create a storage account in the primary region + +1. Create an object storage account to store PostgreSQL backups in the primary region using the [`az storage account create`][az-storage-account-create] command. + + ```bash + export PG_PRIMARY_STORAGE_ACCOUNT_NAME="hacnpgpsa${SUFFIX}" + + az storage account create \ + --name $PG_PRIMARY_STORAGE_ACCOUNT_NAME \ + --resource-group $RESOURCE_GROUP_NAME \ + --location $PRIMARY_CLUSTER_REGION \ + --sku Standard_ZRS \ + --kind StorageV2 \ + --query 'provisioningState' \ + --output tsv + ``` + +1. Create the storage container to store the Write Ahead Logs (WAL) and regular PostgreSQL on-demand and scheduled backups using the [`az storage container create`][az-storage-container-create] command. + + ```bash + export PG_STORAGE_BACKUP_CONTAINER_NAME="backups" + + az storage container create \ + --name $PG_STORAGE_BACKUP_CONTAINER_NAME \ + --account-name $PG_PRIMARY_STORAGE_ACCOUNT_NAME \ + --auth-mode login + ``` + + Example output: + + ```output + { + "created": true + } + ``` + + > [!NOTE] + > If you encounter the error message: `The request may be blocked by network rules of storage account. Please check network rule set using 'az storage account show -n accountname --query networkRuleSet'. If you want to change the default action to apply when no rule matches, please use 'az storage account update'`. Please verify user permissions for Azure Blob Storage and, if **necessary**, elevate your role to `Storage Blob Data Owner` using the commands provided below and after retry the [`az storage container create`][az-storage-container-create] command. + + ```bash + export USER_ID=$(az ad signed-in-user show --query id --output tsv) + + export STORAGE_ACCOUNT_PRIMARY_RESOURCE_ID=$(az storage account show \ + --name $PG_PRIMARY_STORAGE_ACCOUNT_NAME \ + --resource-group $RESOURCE_GROUP_NAME \ + --query "id" \ + --output tsv) + + az role assignment list --scope $STORAGE_ACCOUNT_PRIMARY_RESOURCE_ID --output table + + az role assignment create \ + --role "Storage Blob Data Contributor" \ + --assignee-object-id $AKS_UAMI_WORKLOAD_OBJECTID \ + --assignee-principal-type ServicePrincipal \ + --scope $STORAGE_ACCOUNT_PRIMARY_RESOURCE_ID \ + --query "id" \ + --output tsv + ``` + +## Assign RBAC to storage accounts + +To enable backups, the PostgreSQL cluster needs to read and write to an object store. The PostgreSQL cluster running on AKS uses a workload identity to access the storage account via the CNPG operator configuration parameter [`inheritFromAzureAD`][inherit-from-azuread]. + +1. Get the primary resource ID for the storage account using the [`az storage account show`][az-storage-account-show] command. + + ```bash + export STORAGE_ACCOUNT_PRIMARY_RESOURCE_ID=$(az storage account show \ + --name $PG_PRIMARY_STORAGE_ACCOUNT_NAME \ + --resource-group $RESOURCE_GROUP_NAME \ + --query "id" \ + --output tsv) + + echo $STORAGE_ACCOUNT_PRIMARY_RESOURCE_ID + ```` + +1. Assign the "Storage Blob Data Contributor" Azure built-in role to the object ID with the storage account resource ID scope for the UAMI associated with the managed identity for each AKS cluster using the [`az role assignment create`][az-role-assignment-create] command. + + ```bash + az role assignment create \ + --role "Storage Blob Data Contributor" \ + --assignee-object-id $AKS_UAMI_WORKLOAD_OBJECTID \ + --assignee-principal-type ServicePrincipal \ + --scope $STORAGE_ACCOUNT_PRIMARY_RESOURCE_ID \ + --query "id" \ + --output tsv + ``` + +## Set up monitoring infrastructure + +In this section, you deploy an instance of Azure Managed Grafana, an Azure Monitor workspace, and an Azure Monitor Log Analytics workspace to enable monitoring of the PostgreSQL cluster. You also store references to the created monitoring infrastructure to use as input during the AKS cluster creation process later in the guide. This section might take some time to complete. + +> [!NOTE] +> Azure Managed Grafana instances and AKS clusters are billed independently. For more pricing information, see [Azure Managed Grafana pricing][azure-managed-grafana-pricing]. + +1. Create an Azure Managed Grafana instance using the [`az grafana create`][az-grafana-create] command. + + ```bash + export GRAFANA_PRIMARY="grafana-${LOCAL_NAME}-${SUFFIX}" + + export GRAFANA_RESOURCE_ID=$(az grafana create \ + --resource-group $RESOURCE_GROUP_NAME \ + --name $GRAFANA_PRIMARY \ + --location $PRIMARY_CLUSTER_REGION \ + --zone-redundancy Enabled \ + --tags $TAGS \ + --query "id" \ + --output tsv) + + echo $GRAFANA_RESOURCE_ID + ``` + +1. Create an Azure Monitor workspace using the [`az monitor account create`][az-monitor-account-create] command. + + ```bash + export AMW_PRIMARY="amw-${LOCAL_NAME}-${SUFFIX}" + + export AMW_RESOURCE_ID=$(az monitor account create \ + --name $AMW_PRIMARY \ + --resource-group $RESOURCE_GROUP_NAME \ + --location $PRIMARY_CLUSTER_REGION \ + --tags $TAGS \ + --query "id" \ + --output tsv) + + echo $AMW_RESOURCE_ID + ``` + +1. Create an Azure Monitor Log Analytics workspace using the [`az monitor log-analytics workspace create`][az-monitor-log-analytics-workspace-create] command. + + ```bash + export ALA_PRIMARY="ala-${LOCAL_NAME}-${SUFFIX}" + + export ALA_RESOURCE_ID=$(az monitor log-analytics workspace create \ + --resource-group $RESOURCE_GROUP_NAME \ + --workspace-name $ALA_PRIMARY \ + --location $PRIMARY_CLUSTER_REGION \ + --query "id" \ + --output tsv) + + echo $ALA_RESOURCE_ID + ``` + +## Create the AKS cluster to host the PostgreSQL cluster + +In this section, you create a multizone AKS cluster with a system node pool. The AKS cluster hosts the PostgreSQL cluster primary replica and two standby replicas, each aligned to a different availability zone to enable zonal redundancy. + +You also add a user node pool to the AKS cluster to host the PostgreSQL cluster. Using a separate node pool allows for control over the Azure VM SKUs used for PostgreSQL and enables the AKS system pool to optimize performance and costs. You apply a label to the user node pool that you can reference for node selection when deploying the CNPG operator later in this guide. This section might take some time to complete. + +1. Create an AKS cluster using the [`az aks create`][az-aks-create] command. + + ```bash + export SYSTEM_NODE_POOL_VMSKU="standard_d2s_v3" + export USER_NODE_POOL_NAME="postgres" + export USER_NODE_POOL_VMSKU="standard_d4s_v3" + export AKS_PRIMARY_CLUSTER_NAME="aks-primary-${LOCAL_NAME}-${SUFFIX}" + export AKS_PRIMARY_MANAGED_RG_NAME="rg-${LOCAL_NAME}-primary-aksmanaged-${SUFFIX}" + export AKS_CLUSTER_VERSION="1.29" + export MY_PUBLIC_CLIENT_IP=$(dig +short myip.opendns.com @resolver3.opendns.com) + + az aks create \ + --name $AKS_PRIMARY_CLUSTER_NAME \ + --tags $TAGS \ + --resource-group $RESOURCE_GROUP_NAME \ + --location $PRIMARY_CLUSTER_REGION \ + --generate-ssh-keys \ + --node-resource-group $AKS_PRIMARY_MANAGED_RG_NAME \ + --enable-managed-identity \ + --assign-identity $AKS_UAMI_WORKLOAD_RESOURCEID \ + --network-plugin azure \ + --network-plugin-mode overlay \ + --network-dataplane cilium \ + --nodepool-name systempool \ + --enable-oidc-issuer \ + --enable-workload-identity \ + --enable-cluster-autoscaler \ + --min-count 2 \ + --max-count 3 \ + --node-vm-size $SYSTEM_NODE_POOL_VMSKU \ + --enable-azure-monitor-metrics \ + --azure-monitor-workspace-resource-id $AMW_RESOURCE_ID \ + --grafana-resource-id $GRAFANA_RESOURCE_ID \ + --api-server-authorized-ip-ranges $MY_PUBLIC_CLIENT_IP \ + --tier standard \ + --kubernetes-version $AKS_CLUSTER_VERSION \ + --zones 1 2 3 \ + --output table + ``` + +2. Add a user node pool to the AKS cluster using the [`az aks nodepool add`][az-aks-node-pool-add] command. + + ```bash + az aks nodepool add \ + --resource-group $RESOURCE_GROUP_NAME \ + --cluster-name $AKS_PRIMARY_CLUSTER_NAME \ + --name $USER_NODE_POOL_NAME \ + --enable-cluster-autoscaler \ + --min-count 3 \ + --max-count 6 \ + --node-vm-size $USER_NODE_POOL_VMSKU \ + --zones 1 2 3 \ + --labels workload=postgres \ + --output table + ``` + +> [!NOTE] +> If you receive the error message `"(OperationNotAllowed) Operation is not allowed: Another operation (Updating) is in progress, please wait for it to finish before starting a new operation."` when adding the AKS node pool, please wait a few minutes for the AKS cluster operations to complete and then run the `az aks nodepool add` command. + +## Connect to the AKS cluster and create namespaces + +In this section, you get the AKS cluster credentials, which serve as the keys that allow you to authenticate and interact with the cluster. Once connected, you create two namespaces: one for the CNPG controller manager services and one for the PostgreSQL cluster and its related services. + +1. Get the AKS cluster credentials using the [`az aks get-credentials`][az-aks-get-credentials] command. + + ```bash + az aks get-credentials \ + --resource-group $RESOURCE_GROUP_NAME \ + --name $AKS_PRIMARY_CLUSTER_NAME \ + --output none + ``` + +2. Create the namespace for the CNPG controller manager services, the PostgreSQL cluster, and its related services by using the [`kubectl create namespace`][kubectl-create-namespace] command. + + ```bash + export PG_NAMESPACE="cnpg-database" + export PG_SYSTEM_NAMESPACE="cnpg-system" + + kubectl create namespace $PG_NAMESPACE --context $AKS_PRIMARY_CLUSTER_NAME + kubectl create namespace $PG_SYSTEM_NAMESPACE --context $AKS_PRIMARY_CLUSTER_NAME + ``` + +## Update the monitoring infrastructure + +The Azure Monitor workspace for Managed Prometheus and Azure Managed Grafana are automatically linked to the AKS cluster for metrics and visualization during the cluster creation process. In this section, you enable log collection with AKS Container insights and validate that Managed Prometheus is scraping metrics and Container insights is ingesting logs. + +1. Enable Container insights monitoring on the AKS cluster using the [`az aks enable-addons`][az-aks-enable-addons] command. + + ```bash + az aks enable-addons \ + --addon monitoring \ + --name $AKS_PRIMARY_CLUSTER_NAME \ + --resource-group $RESOURCE_GROUP_NAME \ + --workspace-resource-id $ALA_RESOURCE_ID \ + --output table + ``` + +2. Validate that Managed Prometheus is scraping metrics and Container insights is ingesting logs from the AKS cluster by inspecting the DaemonSet using the [`kubectl get`][kubectl-get] command and the [`az aks show`][az-aks-show] command. + + ```bash + kubectl get ds ama-metrics-node \ + --context $AKS_PRIMARY_CLUSTER_NAME \ + --namespace=kube-system + + kubectl get ds ama-logs \ + --context $AKS_PRIMARY_CLUSTER_NAME \ + --namespace=kube-system + + az aks show \ + --resource-group $RESOURCE_GROUP_NAME \ + --name $AKS_PRIMARY_CLUSTER_NAME \ + --query addonProfiles + ``` + + Your output should resemble the following example output, with *six* nodes total (three for the system node pool and three for the PostgreSQL node pool) and the Container insights showing `"enabled": true`: + + ```output + NAME DESIRED CURRENT READY UP-TO-DATE AVAILABLE NODE SELECTOR + ama-metrics-node 6 6 6 6 6 + + NAME DESIRED CURRENT READY UP-TO-DATE AVAILABLE NODE SELECTOR + ama-logs 6 6 6 6 6 + + { + "omsagent": { + "config": { + "logAnalyticsWorkspaceResourceID": "/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/rg-cnpg-9vbin3p8/providers/Microsoft.OperationalInsights/workspaces/ala-cnpg-9vbin3p8", + "useAADAuth": "true" + }, + "enabled": true, + "identity": null + } + } + ``` + +## Create a public static IP for PostgreSQL cluster ingress + +To validate deployment of the PostgreSQL cluster and use client PostgreSQL tooling, such as *psql* and *PgAdmin*, you need to expose the primary and read-only replicas to ingress. In this section, you create an Azure public IP resource that you later supply to an Azure load balancer to expose PostgreSQL endpoints for query. + +1. Get the name of the AKS cluster node resource group using the [`az aks show`][az-aks-show] command. + + ```bash + export AKS_PRIMARY_CLUSTER_NODERG_NAME=$(az aks show \ + --name $AKS_PRIMARY_CLUSTER_NAME \ + --resource-group $RESOURCE_GROUP_NAME \ + --query nodeResourceGroup \ + --output tsv) + + echo $AKS_PRIMARY_CLUSTER_NODERG_NAME + ``` + +2. Create the public IP address using the [`az network public-ip create`][az-network-public-ip-create] command. + + ```bash + export AKS_PRIMARY_CLUSTER_PUBLICIP_NAME="$AKS_PRIMARY_CLUSTER_NAME-pip" + + az network public-ip create \ + --resource-group $AKS_PRIMARY_CLUSTER_NODERG_NAME \ + --name $AKS_PRIMARY_CLUSTER_PUBLICIP_NAME \ + --location $PRIMARY_CLUSTER_REGION \ + --sku Standard \ + --zone 1 2 3 \ + --allocation-method static \ + --output table + ``` + +3. Get the newly created public IP address using the [`az network public-ip show`][az-network-public-ip-show] command. + + ```bash + export AKS_PRIMARY_CLUSTER_PUBLICIP_ADDRESS=$(az network public-ip show \ + --resource-group $AKS_PRIMARY_CLUSTER_NODERG_NAME \ + --name $AKS_PRIMARY_CLUSTER_PUBLICIP_NAME \ + --query ipAddress \ + --output tsv) + + echo $AKS_PRIMARY_CLUSTER_PUBLICIP_ADDRESS + ``` + +4. Get the resource ID of the node resource group using the [`az group show`][az-group-show] command. + + ```bash + export AKS_PRIMARY_CLUSTER_NODERG_NAME_SCOPE=$(az group show --name \ + $AKS_PRIMARY_CLUSTER_NODERG_NAME \ + --query id \ + --output tsv) + echo $AKS_PRIMARY_CLUSTER_NODERG_NAME_SCOPE + ``` + +5. Assign the "Network Contributor" role to the UAMI object ID using the node resource group scope using the [`az role assignment create`][az-role-assignment-create] command. + + ```bash + az role assignment create \ + --assignee-object-id ${AKS_UAMI_WORKLOAD_OBJECTID} \ + --assignee-principal-type ServicePrincipal \ + --role "Network Contributor" \ + --scope ${AKS_PRIMARY_CLUSTER_NODERG_NAME_SCOPE} + ``` + +## Install the CNPG operator in the AKS cluster + +In this section, you install the CNPG operator in the AKS cluster using Helm or a YAML manifest. + +### [Helm](#tab/helm) + +1. Add the CNPG Helm repo using the [`helm repo add`][helm-repo-add] command. + + ```bash + helm repo add cnpg https://cloudnative-pg.github.io/charts + ``` + +2. Upgrade the CNPG Helm repo and install it on the AKS cluster using the [`helm upgrade`][helm-upgrade] command with the `--install` flag. + + ```bash + helm upgrade --install cnpg \ + --namespace $PG_SYSTEM_NAMESPACE \ + --create-namespace \ + --kube-context=$AKS_PRIMARY_CLUSTER_NAME \ + cnpg/cloudnative-pg + ``` + +3. Verify the operator installation on the AKS cluster using the [`kubectl get`][kubectl-get] command. + + ```bash + kubectl get deployment \ + --context $AKS_PRIMARY_CLUSTER_NAME \ + --namespace $PG_SYSTEM_NAMESPACE cnpg-cloudnative-pg + ``` + +### [YAML](#tab/yaml) + +1. Install the CNPG operator on the AKS cluster using the [`kubectl apply`][kubectl-apply] command. + + ```bash + kubectl apply --context $AKS_PRIMARY_CLUSTER_NAME \ + --namespace $PG_SYSTEM_NAMESPACE \ + --server-side -f \ + https://raw.githubusercontent.com/cloudnative-pg/cloudnative-pg/release-1.23/releases/cnpg-1.23.1.yaml + ``` + +2. Verify the operator installation on the AKS cluster using the [`kubectl get`][kubectl-get] command. + + ```bash + kubectl get deployment \ + --namespace $PG_SYSTEM_NAMESPACE cnpg-controller-manager \ + --context $AKS_PRIMARY_CLUSTER_NAME + ``` + +--- + +## Next steps + +> [!div class="nextstepaction"] +> [Deploy a highly available PostgreSQL database on the AKS cluster][deploy-postgresql] + +## Contributors + +*This article is maintained by Microsoft. It was originally written by the following contributors*: + +* Ken Kilty | Principal TPM +* Russell de Pina | Principal TPM +* Adrian Joian | Senior Customer Engineer +* Jenny Hayes | Senior Content Developer +* Carol Smith | Senior Content Developer +* Erin Schaffer | Content Developer 2 + + +[az-identity-create]: /cli/azure/identity#az-identity-create +[az-grafana-create]: /cli/azure/grafana#az-grafana-create +[postgresql-ha-deployment-overview]: ./postgresql-ha-overview.md +[az-extension-add]: /cli/azure/extension#az_extension_add +[az-group-create]: /cli/azure/group#az_group_create +[az-storage-account-create]: /cli/azure/storage/account#az_storage_account_create +[az-storage-container-create]: /cli/azure/storage/container#az_storage_container_create +[inherit-from-azuread]: https://cloudnative-pg.io/documentation/1.23/appendixes/object_stores/#azure-blob-storage +[az-storage-account-show]: /cli/azure/storage/account#az_storage_account_show +[az-role-assignment-create]: /cli/azure/role/assignment#az_role_assignment_create +[az-monitor-account-create]: /cli/azure/monitor/account#az_monitor_account_create +[az-monitor-log-analytics-workspace-create]: /cli/azure/monitor/log-analytics/workspace#az_monitor_log_analytics_workspace_create +[azure-managed-grafana-pricing]: https://azure.microsoft.com/pricing/details/managed-grafana/ +[az-aks-create]: /cli/azure/aks#az_aks_create +[az-aks-node-pool-add]: /cli/azure/aks/nodepool#az_aks_nodepool_add +[az-aks-get-credentials]: /cli/azure/aks#az_aks_get_credentials +[kubectl-create-namespace]: https://kubernetes.io/docs/reference/kubectl/generated/kubectl_create/kubectl_create_namespace/ +[az-aks-enable-addons]: /cli/azure/aks#az_aks_enable_addons +[kubectl-get]: https://kubernetes.io/docs/reference/kubectl/generated/kubectl_get/ +[az-aks-show]: /cli/azure/aks#az_aks_show +[az-network-public-ip-create]: /cli/azure/network/public-ip#az_network_public_ip_create +[az-network-public-ip-show]: /cli/azure/network/public-ip#az_network_public_ip_show +[az-group-show]: /cli/azure/group#az_group_show +[helm-repo-add]: https://helm.sh/docs/helm/helm_repo_add/ +[helm-upgrade]: https://helm.sh/docs/helm/helm_upgrade/ +[kubectl-apply]: https://kubernetes.io/docs/reference/kubectl/generated/kubectl_apply/ +[deploy-postgresql]: ./deploy-postgresql-ha.md +[install-krew]: https://krew.sigs.k8s.io/ +[cnpg-plugin]: https://cloudnative-pg.io/documentation/current/kubectl-plugin/#using-krew \ No newline at end of file diff --git a/scenarios/azure-aks-docs/articles/aks/delete-cluster.md b/scenarios/azure-aks-docs/articles/aks/delete-cluster.md new file mode 100644 index 000000000..559bf7a6d --- /dev/null +++ b/scenarios/azure-aks-docs/articles/aks/delete-cluster.md @@ -0,0 +1,74 @@ +--- +title: Delete an Azure Kubernetes Service (AKS) cluster +description: Learn about deleting a cluster in Azure Kubernetes Service (AKS). +ms.topic: overview +ms.author: schaffererin +author: schaffererin +ms.date: 04/16/2024 +ms.custom: innovation-engine +--- + +# Delete an Azure Kubernetes Service (AKS) cluster + +This article outlines cluster deletion in Azure Kubernetes Service (AKS), including what happens when you delete a cluster, alternatives to deleting a cluster, and how to delete a cluster. + +## What happens when you delete a cluster? + +When you delete a cluster, the following resources are deleted: + +* The [node resource group][node-resource-group] and its resources, including: + * The virtual machine scale sets and virtual machines (VMs) for each node in the cluster + * The virtual network and its subnets for the cluster + * The storage for the cluster +* The control plane and its resources +* Any node instances in the cluster along with any pods running on those nodes + +## Alternatives to deleting a cluster + +Before you delete a cluster, consider **stopping the cluster**. Stopping an AKS cluster stops the control plane and agent nodes, allowing you to save on compute costs while maintaining all objects except standalone pods. When you stop a cluster, its state is saved and you can restart the cluster at any time. For more information, see [Stop an AKS cluster][stop-cluster]. + +If you want to delete a cluster to change its configuration, you can instead use the [AKS cluster upgrade][upgrade-cluster] feature to upgrade the cluster to a different Kubernetes version or change the node pool configuration. For more information, see [Upgrade an AKS cluster][upgrade-cluster]. + +## Delete a cluster + +> [!IMPORTANT] +> **You can't recover a cluster after it's deleted**. If you need to recover a cluster, you need to create a new cluster and redeploy your applications. + +### [Azure CLI](#tab/azure-cli) + +Delete a cluster using the [`az aks delete`][az-aks-delete] command. The following example deletes the `myAKSCluster` cluster in the `myResourceGroup` resource group. + +Declare environment variables with generic names and a random suffix to ensure uniqueness. This avoids conflicts and enables the commands to be re-used in different sessions or by different users. + +```azurecli-interactive +export RANDOM_SUFFIX=$(openssl rand -hex 3) +export AKS_CLUSTER_NAME="myAKSCluster$RANDOM_SUFFIX" +export RESOURCE_GROUP_NAME="myResourceGroup$RANDOM_SUFFIX" +az aks delete --name $AKS_CLUSTER_NAME --resource-group $RESOURCE_GROUP_NAME --yes --no-wait +``` + +### [Azure PowerShell](#tab/azure-powershell) + +Delete a cluster using the [`Remove-AzAks`][remove-azaks] command. The following example deletes the `myAKSCluster` cluster in the `myResourceGroup` resource group: + +```azurepowershell-interactive +Remove-AzAksCluster -Name myAKSCluster -ResourceGroupName myResourceGroup +``` + +### [Azure portal](#tab/azure-portal) + +You can delete a cluster using the Azure portal. To delete a cluster, navigate to the **Overview** page for the cluster and select **Delete**. You can also delete a cluster from the **Resource group** page by selecting the cluster and then selecting **Delete**. + +--- + +## Next steps + +For more information about AKS, see [Core Kubernetes concepts for AKS][core-concepts]. + + +[node-resource-group]: ./concepts-clusters-workloads.md#node-resource-group +[stop-cluster]: ./start-stop-cluster.md +[upgrade-cluster]: ./upgrade-cluster.md +[az-aks-delete]: /cli/azure/aks#az_aks_delete +[remove-azaks]: /powershell/module/az.aks/remove-azakscluster +[core-concepts]: ./concepts-clusters-workloads.md \ No newline at end of file diff --git a/scenarios/azure-aks-docs/articles/aks/deploy-postgresql-ha.md b/scenarios/azure-aks-docs/articles/aks/deploy-postgresql-ha.md new file mode 100644 index 000000000..9f2eb91bd --- /dev/null +++ b/scenarios/azure-aks-docs/articles/aks/deploy-postgresql-ha.md @@ -0,0 +1,1000 @@ +--- +title: 'Deploy a highly available PostgreSQL database on AKS with Azure CLI' +description: In this article, you deploy a highly available PostgreSQL database on AKS using the CloudNativePG operator. +ms.topic: how-to +ms.date: 06/07/2024 +author: kenkilty +ms.author: kkilty +ms.custom: innovation-engine, aks-related-content +--- + +# Deploy a highly available PostgreSQL database on AKS + +In this article, you deploy a highly available PostgreSQL database on AKS. + +* If you haven't already created the required infrastructure for this deployment, follow the steps in [Create infrastructure for deploying a highly available PostgreSQL database on AKS][create-infrastructure] to get set up, and then you can return to this article. + +[!INCLUDE [open source disclaimer](./includes/open-source-disclaimer.md)] + +## Create secret for bootstrap app user + +1. Generate a secret to validate the PostgreSQL deployment by interactive login for a bootstrap app user using the [`kubectl create secret`][kubectl-create-secret] command. + + ```bash + export PG_DATABASE_APPUSER_SECRET=$(echo -n | openssl rand -base64 16) + + kubectl create secret generic db-user-pass \ + --from-literal=username=app \ + --from-literal=password="${PG_DATABASE_APPUSER_SECRET}" \ + --namespace $PG_NAMESPACE \ + --context $AKS_PRIMARY_CLUSTER_NAME + ``` + +1. Validate that the secret was successfully created using the [`kubectl get`][kubectl-get] command. + + ```bash + kubectl get secret db-user-pass --namespace $PG_NAMESPACE --context $AKS_PRIMARY_CLUSTER_NAME + ``` + +## Set environment variables for the PostgreSQL cluster + +* Deploy a ConfigMap to set environment variables for the PostgreSQL cluster using the following [`kubectl apply`][kubectl-apply] command: + + ```bash + export ENABLE_AZURE_PVC_UPDATES="true" + cat < 5432/TCP 3h57m + pg-primary-cnpg-sryti1qf-ro ClusterIP 10.0.237.19 5432/TCP 3h57m + pg-primary-cnpg-sryti1qf-rw ClusterIP 10.0.244.125 5432/TCP 3h57m + ``` + + > [!NOTE] + > There are three services: `namespace/cluster-name-ro` mapped to port 5433, `namespace/cluster-name-rw`, and `namespace/cluster-name-r` mapped to port 5433. It’s important to avoid using the same port as the read/write node of the PostgreSQL database cluster. If you want applications to access only the read-only replica of the PostgreSQL database cluster, direct them to port 5433. The final service is typically used for data backups but can also function as a read-only node. + +1. Get the service details using the [`kubectl get`][kubectl-get] command. + + ```bash + export PG_PRIMARY_CLUSTER_RW_SERVICE=$(kubectl get services \ + --namespace $PG_NAMESPACE \ + --context $AKS_PRIMARY_CLUSTER_NAME \ + -l "cnpg.io/cluster" \ + --output json | jq -r '.items[] | select(.metadata.name | endswith("-rw")) | .metadata.name') + + echo $PG_PRIMARY_CLUSTER_RW_SERVICE + + export PG_PRIMARY_CLUSTER_RO_SERVICE=$(kubectl get services \ + --namespace $PG_NAMESPACE \ + --context $AKS_PRIMARY_CLUSTER_NAME \ + -l "cnpg.io/cluster" \ + --output json | jq -r '.items[] | select(.metadata.name | endswith("-ro")) | .metadata.name') + + echo $PG_PRIMARY_CLUSTER_RO_SERVICE + ``` + +1. Configure the load balancer service with the following YAML files using the [`kubectl apply`][kubectl-apply] command. + + ```bash + cat < [!NOTE] +> You need the value of the app user password for PostgreSQL basic auth that was generated earlier and stored in the `$PG_DATABASE_APPUSER_SECRET` environment variable. + +* Validate the public PostgreSQL endpoints using the following `psql` commands: + + ```bash + echo "Public endpoint for PostgreSQL cluster: $AKS_PRIMARY_CLUSTER_ALB_DNSNAME" + + # Query the primary, pg_is_in_recovery = false + + psql -h $AKS_PRIMARY_CLUSTER_ALB_DNSNAME \ + -p 5432 -U app -d appdb -W -c "SELECT pg_is_in_recovery();" + ``` + + Example output + + ```output + pg_is_in_recovery + ------------------- + f + (1 row) + ``` + + ```bash + echo "Query a replica, pg_is_in_recovery = true" + + psql -h $AKS_PRIMARY_CLUSTER_ALB_DNSNAME \ + -p 5433 -U app -d appdb -W -c "SELECT pg_is_in_recovery();" + ``` + + Example output + + ```output + # Example output + + pg_is_in_recovery + ------------------- + t + (1 row) + ``` + + When successfully connected to the primary read-write endpoint, the PostgreSQL function returns `f` for *false*, indicating that the current connection is writable. + + When connected to a replica, the function returns `t` for *true*, indicating the database is in recovery and read-only. + +## Simulate an unplanned failover + +In this section, you trigger a sudden failure by deleting the pod running the primary, which simulates a sudden crash or loss of network connectivity to the node hosting the PostgreSQL primary. + +1. Check the status of the running pod instances using the following command: + + ```bash + kubectl cnpg status $PG_PRIMARY_CLUSTER_NAME --namespace $PG_NAMESPACE + ``` + + Example output + + ```output + Name Current LSN Rep role Status Node + --------------------------- ----------- -------- ------- ----------- + pg-primary-cnpg-sryti1qf-1 0/9000060 Primary OK aks-postgres-32388626-vmss000000 + pg-primary-cnpg-sryti1qf-2 0/9000060 Standby (sync) OK aks-postgres-32388626-vmss000001 + pg-primary-cnpg-sryti1qf-3 0/9000060 Standby (sync) OK aks-postgres-32388626-vmss000002 + ``` + +1. Delete the primary pod using the [`kubectl delete`][kubectl-delete] command. + + ```bash + PRIMARY_POD=$(kubectl get pod \ + --namespace $PG_NAMESPACE \ + --no-headers \ + -o custom-columns=":metadata.name" \ + -l role=primary) + + kubectl delete pod $PRIMARY_POD --grace-period=1 --namespace $PG_NAMESPACE + ``` + +1. Validate that the `pg-primary-cnpg-sryti1qf-2` pod instance is now the primary using the following command: + + ```bash + kubectl cnpg status $PG_PRIMARY_CLUSTER_NAME --namespace $PG_NAMESPACE + ``` + + Example output + + ```output + pg-primary-cnpg-sryti1qf-2 0/9000060 Primary OK aks-postgres-32388626-vmss000001 + pg-primary-cnpg-sryti1qf-1 0/9000060 Standby (sync) OK aks-postgres-32388626-vmss000000 + pg-primary-cnpg-sryti1qf-3 0/9000060 Standby (sync) OK aks-postgres-32388626-vmss000002 + ``` + +1. Reset the `pg-primary-cnpg-sryti1qf-1` pod instance as the primary using the following command: + + ```bash + kubectl cnpg promote $PG_PRIMARY_CLUSTER_NAME 1 --namespace $PG_NAMESPACE + ``` + +1. Validate that the pod instances have returned to their original state before the unplanned failover test using the following command: + + ```bash + kubectl cnpg status $PG_PRIMARY_CLUSTER_NAME --namespace $PG_NAMESPACE + ``` + + Example output + + ```output + Name Current LSN Rep role Status Node + --------------------------- ----------- -------- ------- ----------- + pg-primary-cnpg-sryti1qf-1 0/9000060 Primary OK aks-postgres-32388626-vmss000000 + pg-primary-cnpg-sryti1qf-2 0/9000060 Standby (sync) OK aks-postgres-32388626-vmss000001 + pg-primary-cnpg-sryti1qf-3 0/9000060 Standby (sync) OK aks-postgres-32388626-vmss000002 + ``` + +## Clean up resources + +* Once you're finished reviewing your deployment, delete all the resources you created in this guide using the [`az group delete`][az-group-delete] command. + + ```bash + az group delete --resource-group $RESOURCE_GROUP_NAME --no-wait --yes + ``` + +## Next steps + +In this how-to guide, you learned how to: + +* Use Azure CLI to create a multi-zone AKS cluster. +* Deploy a highly available PostgreSQL cluster and database using the CNPG operator. +* Set up monitoring for PostgreSQL using Prometheus and Grafana. +* Deploy a sample dataset to the PostgreSQL database. +* Perform PostgreSQL and AKS cluster upgrades. +* Simulate a cluster interruption and PostgreSQL replica failover. +* Perform a backup and restore of the PostgreSQL database. + +To learn more about how you can leverage AKS for your workloads, see [What is Azure Kubernetes Service (AKS)?][what-is-aks] + +## Contributors + +*This article is maintained by Microsoft. It was originally written by the following contributors*: + +* Ken Kilty | Principal TPM +* Russell de Pina | Principal TPM +* Adrian Joian | Senior Customer Engineer +* Jenny Hayes | Senior Content Developer +* Carol Smith | Senior Content Developer +* Erin Schaffer | Content Developer 2 +* Adam Sharif | Customer Engineer 2 + + +[helm-upgrade]: https://helm.sh/docs/helm/helm_upgrade/ +[create-infrastructure]: ./create-postgresql-ha.md +[kubectl-create-secret]: https://kubernetes.io/docs/reference/kubectl/generated/kubectl_create/kubectl_create_secret/ +[kubectl-get]: https://kubernetes.io/docs/reference/kubectl/generated/kubectl_get/ +[kubectl-apply]: https://kubernetes.io/docs/reference/kubectl/generated/kubectl_apply/ +[helm-repo-add]: https://helm.sh/docs/helm/helm_repo_add/ +[az-aks-show]: /cli/azure/aks#az_aks_show +[az-identity-federated-credential-create]: /cli/azure/identity/federated-credential#az_identity_federated_credential_create +[cluster-crd]: https://cloudnative-pg.io/documentation/1.23/cloudnative-pg.v1/#postgresql-cnpg-io-v1-ClusterSpec +[kubectl-describe]: https://kubernetes.io/docs/reference/kubectl/generated/kubectl_describe/ +[az-storage-blob-list]: /cli/azure/storage/blob/#az_storage_blob_list +[az-identity-federated-credential-delete]: /cli/azure/identity/federated-credential#az_identity_federated_credential_delete +[kubectl-delete]: https://kubernetes.io/docs/reference/kubectl/generated/kubectl_delete/ +[az-group-delete]: /cli/azure/group#az_group_delete +[what-is-aks]: ./what-is-aks.md \ No newline at end of file diff --git a/scenarios/azure-aks-docs/articles/aks/enable-host-encryption.md b/scenarios/azure-aks-docs/articles/aks/enable-host-encryption.md new file mode 100644 index 000000000..6d9d4b8e2 --- /dev/null +++ b/scenarios/azure-aks-docs/articles/aks/enable-host-encryption.md @@ -0,0 +1,103 @@ +--- +title: Enable host-based encryption on Azure Kubernetes Service (AKS) +description: Learn how to configure a host-based encryption in an Azure Kubernetes Service (AKS) cluster. +ms.topic: how-to +ms.subservice: aks-security +ms.date: 07/17/2023 +author: nickomang +ms.author: nickoman + +ms.custom: devx-track-azurecli +ms.devlang: azurecli +--- + +# Host-based encryption on Azure Kubernetes Service (AKS) + +With host-based encryption, the data stored on the VM host of your AKS agent nodes' VMs is encrypted at rest and flows encrypted to the Storage service. This means the temp disks are encrypted at rest with platform-managed keys. The cache of OS and data disks is encrypted at rest with either platform-managed keys or customer-managed keys depending on the encryption type set on those disks. + +By default, when using AKS, OS and data disks use server-side encryption with platform-managed keys. The caches for these disks are encrypted at rest with platform-managed keys. You can specify your own managed keys following [Bring your own keys (BYOK) with Azure disks in Azure Kubernetes Service](azure-disk-customer-managed-keys.md). The caches for these disks are also encrypted using the key you specify. + +Host-based encryption is different than server-side encryption (SSE), which is used by Azure Storage. Azure-managed disks use Azure Storage to automatically encrypt data at rest when saving data. Host-based encryption uses the host of the VM to handle encryption before the data flows through Azure Storage. + +## Before you begin + +Before you begin, review the following prerequisites and limitations. + +### Prerequisites + +- Ensure you have the CLI extension v2.23 or higher installed. + +### Limitations + +- This feature can only be set at cluster or node pool creation time. +- This feature can only be enabled in [Azure regions][supported-regions] that support server-side encryption of Azure managed disks and only with specific [supported VM sizes][supported-sizes]. +- This feature requires an AKS cluster and node pool based on Virtual Machine Scale Sets as *VM set type*. + +## Enable Encryption at Host for your AKS cluster + +Before adding a node pool with host-based encryption, ensure the EncryptionAtHost feature is enabled for your subscription: + +```azurecli +# Register the EncryptionAtHost feature +az feature register --namespace Microsoft.Compute --name EncryptionAtHost + +# Wait for registration to complete (this may take several minutes) +az feature show --namespace Microsoft.Compute --name EncryptionAtHost --query "properties.state" + +# Refresh the provider registration +az provider register --namespace Microsoft.Compute +``` + +## Use host-based encryption on new clusters + +- Create a new cluster and configure the cluster agent nodes to use host-based encryption using the [`az aks create`][az-aks-create] command with the `--enable-encryption-at-host` flag. + + ```shell + az aks create \ + --name myAKSCluster \ + --resource-group myResourceGroup \ + --node-vm-size Standard_DS2_v2 \ + --location westus2 \ + --enable-encryption-at-host \ + --generate-ssh-keys + ``` + +## Use host-based encryption on existing clusters + +- Enable host-based encryption on an existing cluster by adding a new node pool using the [`az aks nodepool add`][az-aks-nodepool-add] command with the `--enable-encryption-at-host` flag. + + ```azurecli + az aks nodepool add --name hostencrypt --cluster-name $MY_AKS_CLUSTER --resource-group $MY_RESOURCE_GROUP -s Standard_DS2_v2 --enable-encryption-at-host + ``` + + Results: + + + + ```output + { + "agentPoolProfile": { + "enableEncryptionAtHost": true, + "name": "hostencrypt", + "nodeCount": 1, + "osDiskSizeGB": 30, + "vmSize": "Standard_DS2_v2" + }, + ... + } + ``` + +## Next steps + +- Review [best practices for AKS cluster security][best-practices-security]. +- Read more about [host-based encryption](/azure/virtual-machines/disk-encryption#encryption-at-host---end-to-end-encryption-for-your-vm-data). + + + +[best-practices-security]: ./operator-best-practices-cluster-security.md +[supported-regions]: /azure/virtual-machines/disk-encryption#supported-regions +[supported-sizes]: /azure/virtual-machines/disk-encryption#supported-vm-sizes +[control-keys]: ../key-vault/general/best-practices.md#control-access-to-your-vault +[akv-built-in-roles]: ../key-vault/general/rbac-guide.md#azure-built-in-roles-for-key-vault-data-plane-operations +[az-aks-create]: /cli/azure/aks#az-aks-create +[az-aks-nodepool-add]: /cli/azure/aks/nodepool#az-aks-nodepool-add \ No newline at end of file diff --git a/scenarios/azure-aks-docs/articles/aks/events.md b/scenarios/azure-aks-docs/articles/aks/events.md new file mode 100644 index 000000000..38c81e4d4 --- /dev/null +++ b/scenarios/azure-aks-docs/articles/aks/events.md @@ -0,0 +1,131 @@ +--- +title: Use Kubernetes events for troubleshooting +description: Learn about Kubernetes events, which provide details on pods, nodes, and other Kubernetes objects. +ms.topic: how-to +ms.author: nickoman +author: nickomang +ms.subservice: aks-monitoring +ms.date: 06/13/2024 +ms.custom: innovation-engine +--- + +# Use Kubernetes events for troubleshooting in Azure Kubernetes Service (AKS) + +This article shows you how to use Kubernetes events to monitor and troubleshoot issues in your Azure Kubernetes Service (AKS) clusters. + +## What are Kubernetes events? + +Events are one of the most prominent sources for monitoring and troubleshooting issues in Kubernetes. They capture and record information about the lifecycle of various Kubernetes objects, such as pods, nodes, services, and deployments. By monitoring events, you can gain visibility into your cluster's activities, identify issues, and troubleshoot problems effectively. + +Kubernetes events don't persist throughout your cluster lifecycle, as there's no retention mechanism. Events are **only available for *one hour* after the event is generated**. To store events for a longer time period, enable [Container insights][container-insights]. + +## Kubernetes event objects + +The following table lists some key Kubernetes event objects: + +|Field name|Description| +|----------|------------| +|type |The type is based on the severity of the event:
**Warning** events signal potentially problematic situations, such as a pod repeatedly failing or a node running out of resources. They require attention, but might not result in immediate failure.
**Normal** events represent routine operations, such as a pod being scheduled or a deployment scaling up. They usually indicate healthy cluster behavior.| +|reason|The reason why the event was generated. For example, *FailedScheduling* or *CrashLoopBackoff*.| +|message|A human-readable message that describes the event.| +|namespace|The namespace of the Kubernetes object that the event is associated with.| +|firstSeen|Timestamp when the event was first observed.| +|lastSeen|Timestamp of when the event was last observed.| +|reportingController|The name of the controller that reported the event. For example, `kubernetes.io/kubelet`.| +|object|The name of the Kubernetes object that the event is associated with.| + +For more information, see the official [Kubernetes documentation][k8s-events]. + +## View Kubernetes events + +### [Azure CLI](#tab/azure-cli) + +List all events in your cluster using the `kubectl get events` command. + +Assuming your cluster is already created and available (per doc prerequisites), get credentials (note the `--overwrite-existing` flag is set to avoid kubeconfig errors): + +```bash +az aks get-credentials --resource-group $RESOURCE_GROUP --name $AKS_CLUSTER --overwrite-existing +``` + +Now list all events in your cluster: + +```bash +kubectl get events +``` + +Results: + + + +```output +LAST SEEN TYPE REASON OBJECT MESSAGE +xxm Normal Scheduled pod/my-pod-xxxxx Successfully assigned default/my-pod-xxxxx to aks-nodepoolxx-xxxxxxx-vmss000000 +xxm Normal Pulled pod/my-pod-xxxxx Container image "nginx" already present on machine +xxm Normal Created pod/my-pod-xxxxx Created container nginx +xxm Normal Started pod/my-pod-xxxxx Started container nginx +... +``` + +Look at a specific pod's events by first finding the name of the pod and then using the `kubectl describe pod` command. + +List the pods in the current namespace: + +```bash +kubectl get pods +``` + +Results: + + + +```output +NAME READY STATUS RESTARTS AGE +my-pod-xxxxx 1/1 Running 0 xxm +nginx-deployment-xxxxx 1/1 Running 0 xxm +... +``` + +Replace `` below with your actual pod name. For automation, here's an example for the first pod in the list: + +```shell +POD_NAME=$(kubectl get pods -o jsonpath="{.items[0].metadata.name}") +kubectl describe pod $POD_NAME +``` + +### [Azure portal](#tab/azure-portal) + +1. Open the Azure portal and navigate to your AKS cluster resource. +1. From the service menu, under **Kubernetes resources**, select **Events**. +1. The **Events** page displays a list of events in your cluster. You can filter events by type, reason, source, object, or namespace. You can combine filters to narrow down the results. + +--- + +## Best practices for troubleshooting with events + +### Filtering events for relevance + +You might have various namespaces and services running in your AKS cluster. Filtering events based on object type, namespace, or reason can help narrow down the results to the most relevant information. + +For example, you can use the following command to filter events within the default namespace: + +```bash +kubectl get events --namespace default +``` + +### Automating event notifications + +To ensure timely response to critical events in your AKS cluster, set up automated notifications. Azure offers integration with monitoring and alerting services like [Azure Monitor][aks-azure-monitor]. You can configure alerts to trigger based on specific event patterns. This way, you're immediately informed about crucial issues that require attention. + +### Regularly reviewing events + +Make a habit of regularly reviewing events in your AKS cluster. This proactive approach can help you identify trends, catch potential problems early, and prevent escalations. By staying on top of events, you can maintain the stability and performance of your applications. + +## Next steps + +Now that you understand Kubernetes events, you can continue your monitoring and observability journey by [enabling Container insights][container-insights]. + + +[aks-azure-monitor]: ./monitor-aks.md +[container-insights]: /azure/azure-monitor/containers/container-insights-enable-aks +[k8s-events]: https://kubernetes.io/docs/reference/kubernetes-api/cluster-resources/event-v1/ diff --git a/scenarios/azure-aks-docs/articles/aks/free-standard-pricing-tiers.md b/scenarios/azure-aks-docs/articles/aks/free-standard-pricing-tiers.md new file mode 100644 index 000000000..1977af5d8 --- /dev/null +++ b/scenarios/azure-aks-docs/articles/aks/free-standard-pricing-tiers.md @@ -0,0 +1,296 @@ +--- +title: Azure Kubernetes Service (AKS) Free, Standard, and Premium pricing tiers for cluster management +description: Learn about the Azure Kubernetes Service (AKS) Free, Standard, and Premium pricing plans and what features, deployment patterns, and recommendations to consider between each plan. +ms.topic: concept-article +ms.date: 06/07/2024 +author: schaffererin +ms.author: schaffererin +ms.custom: references_regions, devx-track-azurecli, innovation-engine +--- + +# Free, Standard, and Premium pricing tiers for Azure Kubernetes Service (AKS) cluster management + +Azure Kubernetes Service (AKS) offers three pricing tiers for cluster management: the **Free tier**, the **Standard tier**, and the **Premium tier**. All tiers are in the **Base** SKU. + +| |Free tier|Standard tier|Premium tier| +|------------------|---------|--------|--------| +|**When to use**|• You want to experiment with AKS at no extra cost
• You're new to AKS and Kubernetes|• You're running production or mission-critical workloads and need high availability and reliability
• You need a financially backed SLA
• Automatically selected for AKS automatic clusters (if you create an AKS Automatic Cluster)|• You're running production or mission-critical workloads and need high availability and reliability
• You need a financially backed SLA
• All mission critical, at scale, or production workloads requiring *two years* of one Kubernetes version support| +|**Supported cluster types**|• Development clusters or small scale testing environments
• Clusters with fewer than 10 nodes|• Enterprise-grade or production workloads
• Clusters with up to 5,000 nodes| • Enterprise-grade or production workloads
• Clusters with up to 5,000 nodes | +|**Pricing**|• Free cluster management
• Pay-as-you-go for resources you consume|• Pay-as-you-go for resources you consume
• [Standard tier Cluster Management Pricing](https://azure.microsoft.com/pricing/details/kubernetes-service/) | • Pay-as-you-go for resources you consume
• [Premium tier Cluster Management Pricing](https://azure.microsoft.com/pricing/details/kubernetes-service/) | +|**Feature comparison**|• Recommended for clusters with fewer than 10 nodes, but can support up to 1,000 nodes
• Includes all current AKS features|• Uptime SLA is enabled by default
• Greater cluster reliability and resources
• Can support up to 5,000 nodes in a cluster
• Includes all current AKS features | • Includes all current AKS features from standard tier
• [Microsoft maintenance past community support][long-term-support] | + +For more information on pricing, see the [AKS pricing details](https://azure.microsoft.com/pricing/details/kubernetes-service/). + +## Uptime SLA terms and conditions + +In the Standard tier and Premium tier, the Uptime SLA feature is enabled by default per cluster. The Uptime SLA feature guarantees 99.95% availability of the Kubernetes API server endpoint for clusters using [Availability Zones][availability-zones], and 99.9% of availability for clusters that aren't using Availability Zones. For more information, see [SLA](https://azure.microsoft.com/support/legal/sla/kubernetes-service/v1_1/). + +## Region availability + +* Free tier, Standard tier, and Premium tier are available in public regions and Azure Government regions where [AKS is supported](https://azure.microsoft.com/global-infrastructure/services/?products=kubernetes-service). +* Free tier, Standard tier, and Premium tier are available for [private AKS clusters][private-clusters] in all public regions where AKS is supported. + +## Before you begin + +You need [Azure CLI](/cli/azure/install-azure-cli) version 2.47.0 or later. Run `az --version` to find your current version. If you need to install or upgrade, see [Install Azure CLI][install-azure-cli]. + +## Create a new cluster and select the pricing tier + +Use the Azure CLI to create a new cluster on an AKS pricing tier. You can create your cluster in an existing resource group or create a new one. To learn more about resource groups and working with them, see [managing resource groups using the Azure CLI][manage-resource-group-cli]. + +Use the [`az aks create`][az-aks-create] command to create an AKS cluster. The following commands show you how to create a new cluster in the Free, Standard, and Premium tiers. + +Below, we set up the required environment variables for the resource group, cluster name, and region. We generate a unique suffix for the resource names to avoid conflicts if run multiple times. + +```shell +export RANDOM_SUFFIX=$(openssl rand -hex 3) +export REGION="eastus2" +export RESOURCE_GROUP="aks-rg-$RANDOM_SUFFIX" +export CLUSTER_NAME="aks-cluster-$RANDOM_SUFFIX" +az group create --name $RESOURCE_GROUP --location $REGION +``` + +Results: + +```output +{ + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/aks-rg-xxx", + "location": "eastus2", + "managedBy": null, + "name": "aks-rg-xxx", + "properties": { + "provisioningState": "Succeeded" + }, + "tags": null, + "type": "Microsoft.Resources/resourceGroups" +} +``` + +### Create a new AKS cluster in the Free tier + +```shell +# Create a new AKS cluster in the Free tier + +az aks create \ + --resource-group $RESOURCE_GROUP \ + --name $CLUSTER_NAME \ + --tier free \ + --generate-ssh-keys +``` + +Results: + +```output +{ + ... + "sku": { + "name": "Base", + "tier": "Free" + }, + ... +} +``` + +### Create a new AKS cluster in the Standard tier + +```shell +# Create a new AKS cluster in the Standard tier + +az aks create \ + --resource-group $RESOURCE_GROUP \ + --name $CLUSTER_NAME \ + --tier standard \ + --generate-ssh-keys +``` + +Results: + +```output +{ + ... + "sku": { + "name": "Base", + "tier": "Standard" + }, + ... +} +``` + +### Create a new AKS cluster in the Premium tier + +LongTermSupport and Premium tier should be enabled/disabled together. + +```shell +# Create a new AKS cluster in the Premium tier +# LongTermSupport and Premium tier should be enabled/disabled together + +az aks create \ + --resource-group $RESOURCE_GROUP \ + --name $CLUSTER_NAME \ + --tier premium \ + --k8s-support-plan AKSLongTermSupport \ + --generate-ssh-keys +``` + +Results: + +```output +{ + ... + "sku": { + "name": "Base", + "tier": "Premium" + }, + "supportPlan": "AKSLongTermSupport", + ... +} +``` + +Once the deployment completes, it returns JSON-formatted information about your cluster: + +```output +# Sample output for --tier free + + }, + "sku": { + "name": "Base", + "tier": "Free" + }, + +# Sample output for --tier standard + + }, + "sku": { + "name": "Base", + "tier": "Standard" + }, + +# Sample output for --tier premium + + "sku": { + "name": "Base", + "tier": "Premium" + }, + "supportPlan": "AKSLongTermSupport", +``` + +## Update the tier of an existing AKS cluster + +The following example uses the [`az aks update`](/cli/azure/aks#az_aks_update) command to update the existing cluster. + +### Update an existing cluster from the Standard tier to the Free tier + +```azurecli-interactive +# Update an existing cluster from the Standard tier to the Free tier + +az aks update --resource-group $RESOURCE_GROUP --name $CLUSTER_NAME --tier free +``` + + + +Results: + +```output +{ + ... + "sku": { + "name": "Base", + "tier": "Free" + }, + ... +} +``` + +### Update an existing cluster from the Free tier to the Standard tier + +```azurecli-interactive +# Update an existing cluster from the Free tier to the Standard tier + +az aks update --resource-group $RESOURCE_GROUP --name $CLUSTER_NAME --tier standard +``` + + + +Results: + +```output +{ + ... + "sku": { + "name": "Base", + "tier": "Standard" + }, + ... +} +``` + +[Updating existing clusters from and to the Premium tier][long-term-support-update] requires changing the support plan. + +### Update an existing cluster to the Premium tier + +```azurecli-interactive +# Update an existing cluster to the Premium tier +az aks update --resource-group $RESOURCE_GROUP --name $CLUSTER_NAME --tier premium --k8s-support-plan AKSLongTermSupport +``` + + + +Results: + +```output +{ + ... + "sku": { + "name": "Base", + "tier": "Premium" + }, + "supportPlan": "AKSLongTermSupport", + ... +} +``` + +### Update an existing cluster to from Premium tier to Free or Standard tier + +```shell +# Update an existing cluster to from Premium tier to Free or Standard tier +az aks update --resource-group $RESOURCE_GROUP --name $CLUSTER_NAME --tier free --k8s-support-plan KubernetesOfficial +# or +az aks update --resource-group $RESOURCE_GROUP --name $CLUSTER_NAME --tier standard --k8s-support-plan KubernetesOfficial +``` + +Results: + +```output +{ + ... + "sku": { + "name": "Base", + "tier": "Free" # or "Standard" + }, + "supportPlan": "KubernetesOfficial", + ... +} +``` + +This process takes several minutes to complete. You shouldn't experience any downtime while your cluster tier is being updated. When finished, the following example JSON snippet shows updating the existing cluster to the Standard tier in the Base SKU. + +```output + }, + "sku": { + "name": "Base", + "tier": "Standard" + }, +``` + +## Next steps + +* Use [Availability Zones][availability-zones] to increase high availability with your AKS cluster workloads. +* Configure your cluster to [limit egress traffic](limit-egress-traffic.md). + +[manage-resource-group-cli]: /azure/azure-resource-manager/management/manage-resource-groups-cli +[availability-zones]: ./availability-zones.md +[az-aks-create]: /cli/azure/aks?#az_aks_create +[private-clusters]: private-clusters.md +[long-term-support]: long-term-support.md +[long-term-support-update]: long-term-support.md#enable-lts-on-an-existing-cluster +[install-azure-cli]: /cli/azure/install-azure-cli \ No newline at end of file diff --git a/scenarios/azure-aks-docs/articles/aks/istio-deploy-addon.md b/scenarios/azure-aks-docs/articles/aks/istio-deploy-addon.md new file mode 100644 index 000000000..41d2e9b1a --- /dev/null +++ b/scenarios/azure-aks-docs/articles/aks/istio-deploy-addon.md @@ -0,0 +1,237 @@ +--- +title: Deploy Istio-based service mesh add-on for Azure Kubernetes Service +description: Deploy Istio-based service mesh add-on for Azure Kubernetes Service +ms.topic: how-to +ms.custom: devx-track-azurecli, innovation-engine +ms.service: azure-kubernetes-service +ms.date: 03/28/2024 +ms.author: shasb +author: shashankbarsin +--- + +# Deploy Istio-based service mesh add-on for Azure Kubernetes Service + +This article shows you how to install the Istio-based service mesh add-on for Azure Kubernetes Service (AKS) cluster. + +For more information on Istio and the service mesh add-on, see [Istio-based service mesh add-on for Azure Kubernetes Service][istio-about]. + +## Before you begin + +* The add-on requires Azure CLI version 2.57.0 or later installed. You can run `az --version` to verify version. To install or upgrade, see [Install Azure CLI][azure-cli-install]. +* To find information about which Istio add-on revisions are available in a region and their compatibility with AKS cluster versions, use the command [`az aks mesh get-revisions`][az-aks-mesh-get-revisions]: + + ```azurecli-interactive + az aks mesh get-revisions --location EastUS2 -o table + ``` +* In some cases, Istio CRDs from previous installations may not be automatically cleaned up on uninstall. Ensure existing Istio CRDs are deleted: + + ```text + kubectl delete crd $(kubectl get crd -A | grep "istio.io" | awk '{print $1}') + ``` + It is recommend to also clean up other resources from self-managed installations of Istio such as ClusterRoles, MutatingWebhookConfigurations and ValidatingWebhookConfigurations. + +* Note that if you choose to use any `istioctl` CLI commands, you will need to include a flag to point to the add-on installation of Istio: `--istioNamespace aks-istio-system` + +## Install Istio add-on + +This section includes steps to install the Istio add-on during cluster creation or enable for an existing cluster using the Azure CLI. If you want to install the add-on using Bicep, see the guide for [installing an AKS cluster with the Istio service mesh add-on using Bicep][install-aks-cluster-istio-bicep]. To learn more about the Bicep resource definition for an AKS cluster, see [Bicep managedCluster reference][bicep-aks-resource-definition]. + +### Revision selection + +If you enable the add-on without specifying a revision, a default supported revision is installed for you. + +To specify a revision, perform the following steps. + +1. Use the [`az aks mesh get-revisions`][az-aks-mesh-get-revisions] command to check which revisions are available for different AKS cluster versions in a region. +1. Based on the available revisions, you can include the `--revision asm-X-Y` (ex: `--revision asm-1-20`) flag in the enable command you use for mesh installation. + +### Install mesh during cluster creation + +To install the Istio add-on when creating the cluster, use the `--enable-azure-service-mesh` or `--enable-asm` parameter. + +```text +az group create --name ${RESOURCE_GROUP} --location ${LOCATION} +``` + +```text +az aks create \ + --resource-group ${RESOURCE_GROUP} \ + --name ${CLUSTER} \ + --enable-asm \ + --generate-ssh-keys +``` + +### Install mesh for existing cluster + +The following example enables Istio add-on for an existing AKS cluster: + +> [!IMPORTANT] +> You can't enable the Istio add-on on an existing cluster if an OSM add-on is already on your cluster. Uninstall the OSM add-on before installing the Istio add-on. +> For more information, see [uninstall the OSM add-on from your AKS cluster][uninstall-osm-addon]. +> Istio add-on can only be enabled on AKS clusters of version >= 1.23. + +```bash +az aks mesh enable --resource-group ${RESOURCE_GROUP} --name ${CLUSTER} +``` + +## Verify successful installation + +To verify the Istio add-on is installed on your cluster, run the following command: + +```azurecli-interactive +az aks show --resource-group ${RESOURCE_GROUP} --name ${CLUSTER} --query 'serviceMeshProfile.mode' +``` + +Confirm the output shows `Istio`. + +Use `az aks get-credentials` to retrieve the credentials for your AKS cluster: + +```azurecli-interactive +az aks get-credentials --resource-group ${RESOURCE_GROUP} --name ${CLUSTER} +``` + +Use `kubectl` to verify that `istiod` (Istio control plane) pods are running successfully: + +```bash +kubectl get pods -n aks-istio-system +``` + +Confirm the `istiod` pod has a status of `Running`. For example: + +``` +NAME READY STATUS RESTARTS AGE +istiod-asm-1-18-74f7f7c46c-xfdtl 1/1 Running 0 2m +istiod-asm-1-18-74f7f7c46c-4nt2v 1/1 Running 0 2m +``` + +## Enable sidecar injection + +To automatically install sidecar to any new pods, you need to annotate your namespaces with the revision label corresponding to the control plane revision currently installed. + +If you're unsure which revision is installed, use: + +```azurecli-interactive +az aks show --resource-group ${RESOURCE_GROUP} --name ${CLUSTER} --query 'serviceMeshProfile.istio.revisions' +``` + +Apply the revision label: + +```bash +kubectl label namespace default istio.io/rev=asm-X-Y +``` + +> [!IMPORTANT] +> The default `istio-injection=enabled` labeling doesn't work. Explicit versioning matching the control plane revision (ex: `istio.io/rev=asm-1-18`) is required. + +For manual injection of sidecar using `istioctl kube-inject`, you need to specify extra parameters for `istioNamespace` (`-i`) and `revision` (`-r`). For example: + +```text +kubectl apply -f <(istioctl kube-inject -f sample.yaml -i aks-istio-system -r asm-X-Y) -n foo +``` + +## Trigger sidecar injection + +You can either deploy the sample application provided for testing, or trigger sidecar injection for existing workloads. + +### Existing applications + +If you have existing applications to be added to the mesh, ensure their namespaces are labeled as in the previous step, and then restart their deployments to trigger sidecar injection: + +```text +kubectl rollout restart -n +``` + +Verify that sidecar injection succeeded by ensuring all containers are ready and looking for the `istio-proxy` container in the `kubectl describe` output, for example: + +```text +kubectl describe pod -n namespace +``` + +The `istio-proxy` container is the Envoy sidecar. Your application is now part of the data plane. + +### Deploy sample application + +Use `kubectl apply` to deploy the sample application on the cluster: + +```bash +kubectl apply -f https://raw.githubusercontent.com/istio/istio/release-1.18/samples/bookinfo/platform/kube/bookinfo.yaml +``` +> [!NOTE] +> Clusters using an HTTP proxy for outbound internet access will need to set up a Service Entry. For setup instructions see [HTTP proxy support in Azure Kubernetes Service](./http-proxy.md#istio-add-on-http-proxy-for-external-services) + +Confirm several deployments and services are created on your cluster. For example: + +```output +service/details created +serviceaccount/bookinfo-details created +deployment.apps/details-v1 created +service/ratings created +serviceaccount/bookinfo-ratings created +deployment.apps/ratings-v1 created +service/reviews created +serviceaccount/bookinfo-reviews created +deployment.apps/reviews-v1 created +deployment.apps/reviews-v2 created +deployment.apps/reviews-v3 created +service/productpage created +serviceaccount/bookinfo-productpage created +deployment.apps/productpage-v1 created +``` + +Use `kubectl get services` to verify that the services were created successfully: + +```bash +kubectl get services +``` + +Confirm the following services were deployed: + +```output +NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE +details ClusterIP 10.0.180.193 9080/TCP 87s +kubernetes ClusterIP 10.0.0.1 443/TCP 15m +productpage ClusterIP 10.0.112.238 9080/TCP 86s +ratings ClusterIP 10.0.15.201 9080/TCP 86s +reviews ClusterIP 10.0.73.95 9080/TCP 86s +``` + +```bash +kubectl get pods +``` + +```output +NAME READY STATUS RESTARTS AGE +details-v1-558b8b4b76-2llld 2/2 Running 0 2m41s +productpage-v1-6987489c74-lpkgl 2/2 Running 0 2m40s +ratings-v1-7dc98c7588-vzftc 2/2 Running 0 2m41s +reviews-v1-7f99cc4496-gdxfn 2/2 Running 0 2m41s +reviews-v2-7d79d5bd5d-8zzqd 2/2 Running 0 2m41s +reviews-v3-7dbcdcbc56-m8dph 2/2 Running 0 2m41s +``` + +Confirm that all the pods have status of `Running` with two containers in the `READY` column. The second container (`istio-proxy`) added to each pod is the Envoy sidecar injected by Istio, and the other is the application container. + +To test this sample application against ingress, check out [next-steps](#next-steps). + +## Next steps + +* [Deploy external or internal ingresses for Istio service mesh add-on][istio-deploy-ingress] +* [Scale istiod and ingress gateway HPA][istio-scaling-guide] +* [Collect metrics for Istio service mesh add-on workloads in Azure Managed Prometheus][istio-metrics-managed-prometheus] + + +[install-aks-cluster-istio-bicep]: https://github.com/Azure-Samples/aks-istio-addon-bicep +[uninstall-istio-oss]: https://istio.io/latest/docs/setup/install/istioctl/#uninstall-istio + + +[istio-about]: istio-about.md +[azure-cli-install]: /cli/azure/install-azure-cli +[az-feature-register]: /cli/azure/feature#az-feature-register +[az-feature-show]: /cli/azure/feature#az-feature-show +[az-provider-register]: /cli/azure/provider#az-provider-register +[uninstall-osm-addon]: open-service-mesh-uninstall-add-on.md +[istio-deploy-ingress]: istio-deploy-ingress.md +[az-aks-mesh-get-revisions]: /cli/azure/aks/mesh#az-aks-mesh-get-revisions(aks-preview) +[bicep-aks-resource-definition]: /azure/templates/microsoft.containerservice/managedclusters +[istio-scaling-guide]: istio-scale.md#scaling +[istio-metrics-managed-prometheus]: istio-metrics-managed-prometheus.md \ No newline at end of file diff --git a/scenarios/azure-aks-docs/articles/aks/istio-meshconfig.md b/scenarios/azure-aks-docs/articles/aks/istio-meshconfig.md new file mode 100644 index 000000000..0d40f84b1 --- /dev/null +++ b/scenarios/azure-aks-docs/articles/aks/istio-meshconfig.md @@ -0,0 +1,184 @@ +--- +title: Configure Istio-based service mesh add-on for Azure Kubernetes Service +description: Configure Istio-based service mesh add-on for Azure Kubernetes Service +ms.topic: how-to +ms.custom: innovation-engine +ms.service: azure-kubernetes-service +ms.date: 06/13/2024 +ms.author: shasb +author: shashankbarsin +--- + +# Configure Istio-based service mesh add-on for Azure Kubernetes Service + +Open-source Istio uses [MeshConfig][istio-meshconfig] to define mesh-wide settings for the Istio service mesh. Istio-based service mesh add-on for AKS builds on top of MeshConfig and classifies different properties as supported, allowed, and blocked. + +This article walks through how to configure Istio-based service mesh add-on for Azure Kubernetes Service and the support policy applicable for such configuration. + +## Prerequisites + +This guide assumes you followed the [documentation][istio-deploy-add-on] to enable the Istio add-on on an AKS cluster. + +## Set up configuration on cluster + +1. Find out which revision of Istio is deployed on the cluster: + + ```bash + export RANDOM_SUFFIX=$(head -c 3 /dev/urandom | xxd -p) + export CLUSTER="my-aks-cluster" + export RESOURCE_GROUP="my-aks-rg$RANDOM_SUFFIX" + az aks show --name $CLUSTER --resource-group $RESOURCE_GROUP --query 'serviceMeshProfile' --output json + ``` + + Results: + + + + ```output + { + "istio": { + "certificateAuthority": null, + "components": { + "egressGateways": null, + "ingressGateways": null + }, + "revisions": [ + "asm-1-24" + ] + }, + "mode": "Istio" + } + ``` + + This command shows the Istio service mesh profile, including the revision(s) currently deployed on your AKS cluster. + +2. Create a ConfigMap with the name `istio-shared-configmap-` in the `aks-istio-system` namespace. For example, if your cluster is running asm-1-24 revision of mesh, then the ConfigMap needs to be named as `istio-shared-configmap-asm-1-24`. Mesh configuration has to be provided within the data section under mesh. + + Example: + + ```bash + cat < istio-shared-configmap-asm-1-24.yaml + apiVersion: v1 + kind: ConfigMap + metadata: + name: istio-shared-configmap-asm-1-24 + namespace: aks-istio-system + data: + mesh: |- + accessLogFile: /dev/stdout + defaultConfig: + holdApplicationUntilProxyStarts: true + EOF + kubectl apply -f istio-shared-configmap-asm-1-24.yaml + ``` + + Results: + + + + ```output + configmap/istio-shared-configmap-asm-1-24 created + ``` + + The values under `defaultConfig` are mesh-wide settings applied for Envoy sidecar proxy. + +> [!CAUTION] +> A default ConfigMap (for example, `istio-asm-1-24` for revision asm-1-24) is created in `aks-istio-system` namespace on the cluster when the Istio add-on is enabled. However, this default ConfigMap gets reconciled by the managed Istio add-on and thus users should NOT directly edit this ConfigMap. Instead users should create a revision specific Istio shared ConfigMap (for example `istio-shared-configmap-asm-1-24` for revision asm-1-24) in the aks-istio-system namespace, and then the Istio control plane will merge this with the default ConfigMap, with the default settings taking precedence. + +### Mesh configuration and upgrades + +When you're performing [canary upgrade for Istio](./istio-upgrade.md), you need to create a separate ConfigMap for the new revision in the `aks-istio-system` namespace **before initiating the canary upgrade**. This way the configuration is available when the new revision's control plane is deployed on cluster. For example, if you're upgrading the mesh from asm-1-24 to asm-1-25, you need to copy changes over from `istio-shared-configmap-asm-1-24` to create a new ConfigMap called `istio-shared-configmap-asm-1-25` in the `aks-istio-system` namespace. + +After the upgrade is completed or rolled back, you can delete the ConfigMap of the revision that was removed from the cluster. + +## Allowed, supported, and blocked MeshConfig values + +Fields in `MeshConfig` are classified as `allowed`, `supported`, or `blocked`. To learn more about these categories, see the [support policy][istio-support-policy] for Istio add-on features and configuration options. + +Mesh configuration and the list of allowed/supported fields are revision specific to account for fields being added/removed across revisions. The full list of allowed fields and the supported/unsupported ones within the allowed list is provided in the below table. When new mesh revision is made available, any changes to allowed and supported classification of the fields is noted in this table. + +### MeshConfig + +Fields present in [open source MeshConfig reference documentation][istio-meshconfig] that are not covered in the following table are blocked. For example, `configSources` is blocked. + +| **Field** | **Supported/Allowed** | **Notes** | +|-----------|---------------|-----------| +| proxyListenPort | Allowed | - | +| proxyInboundListenPort | Allowed | - | +| proxyHttpPort | Allowed | - | +| connectTimeout | Allowed | Configurable in [DestinationRule](https://istio.io/latest/docs/reference/config/networking/destination-rule/#ConnectionPoolSettings-TCPSettings) | +| tcpKeepalive | Allowed | Configurable in [DestinationRule](https://istio.io/latest/docs/reference/config/networking/destination-rule/#ConnectionPoolSettings-TCPSettings) | +| defaultConfig | Supported | Used to configure [ProxyConfig](https://istio.io/latest/docs/reference/config/istio.mesh.v1alpha1/#ProxyConfig) | +| outboundTrafficPolicy | Supported | Also configurable in [Sidecar CR](https://istio.io/latest/docs/reference/config/networking/sidecar/#OutboundTrafficPolicy) | +| extensionProviders | Allowed | - | +| defaultProviders | Allowed | - | +| accessLogFile | Supported | This field addresses the generation of access logs. For a managed experience on collection and querying of logs, refer to [Azure Monitor Container Insights on AKS][container-insights-docs]. It is encouraged to configure access logging via the [Telemetry API][istio-telemetry]. | +| accessLogFormat | Supported | This field addresses the generation of access logs. For a managed experience on collection and querying of logs, refer to [Azure Monitor Container Insights on AKS][container-insights-docs] | +| accessLogEncoding | Supported | This field addresses the generation of access logs. For a managed experience on collection and querying of logs, refer to [Azure Monitor Container Insights on AKS][container-insights-docs] | +| enableTracing | Allowed | It is encouraged to configure tracing via the [Telemetry API][istio-telemetry]. | +| enableEnvoyAccessLogService | Supported | This field addresses the generation of access logs. For a managed experience on collection and querying of logs, refer to [Azure Monitor Container Insights on AKS][container-insights-docs] | +| disableEnvoyListenerLog | Supported | This field addresses the generation of access logs. For a managed experience on collection and querying of logs, refer to [Azure Monitor Container Insights on AKS][container-insights-docs] | +| trustDomain | Allowed | - | +| trustDomainAliases | Allowed | - | +| caCertificates | Allowed | Configurable in [DestinationRule](https://istio.io/latest/docs/reference/config/networking/destination-rule/#ClientTLSSettings) | +| defaultServiceExportTo | Allowed | Configurable in [ServiceEntry](https://istio.io/latest/docs/reference/config/networking/service-entry/#ServiceEntry) | +| defaultVirtualServiceExportTo | Allowed | Configurable in [VirtualService](https://istio.io/latest/docs/reference/config/networking/virtual-service/#VirtualService) | +| defaultDestinationRuleExportTo | Allowed | Configurable in [DestinationRule](https://istio.io/latest/docs/reference/config/networking/destination-rule/#DestinationRule) | +| localityLbSetting | Allowed | Configurable in [DestinationRule](https://istio.io/latest/docs/reference/config/networking/destination-rule/#LoadBalancerSettings) | +| dnsRefreshRate | Allowed | - | +| h2UpgradePolicy | Allowed | Configurable in [DestinationRule](https://istio.io/latest/docs/reference/config/networking/destination-rule/#ConnectionPoolSettings-HTTPSettings) | +| enablePrometheusMerge | Allowed | - | +| discoverySelectors | Supported | - | +| pathNormalization | Allowed | - | +| defaultHttpRetryPolicy | Allowed | Configurable in [VirtualService](https://istio.io/latest/docs/reference/config/networking/virtual-service/#HTTPRetry) | +| serviceSettings | Allowed | - | +| meshMTLS | Allowed | - | +| tlsDefaults | Allowed | - | +| ingressService | Allowed | Name of the Kubernetes service used for the istio ingress controller. | +| ingressSelector | Allowed | Defines which gateway deployment to use as the Ingress controller. This field corresponds to the Gateway.selector field, and will be set as istio: INGRESS_SELECTOR. | + +### ProxyConfig (meshConfig.defaultConfig) + +Fields present in [open source MeshConfig reference documentation](https://istio.io/latest/docs/reference/config/istio.mesh.v1alpha1/#ProxyConfig) that are not covered in the following table are blocked. + +| **Field** | **Supported/Allowed** | **Notes** | +|-----------|-----------------------|-----------| +| tracingServiceName | Allowed | It is encouraged to configure tracing via the [Telemetry API][istio-telemetry]. | +| drainDuration | Supported | - | +| statsUdpAddress | Allowed | - | +| proxyAdminPort | Allowed | - | +| tracing | Allowed | It is encouraged to configure tracing via the [Telemetry API][istio-telemetry]. | +| concurrency | Supported | - | +| envoyAccessLogService | Allowed | It is encouraged to configure tracing via the [Telemetry API][istio-telemetry]. | +| envoyMetricsService | Allowed | It is encouraged to configure metrics collection via the [Telemetry API][istio-telemetry]. +| proxyMetadata | Allowed | - | +| statusPort | Allowed | - | +| extraStatTags | Allowed | - | +| gatewayTopology | Allowed | - | +| proxyStatsMatcher | Allowed | - | +| terminationDrainDuration | Supported | - | +| meshId | Allowed | - | +| holdApplicationUntilProxyStarts | Supported | - | +| caCertificatesPem | Allowed | - | +| privateKeyProvider | Allowed | - | + +> [!CAUTION] +> **Support scope of configurations:** Mesh configuration allows for extension providers such as self-managed instances of Zipkin or Apache Skywalking to be configured with the Istio add-on. However, these extension providers are outside the support scope of the Istio add-on. Any issues associated with extension tools are outside the support boundary of the Istio add-on. + +## Common errors and troubleshooting tips + +- Ensure that the MeshConfig is indented with spaces instead of tabs. +- Ensure that you're only editing the revision specific shared ConfigMap (for example `istio-shared-configmap-asm-1-24`) and not trying to edit the default ConfigMap (for example `istio-asm-1-24`). +- The ConfigMap must follow the name `istio-shared-configmap-` and be in the `aks-istio-system` namespace. +- Ensure that all MeshConfig fields are spelled correctly. If they're unrecognized or if they aren't part of the allowed list, admission control denies such configurations. +- When performing canary upgrades, [check your revision specific ConfigMaps](#mesh-configuration-and-upgrades) to ensure configurations exist for the revisions deployed on your cluster. +- Certain `MeshConfig` options such as accessLogging may increase Envoy's resource consumption, and disabling some of these settings may mitigate Istio data plane resource utilization. It's also advisable to use the `discoverySelectors` field in the MeshConfig to help alleviate memory consumption for Istiod and Envoy. +- If the `concurrency` field in the MeshConfig is misconfigured and set to zero, it causes Envoy to use up all CPU cores. Instead if this field is unset, number of worker threads to run is automatically determined based on CPU requests/limits. +- [Pod and sidecar race conditions][istio-sidecar-race-condition] in which the application starts before Envoy can be mitigated using the `holdApplicationUntilProxyStarts` field in the MeshConfig. + +[istio-meshconfig]: https://istio.io/latest/docs/reference/config/istio.mesh.v1alpha1/ +[istio-sidecar-race-condition]: https://istio.io/latest/docs/ops/common-problems/injection/#pod-or-containers-start-with-network-issues-if-istio-proxy-is-not-ready +[istio-deploy-add-on]: istio-deploy-addon.md +[container-insights-docs]: /azure/azure-monitor/containers/container-insights-overview +[istio-support-policy]: ./istio-support-policy.md#allowed-supported-and-blocked-customizations +[istio-telemetry]: ./istio-telemetry.md diff --git a/scenarios/azure-aks-docs/articles/aks/istio-scale.md b/scenarios/azure-aks-docs/articles/aks/istio-scale.md new file mode 100644 index 000000000..a3fa97c18 --- /dev/null +++ b/scenarios/azure-aks-docs/articles/aks/istio-scale.md @@ -0,0 +1,167 @@ +--- +title: Istio service mesh Azure Kubernetes Service add-on performance and scaling +description: Istio service mesh Azure Kubernetes Service add-on performance and scaling +ms.topic: concept-article +ms.custom: innovation-engine +ms.service: azure-kubernetes-service +ms.date: 06/13/2024 +ms.author: shalierxia +--- + +# Istio service mesh add-on performance and scaling +The Istio-based service mesh add-on is logically split into a control plane (`istiod`) and a data plane. The data plane is composed of Envoy sidecar proxies inside workload pods. Istiod manages and configures these Envoy proxies. This article presents the performance of both the control and data plane for revision asm-1-19, including resource consumption, sidecar capacity, and latency overhead. Additionally, it provides suggestions for addressing potential strain on resources during periods of heavy load. This article also covers how to customize scaling for the control plane and gateways. + +## Control plane performance +[Istiod’s CPU and memory requirements][control-plane-performance] correlate with the rate of deployment and configuration changes and the number of proxies connected. The scenarios tested were: + +- Pod churn: examines the impact of pod churning on `istiod`. To reduce variables, only one service is used for all sidecars. +- Multiple services: examines the impact of multiple services on the maximum sidecars Istiod can manage (sidecar capacity), where each service has `N` sidecars, totaling the overall maximum. + +#### Test specifications +- One `istiod` instance with default settings +- Horizontal pod autoscaling disabled +- Tested with two network plugins: Azure Container Networking Interface (CNI) Overlay and Azure CNI Overlay with Cilium [ (recommended network plugins for large scale clusters) ](/azure/aks/azure-cni-overlay?tabs=kubectl#choosing-a-network-model-to-use) +- Node SKU: Standard D16 v3 (16 vCPU, 64-GB memory) +- Kubernetes version: 1.28.5 +- Istio revision: asm-1-19 + +### Pod churn +The [ClusterLoader2 framework][clusterloader2] was used to determine the maximum number of sidecars Istiod can manage when there's sidecar churning. The churn percent is defined as the percent of sidecars churned down/up during the test. For example, 50% churn for 10,000 sidecars would mean that 5,000 sidecars were churned down, then 5,000 sidecars were churned up. The churn percents tested were determined from the typical churn percentage during deployment rollouts (`maxUnavailable`). The churn rate was calculated by determining the total number of sidecars churned (up and down) over the actual time taken to complete the churning process. + +#### Sidecar capacity and Istiod CPU and memory + +**Azure CNI overlay** + +| Churn (%) | Churn Rate (sidecars/sec) | Sidecar Capacity | Istiod Memory (GB) | Istiod CPU | +|-------------|-----------------------------|--------------------|----------------------|--------------| +| 0 | -- | 25000 | 32.1 | 15 | +| 25 | 31.2 | 15000 | 22.2 | 15 | +| 50 | 31.2 | 15000 | 25.4 | 15 | + + +**Azure CNI overlay with Cilium** + +| Churn (%) | Churn Rate (sidecars/sec) | Sidecar Capacity | Istiod Memory (GB) | Istiod CPU | +|-------------|-----------------------------|--------------------|----------------------|--------------| +| 0 |-- | 30000 | 41.2 | 15 | +| 25 | 41.7 | 25000 | 36.1 | 16 | +| 50 | 37.9 | 25000 | 42.7 | 16 | + + +### Multiple services +The [ClusterLoader2 framework][clusterloader2] was used to determine the maximum number of sidecars `istiod` can manage with 1,000 services. The results can be compared to the 0% churn test (one service) in the pod churn scenario. Each service had `N` sidecars contributing to the overall maximum sidecar count. The API Server resource usage was observed to determine if there was any significant stress from the add-on. + +**Sidecar capacity** + +| Azure CNI Overlay | Azure CNI Overlay with Cilium | +|---------------------|---------------------------------| +| 20000 | 20000 | + +**CPU and memory** + +| Resource | Azure CNI Overlay | Azure CNI Overlay with Cilium | +|------------------------|--------------------|---------------------------------| +| API Server Memory (GB) | 38.9 | 9.7 | +| API Server CPU | 6.1 | 4.7 | +| Istiod Memory (GB) | 40.4 | 42.6 | +| Istiod CPU | 15 | 16 | + + +## Data plane performance +Various factors impact [sidecar performance][data-plane-performance] such as request size, number of proxy worker threads, and number of client connections. Additionally, any request flowing through the mesh traverses the client-side proxy and then the server-side proxy. Therefore, latency and resource consumption are measured to determine the data plane performance. + +[`Fortio`][fortio] was used to create the load. The test was conducted with the [Istio benchmark repository][istio-benchmark] that was modified for use with the add-on. + +#### Test specifications +- Tested with two network plugins: Azure CNI Overlay and Azure CNI Overlay with Cilium [ (recommended network plugins for large scale clusters) ](/azure/aks/azure-cni-overlay?tabs=kubectl#choosing-a-network-model-to-use) +- Node SKU: Standard D16 v5 (16 vCPU, 64-GB memory) +- Kubernetes version: 1.28.5 +- Two proxy workers +- 1-KB payload +- 1,000 Queries per second (QPS) at varying client connections +- `http/1.1` protocol and mutual Transport Layer Security (TLS) enabled +- 26 data points collected + +#### CPU and memory +The memory and CPU usage for both the client and server proxy for 16 client connections and 1,000 QPS across all network plugin scenarios is roughly 0.4 vCPU and 72 MB. + +#### Latency +The sidecar Envoy proxy collects raw telemetry data after responding to a client, which doesn't directly affect the request's total processing time. However, this process delays the start of handling the next request, contributing to queue wait times and influencing average and tail latencies. Depending on the traffic pattern, the actual tail latency varies. + +The following results evaluate the impact of adding sidecar proxies to the data path, showcasing the P90 and P99 latency. +- Sidecar traffic path: client --> client-sidecar --> server-sidecar --> server +- Baseline traffic path: client --> server + +A comparison of data plane latency performance across Istio add-on and AKS versions can be found [here](./istio-latency.md). + +| Azure CNI Overlay |Azure CNI Overlay with Cilium | +|:-------------------------:|:-------------------------:| +[ ![Diagram that compares P99 latency for Azure CNI Overlay.](./media/aks-istio-addon/latency-box-plot/overlay-azure-p99.png) ](./media/aks-istio-addon/latency-box-plot/overlay-azure-p99.png#lightbox) | [ ![Diagram that compares P99 latency for Azure CNI Overlay with Cilium.](./media/aks-istio-addon/latency-box-plot/overlay-cilium-p99.png) ](./media/aks-istio-addon/latency-box-plot/overlay-cilium-p99.png#lightbox) +[ ![Diagram that compares P90 latency for Azure CNI Overlay.](./media/aks-istio-addon/latency-box-plot/overlay-azure-p90.png) ](./media/aks-istio-addon/latency-box-plot/overlay-azure-p90.png#lightbox) | [ ![Diagram that compares P90 latency for Azure CNI Overlay with Cilium.](./media/aks-istio-addon/latency-box-plot/overlay-cilium-p90.png) ](./media/aks-istio-addon/latency-box-plot/overlay-cilium-p90.png#lightbox) + +## Scaling + +### Horizontal pod autoscaling customization + +[Horizontal pod autoscaling (HPA)][hpa] is enabled for the `istiod` and ingress gateway pods. The default configurations for `istiod` and the gateways are: +- Min Replicas: 2 +- Max Replicas: 5 +- CPU Utilization: 80% + +> [!NOTE] +> To prevent conflicts with the `PodDisruptionBudget`, the add-on does not allow setting the `minReplicas` below the initial default of `2`. + +The following are the `istiod` and ingress gateway HPA resources: +```console +NAMESPACE NAME REFERENCE +aks-istio-ingress aks-istio-ingressgateway-external-asm-1-19 Deployment/aks-istio-ingressgateway-external-asm-1-19 + +aks-istio-ingress aks-istio-ingressgateway-internal-asm-1-19 Deployment/aks-istio-ingressgateway-internal-asm-1-19 + +aks-istio-system istiod-asm-1-19 Deployment/istiod-asm-1-19 +``` + +The HPA configuration can be modified through patches and direct edits. + +First, connect to your AKS cluster using the Azure CLI: + +```bash +az aks get-credentials --resource-group $RESOURCE_GROUP --name $AKS_CLUSTER --overwrite-existing +``` + +Then, you can patch the HPA resources to customize the scaling settings. Modify the minimum and maximum number of replicas for the external ingress gateway HPA resource to scale as needed. Replace the variable values according to your AKS cluster setup if needed. + +```bash +# Get the external ingress gateway HPA name dynamically +EXTERNAL_HPA_NAME=$(kubectl get hpa -n aks-istio-ingress -o jsonpath='{.items[?(@.metadata.name contains "external")].metadata.name}') + +kubectl patch hpa $EXTERNAL_HPA_NAME -n aks-istio-ingress --type merge --patch '{"spec": {"minReplicas": 3, "maxReplicas": 6}}' +``` + +Results: + + + +```output +horizontalpodautoscaler.autoscaling/aks-istio-ingressgateway-external-asm-1-19 patched +``` + +> [!NOTE] +> See the [Istio add-on upgrade documentation][istio-upgrade-hpa] for details on how HPA settings are applied across both revisions during a canary upgrade. + +## Service entry +Istio's ServiceEntry custom resource definition enables adding other services into the Istio’s internal service registry. A [ServiceEntry][serviceentry] allows services already in the mesh to route or access the services specified. However, the configuration of multiple ServiceEntries with the `resolution` field set to DNS can cause a [heavy load on Domain Name System (DNS) servers][understanding-dns]. The following suggestions can help reduce the load: + +- Switch to `resolution: NONE` to avoid proxy DNS lookups entirely. Suitable for most use cases. +- Increase TTL (Time To Live) if you control the domains being resolved. +- Limit the ServiceEntry scope with `exportTo`. + +[control-plane-performance]: https://istio.io/latest/docs/ops/deployment/performance-and-scalability/#control-plane-performance +[data-plane-performance]: https://istio.io/latest/docs/ops/deployment/performance-and-scalability/#data-plane-performance +[clusterloader2]: https://github.com/kubernetes/perf-tests/tree/master/clusterloader2#clusterloader +[fortio]: https://fortio.org/ +[istio-benchmark]: https://github.com/istio/tools/tree/master/perf/benchmark#istio-performance-benchmarking +[serviceentry]: https://istio.io/latest/docs/reference/config/networking/service-entry/ +[understanding-dns]: https://preliminary.istio.io/latest/docs/ops/configuration/traffic-management/dns/#proxy-dns-resolution +[hpa]: https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/ +[istio-upgrade-hpa]: ./istio-upgrade.md#minor-revision-upgrades-with-horizontal-pod-autoscaling-customizations \ No newline at end of file diff --git a/scenarios/azure-aks-docs/articles/aks/kubelet-logs.md b/scenarios/azure-aks-docs/articles/aks/kubelet-logs.md new file mode 100644 index 000000000..249aacf34 --- /dev/null +++ b/scenarios/azure-aks-docs/articles/aks/kubelet-logs.md @@ -0,0 +1,111 @@ +--- +title: View kubelet logs in Azure Kubernetes Service (AKS) +description: Learn how to view troubleshooting information in the kubelet logs from Azure Kubernetes Service (AKS) nodes +ms.topic: how-to +ms.subservice: aks-monitoring +ms.date: 06/08/2024 +author: nickoman +ms.author: nickoman +ms.custom: innovation-engine,aks,logs,troubleshooting +--- + +# Get kubelet logs from Azure Kubernetes Service (AKS) cluster nodes + +When operating an Azure Kubernetes Service (AKS) cluster, you may need to review logs to troubleshoot a problem. Azure portal has a built-in capability that allows you to view logs for AKS [main components][aks-main-logs] and [cluster containers][azure-container-logs]. Occasionally, you may need to get *kubelet* logs from AKS nodes for troubleshooting purposes. + +This article shows you how you can use `journalctl` to view *kubelet* logs on an AKS node. +Alternatively, customers can collect kubelet logs using the [syslog collection feature in Azure Monitor - Container Insights](https://aka.ms/CISyslog). + +## Before you begin + +This article assumes you have an existing AKS cluster. If you need an AKS cluster, create one using [Azure CLI][aks-quickstart-cli], [Azure PowerShell][aks-quickstart-powershell], or [Azure portal][aks-quickstart-portal]. + +## Connect to your AKS cluster + +To interact with your AKS cluster, first get the cluster credentials using the Azure CLI: + +```bash +export RESOURCE_GROUP_NAME="" +export AKS_CLUSTER_NAME="" +az aks get-credentials --resource-group $RESOURCE_GROUP_NAME --name $AKS_CLUSTER_NAME +``` +This command configures `kubectl` to use the credentials for your AKS cluster. + +## Using kubectl raw + +You can quickly view any node kubelet logs by using the following command: + +```bash +export NODE_NAME="aks-agentpool-xxxxxxx-0" +kubectl get --raw "/api/v1/nodes/$NODE_NAME/proxy/logs/messages" | grep kubelet +``` + +Results: + + + +```output +I0508 12:26:17.905042 8672 kubelet_node_status.go:497] Using Node Hostname from cloudprovider: "aks-agentpool-xxxxxxx-0" +I0508 12:26:27.943494 8672 kubelet_node_status.go:497] Using Node Hostname from cloudprovider: "aks-agentpool-xxxxxxx-0" +I0508 12:26:28.920125 8672 server.go:796] GET /stats/summary: (10.370874ms) 200 [[Ruby] 10.244.0.x:52492] +I0508 12:26:37.964650 8672 kubelet_node_status.go:497] Using Node Hostname from cloudprovider: "aks-agentpool-xxxxxxx-0" +... +``` + +## Create an SSH connection + +First, you need to create an SSH connection with the node you need to view *kubelet* logs for. To create this connection, follow the steps in [SSH into AKS cluster nodes][aks-ssh]. + +## Get kubelet logs + +Once you connect to the node using `kubectl debug`, run the following command to pull the *kubelet* logs: + +```console +chroot /host +journalctl -u kubelet -o cat +``` + +> [!NOTE] +> For Windows nodes, the log data is in `C:\k` and can be viewed using the *more* command: +> +> ```console +> more C:\k\kubelet.log +> ``` + +The following example output shows *kubelet* log data: + +```output +I0508 12:26:17.905042 8672 kubelet_node_status.go:497] Using Node Hostname from cloudprovider: "aks-agentpool-xxxxxxx-0" +I0508 12:26:27.943494 8672 kubelet_node_status.go:497] Using Node Hostname from cloudprovider: "aks-agentpool-xxxxxxx-0" +I0508 12:26:28.920125 8672 server.go:796] GET /stats/summary: (10.370874ms) 200 [[Ruby] 10.244.0.x:52292] +I0508 12:26:37.964650 8672 kubelet_node_status.go:497] Using Node Hostname from cloudprovider: "aks-agentpool-xxxxxxx-0" +I0508 12:26:47.996449 8672 kubelet_node_status.go:497] Using Node Hostname from cloudprovider: "aks-agentpool-xxxxxxx-0" +I0508 12:26:58.019746 8672 kubelet_node_status.go:497] Using Node Hostname from cloudprovider: "aks-agentpool-xxxxxxx-0" +I0508 12:27:05.107680 8672 server.go:796] GET /stats/summary/: (24.853838ms) 200 [[Go-http-client/1.1] 10.244.0.x:44660] +I0508 12:27:08.041736 8672 kubelet_node_status.go:497] Using Node Hostname from cloudprovider: "aks-agentpool-xxxxxxx-0" +I0508 12:27:18.068505 8672 kubelet_node_status.go:497] Using Node Hostname from cloudprovider: "aks-agentpool-xxxxxxx-0" +I0508 12:27:28.094889 8672 kubelet_node_status.go:497] Using Node Hostname from cloudprovider: "aks-agentpool-xxxxxxx-0" +I0508 12:27:38.121346 8672 kubelet_node_status.go:497] Using Node Hostname from cloudprovider: "aks-agentpool-xxxxxxx-0" +I0508 12:27:44.015205 8672 server.go:796] GET /stats/summary: (30.236824ms) 200 [[Ruby] 10.244.0.x:52588] +I0508 12:27:48.145640 8672 kubelet_node_status.go:497] Using Node Hostname from cloudprovider: "aks-agentpool-xxxxxxx-0" +I0508 12:27:58.178534 8672 kubelet_node_status.go:497] Using Node Hostname from cloudprovider: "aks-agentpool-xxxxxxx-0" +I0508 12:28:05.040375 8672 server.go:796] GET /stats/summary/: (27.78503ms) 200 [[Go-http-client/1.1] 10.244.0.x:44660] +I0508 12:28:08.214158 8672 kubelet_node_status.go:497] Using Node Hostname from cloudprovider: "aks-agentpool-xxxxxxx-0" +I0508 12:28:18.242160 8672 kubelet_node_status.go:497] Using Node Hostname from cloudprovider: "aks-agentpool-xxxxxxx-0" +I0508 12:28:28.274408 8672 kubelet_node_status.go:497] Using Node Hostname from cloudprovider: "aks-agentpool-xxxxxxx-0" +I0508 12:28:38.296074 8672 kubelet_node_status.go:497] Using Node Hostname from cloudprovider: "aks-agentpool-xxxxxxx-0" +I0508 12:28:48.321952 8672 kubelet_node_status.go:497] Using Node Hostname from cloudprovider: "aks-agentpool-xxxxxxx-0" +I0508 12:28:58.344656 8672 kubelet_node_status.go:497] Using Node Hostname from cloudprovider: "aks-agentpool-xxxxxxx-0" +``` + +## Next steps + +If you need more troubleshooting information for the Kubernetes main, see [view Kubernetes main node logs in AKS][aks-main-logs]. + + +[aks-ssh]: ssh.md +[aks-main-logs]: monitor-aks-reference.md#resource-logs +[aks-quickstart-cli]: ./learn/quick-kubernetes-deploy-cli.md +[aks-quickstart-portal]: ./learn/quick-kubernetes-deploy-portal.md +[aks-quickstart-powershell]: ./learn/quick-kubernetes-deploy-powershell.md +[azure-container-logs]: /azure/azure-monitor/containers/container-insights-overview \ No newline at end of file diff --git a/scenarios/azure-aks-docs/articles/aks/learn/quick-windows-container-deploy-cli.md b/scenarios/azure-aks-docs/articles/aks/learn/quick-windows-container-deploy-cli.md new file mode 100644 index 000000000..bb683c6b4 --- /dev/null +++ b/scenarios/azure-aks-docs/articles/aks/learn/quick-windows-container-deploy-cli.md @@ -0,0 +1,354 @@ +--- +title: Deploy a Windows Server container on an Azure Kubernetes Service (AKS) cluster using Azure CLI +description: Learn how to quickly deploy a Kubernetes cluster and deploy an application in a Windows Server container in Azure Kubernetes Service (AKS) using Azure CLI. +ms.topic: quickstart +ms.custom: devx-track-azurecli, innovation-engine +ms.date: 01/11/2024 +author: schaffererin +ms.author: schaffererin +--- + +# Deploy a Windows Server container on an Azure Kubernetes Service (AKS) cluster using Azure CLI + +Azure Kubernetes Service (AKS) is a managed Kubernetes service that lets you quickly deploy and manage clusters. In this article, you use Azure CLI to deploy an AKS cluster that runs Windows Server containers. You also deploy an ASP.NET sample application in a Windows Server container to the cluster. + +> [!NOTE] +> To get started with quickly provisioning an AKS cluster, this article includes steps to deploy a cluster with default settings for evaluation purposes only. Before deploying a production-ready cluster, we recommend that you familiarize yourself with our [baseline reference architecture][baseline-reference-architecture] to consider how it aligns with your business requirements. + +## Before you begin + +This quickstart assumes a basic understanding of Kubernetes concepts. For more information, see [Kubernetes core concepts for Azure Kubernetes Service (AKS)](../concepts-clusters-workloads.md). + +- [!INCLUDE [quickstarts-free-trial-note](~/reusable-content/ce-skilling/azure/includes/quickstarts-free-trial-note.md)] + +[!INCLUDE [azure-cli-prepare-your-environment-no-header.md](~/reusable-content/azure-cli/azure-cli-prepare-your-environment-no-header.md)] + +- This article requires version 2.0.64 or later of the Azure CLI. If you're using Azure Cloud Shell, the latest version is already installed there. +- Make sure that the identity you're using to create your cluster has the appropriate minimum permissions. For more details on access and identity for AKS, see [Access and identity options for Azure Kubernetes Service (AKS)](../concepts-identity.md). +- If you have multiple Azure subscriptions, select the appropriate subscription ID in which the resources should be billed using the [az account set](/cli/azure/account#az-account-set) command. For more information, see [How to manage Azure subscriptions – Azure CLI](/cli/azure/manage-azure-subscriptions-azure-cli?tabs=bash#change-the-active-subscription). + +## Create a resource group + +An [Azure resource group](/azure/azure-resource-manager/management/overview) is a logical group in which Azure resources are deployed and managed. When you create a resource group, you're asked to specify a location. This location is where resource group metadata is stored and where your resources run in Azure if you don't specify another region during resource creation. + +- Create a resource group using the [az group create][az-group-create] command. The following example creates a resource group named *myResourceGroup* in the *WestUS2* location. Enter this command and other commands in this article into a BASH shell: + +```bash +export RANDOM_SUFFIX=$(openssl rand -hex 3) +export REGION="canadacentral" +export MY_RESOURCE_GROUP_NAME="myAKSResourceGroup$RANDOM_SUFFIX" +az group create --name $MY_RESOURCE_GROUP_NAME --location $REGION +``` + +Results: + + + +```JSON +{ + "id": "/subscriptions/xxxxx-xxxxx-xxxxx-xxxxx/resourceGroups/myResourceGroupxxxxx", + "location": "WestUS2", + "managedBy": null, + "name": "myResourceGroupxxxxx", + "properties": { + "provisioningState": "Succeeded" + }, + "tags": null, + "type": "Microsoft.Resources/resourceGroups" +} +``` + +## Create an AKS cluster + +In this section, we create an AKS cluster with the following configuration: + +- The cluster is configured with two nodes to ensure it operates reliably. A [node](../concepts-clusters-workloads.md#nodes) is an Azure virtual machine (VM) that runs the Kubernetes node components and container runtime. +- The `--windows-admin-password` and `--windows-admin-username` parameters set the administrator credentials for any Windows Server nodes on the cluster and must meet [Windows Server password requirements][windows-server-password]. +- The node pool uses `VirtualMachineScaleSets`. + +To create the AKS cluster with Azure CLI, follow these steps: + +1. Create a username to use as administrator credentials for the Windows Server nodes on your cluster. (The original example prompted for input; in this Exec Doc, the environment variable is set non-interactively.) + +```bash +export WINDOWS_USERNAME="winadmin" +``` + +2. Create a password for the administrator username you created in the previous step. The password must be a minimum of 14 characters and meet the [Windows Server password complexity requirements][windows-server-password]. + +```bash +export WINDOWS_PASSWORD=$(echo "P@ssw0rd$(openssl rand -base64 10 | tr -dc 'A-Za-z0-9!@#$%^&*()' | cut -c1-6)") +``` + +3. Create your cluster using the [az aks create][az-aks-create] command and specify the `--windows-admin-username` and `--windows-admin-password` parameters. The following example command creates a cluster using the values from *WINDOWS_USERNAME* and *WINDOWS_PASSWORD* you set in the previous commands. A random suffix is appended to the cluster name for uniqueness. + +```bash +export MY_AKS_CLUSTER="myAKSCluster$RANDOM_SUFFIX" +az aks create \ + --resource-group $MY_RESOURCE_GROUP_NAME \ + --name $MY_AKS_CLUSTER \ + --node-count 2 \ + --enable-addons monitoring \ + --generate-ssh-keys \ + --windows-admin-username $WINDOWS_USERNAME \ + --windows-admin-password $WINDOWS_PASSWORD \ + --vm-set-type VirtualMachineScaleSets \ + --network-plugin azure +``` + +After a few minutes, the command completes and returns JSON-formatted information about the cluster. Occasionally, the cluster can take longer than a few minutes to provision. Allow up to 10 minutes for provisioning. + +If you get a password validation error, and the password that you set meets the length and complexity requirements, try creating your resource group in another region. Then try creating the cluster with the new resource group. + +If you don't specify an administrator username and password when creating the node pool, the username is set to *azureuser* and the password is set to a random value. For more information, see the [Windows Server FAQ](../windows-faq.yml) + +The administrator username can't be changed, but you can change the administrator password that your AKS cluster uses for Windows Server nodes using `az aks update`. For more information, see [Windows Server FAQ](../windows-faq.yml). + +To run an AKS cluster that supports node pools for Windows Server containers, your cluster needs to use a network policy that uses [Azure CNI (advanced)][azure-cni] network plugin. The `--network-plugin azure` parameter specifies Azure CNI. + +## Add a node pool + +By default, an AKS cluster is created with a node pool that can run Linux containers. You must add another node pool that can run Windows Server containers alongside the Linux node pool. + +Windows Server 2022 is the default operating system for Kubernetes versions 1.25.0 and higher. Windows Server 2019 is the default OS for earlier versions. If you don't specify a particular OS SKU, Azure creates the new node pool with the default SKU for the version of Kubernetes used by the cluster. + +### [Windows node pool (default SKU)](#tab/add-windows-node-pool) + +To use the default OS SKU, create the node pool without specifying an OS SKU. The node pool is configured for the default operating system based on the Kubernetes version of the cluster. + +Add a Windows node pool using the `az aks nodepool add` command. The following command creates a new node pool named *npwin* and adds it to *myAKSCluster*. The command also uses the default subnet in the default virtual network created when running `az aks create`. An OS SKU isn't specified, so the node pool is set to the default operating system based on the Kubernetes version of the cluster: + +```text +az aks nodepool add \ + --resource-group $MY_RESOURCE_GROUP_NAME \ + --cluster-name $MY_AKS_CLUSTER \ + --os-type Windows \ + --name npwin \ + --node-count 1 +``` + +### [Windows Server 2022 node pool](#tab/add-windows-server-2022-node-pool) + +To use Windows Server 2022, specify the following parameters: + +- `os-type` set to `Windows` +- `os-sku` set to `Windows2022` + +> [!NOTE] +> Windows Server 2022 requires Kubernetes version 1.23.0 or higher. Windows Server 2022 is being retired after Kubernetes version 1.34 reaches its end of support. Windows Server 2022 will not be supported in Kubernetes version 1.35 and above. For more information about this retirement, see the [AKS release notes][aks-release-notes]. + +Add a Windows Server 2022 node pool using the `az aks nodepool add` command: + +```text +az aks nodepool add \ + --resource-group $MY_RESOURCE_GROUP_NAME \ + --cluster-name $MY_AKS_CLUSTER \ + --os-type Windows \ + --os-sku Windows2022 \ + --name npwin \ + --node-count 1 +``` + +### [Windows Server 2019 node pool](#tab/add-windows-server-2019-node-pool) + +To use Windows Server 2019, specify the following parameters: + +- `os-type` set to `Windows` +- `os-sku` set to `Windows2019` + +> [!NOTE] +> Windows Server 2019 is being retired after Kubernetes version 1.32 reaches end of support. Windows Server 2019 will not be supported in Kubernetes version 1.33 and above. For more information about this retirement, see the [AKS release notes][aks-release-notes]. + +Add a Windows Server 2019 node pool using the `az aks nodepool add` command: + +```text +az aks nodepool add \ + --resource-group $MY_RESOURCE_GROUP_NAME \ + --cluster-name $MY_AKS_CLUSTER \ + --os-type Windows \ + --os-sku Windows2019 \ + --name npwin \ + --node-count 1 +``` + +## Connect to the cluster + +You use [kubectl][kubectl], the Kubernetes command-line client, to manage your Kubernetes clusters. If you use Azure Cloud Shell, `kubectl` is already installed. If you want to install and run `kubectl` locally, call the [az aks install-cli][az-aks-install-cli] command. + +1. Configure `kubectl` to connect to your Kubernetes cluster using the [az aks get-credentials][az-aks-get-credentials] command. This command downloads credentials and configures the Kubernetes CLI to use them. + +```bash +az aks get-credentials --resource-group $MY_RESOURCE_GROUP_NAME --name $MY_AKS_CLUSTER +``` + +2. Verify the connection to your cluster using the [kubectl get][kubectl-get] command, which returns a list of the cluster nodes. + +```bash +kubectl get nodes -o wide +``` + +The following sample output shows all nodes in the cluster. Make sure the status of all nodes is *Ready*: + + + +```text +NAME STATUS ROLES AGE VERSION INTERNAL-IP EXTERNAL-IP OS-IMAGE KERNEL-VERSION CONTAINER-RUNTIME +aks-nodepool1-20786768-vmss000000 Ready agent 22h v1.27.7 10.224.0.4 Ubuntu 22.04.3 LTS 5.15.0-1052-azure containerd://1.7.5-1 +aks-nodepool1-20786768-vmss000001 Ready agent 22h v1.27.7 10.224.0.33 Ubuntu 22.04.3 LTS 5.15.0-1052-azure containerd://1.7.5-1 +aksnpwin000000 Ready agent 20h v1.27.7 10.224.0.62 Windows Server 2022 Datacenter 10.0.20348.2159 containerd://1.6.21+azure +``` + +> [!NOTE] +> The container runtime for each node pool is shown under *CONTAINER-RUNTIME*. The container runtime values begin with `containerd://`, which means that they each use `containerd` for the container runtime. + +## Deploy the application + +A Kubernetes manifest file defines a desired state for the cluster, such as what container images to run. In this article, you use a manifest to create all objects needed to run the ASP.NET sample application in a Windows Server container. This manifest includes a [Kubernetes deployment][kubernetes-deployment] for the ASP.NET sample application and an external [Kubernetes service][kubernetes-service] to access the application from the internet. + +The ASP.NET sample application is provided as part of the [.NET Framework Samples][dotnet-samples] and runs in a Windows Server container. AKS requires Windows Server containers to be based on images of *Windows Server 2019* or greater. The Kubernetes manifest file must also define a [node selector][node-selector] to tell your AKS cluster to run your ASP.NET sample application's pod on a node that can run Windows Server containers. + +1. Create a file named `sample.yaml` and copy in the following YAML definition. + +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: sample + labels: + app: sample +spec: + replicas: 1 + template: + metadata: + name: sample + labels: + app: sample + spec: + nodeSelector: + "kubernetes.io/os": windows + containers: + - name: sample + image: mcr.microsoft.com/dotnet/framework/samples:aspnetapp + resources: + limits: + cpu: 1 + memory: 800M + ports: + - containerPort: 80 + selector: + matchLabels: + app: sample +--- +apiVersion: v1 +kind: Service +metadata: + name: sample +spec: + type: LoadBalancer + ports: + - protocol: TCP + port: 80 + selector: + app: sample +``` + +For a breakdown of YAML manifest files, see [Deployments and YAML manifests](../concepts-clusters-workloads.md#deployments-and-yaml-manifests). + +If you create and save the YAML file locally, then you can upload the manifest file to your default directory in CloudShell by selecting the **Upload/Download files** button and selecting the file from your local file system. + +2. Deploy the application using the [kubectl apply][kubectl-apply] command and specify the name of your YAML manifest. + +```bash +kubectl apply -f sample.yaml +``` + +The following sample output shows the deployment and service created successfully: + + + +```text +{ + "deployment.apps/sample": "created", + "service/sample": "created" +} +``` + +## Test the application + +When the application runs, a Kubernetes service exposes the application front end to the internet. This process can take a few minutes to complete. Occasionally, the service can take longer than a few minutes to provision. Allow up to 10 minutes for provisioning. + +1. Check the status of the deployed pods using the [kubectl get pods][kubectl-get] command. Make sure all pods are `Running` before proceeding. + +```bash +kubectl get pods +``` + +2. Monitor progress using the [kubectl get service][kubectl-get] command with the `--watch` argument. + +```bash +while true; do + export EXTERNAL_IP=$(kubectl get service sample -o jsonpath="{.status.loadBalancer.ingress[0].ip}" 2>/dev/null) + if [[ -n "$EXTERNAL_IP" && "$EXTERNAL_IP" != "" ]]; then + kubectl get service sample + break + fi + echo "Still waiting for external IP assignment..." + sleep 5 +done +``` + +Initially, the output shows the *EXTERNAL-IP* for the sample service as *pending*: + + + +```text +NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE +sample LoadBalancer xx.xx.xx.xx pending xx:xxxx/TCP 2m +``` + +When the *EXTERNAL-IP* address changes from *pending* to an actual public IP address, use `CTRL-C` to stop the `kubectl` watch process. The following sample output shows a valid public IP address assigned to the service: + +```JSON +{ + "NAME": "sample", + "TYPE": "LoadBalancer", + "CLUSTER-IP": "10.0.37.27", + "EXTERNAL-IP": "52.179.23.131", + "PORT(S)": "80:30572/TCP", + "AGE": "2m" +} +``` + +See the sample app in action by opening a web browser to the external IP address of your service after a few minutes. + +:::image type="content" source="media/quick-windows-container-deploy-cli/asp-net-sample-app.png" alt-text="Screenshot of browsing to ASP.NET sample application." lightbox="media/quick-windows-container-deploy-cli/asp-net-sample-app.png"::: + +## Next steps + +In this quickstart, you deployed a Kubernetes cluster and then deployed an ASP.NET sample application in a Windows Server container to it. This sample application is for demo purposes only and doesn't represent all the best practices for Kubernetes applications. For guidance on creating full solutions with AKS for production, see [AKS solution guidance][aks-solution-guidance]. + +To learn more about AKS, and to walk through a complete code-to-deployment example, continue to the Kubernetes cluster tutorial. + +> [!div class="nextstepaction"] +> [AKS tutorial][aks-tutorial] + + +[kubectl]: https://kubernetes.io/docs/reference/kubectl/ +[kubectl-apply]: https://kubernetes.io/docs/reference/generated/kubectl/kubectl-commands#apply +[kubectl-get]: https://kubernetes.io/docs/reference/generated/kubectl/kubectl-commands#get +[node-selector]: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/ +[dotnet-samples]: https://hub.docker.com/_/microsoft-dotnet-framework-samples/ +[azure-cni]: https://github.com/Azure/azure-container-networking/blob/master/docs/cni.md +[aks-release-notes]: https://github.com/Azure/AKS/releases + + +[aks-tutorial]: ../tutorial-kubernetes-prepare-app.md +[az-aks-create]: /cli/azure/aks#az_aks_create +[az-aks-get-credentials]: /cli/azure/aks#az_aks_get_credentials +[az-aks-install-cli]: /cli/azure/aks#az_aks_install_cli +[az-group-create]: /cli/azure/group#az_group_create +[aks-solution-guidance]: /azure/architecture/reference-architectures/containers/aks-start-here?toc=/azure/aks/toc.json&bc=/azure/aks/breadcrumb/toc.json +[kubernetes-deployment]: ../concepts-clusters-workloads.md#deployments-and-yaml-manifests +[kubernetes-service]: ../concepts-network-services.md +[windows-server-password]: /windows/security/threat-protection/security-policy-settings/password-must-meet-complexity-requirements#reference +[baseline-reference-architecture]: /azure/architecture/reference-architectures/containers/aks/baseline-aks?toc=/azure/aks/toc.json&bc=/azure/aks/breadcrumb/toc.json \ No newline at end of file diff --git a/scenarios/azure-aks-docs/articles/aks/learn/sample.yaml b/scenarios/azure-aks-docs/articles/aks/learn/sample.yaml new file mode 100644 index 000000000..926ff7496 --- /dev/null +++ b/scenarios/azure-aks-docs/articles/aks/learn/sample.yaml @@ -0,0 +1,40 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: sample + labels: + app: sample +spec: + replicas: 1 + template: + metadata: + name: sample + labels: + app: sample + spec: + nodeSelector: + "kubernetes.io/os": windows + containers: + - name: sample + image: mcr.microsoft.com/dotnet/framework/samples:aspnetapp + resources: + limits: + cpu: 1 + memory: 800M + ports: + - containerPort: 80 + selector: + matchLabels: + app: sample +--- +apiVersion: v1 +kind: Service +metadata: + name: sample +spec: + type: LoadBalancer + ports: + - protocol: TCP + port: 80 + selector: + app: sample \ No newline at end of file diff --git a/scenarios/azure-aks-docs/articles/aks/nat-gateway.md b/scenarios/azure-aks-docs/articles/aks/nat-gateway.md new file mode 100644 index 000000000..16629380c --- /dev/null +++ b/scenarios/azure-aks-docs/articles/aks/nat-gateway.md @@ -0,0 +1,431 @@ +--- +title: Create a managed or user-assigned NAT gateway for your Azure Kubernetes Service (AKS) cluster +description: Learn how to create an AKS cluster with managed NAT integration and user-assigned NAT gateway. +ms.topic: how-to +ms.date: 06/03/2024 +author: asudbring +ms.author: allensu +ms.custom: devx-track-azurecli, innovation-engine +--- + +# Create a managed or user-assigned NAT gateway for your Azure Kubernetes Service (AKS) cluster + +While you can route egress traffic through an Azure Load Balancer, there are limitations on the number of outbound flows of traffic you can have. Azure NAT Gateway allows up to 64,512 outbound UDP and TCP traffic flows per IP address with a maximum of 16 IP addresses. + +This article shows you how to create an Azure Kubernetes Service (AKS) cluster with a managed NAT gateway and a user-assigned NAT gateway for egress traffic. It also shows you how to disable OutboundNAT on Windows. + +## Before you begin + +* Make sure you're using the latest version of [Azure CLI][az-cli]. +* Make sure you're using Kubernetes version 1.20.x or above. +* Managed NAT gateway is incompatible with custom virtual networks. + +> [!IMPORTANT] +> In non-private clusters, API server cluster traffic is routed and processed through the clusters outbound type. To prevent API server traffic from being processed as public traffic, consider using a [private cluster][private-cluster], or check out the [API Server VNet Integration][api-server-vnet-integration] feature. + +## Create an AKS cluster with a managed NAT gateway + +* Create an AKS cluster with a new managed NAT gateway using the [`az aks create`][az-aks-create] command with the `--outbound-type managedNATGateway`, `--nat-gateway-managed-outbound-ip-count`, and `--nat-gateway-idle-timeout` parameters. If you want the NAT gateway to operate out of a specific availability zone, specify the zone using `--zones`. +* If no zone is specified when creating a managed NAT gateway, then NAT gateway is deployed to "no zone" by default. When NAT gateway is placed in **no zone**, Azure places the resource in a zone for you. For more information on non-zonal deployment model, see [non-zonal NAT gateway](/azure/nat-gateway/nat-availability-zones#non-zonal). +* A managed NAT gateway resource can't be used across multiple availability zones. + +The following commands first create the required resource group, then the AKS cluster with a managed NAT gateway. + +```azurecli-interactive +export RANDOM_SUFFIX=$(openssl rand -hex 3) +export MY_RG="myResourceGroup$RANDOM_SUFFIX" +export MY_AKS="myNatCluster$RANDOM_SUFFIX" +az group create --name $MY_RG --location "eastus2" +``` + +Results: + +```output +{ + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/myResourceGroupxxx", + "location": "eastus2", + "managedBy": null, + "name": "myResourceGroupxxx", + "properties": { + "provisioningState": "Succeeded" + }, + "tags": null, + "type": "Microsoft.Resources/resourceGroups" +} +``` + +```azurecli-interactive +az aks create \ + --resource-group $MY_RG \ + --name $MY_AKS \ + --node-count 3 \ + --outbound-type managedNATGateway \ + --nat-gateway-managed-outbound-ip-count 2 \ + --nat-gateway-idle-timeout 4 \ + --generate-ssh-keys +``` + +Results: + + + +```output +{ + "aadProfile": null, + "agentPoolProfiles": [ + { + ... + "name": "nodepool1", + ... + "provisioningState": "Succeeded", + ... + } + ], + "dnsPrefix": "myNatClusterxxx-dns-xxx", + "fqdn": "myNatClusterxxx-dns-xxx.xxxxx.xxxxxx.cloudapp.azure.com", + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourcegroups/myResourceGroupxxx/providers/Microsoft.ContainerService/managedClusters/myNatClusterxxx", + "name": "myNatClusterxxx", + ... + "resourceGroup": "myResourceGroupxxx", + ... + "provisioningState": "Succeeded", + ... + "type": "Microsoft.ContainerService/ManagedClusters" +} +``` + +* Update the outbound IP address or idle timeout using the [`az aks update`][az-aks-update] command with the `--nat-gateway-managed-outbound-ip-count` or `--nat-gateway-idle-timeout` parameter. + +The following example updates the NAT gateway managed outbound IP count for the AKS cluster to 5. + +```azurecli-interactive +az aks update \ + --resource-group $MY_RG \ + --name $MY_AKS \ + --nat-gateway-managed-outbound-ip-count 5 +``` + +Results: + + + +```output +{ + "aadProfile": null, + "agentPoolProfiles": [ + { + ... + "name": "nodepool1", + ... + "provisioningState": "Succeeded", + ... + } + ], + "dnsPrefix": "myNatClusterxxx-dns-xxx", + "fqdn": "myNatClusterxxx-dns-xxx.xxxxx.xxxxxx.cloudapp.azure.com", + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourcegroups/myResourceGroupxxx/providers/Microsoft.ContainerService/managedClusters/myNatClusterxxx", + "name": "myNatClusterxxx", + ... + "resourceGroup": "myResourceGroupxxx", + ... + "provisioningState": "Succeeded", + ... + "type": "Microsoft.ContainerService/ManagedClusters" +} +``` + +## Create an AKS cluster with a user-assigned NAT gateway + +This configuration requires bring-your-own networking (via [Kubenet][byo-vnet-kubenet] or [Azure CNI][byo-vnet-azure-cni]) and that the NAT gateway is preconfigured on the subnet. The following commands create the required resources for this scenario. + +1. Create a resource group using the [`az group create`][az-group-create] command. + + ```shell + export RANDOM_SUFFIX=$(openssl rand -hex 3) + export MY_RG="myResourceGroup$RANDOM_SUFFIX" + az group create --name $MY_RG --location southcentralus + ``` + + Results: + + ```output + { + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/myResourceGroupxxx", + "location": "southcentralus", + "managedBy": null, + "name": "myResourceGroupxxx", + "properties": { + "provisioningState": "Succeeded" + }, + "tags": null, + "type": "Microsoft.Resources/resourceGroups" + } + ``` + +2. Create a managed identity for network permissions and store the ID to `$IDENTITY_ID` for later use. + + ```shell + export IDENTITY_NAME="myNatClusterId$RANDOM_SUFFIX" + export IDENTITY_ID=$(az identity create \ + --resource-group $MY_RG \ + --name $IDENTITY_NAME \ + --location southcentralus \ + --query id \ + --output tsv) + ``` + + Results: + + ```output + /xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/myResourceGroupxxx/providers/Microsoft.ManagedIdentity/userAssignedIdentities/myNatClusterIdxxx + ``` + +3. Create a public IP for the NAT gateway using the [`az network public-ip create`][az-network-public-ip-create] command. + + ```shell + export PIP_NAME="myNatGatewayPip$RANDOM_SUFFIX" + az network public-ip create \ + --resource-group $MY_RG \ + --name $PIP_NAME \ + --location southcentralus \ + --sku standard + ``` + + Results: + + ```output + { + "publicIp": { + "ddosSettings": null, + "dnsSettings": null, + "etag": "W/\"xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx\"", + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/myResourceGroupxxx/providers/Microsoft.Network/publicIPAddresses/myNatGatewayPipxxx", + "ipAddress": null, + "ipTags": [], + "location": "southcentralus", + "name": "myNatGatewayPipxxx", + ... + "provisioningState": "Succeeded", + ... + "sku": { + "name": "Standard", + "tier": "Regional" + }, + "type": "Microsoft.Network/publicIPAddresses", + ... + } + } + ``` + +4. Create the NAT gateway using the [`az network nat gateway create`][az-network-nat-gateway-create] command. + + ```shell + export NATGATEWAY_NAME="myNatGateway$RANDOM_SUFFIX" + az network nat gateway create \ + --resource-group $MY_RG \ + --name $NATGATEWAY_NAME \ + --location southcentralus \ + --public-ip-addresses $PIP_NAME + ``` + + Results: + + ```output + { + "etag": "W/\"xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx\"", + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/myResourceGroupxxx/providers/Microsoft.Network/natGateways/myNatGatewayxxx", + "location": "southcentralus", + "name": "myNatGatewayxxx", + "provisioningState": "Succeeded", + "publicIpAddresses": [ + { + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/myResourceGroupxxx/providers/Microsoft.Network/publicIPAddresses/myNatGatewayPipxxx" + } + ], + ... + "type": "Microsoft.Network/natGateways" + } + ``` + + > [!Important] + > A single NAT gateway resource can't be used across multiple availability zones. To ensure zone-resiliency, it is recommended to deploy a NAT gateway resource to each availability zone and assign to subnets containing AKS clusters in each zone. For more information on this deployment model, see [NAT gateway for each zone](/azure/nat-gateway/nat-availability-zones#zonal-nat-gateway-resource-for-each-zone-in-a-region-to-create-zone-resiliency). + > If no zone is configured for NAT gateway, the default zone placement is "no zone", in which Azure places NAT gateway into a zone for you. + +5. Create a virtual network using the [`az network vnet create`][az-network-vnet-create] command. + + ```shell + export VNET_NAME="myVnet$RANDOM_SUFFIX" + az network vnet create \ + --resource-group $MY_RG \ + --name $VNET_NAME \ + --location southcentralus \ + --address-prefixes 172.16.0.0/20 + ``` + + Results: + + ```output + { + "newVNet": { + "addressSpace": { + "addressPrefixes": [ + "172.16.0.0/20" + ] + }, + ... + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/myResourceGroupxxx/providers/Microsoft.Network/virtualNetworks/myVnetxxx", + "location": "southcentralus", + "name": "myVnetxxx", + "provisioningState": "Succeeded", + ... + "type": "Microsoft.Network/virtualNetworks", + ... + } + } + ``` + +6. Create a subnet in the virtual network using the NAT gateway and store the ID to `$SUBNET_ID` for later use. + + ```shell + export SUBNET_NAME="myNatCluster$RANDOM_SUFFIX" + export SUBNET_ID=$(az network vnet subnet create \ + --resource-group $MY_RG \ + --vnet-name $VNET_NAME \ + --name $SUBNET_NAME \ + --address-prefixes 172.16.0.0/22 \ + --nat-gateway $NATGATEWAY_NAME \ + --query id \ + --output tsv) + ``` + + Results: + + ```output + /xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/myResourceGroupxxx/providers/Microsoft.Network/virtualNetworks/myVnetxxx/subnets/myNatClusterxxx + ``` + +7. Create an AKS cluster using the subnet with the NAT gateway and the managed identity using the [`az aks create`][az-aks-create] command. + + ```shell + export AKS_NAME="myNatCluster$RANDOM_SUFFIX" + az aks create \ + --resource-group $MY_RG \ + --name $AKS_NAME \ + --location southcentralus \ + --network-plugin azure \ + --vnet-subnet-id $SUBNET_ID \ + --outbound-type userAssignedNATGateway \ + --assign-identity $IDENTITY_ID \ + --generate-ssh-keys + ``` + + Results: + + ```output + { + "aadProfile": null, + "agentPoolProfiles": [ + { + ... + "name": "nodepool1", + ... + "provisioningState": "Succeeded", + ... + } + ], + "dnsPrefix": "myNatClusterxxx-dns-xxx", + "fqdn": "myNatClusterxxx-dns-xxx.xxxxx.xxxxxx.cloudapp.azure.com", + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourcegroups/myResourceGroupxxx/providers/Microsoft.ContainerService/managedClusters/myNatClusterxxx", + "name": "myNatClusterxxx", + ... + "resourceGroup": "myResourceGroupxxx", + ... + "provisioningState": "Succeeded", + ... + "type": "Microsoft.ContainerService/ManagedClusters" + } + ``` + +## Disable OutboundNAT for Windows + +Windows OutboundNAT can cause certain connection and communication issues with your AKS pods. An example issue is node port reuse. In this example, Windows OutboundNAT uses ports to translate your pod IP to your Windows node host IP, which can cause an unstable connection to the external service due to a port exhaustion issue. + +Windows enables OutboundNAT by default. You can now manually disable OutboundNAT when creating new Windows agent pools. + +### Prerequisites + +* Existing AKS cluster with v1.26 or above. If you're using Kubernetes version 1.25 or older, you need to [update your deployment configuration][upgrade-kubernetes]. + +### Limitations + +* You can't set cluster outbound type to LoadBalancer. You can set it to Nat Gateway or UDR: + * [NAT Gateway](./nat-gateway.md): NAT Gateway can automatically handle NAT connection and is more powerful than Standard Load Balancer. You might incur extra charges with this option. + * [UDR (UserDefinedRouting)](./limit-egress-traffic.md): You must keep port limitations in mind when configuring routing rules. + * If you need to switch from a load balancer to NAT Gateway, you can either add a NAT gateway into the VNet or run [`az aks upgrade`][aks-upgrade] to update the outbound type. + +> [!NOTE] +> UserDefinedRouting has the following limitations: +> +> * SNAT by Load Balancer (must use the default OutboundNAT) has "64 ports on the host IP". +> * SNAT by Azure Firewall (disable OutboundNAT) has 2496 ports per public IP. +> * SNAT by NAT Gateway (disable OutboundNAT) has 64512 ports per public IP. +> * If the Azure Firewall port range isn't enough for your application, you need to use NAT Gateway. +> * Azure Firewall doesn't SNAT with Network rules when the destination IP address is in a private IP address range per [IANA RFC 1918 or shared address space per IANA RFC 6598](/azure/firewall/snat-private-range). + +### Manually disable OutboundNAT for Windows + +* Manually disable OutboundNAT for Windows when creating new Windows agent pools using the [`az aks nodepool add`][az-aks-nodepool-add] command with the `--disable-windows-outbound-nat` flag. + + > [!NOTE] + > You can use an existing AKS cluster, but you might need to update the outbound type and add a node pool to enable `--disable-windows-outbound-nat`. + + The following command adds a Windows node pool to an existing AKS cluster, disabling OutboundNAT. + + ```shell + export WIN_NODEPOOL_NAME="win$(head -c 1 /dev/urandom | xxd -p)" + az aks nodepool add \ + --resource-group $MY_RG \ + --cluster-name $MY_AKS \ + --name $WIN_NODEPOOL_NAME \ + --node-count 3 \ + --os-type Windows \ + --disable-windows-outbound-nat + ``` + + Results: + + + + ```output + { + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/myResourceGroupxxx/providers/Microsoft.ContainerService/managedClusters/myNatClusterxxx/agentPools/mynpxxx", + "name": "mynpxxx", + "osType": "Windows", + "provisioningState": "Succeeded", + "resourceGroup": "myResourceGroupxxx", + "type": "Microsoft.ContainerService/managedClusters/agentPools" + } + ``` + +## Next steps + +For more information on Azure NAT Gateway, see [Azure NAT Gateway][nat-docs]. + + +[api-server-vnet-integration]: api-server-vnet-integration.md +[byo-vnet-azure-cni]: configure-azure-cni.md +[byo-vnet-kubenet]: configure-kubenet.md +[private-cluster]: private-clusters.md +[upgrade-kubernetes]:tutorial-kubernetes-upgrade-cluster.md + + +[nat-docs]: /azure/virtual-network/nat-gateway/nat-overview +[az-cli]: /cli/azure/install-azure-cli +[aks-upgrade]: /cli/azure/aks#az-aks-update +[az-aks-create]: /cli/azure/aks#az-aks-create +[az-aks-update]: /cli/azure/aks#az-aks-update +[az-group-create]: /cli/azure/group#az_group_create +[az-network-public-ip-create]: /cli/azure/network/public-ip#az_network_public_ip_create +[az-network-nat-gateway-create]: /cli/azure/network/nat/gateway#az_network_nat_gateway_create +[az-network-vnet-create]: /cli/azure/network/vnet#az_network_vnet_create +[az-aks-nodepool-add]: /cli/azure/aks/nodepool#az_aks_nodepool_add diff --git a/scenarios/azure-aks-docs/articles/aks/node-image-upgrade.md b/scenarios/azure-aks-docs/articles/aks/node-image-upgrade.md new file mode 100644 index 000000000..b92230640 --- /dev/null +++ b/scenarios/azure-aks-docs/articles/aks/node-image-upgrade.md @@ -0,0 +1,173 @@ +--- +title: Upgrade Azure Kubernetes Service (AKS) node images +description: Learn how to upgrade the images on AKS cluster nodes and node pools. +ms.topic: how-to +ms.custom: devx-track-azurecli, innovation-engine +ms.subservice: aks-upgrade +ms.service: azure-kubernetes-service +ms.date: 09/20/2024 +author: schaffererin +ms.author: schaffererin +--- + +# Upgrade Azure Kubernetes Service (AKS) node images + +Azure Kubernetes Service (AKS) regularly provides new node images, so it's beneficial to upgrade your node images frequently to use the latest AKS features. Linux node images are updated weekly, and Windows node images are updated monthly. Image upgrade announcements are included in the [AKS release notes](https://github.com/Azure/AKS/releases), and it can take up to a week for these updates to be rolled out across all regions. You can also perform node image upgrades automatically and schedule them using planned maintenance. For more information, see [Automatically upgrade node images][auto-upgrade-node-image]. + +This article shows you how to upgrade AKS cluster node images and how to update node pool images without upgrading the Kubernetes version. For information on upgrading the Kubernetes version for your cluster, see [Upgrade an AKS cluster][upgrade-cluster]. + +> [!NOTE] +> The AKS cluster must use virtual machine scale sets for the nodes. +> +> It's not possible to downgrade a node image version (for example *AKSUbuntu-2204 to AKSUbuntu-1804*, or *AKSUbuntu-2204-202308.01.0 to AKSUbuntu-2204-202307.27.0*). + + +## Connect to your AKS cluster + +1. Connect to your AKS cluster using the [`az aks get-credentials`][az-aks-get-credentials] command. + + ```azurecli-interactive + az aks get-credentials \ + --resource-group $AKS_RESOURCE_GROUP \ + --name $AKS_CLUSTER + ``` +## Check for available node image upgrades + +1. Check for available node image upgrades using the [`az aks nodepool get-upgrades`][az-aks-nodepool-get-upgrades] command. + + ```azurecli-interactive + az aks nodepool get-upgrades \ + --nodepool-name $AKS_NODEPOOL \ + --cluster-name $AKS_CLUSTER \ + --resource-group $AKS_RESOURCE_GROUP + ``` + +1. In the output, find and make note of the `latestNodeImageVersion` value. This value is the latest node image version available for your node pool. +1. Check your current node image version to compare with the latest version using the [`az aks nodepool show`][az-aks-nodepool-show] command. + + ```azurecli-interactive + az aks nodepool show \ + --resource-group $AKS_RESOURCE_GROUP \ + --cluster-name $AKS_CLUSTER \ + --name $AKS_NODEPOOL \ + --query nodeImageVersion + ``` + +1. If the `nodeImageVersion` value is different from the `latestNodeImageVersion`, you can upgrade your node image. + +## Upgrade all node images in all node pools + +1. Upgrade all node images in all node pools in your cluster using the [`az aks upgrade`][az-aks-upgrade] command with the `--node-image-only` flag. + + ```text + az aks upgrade \ + --resource-group $AKS_RESOURCE_GROUP \ + --name $AKS_CLUSTER \ + --node-image-only \ + --yes + ``` + +1. You can check the status of the node images using the `kubectl get nodes` command. + + > [!NOTE] + > This command might differ slightly depending on the shell you use. For more information on Windows and PowerShell environments, see the [Kubernetes JSONPath documentation][kubernetes-json-path]. + + ```bash + kubectl get nodes -o jsonpath='{range .items[*]}{.metadata.name}{"\t"}{.metadata.labels.kubernetes\.azure\.com\/node-image-version}{"\n"}{end}' + ``` + +1. When the upgrade completes, use the [`az aks show`][az-aks-show] command to get the updated node pool details. The current node image is shown in the `nodeImageVersion` property. + + ```azurecli-interactive + az aks show \ + --resource-group $AKS_RESOURCE_GROUP \ + --name $AKS_CLUSTER + ``` + +## Upgrade a specific node pool + +1. Update the OS image of a node pool without doing a Kubernetes cluster upgrade using the [`az aks nodepool upgrade`][az-aks-nodepool-upgrade] command with the `--node-image-only` flag. + + ```azurecli-interactive + az aks nodepool upgrade \ + --resource-group $AKS_RESOURCE_GROUP \ + --cluster-name $AKS_CLUSTER \ + --name $AKS_NODEPOOL \ + --node-image-only + ``` + +1. You can check the status of the node images with the `kubectl get nodes` command. + + > [!NOTE] + > This command may differ slightly depending on the shell you use. For more information on Windows and PowerShell environments, see the [Kubernetes JSONPath documentation][kubernetes-json-path]. + + ```bash + kubectl get nodes -o jsonpath='{range .items[*]}{.metadata.name}{"\t"}{.metadata.labels.kubernetes\.azure\.com\/node-image-version}{"\n"}{end}' + ``` + +1. When the upgrade completes, use the [`az aks nodepool show`][az-aks-nodepool-show] command to get the updated node pool details. The current node image is shown in the `nodeImageVersion` property. + + ```azurecli-interactive + az aks nodepool show \ + --resource-group $AKS_RESOURCE_GROUP \ + --cluster-name $AKS_CLUSTER \ + --name $AKS_NODEPOOL + ``` + +## Upgrade node images with node surge + +To speed up the node image upgrade process, you can upgrade your node images using a customizable node surge value. By default, AKS uses one extra node to configure upgrades. + +1. Upgrade node images with node surge using the [`az aks nodepool update`][az-aks-nodepool-update] command with the `--max-surge` flag to configure the number of nodes used for upgrades. + + > [!NOTE] + > To learn more about the trade-offs of various `--max-surge` settings, see [Customize node surge upgrade][max-surge]. + + ```azurecli-interactive + az aks nodepool update \ + --resource-group $AKS_RESOURCE_GROUP \ + --cluster-name $AKS_CLUSTER \ + --name $AKS_NODEPOOL \ + --max-surge 33% \ + --no-wait + ``` + +1. You can check the status of the node images with the `kubectl get nodes` command. + + ```bash + kubectl get nodes -o jsonpath='{range .items[*]}{.metadata.name}{"\t"}{.metadata.labels.kubernetes\.azure\.com\/node-image-version}{"\n"}{end}' + ``` + +1. Get the updated node pool details using the [`az aks nodepool show`][az-aks-nodepool-show] command. The current node image is shown in the `nodeImageVersion` property. + + ```azurecli-interactive + az aks nodepool show \ + --resource-group $AKS_RESOURCE_GROUP \ + --cluster-name $AKS_CLUSTER \ + --name $AKS_NODEPOOL + ``` + +## Next steps + +- For information about the latest node images, see the [AKS release notes](https://github.com/Azure/AKS/releases). +- Learn how to upgrade the Kubernetes version with [Upgrade an AKS cluster][upgrade-cluster]. +- [Automatically apply cluster and node pool upgrades with GitHub Actions][github-schedule]. +- Learn more about multiple node pools with [Create multiple node pools][use-multiple-node-pools]. +- Learn about upgrading best practices with [AKS patch and upgrade guidance][upgrade-operators-guide]. + + +[kubernetes-json-path]: https://kubernetes.io/docs/reference/kubectl/jsonpath/ + + +[upgrade-cluster]: upgrade-aks-cluster.md +[github-schedule]: node-upgrade-github-actions.md +[use-multiple-node-pools]: create-node-pools.md +[max-surge]: upgrade-aks-cluster.md#customize-node-surge-upgrade +[auto-upgrade-node-image]: auto-upgrade-node-image.md +[az-aks-nodepool-get-upgrades]: /cli/azure/aks/nodepool#az_aks_nodepool_get_upgrades +[az-aks-nodepool-show]: /cli/azure/aks/nodepool#az_aks_nodepool_show +[az-aks-nodepool-upgrade]: /cli/azure/aks/nodepool#az_aks_nodepool_upgrade +[az-aks-nodepool-update]: /cli/azure/aks/nodepool#az_aks_nodepool_update +[az-aks-upgrade]: /cli/azure/aks#az_aks_upgrade +[az-aks-show]: /cli/azure/aks#az_aks_show +[upgrade-operators-guide]: /azure/architecture/operator-guides/aks/aks-upgrade-practices \ No newline at end of file diff --git a/scenarios/azure-aks-docs/articles/aks/resize-cluster.md b/scenarios/azure-aks-docs/articles/aks/resize-cluster.md new file mode 100644 index 000000000..828ffa0ec --- /dev/null +++ b/scenarios/azure-aks-docs/articles/aks/resize-cluster.md @@ -0,0 +1,132 @@ +--- +title: Resize Azure Kubernetes Service (AKS) clusters +description: In this article, you learn about the importance of right-sizing your AKS clusters and how you can right-size them to optimize costs and performance. +ms.topic: how-to +ms.date: 06/13/2024 +author: schaffererin +ms.author: schaffererin +ms.service: azure-kubernetes-service +# Customer intent: As a cluster operator, I want to resize my cluster so I can scale my workloads based on demand. +ms.custom: innovation-engine, devx-track-azurecli, aks, scaling, cluster-management +--- + +# Resize Azure Kubernetes Service (AKS) clusters + +In this article, you learn how to resize an Azure Kubernetes Service (AKS) cluster. It's important to right-size your clusters to optimize costs and performance. You can manually resize a cluster by adding or removing the nodes to meet the needs of your applications. You can also autoscale your cluster to automatically adjust the number of nodes in response to changing demands. + +## Cluster right-sizing + +When you create an AKS cluster, you specify the number of nodes and the size of the nodes, which determines the compute capacity of the cluster. Oversized clusters can lead to unnecessary costs, while undersized clusters can lead to performance issues. You can adjust the number and size of the nodes in the cluster to right-size the cluster to meet the needs of your applications. + +Consider the following factors when right-sizing your cluster: + +* **Resource requirements**: Understand the resource requirements of your applications to determine the number of nodes and the size of the nodes needed to run your workloads. +* **Performance requirements**: Determine the performance requirements of your applications to ensure that the cluster can meet the demands of your workloads. +* **Cost considerations**: Optimize costs by right-sizing your cluster to avoid unnecessary costs associated with oversized clusters. +* **Application demands**: Monitor the demands of your applications to adjust the size of the cluster in response to changing demands. +* **Infrastructure constraints**: Consider the infrastructure constraints of your environment, such as capacity or reserved instance limiting to specific SKUs, to ensure that the cluster can be right-sized within the limits of your environment. + +## Monitor cluster performance and cost + +Closely monitor the performance and cost of your clusters to ensure they're right-sized to meet the needs of your application and make adjustments as needed. You can use the following resources for monitoring: + +* [Identify high CPU usage in Azure Kubernetes Service (AKS) clusters][identify-high-cpu-usage] +* [Troubleshoot memory saturation in Azure Kubernetes Service (AKS) clusters][troubleshoot-memory-saturation] +* [Cost analysis add-on for Azure Kubernetes Service (AKS)](./cost-analysis.md) +* [Configure the Metrics Server Vertical Pod Autoscaler (VPA) in Azure Kubernetes Service (AKS)](./use-metrics-server-vertical-pod-autoscaler.md) + +## When to resize a cluster + +You might want to resize a cluster in scenarios such as the following: + +* If you see that CPU and memory usage is consistently low, consider *downsizing* the cluster. If usage is consistently high, make sure you have [autoscaling enabled](#automatically-resize-an-aks-cluster) and increase the maximum node count if necessary. +* The [cost analysis add-on for AKS](./cost-analysis.md) shows you details about node usage and cost that indicate you might benefit from cluster resizing. For example, if you see that your nodes have a *high idle cost* with a *low usage cost*, you might consider resizing your cluster to reduce costs. +* The [Metrics Server VPA](./use-metrics-server-vertical-pod-autoscaler.md) shows you that your requests and/or limits are too high or low based on historical usage. You can use this information to adjust your cluster size to better match your workload. +* You experience performance issues such as resource starvation. This might be a result of the cluster being undersized for the demands of your applications. + +## What happens when I resize a cluster? + +### Increasing cluster size + +You can increase the size of an AKS cluster by adding nodes to the cluster. You can [add nodes to the cluster manually][manually-scale] or [configure autoscaling to automatically adjust the number of nodes](#automatically-resize-an-aks-cluster) in response to changing demands. + +When you increase the size of a cluster, the following changes occur: + +* New node instances are created using the same configuration as the existing nodes in the cluster. +* New pods might be scheduled on the new nodes to distribute the workload across the cluster. +* Existing pods don't move to the new nodes unless they are rescheduled due to node failures or other reasons. + +### Decreasing cluster size + +You can decrease the size of an AKS cluster by removing nodes from the cluster. When you remove nodes from the cluster, the nodes are automatically drained and removed from the cluster. You can remove nodes from the cluster manually or configure autoscaling to automatically adjust the number of nodes in response to changing demands. + +When you decrease the size of a cluster, the following changes occur: + +* AKS gracefully terminates the nodes and drains the pods running on the nodes before removing the nodes from the cluster. +* Any pods managed by a replication controller are rescheduled on other node instances in the cluster. +* Any pods that aren't managed by a replication controller aren't restarted. + +## Manually resize an AKS cluster + +### [Azure CLI](#tab/azure-cli) + +* Resize an AKS cluster using the [`az aks scale`][az-aks-scale] command with the `--node-count` and `--nodepool-name` parameters. + +Before running the resize command, set the required environment variables with your own values. The example values should be substituted with your actual resource group, cluster, desired node count, and node pool name. + +```azurecli-interactive +az aks scale --resource-group $RESOURCE_GROUP --name $CLUSTER_NAME --node-count $NUM_NODES --nodepool-name $NODE_POOL_NAME +``` + +Results: + + + +```output +{ + "agentPoolProfiles": [ + { + "count": 4, + "maxCount": null, + "minCount": null, + "name": "nodepool1", + ... + } + ], + "dnsPrefix": "xxxxx", + "fqdn": "xxxxx.xxxxx.xxxxxx.cloudapp.azure.com", + ... +} +``` + +Repeat this command for each node pool in the cluster that you want to resize. If your cluster has only one node pool, you can omit the `--nodepool-name` parameter. + +### [Azure portal](#tab/azure-portal) + +1. In the Azure portal, go to the AKS cluster that you want to resize. +2. Under **Settings**, select **Node pools**. +3. Select the node pool that you want to resize > **Scale node pool**. +4. On the **Scale node pool** page, enter the new **Node count** value. +5. Select **Apply** and repeat the steps for each node pool in the cluster that you want to resize. + +--- + +## Automatically resize an AKS cluster + +Use the [cluster autoscaler](./cluster-autoscaler-overview.md) to automatically resize your node pools in response to changing demands. + +For more information, see the [Cluster autoscaling in Azure Kubernetes Service (AKS) overview](./cluster-autoscaler-overview.md). To configure cluster autoscaling in AKS, see [Use the cluster autoscaler in Azure Kubernetes Service (AKS)](./cluster-autoscaler.md). + +## Next steps + +In this article, you learned how to right-size an AKS cluster. To learn more about managing AKS clusters, see the following articles: + +* [Stop and start an AKS cluster](./start-stop-cluster.md) +* [Configure a private AKS cluster](./private-clusters.md) +* [Use AKS cluster extensions](./cluster-extensions.md) + + +[az-aks-scale]: /cli/azure/aks#az-aks-scale +[manually-scale]: ./scale-cluster.md +[identify-high-cpu-usage]: /troubleshoot/azure/azure-kubernetes/availability-performance/identify-high-cpu-consuming-containers-aks +[troubleshoot-memory-saturation]: /troubleshoot/azure/azure-kubernetes/availability-performance/identify-memory-saturation-aks \ No newline at end of file diff --git a/scenarios/azure-aks-docs/articles/aks/spot-node-pool.md b/scenarios/azure-aks-docs/articles/aks/spot-node-pool.md new file mode 100644 index 000000000..e093dcd16 --- /dev/null +++ b/scenarios/azure-aks-docs/articles/aks/spot-node-pool.md @@ -0,0 +1,240 @@ +--- +title: Add an Azure Spot node pool to an Azure Kubernetes Service (AKS) cluster +description: Learn how to add an Azure Spot node pool to an Azure Kubernetes Service (AKS) cluster. +ms.topic: how-to +ms.date: 03/29/2023 +author: schaffererin +ms.author: schaffererin +ms.subservice: aks-nodes +--- + +# Add an Azure Spot node pool to an Azure Kubernetes Service (AKS) cluster + +In this article, you add a secondary Spot node pool to an existing Azure Kubernetes Service (AKS) cluster. + +A Spot node pool is a node pool backed by an [Azure Spot Virtual Machine scale set][vmss-spot]. With Spot VMs in your AKS cluster, you can take advantage of unutilized Azure capacity with significant cost savings. The amount of available unutilized capacity varies based on many factors, such as node size, region, and time of day. + +When you deploy a Spot node pool, Azure allocates the Spot nodes if there's capacity available and deploys a Spot scale set that backs the Spot node pool in a single default domain. There's no SLA for the Spot nodes. There are no high availability guarantees. If Azure needs capacity back, the Azure infrastructure evicts the Spot nodes. + +Spot nodes are great for workloads that can handle interruptions, early terminations, or evictions. For example, workloads such as batch processing jobs, development and testing environments, and large compute workloads might be good candidates to schedule on a Spot node pool. + +## Before you begin + +* This article assumes a basic understanding of Kubernetes and Azure Load Balancer concepts. For more information, see [Kubernetes core concepts for Azure Kubernetes Service (AKS)][kubernetes-concepts]. +* If you don't have an Azure subscription, create a [free account](https://azure.microsoft.com/free/?WT.mc_id=A261C142F) before you begin. +* When you create a cluster to use a Spot node pool, the cluster must use Virtual Machine Scale Sets for node pools and the *Standard* SKU load balancer. You must also add another node pool after you create your cluster, which is covered in this tutorial. +* This article requires that you're running the Azure CLI version 2.14 or later. Run `az --version` to find the version. If you need to install or upgrade, see [Install Azure CLI][azure-cli-install]. + +### Limitations + +The following limitations apply when you create and manage AKS clusters with a Spot node pool: + +* A Spot node pool can't be a default node pool, it can only be used as a secondary pool. +* You can't upgrade the control plane and node pools at the same time. You must upgrade them separately or remove the Spot node pool to upgrade the control plane and remaining node pools at the same time. +* A Spot node pool must use Virtual Machine Scale Sets. +* You can't change `ScaleSetPriority` or `SpotMaxPrice` after creation. +* When setting `SpotMaxPrice`, the value must be *-1* or a *positive value with up to five decimal places*. +* A Spot node pool has the `kubernetes.azure.com/scalesetpriority:spot` label, the `kubernetes.azure.com/scalesetpriority=spot:NoSchedule` taint, and the system pods have anti-affinity. +* You must add a [corresponding toleration][spot-toleration] and affinity to schedule workloads on a Spot node pool. + +## Add a Spot node pool to an AKS cluster + +When adding a Spot node pool to an existing cluster, it must be a cluster with multiple node pools enabled. When you create an AKS cluster with multiple node pools enabled, you create a node pool with a `priority` of `Regular` by default. To add a Spot node pool, you must specify `Spot` as the value for `priority`. For more details on creating an AKS cluster with multiple node pools, see [use multiple node pools][use-multiple-node-pools]. + +* Create a node pool with a `priority` of `Spot` using the [`az aks nodepool add`][az-aks-nodepool-add] command. + +```azurecli-interactive +export SPOT_NODEPOOL="spotnodepool" + +az aks nodepool add \ + --resource-group $RESOURCE_GROUP \ + --cluster-name $AKS_CLUSTER \ + --name $SPOT_NODEPOOL \ + --priority Spot \ + --eviction-policy Delete \ + --spot-max-price -1 \ + --enable-cluster-autoscaler \ + --min-count 1 \ + --max-count 3 \ + --no-wait +``` + +In the previous command, the `priority` of `Spot` makes the node pool a Spot node pool. The `eviction-policy` parameter is set to `Delete`, which is the default value. When you set the [eviction policy][eviction-policy] to `Delete`, nodes in the underlying scale set of the node pool are deleted when they're evicted. + +You can also set the eviction policy to `Deallocate`, which means that the nodes in the underlying scale set are set to the *stopped-deallocated* state upon eviction. Nodes in the *stopped-deallocated* state count against your compute quota and can cause issues with cluster scaling or upgrading. The `priority` and `eviction-policy` values can only be set during node pool creation. Those values can't be updated later. + +The previous command also enables the [cluster autoscaler][cluster-autoscaler], which we recommend using with Spot node pools. Based on the workloads running in your cluster, the cluster autoscaler scales the number of nodes up and down. For Spot node pools, the cluster autoscaler will scale up the number of nodes after an eviction if more nodes are still needed. If you change the maximum number of nodes a node pool can have, you also need to adjust the `maxCount` value associated with the cluster autoscaler. If you don't use a cluster autoscaler, upon eviction, the Spot pool will eventually decrease to *0* and require manual operation to receive any additional Spot nodes. + +> [!IMPORTANT] +> Only schedule workloads on Spot node pools that can handle interruptions, such as batch processing jobs and testing environments. We recommend you set up [taints and tolerations][taints-tolerations] on your Spot node pool to ensure that only workloads that can handle node evictions are scheduled on a Spot node pool. For example, the above command adds a taint of `kubernetes.azure.com/scalesetpriority=spot:NoSchedule`, so only pods with a corresponding toleration are scheduled on this node. + +## Verify the Spot node pool + +* Verify your node pool was added using the [`az aks nodepool show`][az-aks-nodepool-show] command and confirming the `scaleSetPriority` is `Spot`. + +```azurecli-interactive +az aks nodepool show --resource-group $RESOURCE_GROUP --cluster-name $AKS_CLUSTER --name $SPOT_NODEPOOL +``` + +Results: + + + +```JSON +{ + "artifactStreamingProfile": null, + "availabilityZones": null, + "capacityReservationGroupId": null, + "count": 3, + "creationData": null, + "currentOrchestratorVersion": "1.30.10", + "eTag": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx", + "enableAutoScaling": true, + "enableCustomCaTrust": false, + "enableEncryptionAtHost": false, + "enableFips": false, + "enableNodePublicIp": false, + "enableUltraSsd": false, + "gatewayProfile": null, + "gpuInstanceProfile": null, + "gpuProfile": null, + "hostGroupId": null, + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourcegroups/xxxxxxxxxxxxxxxx/providers/Microsoft.ContainerService/managedClusters/xxxxxxxxxxxxxxxx/agentPools/xxxxxxxxxxxx", + "kubeletConfig": null, + "kubeletDiskType": "OS", + "linuxOsConfig": null, + "maxCount": 3, + "maxPods": 30, + "messageOfTheDay": null, + "minCount": 1, + "mode": "User", + "name": "xxxxxxxxxxxx", + "networkProfile": { + "allowedHostPorts": null, + "applicationSecurityGroups": null, + "nodePublicIpTags": null + }, + "nodeImageVersion": "AKSUbuntu-2204gen2containerd-xxxxxxxx.xx.x", + "nodeInitializationTaints": null, + "nodeLabels": { + "kubernetes.azure.com/scalesetpriority": "spot" + }, + "nodePublicIpPrefixId": null, + "nodeTaints": [ + "kubernetes.azure.com/scalesetpriority=spot:NoSchedule" + ], + "orchestratorVersion": "x.xx.xx", + "osDiskSizeGb": 128, + "osDiskType": "Managed", + "osSku": "Ubuntu", + "osType": "Linux", + "podIpAllocationMode": null, + "podSubnetId": null, + "powerState": { + "code": "Running" + }, + "provisioningState": "Creating", + "proximityPlacementGroupId": null, + "resourceGroup": "xxxxxxxxxxxxxxxx", + "scaleDownMode": "Delete", + "scaleSetEvictionPolicy": "Delete", + "scaleSetPriority": "Spot", + "securityProfile": { + "enableSecureBoot": false, + "enableVtpm": false, + "sshAccess": "LocalUser" + }, + "spotMaxPrice": -1.0, + "status": null, + "tags": null, + "type": "Microsoft.ContainerService/managedClusters/agentPools", + "typePropertiesType": "VirtualMachineScaleSets", + "upgradeSettings": { + "drainTimeoutInMinutes": null, + "maxSurge": null, + "maxUnavailable": null, + "nodeSoakDurationInMinutes": null, + "undrainableNodeBehavior": null + }, + "virtualMachineNodesStatus": null, + "virtualMachinesProfile": null, + "vmSize": "Standard_DS2_v2", + "vnetSubnetId": null, + "windowsProfile": null, + "workloadRuntime": "OCIContainer" +} +``` + +## Schedule a pod to run on the Spot node + +To schedule a pod to run on a Spot node, you can add a toleration and node affinity that corresponds to the taint applied to your Spot node. + +The following example shows a portion of a YAML file that defines a toleration corresponding to the `kubernetes.azure.com/scalesetpriority=spot:NoSchedule` taint and a node affinity corresponding to the `kubernetes.azure.com/scalesetpriority=spot` label used in the previous step with `requiredDuringSchedulingIgnoredDuringExecution` and `preferredDuringSchedulingIgnoredDuringExecution` node affinity rules: + +```yaml +spec: + containers: + - name: spot-example + tolerations: + - key: "kubernetes.azure.com/scalesetpriority" + operator: "Equal" + value: "spot" + effect: "NoSchedule" + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: "kubernetes.azure.com/scalesetpriority" + operator: In + values: + - "spot" + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 1 + preference: + matchExpressions: + - key: another-node-label-key + operator: In + values: + - another-node-label-value +``` + +When you deploy a pod with this toleration and node affinity, Kubernetes successfully schedules the pod on the nodes with the taint and label applied. In this example, the following rules apply: + +* The node *must* have a label with the key `kubernetes.azure.com/scalesetpriority`, and the value of that label *must* be `spot`. +* The node *preferably* has a label with the key `another-node-label-key`, and the value of that label *must* be `another-node-label-value`. + +For more information, see [Assigning pods to nodes](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#affinity-and-anti-affinity). + +## Upgrade a Spot node pool + +When you upgrade a Spot node pool, AKS internally issues a cordon and an eviction notice, but no drain is applied. There are no surge nodes available for Spot node pool upgrades. Outside of these changes, the behavior when upgrading Spot node pools is consistent with that of other node pool types. + +For more information on upgrading, see [Upgrade an AKS cluster][upgrade-cluster]. + +## Max price for a Spot pool + +[Pricing for Spot instances is variable][pricing-spot], based on region and SKU. For more information, see pricing information for [Linux][pricing-linux] and [Windows][pricing-windows]. + +With variable pricing, you have the option to set a max price, in US dollars (USD) using up to five decimal places. For example, the value *0.98765* would be a max price of *$0.98765 USD per hour*. If you set the max price to *-1*, the instance won't be evicted based on price. As long as there's capacity and quota available, the price for the instance will be the lower price of either the current price for a Spot instance or for a standard instance. + +## Next steps + +In this article, you learned how to add a Spot node pool to an AKS cluster. For more information about how to control pods across node pools, see [Best practices for advanced scheduler features in AKS][operator-best-practices-advanced-scheduler]. + + +[azure-cli-install]: /cli/azure/install-azure-cli +[az-aks-nodepool-add]: /cli/azure/aks/nodepool#az-aks-nodepool-add +[az-aks-nodepool-show]: /cli/azure/aks/nodepool#az_aks_nodepool_show +[cluster-autoscaler]: cluster-autoscaler.md +[eviction-policy]: /azure/virtual-machine-scale-sets/use-spot#eviction-policy +[kubernetes-concepts]: concepts-clusters-workloads.md +[operator-best-practices-advanced-scheduler]: operator-best-practices-advanced-scheduler.md +[pricing-linux]: https://azure.microsoft.com/pricing/details/virtual-machine-scale-sets/linux/ +[pricing-spot]: /azure/virtual-machine-scale-sets/use-spot#pricing +[pricing-windows]: https://azure.microsoft.com/pricing/details/virtual-machine-scale-sets/windows/ +[spot-toleration]: #verify-the-spot-node-pool +[taints-tolerations]: operator-best-practices-advanced-scheduler.md#provide-dedicated-nodes-using-taints-and-tolerations +[use-multiple-node-pools]: create-node-pools.md +[vmss-spot]: /azure/virtual-machine-scale-sets/use-spot +[upgrade-cluster]: upgrade-cluster.md \ No newline at end of file diff --git a/scenarios/azure-aks-docs/articles/aks/trusted-access-feature.md b/scenarios/azure-aks-docs/articles/aks/trusted-access-feature.md new file mode 100644 index 000000000..9922a8ad4 --- /dev/null +++ b/scenarios/azure-aks-docs/articles/aks/trusted-access-feature.md @@ -0,0 +1,130 @@ +--- +title: Get secure resource access to Azure Kubernetes Service (AKS) using Trusted Access +description: Learn how to use the Trusted Access feature to give Azure resources access to Azure Kubernetes Service (AKS) clusters. +author: schaffererin +ms.topic: how-to +ms.custom: devx-track-azurecli, innovation-engine +ms.date: 11/05/2024 +ms.author: schaffererin +--- + +# Get secure access for Azure resources in Azure Kubernetes Service by using Trusted Access + +This article shows you how to get secure access for your Azure services to your Kubernetes API server in Azure Kubernetes Service (AKS) using Trusted Access. + +The Trusted Access feature gives services secure access to AKS API server by using the Azure back end without requiring a private endpoint. Instead of relying on identities that have [Microsoft Entra](/azure/active-directory/fundamentals/active-directory-whatis) permissions, this feature can use your system-assigned managed identity to authenticate with the managed services and applications that you want to use with your AKS clusters. + +> [!NOTE] +> The Trusted Access API is generally available. We provide general availability (GA) support for the Azure CLI, but it's still in preview and requires using the aks-preview extension. + +## Trusted Access feature overview + +Trusted Access addresses the following scenarios: + +* If an authorized IP range is set or in a private cluster, Azure services might not be able to access the Kubernetes API server unless you implement a private endpoint access model. +* Giving an Azure service admin access to the Kubernetes API doesn't follow the least privilege access best practice and can lead to privilege escalations or risk of credentials leakage. For example, you might have to implement high-privileged service-to-service permissions, and they aren't ideal in an audit review. + +You can use Trusted Access to give explicit consent to your system-assigned managed identity of allowed resources to access your AKS clusters by using an Azure resource called a *role binding*. Your Azure resources access AKS clusters through the AKS regional gateway via system-assigned managed identity authentication. The appropriate Kubernetes permissions are assigned via an Azure resource called a *role*. Through Trusted Access, you can access AKS clusters with different configurations including but not limited to [private clusters](private-clusters.md), [clusters that have local accounts turned off](manage-local-accounts-managed-azure-ad.md#disable-local-accounts), [Microsoft Entra clusters](azure-ad-integration-cli.md), and [authorized IP range clusters](api-server-authorized-ip-ranges.md). + +## Prerequisites + +* An Azure account with an active subscription. [Create an account for free](https://azure.microsoft.com/free/?WT.mc_id=A261C142F). +* Resource types that support [system-assigned managed identity](/azure/active-directory/managed-identities-azure-resources/overview). +* Azure CLI version 2.53.0 or later. Run `az --version` to find your version. If you need to install or upgrade, see [Install Azure CLI][azure-cli-install]. +* To learn what roles to use in different scenarios, see these articles: + * [Azure Machine Learning access to AKS clusters with special configurations](https://github.com/Azure/AML-Kubernetes/blob/master/docs/azureml-aks-ta-support.md) + * [What is Azure Kubernetes Service backup?][aks-azure-backup] + * [Turn on an agentless container posture](/azure/defender-for-cloud/concept-agentless-containers) +* In the same subscription as the Azure resource that you want to access the cluster, [create an AKS cluster](tutorial-kubernetes-deploy-cluster.md). + +## Connect to your cluster + +Configure `kubectl` to connect to your cluster using the [`az aks get-credentials`][az-aks-get-credentials] command. + +```azurecli-interactive +export RESOURCE_GROUP_NAME="myAKSResourceGroup0b090b" +export CLUSTER_NAME="myAKSCluster0b090b" + +az aks get-credentials --resource-group ${RESOURCE_GROUP_NAME} --name ${CLUSTER_NAME} --overwrite-existing +``` + +Verify the connection to your cluster using the `kubectl get` command. + +```bash +kubectl get nodes +``` + +## Select the required Trusted Access roles + +The roles that you select depend on the Azure services that you want to access the AKS cluster. Azure services help create roles and role bindings that build the connection from the Azure service to AKS. + +To find the roles that you need, see the documentation for the Azure service that you want to connect to AKS. You can also use the Azure CLI to list the roles that are available for the Azure service using the `az aks trustedaccess role list --location ` command. + +## Create a Trusted Access role binding + +After you confirm which role to use, use the Azure CLI to create a Trusted Access role binding in the AKS cluster. The role binding associates your selected role with the Azure service + +```azurecli-interactive +export RESOURCE_GROUP_NAME="myAKSResourceGroup0b090b" +export CLUSTER_NAME="myAKSCluster0b090b" +export RANDOM_SUFFIX=$(openssl rand -hex 3) +export ROLE_BINDING_NAME="myRoleBindingName${RANDOM_SUFFIX}" +export SOURCE_RESOURCE_ID=$(az aks show --resource-group $RESOURCE_GROUP_NAME --name $CLUSTER_NAME --query id --output tsv) +export ROLE_NAME_1="Microsoft.ContainerService/managedClusters/roleName1" +export ROLE_NAME_2="Microsoft.ContainerService/managedClusters/roleName2" + +az aks trustedaccess rolebinding create --resource-group ${RESOURCE_GROUP_NAME} --cluster-name ${CLUSTER_NAME} --name ${ROLE_BINDING_NAME} --source-resource-id ${SOURCE_RESOURCE_ID} --roles ${ROLE_NAME_1},${ROLE_NAME_2} +``` + +Results: + + + +```json +{ + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/${RESOURCE_GROUP_NAME}/providers/Microsoft.ContainerService/managedClusters/${CLUSTER_NAME}/trustedAccessRoleBindings/${ROLE_BINDING_NAME}", + "name": "${ROLE_BINDING_NAME}", + "provisioningState": "Succeeded", + "resourceGroup": "${RESOURCE_GROUP_NAME}", + "roles": [ + "${ROLE_NAME_1}", + "${ROLE_NAME_2}" + ], + "sourceResourceId": "${SOURCE_RESOURCE_ID}", + "systemData": null, + "type": "Microsoft.ContainerService/managedClusters/trustedAccessRoleBindings" +} +``` + +## Update an existing Trusted Access role binding + +For an existing role binding that has an associated source service, you can update the role binding with new roles using the `az aks trustedaccess rolebinding update --resource-group $RESOURCE_GROUP_NAME --cluster-name $CLUSTER_NAME --name $ROLE_BINDING_NAME --roles $ROLE_NAME_3,$ROLE_NAME_4` command. This command updates the role binding with the new roles that you specify. + +> [!NOTE] +> The add-on manager updates clusters every five minutes, so the new role binding might take up to five minutes to take effect. Before the new role binding takes effect, the existing role binding still works. +> +> You can use the `az aks trusted access rolebinding list` command to check the current role binding. + +## Show a Trusted Access role binding + +Show a specific Trusted Access role binding using the `az aks trustedaccess rolebinding show --name $ROLE_BINDING_NAME --resource-group $RESOURCE_GROUP_NAME --cluster-name $CLUSTER_NAME` command. + +## List all the Trusted Access role bindings for a cluster + +List all the Trusted Access role bindings for a cluster using the `az aks trustedaccess rolebinding list --resource-group $RESOURCE_GROUP_NAME --cluster-name $CLUSTER_NAME` command. + +## Related content + +* [Deploy and manage cluster extensions for AKS](cluster-extensions.md) +* [Deploy the Azure Machine Learning extension on an AKS or Azure Arc–enabled Kubernetes cluster](/azure/machine-learning/how-to-deploy-kubernetes-extension) +* [Deploy Azure Backup on an AKS cluster](/azure/backup/azure-kubernetes-service-backup-overview) +* [Set agentless container posture in Microsoft Defender for Cloud for an AKS cluster](/azure/defender-for-cloud/concept-agentless-containers) + + + +[az-feature-register]: /cli/azure/feature#az-feature-register +[az-feature-show]: /cli/azure/feature#az-feature-show +[az-provider-register]: /cli/azure/provider#az-provider-register +[aks-azure-backup]: /azure/backup/azure-kubernetes-service-backup-overview +[azure-cli-install]: /cli/azure/install-azure-cli +[az-aks-get-credentials]: /cli/azure/aks#az-aks-get-credentials \ No newline at end of file diff --git a/scenarios/azure-aks-docs/articles/aks/use-etags.md b/scenarios/azure-aks-docs/articles/aks/use-etags.md new file mode 100644 index 000000000..03d7e1af8 --- /dev/null +++ b/scenarios/azure-aks-docs/articles/aks/use-etags.md @@ -0,0 +1,137 @@ +--- +title: Enhancing Concurrency Control with Entity Tags (eTags) in Azure Kubernetes Service +description: Learn how to use eTags (Entity Tags) to enable concurrency control and avoid racing conditions or overwriting scenarios. +ms.topic: how-to +ms.date: 06/10/2024 +author: reginalin +ms.author: reginalin +ms.custom: innovation-engine, aks, etag, concurrency-control +ms.subservice: aks-nodes +--- + +# Enhance concurrency control with entity tags (eTags) in Azure Kubernetes Service + +To prevent conflicting requests in Azure Kubernetes Service (AKS), eTags (Entity Tags) serve as unique identifiers that enable concurrency control. When a request to the cluster is made, the system checks whether the provided eTag matches the latest version stored in the database. If there is a mismatch, the request fails early, ensuring that no unintended overwrites occur. + +## Utilizing eTag Headers + +There are two options for applying eTags through headers: + +**`–-if-match`** Header: Ensures that the operation is performed only if the existing eTag matches the value provided in this header. + +**`–-if-none-match`** Header: Ensures that the operation is performed only if none of the eTags matches the value provided in this header. This header type can only be empty or a `*`. + +### Find existing ETags + +You can do either a `LIST` or a `GET` call to your cluster or node pool to see the existing ETag. An ETag looks something like the following example: +``` +"agentPoolProfiles": [ + {"eTag": "5e5ffdce-356b-431b-b050-81b45eef2a12"} +] +``` + +### What would modify existing ETags + +ETags can exist at both the cluster and agent pool levels. Depending on the scope of the operations you are performing, you can pass in the corresponding eTag. When you perform a cluster-level operation, both the cluster-level eTag and agent pool eTag are updated. When you perform an agent pool operation, only the agent pool eTag is updated. + +### Include ETags in operation headers + +Headers are optional to use. The following examples show how to use `–-if-match` and `-–if-none-match` headers. + +**Example 1**: The CLI command below deletes an existing cluster `MyManagedCluster` if the eTag matches with `yvjvt` + +Suppose you want to delete an AKS cluster using its eTag. (For illustration, replace `"yvjvt"` with the actual eTag value you retrieved from the resource.) + +```shell +az aks delete -g $RG_NAME -n $CLUSTER_NAME --if-match "yvjvt" +``` + +**Example 2**: The CLI command below creates a new cluster. If `*` is provided in the `–if-none-match` header, that means to validate the resource does not exist. + +First, create a resource group: + +```azurecli +export RANDOM_SUFFIX=$(head -c 3 /dev/urandom | xxd -p) +export RG_NAME="my-resource-group$RANDOM_SUFFIX" +export REGION="eastus2" + +az group create --name $RG_NAME --location $REGION +``` + +Then, create a new AKS cluster with a random suffix to ensure uniqueness: + +```azurecli +export CLUSTER_NAME="my-managed-cluster$RANDOM_SUFFIX" + +az aks create -g $RG_NAME -n $CLUSTER_NAME --location $REGION --if-none-match "*" +``` + +Results: + + + +```output +{ + "aadProfile": null, + "addonProfiles": null, + "agentPoolProfiles": [ + { + "eTag": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx", + ... + } + ], + "apiServerAccessProfile": null, + "autoScalerProfile": null, + ... + "name": "my-managed-clusterxxx", + ... + "provisioningState": "Succeeded", + ... + "resourceGroup": "my-resource-groupxxx", + ... +} +``` + +### Configurations and Expected Behavior + +The table below outlines the expected behavior of HTTP operations (PUT, PATCH, and DELETE) based on different eTag configurations and resource existence. They show how the presence of `--if-match` or `--if-none-match` headers affects the response status codes, ensuring concurrency control and preventing unintended modifications. + + +**PUT** | **Resource does not exist** | **Resource exists** +--- | --- | --- +**`--if-match = ""`** | 201 – Created | 200 - Ok +**`--if-match = "*"`** | 412 - Precondition Failed | 200 - OK +**`--if-match = "xyz"`** | 412 - Precondition Failed | 200 - OK OR 412 - Precondition Failed +**`--if-none-match = "*"`** | 201 - Created | 412 - Precondition Failed + + +**PATCH** | **Resource does not exist** | **Resource exists** +--- | --- | --- +**`--if-match = ""`** | 404 - Not Found | 200 - OK +**`--if-match = "*"`** | 404 - Not Found | 200 - OK +**`--if-match = "xyz"`** | 404 - Not Found | 200 - OK OR 412 - Precondition Failed + + +**DELETE** | **Resource does not exist** | **Resource exists** +--- | --- | --- +**`--if-match = ""`** | 204 - No Content | 200 - OK +**`--if-match = "*"`** | 204 - No Content | 200 - OK +**`--if-match = "xyz"`** | 204 - No Content | 200 - OK OR 412 - Precondition Failed + +## Common Issues and Recommended Mitigations + +### **Scenario 1**: `BadRequest` – `--if-none-match` header is not empty or not set to `*` + +This fails the prevalidation checks. The `--if-none-match` header can only be empty or take a value of `*`. + +### **Scenario 2**: `BadRequest` - `--if-match` header is not empty AND `--if-none-match` header is `*` + +This fails the prevalidation checks. Both headers cannot be used at the same time. + +### **Scenario 3**: `PreConditionFailed` - `--if-none-match` is `*` and the given resource already exists + +The request is rejected if a `*` (wildcard of any) value is passed into `--if-none-match` header and the resource already exists. + +### **Scenario 4**: `PreConditionFailed` - The value of `--if-match` header does not match the latest eTag value of the resource + +The request is rejected if the header provided does not match with the eTag value. A new GET operation is needed to get the latest eTag on the resource and update the header value in the request. diff --git a/scenarios/azure-aks-docs/articles/aks/use-labels.md b/scenarios/azure-aks-docs/articles/aks/use-labels.md new file mode 100644 index 000000000..1eb6ef75b --- /dev/null +++ b/scenarios/azure-aks-docs/articles/aks/use-labels.md @@ -0,0 +1,254 @@ +--- +title: Use labels in an Azure Kubernetes Service (AKS) cluster +description: Learn how to use labels in an Azure Kubernetes Service (AKS) cluster. +author: rayoef +ms.author: rayoflores +ms.topic: how-to +ms.date: 06/10/2024 +ms.custom: innovation-engine, devx-track-azurecli, linux-related-content, kubernetes, aks +--- + +# Use labels in an Azure Kubernetes Service (AKS) cluster + +If you have multiple node pools, you may want to add a label during node pool creation. [Kubernetes labels][kubernetes-labels] handle the scheduling rules for nodes. You can add labels to a node pool anytime and apply them to all nodes in the node pool. + +In this how-to guide, you learn how to use labels in an Azure Kubernetes Service (AKS) cluster. + +## Prerequisites + +You need the Azure CLI version 2.2.0 or later installed and configured. Run `az --version` to find the version. If you need to install or upgrade, see [Install Azure CLI][install-azure-cli]. + +## Create an AKS cluster with a label + +You can create an AKS cluster with node labels to set key/value metadata for workload scheduling. + +```bash +export RANDOM_SUFFIX=$(openssl rand -hex 3) +export RESOURCE_GROUP="myResourceGroup$RANDOM_SUFFIX" +export AKS_CLUSTER_NAME="myAKSCluster$RANDOM_SUFFIX" +az group create --name $RESOURCE_GROUP --location $REGION +``` + +Results: + + + +```output +{ + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/myResourceGroupxxx", + "location": "eastus2", + "managedBy": null, + "name": "myResourceGroupxxx", + "properties": { + "provisioningState": "Succeeded" + }, + "tags": null, + "type": "Microsoft.Resources/resourceGroups" +} +``` + +Create the AKS cluster specifying node labels (e.g., dept=IT, costcenter=9000): + +```azurecli-interactive +az aks create \ + --resource-group $RESOURCE_GROUP \ + --name $AKS_CLUSTER_NAME \ + --node-count 2 \ + --nodepool-labels dept=IT costcenter=9000 \ + --generate-ssh-keys --location $REGION +``` + +Results: + + + +```output +{ + "aadProfile": null, + "addonProfiles": {}, + "agentPoolProfiles": [ + { + "count": 2, + "enableAutoScaling": null, + "mode": "System", + "name": "nodepool1", + "nodeLabels": { + "costcenter": "9000", + "dept": "IT" + } + } + ], + "dnsPrefix": "myaksclusterxxx-dns", + "fqdn": "myaksclusterxxx-xxxxxxxx.hcp.eastus2.azmk8s.io", + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/myResourceGroupxxx/providers/Microsoft.ContainerService/managedClusters/myAKSClusterxxx", + "location": "eastus2", + "name": "myAKSClusterxxx", + "resourceGroup": "myResourceGroupxxx" +} +``` + +Verify the labels were set: + +```bash +az aks get-credentials --resource-group $RESOURCE_GROUP --name $AKS_CLUSTER_NAME --overwrite-existing +kubectl get nodes --show-labels | grep -e "costcenter=9000" -e "dept=IT" +``` + +## Create a node pool with a label + +You can create an additional node pool with labels for specific scheduling needs. + +```bash +export NODEPOOL_NAME="labelnp" +az aks nodepool add \ + --resource-group $RESOURCE_GROUP \ + --cluster-name $AKS_CLUSTER_NAME \ + --name $NODEPOOL_NAME \ + --node-count 1 \ + --labels dept=HR costcenter=5000 \ +``` + +The following is example output from the [`az aks nodepool list`][az-aks-nodepool-list] command showing the *labelnp* node pool is *Creating* nodes with the specified *nodeLabels*: + +```bash +az aks nodepool list --resource-group $RESOURCE_GROUP --cluster-name $AKS_CLUSTER_NAME +``` + +Results: + + + +```output +[ + { + "count": 2, + "name": "nodepool1", + "nodeLabels": { + "costcenter": "9000", + "dept": "IT" + } + }, + { + "count": 1, + "name": "labelnp", + "nodeLabels": { + "costcenter": "5000", + "dept": "HR" + }, + "provisioningState": "Creating" + } +] +``` + +Verify the labels were set: + +```bash +kubectl get nodes --show-labels | grep -e "costcenter=5000" -e "dept=HR" +``` + +## Updating labels on existing node pools + +You can update the labels on an existing node pool. Note: updating labels will overwrite the old labels. + +```bash +az aks nodepool update \ + --resource-group $RESOURCE_GROUP \ + --cluster-name $AKS_CLUSTER_NAME \ + --name $NODEPOOL_NAME \ + --labels dept=ACCT costcenter=6000 \ +``` + +Verify the new labels are set: + +```bash +kubectl get nodes --show-labels | grep -e "costcenter=6000" -e "dept=ACCT" +``` + +## Unavailable labels + +### Reserved system labels + +Since the [2021-08-19 AKS release][aks-release-2021-gh], AKS stopped the ability to make changes to AKS reserved labels. Attempting to change these labels results in an error message. + +The following labels are AKS reserved labels. *Virtual node usage* specifies if these labels could be a supported system feature on virtual nodes. Some properties that these system features change aren't available on the virtual nodes because they require modifying the host. + +| Label | Value | Example/Options | Virtual node usage | +| ---- | --- | --- | --- | +| kubernetes.azure.com/agentpool | \ | nodepool1 | Same | +| kubernetes.io/arch | amd64 | runtime.GOARCH | N/A | +| kubernetes.io/os | \ | Linux/Windows | Same | +| node.kubernetes.io/instance-type | \ | Standard_NC6s_v3 | Virtual | +| topology.kubernetes.io/region | \ | westus2 | Same | +| topology.kubernetes.io/zone | \ | 0 | Same | +| kubernetes.azure.com/cluster | \ | MC_aks_myAKSCluster_westus2 | Same | +| kubernetes.azure.com/managedby | aks | aks | N/A | +| kubernetes.azure.com/mode | \ | User or system | User | +| kubernetes.azure.com/role | agent | Agent | Same | +| kubernetes.azure.com/scalesetpriority | \ | Spot or regular | N/A | +| kubernetes.io/hostname | \ | aks-nodepool-00000000-vmss000000 | Same | +| kubernetes.azure.com/storageprofile | \ | Managed | N/A | +| kubernetes.azure.com/storagetier | \ | Premium_LRS | N/A | +| kubernetes.azure.com/instance-sku | \ | Standard_N | Virtual | +| kubernetes.azure.com/node-image-version | \ | AKSUbuntu-1804-2020.03.05 | Virtual node version | +| kubernetes.azure.com/subnet | \ | subnetName | Virtual node subnet name | +| kubernetes.azure.com/vnet | \ | vnetName | Virtual node virtual network | +| kubernetes.azure.com/ppg | \ | ppgName | N/A | +| kubernetes.azure.com/encrypted-set | \ | encrypted-set-name | N/A | +| kubernetes.azure.com/accelerator | \ | nvidia | N/A | +| kubernetes.azure.com/fips_enabled | \ | true | N/A | +| kubernetes.azure.com/os-sku | \ | [Create or update OS SKU][create-or-update-os-sku] | Linux | + +* *Same* is included in places where the expected values for the labels don't differ between a standard node pool and a virtual node pool. As virtual node pods don't expose any underlying virtual machine (VM), the VM SKU values are replaced with the SKU *Virtual*. +* *Virtual node version* refers to the current version of the [virtual Kubelet-ACI connector release][virtual-kubelet-release]. +* *Virtual node subnet name* is the name of the subnet where virtual node pods are deployed into Azure Container Instance (ACI). +* *Virtual node virtual network* is the name of the virtual network, which contains the subnet where virtual node pods are deployed on ACI. + +### Reserved prefixes + +The following prefixes are AKS reserved prefixes and can't be used for any node: + +* kubernetes.azure.com/ +* kubernetes.io/ + +For more information on reserved prefixes, see [Kubernetes well-known labels, annotations, and taints][kubernetes-well-known-labels]. + +### Deprecated labels + +The following labels are planned for deprecation with the release of [Kubernetes v1.24][aks-release-calendar]. You should change any label references to the recommended substitute. + +| Label | Recommended substitute | Maintainer | +| --- | --- | --- | +| failure-domain.beta.kubernetes.io/region | topology.kubernetes.io/region | [Kubernetes][kubernetes-labels] +| failure-domain.beta.kubernetes.io/zone | topology.kubernetes.io/zone | [Kubernetes][kubernetes-labels] +| beta.kubernetes.io/arch | kubernetes.io/arch | [Kubernetes][kubernetes-labels] +| beta.kubernetes.io/instance-type | node.kubernetes.io/instance-type | [Kubernetes][kubernetes-labels] +| beta.kubernetes.io/os | kubernetes.io/os | [Kubernetes][kubernetes-labels] +| node-role.kubernetes.io/agent* | kubernetes.azure.com/role=agent | Azure Kubernetes Service +| kubernetes.io/role* | kubernetes.azure.com/role=agent | Azure Kubernetes Service +| Agentpool* | kubernetes.azure.com/agentpool | Azure Kubernetes Service +| Storageprofile* | kubernetes.azure.com/storageprofile | Azure Kubernetes Service +| Storagetier* | kubernetes.azure.com/storagetier | Azure Kubernetes Service +| Accelerator* | kubernetes.azure.com/accelerator | Azure Kubernetes Service + +*Newly deprecated. For more information, see the [Release Notes][aks-release-notes-gh]. + +## Next steps + +Learn more about Kubernetes labels in the [Kubernetes labels documentation][kubernetes-labels]. + + +[aks-release-2021-gh]: https://github.com/Azure/AKS/releases/tag/2021-08-19 +[aks-release-notes-gh]: https://github.com/Azure/AKS/releases +[kubernetes-labels]: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ +[kubernetes-label-syntax]: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#syntax-and-character-set +[kubernetes-well-known-labels]: https://kubernetes.io/docs/reference/labels-annotations-taints/ +[virtual-kubelet-release]: https://github.com/virtual-kubelet/azure-aci/releases + + +[aks-release-calendar]: ./supported-kubernetes-versions.md#aks-kubernetes-release-calendar +[az-aks-create]: /cli/azure/aks#az-aks-create +[az-aks-nodepool-add]: /cli/azure/aks#az-aks-nodepool-add +[az-aks-nodepool-list]: /cli/azure/aks/nodepool#az-aks-nodepool-list +[az-aks-nodepool-update]: /cli/azure/aks/nodepool#az-aks-nodepool-update +[create-or-update-os-sku]: /rest/api/aks/agent-pools/create-or-update#ossku +[install-azure-cli]: /cli/azure/install-azure-cli \ No newline at end of file diff --git a/scenarios/azure-aks-docs/articles/aks/workload-identity-deploy-cluster.md b/scenarios/azure-aks-docs/articles/aks/workload-identity-deploy-cluster.md new file mode 100644 index 000000000..df6635ce0 --- /dev/null +++ b/scenarios/azure-aks-docs/articles/aks/workload-identity-deploy-cluster.md @@ -0,0 +1,398 @@ +--- +title: Deploy and configure an AKS cluster with workload identity +description: In this Azure Kubernetes Service (AKS) article, you deploy an Azure Kubernetes Service cluster and configure it with a Microsoft Entra Workload ID. +author: tamram +ms.topic: how-to +ms.subservice: aks-security +ms.custom: devx-track-azurecli, innovation-engine +ms.date: 05/28/2024 +ms.author: tamram +--- + +# Deploy and configure workload identity on an Azure Kubernetes Service (AKS) cluster + +Azure Kubernetes Service (AKS) is a managed Kubernetes service that lets you quickly deploy and manage Kubernetes clusters. This article shows you how to: + +* Deploy an AKS cluster using the Azure CLI with the OpenID Connect issuer and a Microsoft Entra Workload ID. +* Create a Microsoft Entra Workload ID and Kubernetes service account. +* Configure the managed identity for token federation. +* Deploy the workload and verify authentication with the workload identity. +* Optionally grant a pod in the cluster access to secrets in an Azure key vault. + +This article assumes you have a basic understanding of Kubernetes concepts. For more information, see [Kubernetes core concepts for Azure Kubernetes Service (AKS)][kubernetes-concepts]. If you aren't familiar with Microsoft Entra Workload ID, see the following [Overview][workload-identity-overview] article. + +## Prerequisites + +* [!INCLUDE [quickstarts-free-trial-note](~/reusable-content/ce-skilling/azure/includes/quickstarts-free-trial-note.md)] +* This article requires version 2.47.0 or later of the Azure CLI. If using Azure Cloud Shell, the latest version is already installed. +* Make sure that the identity that you're using to create your cluster has the appropriate minimum permissions. For more information about access and identity for AKS, see [Access and identity options for Azure Kubernetes Service (AKS)][aks-identity-concepts]. +* If you have multiple Azure subscriptions, select the appropriate subscription ID in which the resources should be billed using the [az account set][az-account-set] command. + +> [!NOTE] +> You can use _Service Connector_ to help you configure some steps automatically. See also: [Tutorial: Connect to Azure storage account in Azure Kubernetes Service (AKS) with Service Connector using workload identity][tutorial-python-aks-storage-workload-identity]. + +## Create a resource group + +An [Azure resource group][azure-resource-group] is a logical group in which Azure resources are deployed and managed. When you create a resource group, you're prompted to specify a location. This location is the storage location of your resource group metadata and where your resources run in Azure if you don't specify another region during resource creation. + +Create a resource group by calling the [az group create][az-group-create] command: + +```azurecli-interactive +export RANDOM_ID="$(openssl rand -hex 3)" +export RESOURCE_GROUP="myResourceGroup$RANDOM_ID" +export REGION="centralindia" +az group create --name "${RESOURCE_GROUP}" --location "${REGION}" +``` + +The following output example shows successful creation of a resource group: + +Results: + +```json +{ + "id": "/subscriptions//resourceGroups/myResourceGroup", + "location": "eastus", + "managedBy": null, + "name": "myResourceGroup", + "properties": { + "provisioningState": "Succeeded" + }, + "tags": null, + "type": "Microsoft.Resources/resourceGroups" +} +``` + +## Create an AKS cluster + +Create an AKS cluster using the [az aks create][az-aks-create] command with the `--enable-oidc-issuer` parameter to enable the OIDC issuer. The following example creates a cluster with a single node: + +```azurecli-interactive +export CLUSTER_NAME="myAKSCluster$RANDOM_ID" +az aks create \ + --resource-group "${RESOURCE_GROUP}" \ + --name "${CLUSTER_NAME}" \ + --enable-oidc-issuer \ + --enable-workload-identity \ + --generate-ssh-keys +``` + +After a few minutes, the command completes and returns JSON-formatted information about the cluster. + +> [!NOTE] +> When you create an AKS cluster, a second resource group is automatically created to store the AKS resources. For more information, see [Why are two resource groups created with AKS?][aks-two-resource-groups]. + +## Update an existing AKS cluster + +You can update an AKS cluster to use the OIDC issuer and enable workload identity by calling the [az aks update][az aks update] command with the `--enable-oidc-issuer` and the `--enable-workload-identity` parameters. + +## Retrieve the OIDC issuer URL + +To get the OIDC issuer URL and save it to an environmental variable, run the following command: + +```azurecli-interactive +export AKS_OIDC_ISSUER="$(az aks show --name "${CLUSTER_NAME}" \ + --resource-group "${RESOURCE_GROUP}" \ + --query "oidcIssuerProfile.issuerUrl" \ + --output tsv)" +``` + +The environment variable should contain the issuer URL, similar to the following example: + +```output +https://eastus.oic.prod-aks.azure.com/00000000-0000-0000-0000-000000000000/11111111-1111-1111-1111-111111111111/ +``` + +By default, the issuer is set to use the base URL `https://{region}.oic.prod-aks.azure.com/{tenant_id}/{uuid}`, where the value for `{region}` matches the location to which the AKS cluster is deployed. The value `{uuid}` represents the OIDC key, which is a randomly generated guid for each cluster that is immutable. + +## Create a managed identity + +Call the [az identity create][az-identity-create] command to create a managed identity. + +```azurecli-interactive +export SUBSCRIPTION="$(az account show --query id --output tsv)" +export USER_ASSIGNED_IDENTITY_NAME="myIdentity$RANDOM_ID" +az identity create \ + --name "${USER_ASSIGNED_IDENTITY_NAME}" \ + --resource-group "${RESOURCE_GROUP}" \ + --location "${REGION}" \ + --subscription "${SUBSCRIPTION}" +``` + +The following output example shows successful creation of a managed identity: + +Results: + +```output +{ + "clientId": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx", + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourcegroups/myResourceGroupxxxxxx/providers/Microsoft.ManagedIdentity/userAssignedIdentities/myIdentityxxxxxx", + "location": "centralindia", + "name": "myIdentityxxxxxx", + "principalId": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx", + "resourceGroup": "myResourceGroupxxxxxx", + "systemData": null, + "tags": {}, + "tenantId": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx", + "type": "Microsoft.ManagedIdentity/userAssignedIdentities" +} +``` + +Next, create a variable for the managed identity's client ID. + +```azurecli-interactive +export USER_ASSIGNED_CLIENT_ID="$(az identity show \ + --resource-group "${RESOURCE_GROUP}" \ + --name "${USER_ASSIGNED_IDENTITY_NAME}" \ + --query 'clientId' \ + --output tsv)" +``` + +## Create a Kubernetes service account + +Create a Kubernetes service account and annotate it with the client ID of the managed identity created in the previous step. Use the [az aks get-credentials][az-aks-get-credentials] command and replace the values for the cluster name and the resource group name. + +```azurecli-interactive +az aks get-credentials --name "${CLUSTER_NAME}" --resource-group "${RESOURCE_GROUP}" +``` + +Copy and paste the following multi-line input in the Azure CLI. + +```azurecli-interactive +export SERVICE_ACCOUNT_NAMESPACE="default" +export SERVICE_ACCOUNT_NAME="workload-identity-sa$RANDOM_ID" +cat < [!NOTE] +> It takes a few seconds for the federated identity credential to propagate after it is added. If a token request is made immediately after adding the federated identity credential, the request might fail until the cache is refreshed. To avoid this issue, you can add a slight delay after adding the federated identity credential. + +## Deploy your application + +When you deploy your application pods, the manifest should reference the service account created in the **Create Kubernetes service account** step. The following manifest shows how to reference the account, specifically the _metadata\namespace_ and _spec\serviceAccountName_ properties. Make sure to specify an image for `` and a container name for ``: + +```bash +cat < [!IMPORTANT] +> Ensure that the application pods using workload identity include the label `azure.workload.identity/use: "true"` in the pod spec. Otherwise the pods will fail after they are restarted. + +## Grant permissions to access Azure Key Vault + +The instructions in this step show how to access secrets, keys, or certificates in an Azure key vault from the pod. The examples in this section configure access to secrets in the key vault for the workload identity, but you can perform similar steps to configure access to keys or certificates. + +The following example shows how to use the Azure role-based access control (Azure RBAC) permission model to grant the pod access to the key vault. For more information about the Azure RBAC permission model for Azure Key Vault, see [Grant permission to applications to access an Azure key vault using Azure RBAC](/azure/key-vault/general/rbac-guide). + +1. Create a key vault with purge protection and RBAC authorization enabled. You can also use an existing key vault if it is configured for both purge protection and RBAC authorization: + + ```azurecli-interactive + export KEYVAULT_NAME="keyvault-workload-id$RANDOM_ID" + # Ensure the key vault name is between 3-24 characters + if [ ${#KEYVAULT_NAME} -gt 24 ]; then + KEYVAULT_NAME="${KEYVAULT_NAME:0:24}" + fi + az keyvault create \ + --name "${KEYVAULT_NAME}" \ + --resource-group "${RESOURCE_GROUP}" \ + --location "${REGION}" \ + --enable-purge-protection \ + --enable-rbac-authorization + ``` + +1. Assign yourself the RBAC [Key Vault Secrets Officer](/azure/role-based-access-control/built-in-roles/security#key-vault-secrets-officer) role so that you can create a secret in the new key vault: + + ```azurecli-interactive + export KEYVAULT_RESOURCE_ID=$(az keyvault show --resource-group "${KEYVAULT_RESOURCE_GROUP}" \ + --name "${KEYVAULT_NAME}" \ + --query id \ + --output tsv) + + export CALLER_OBJECT_ID=$(az ad signed-in-user show --query id -o tsv) + + az role assignment create --assignee "${CALLER_OBJECT_ID}" \ + --role "Key Vault Secrets Officer" \ + --scope "${KEYVAULT_RESOURCE_ID}" + ``` + +1. Create a secret in the key vault: + + ```azurecli-interactive + export KEYVAULT_SECRET_NAME="my-secret$RANDOM_ID" + az keyvault secret set \ + --vault-name "${KEYVAULT_NAME}" \ + --name "${KEYVAULT_SECRET_NAME}" \ + --value "Hello\!" + ``` + +1. Assign the [Key Vault Secrets User](/azure/role-based-access-control/built-in-roles/security#key-vault-secrets-user) role to the user-assigned managed identity that you created previously. This step gives the managed identity permission to read secrets from the key vault: + + ```azurecli-interactive + export IDENTITY_PRINCIPAL_ID=$(az identity show \ + --name "${USER_ASSIGNED_IDENTITY_NAME}" \ + --resource-group "${RESOURCE_GROUP}" \ + --query principalId \ + --output tsv) + + az role assignment create \ + --assignee-object-id "${IDENTITY_PRINCIPAL_ID}" \ + --role "Key Vault Secrets User" \ + --scope "${KEYVAULT_RESOURCE_ID}" \ + --assignee-principal-type ServicePrincipal + ``` + +1. Create an environment variable for the key vault URL: + + ```azurecli-interactive + export KEYVAULT_URL="$(az keyvault show \ + --resource-group ${RESOURCE_GROUP} \ + --name ${KEYVAULT_NAME} \ + --query properties.vaultUri \ + --output tsv)" + ``` + +1. Deploy a pod that references the service account and key vault URL: + + ```bash + kubectl apply -f - < [!IMPORTANT] +> Azure RBAC role assignments can take up to ten minutes to propagate. If the pod is unable to access the secret, you may need to wait for the role assignment to propagate. For more information, see [Troubleshoot Azure RBAC](/azure/role-based-access-control/troubleshooting#). + +## Disable workload identity + +To disable the Microsoft Entra Workload ID on the AKS cluster where it's been enabled and configured, update the AKS cluster by setting the `--disable-workload-identity` parameter using the `az aks update` command. + +## Next steps + +In this article, you deployed a Kubernetes cluster and configured it to use a workload identity in preparation for application workloads to authenticate with that credential. Now you're ready to deploy your application and configure it to use the workload identity with the latest version of the [Azure Identity][azure-identity-libraries] client library. If you can't rewrite your application to use the latest client library version, you can [set up your application pod][workload-identity-migration] to authenticate using managed identity with workload identity as a short-term migration solution. + +The [Service Connector](/azure/service-connector/overview) integration helps simplify the connection configuration for AKS workloads and Azure backing services. It securely handles authentication and network configurations and follows best practices for connecting to Azure services. For more information, see [Connect to Azure OpenAI Service in AKS using Workload Identity](/azure/service-connector/tutorial-python-aks-openai-workload-identity) and the [Service Connector introduction](https://azure.github.io/AKS/2024/05/23/service-connector-intro). + + +[kubectl-describe]: https://kubernetes.io/docs/reference/generated/kubectl/kubectl-commands#describe + + +[kubernetes-concepts]: concepts-clusters-workloads.md +[workload-identity-overview]: workload-identity-overview.md +[azure-resource-group]: /azure/azure-resource-manager/management/overview +[az-group-create]: /cli/azure/group#az-group-create +[aks-identity-concepts]: concepts-identity.md +[federated-identity-credential]: /graph/api/resources/federatedidentitycredentials-overview +[tutorial-python-aks-storage-workload-identity]: /azure/service-connector/tutorial-python-aks-storage-workload-identity +[az-aks-create]: /cli/azure/aks#az-aks-create +[az aks update]: /cli/azure/aks#az-aks-update +[aks-two-resource-groups]: faq.yml +[az-account-set]: /cli/azure/account#az-account-set +[az-identity-create]: /cli/azure/identity#az-identity-create +[az-aks-get-credentials]: /cli/azure/aks#az-aks-get-credentials +[az-identity-federated-credential-create]: /cli/azure/identity/federated-credential#az-identity-federated-credential-create +[workload-identity-migration]: workload-identity-migrate-from-pod-identity.md +[azure-identity-libraries]: /azure/active-directory/develop/reference-v2-libraries \ No newline at end of file diff --git a/scenarios/azure-aks-docs/articles/aks/workload-identity-migrate-from-pod-identity.md b/scenarios/azure-aks-docs/articles/aks/workload-identity-migrate-from-pod-identity.md new file mode 100644 index 000000000..43c5fd88e --- /dev/null +++ b/scenarios/azure-aks-docs/articles/aks/workload-identity-migrate-from-pod-identity.md @@ -0,0 +1,181 @@ +--- +title: Migrate your Azure Kubernetes Service (AKS) pod to use workload identity +description: In this Azure Kubernetes Service (AKS) article, you learn how to configure your Azure Kubernetes Service pod to authenticate with workload identity. +ms.topic: how-to +ms.subservice: aks-security +ms.custom: devx-track-azurecli, innovation-engine +ms.date: 07/31/2023 +author: nickomang +ms.author: nickoman +--- + +# Migrate from pod managed-identity to workload identity + +## Create resource group +Set your subscription to be the current active subscription using the `az account set` command. Then, create a random suffix to ensure unique resource names. + +```bash +export RANDOM_SUFFIX=$(openssl rand -hex 3) +export RESOURCE_GROUP_NAME="myResourceGroup$RANDOM_SUFFIX" +export LOCATION="WestUS2" +az group create --name "$RESOURCE_GROUP_NAME" --location "$LOCATION" +``` + +Results: + + + +```json +{ + "id": "/subscriptions/xxxxx/resourceGroups/myResourceGroupxxx", + "location": "", + "managedBy": null, + "name": "myResourceGroupxxx", + "properties": { + "provisioningState": "Succeeded" + }, + "tags": null, + "type": "Microsoft.Resources/resourceGroups" +} +``` + +## Create a managed identity. + +```bash +export IDENTITY_NAME="userAssignedIdentity$RANDOM_SUFFIX" +az identity create --name "$IDENTITY_NAME" --resource-group "$RESOURCE_GROUP_NAME" --location "$LOCATION" +``` + +Results: + + + +```json +{ + "clientId": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx", + "id": "/subscriptions/xxxxx/resourceGroups/myResourceGroupxxx/providers/Microsoft.ManagedIdentity/userAssignedIdentities/userAssignedIdentityxxx", + "location": "", + "name": "userAssignedIdentityxxx", + "principalId": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx", + "resourceGroup": "myResourceGroupxxx", + "tags": {}, + "type": "Microsoft.ManagedIdentity/userAssignedIdentities" +} +``` + +## Get Client ID + +Save the client ID of the managed identity to an environment variable. + +```bash +export USER_ASSIGNED_CLIENT_ID="$(az identity show --resource-group "$RESOURCE_GROUP_NAME" --name "$IDENTITY_NAME" --query 'clientId' -o tsv)" +``` + +## Save OIDC Issuer URL +Get the OIDC Issuer URL and save it to an environment variable.By default, the Issuer is set to use the base URL `https://{region}.oic.prod-aks.azure.com/{uuid}`, where the value for `{region}` matches the location the AKS cluster is deployed in. The value `{uuid}` represents the OIDC key. + +```bash +export AKS_CLUSTER_NAME=$MY_AKS_CLUSTER_NAME +export AKS_RESOURCE_GROUP=$MY_AKS_RESOURCE_GROUP +export AKS_OIDC_ISSUER="$(az aks show --name "$AKS_CLUSTER_NAME" --resource-group "$AKS_RESOURCE_GROUP" --query "oidcIssuerProfile.issuerUrl" -o tsv)" +``` + +## Load credentials + +Get the Kubernetes credentials for your cluster. + +```bash +az aks get-credentials --name "$AKS_CLUSTER_NAME" --resource-group "$AKS_RESOURCE_GROUP" +``` + +## Create Namespace + +Create a namespace. + +```bash +export SERVICE_ACCOUNT_NAMESPACE="mynamespace$RANDOM_SUFFIX" +kubectl create namespace "$SERVICE_ACCOUNT_NAMESPACE" +``` + +## Create Service Account +Create the service account and annotate it with the client ID of the managed identity. + +```bash +export SERVICE_ACCOUNT_NAME="myserviceaccount$RANDOM_SUFFIX" +kubectl create serviceaccount "$SERVICE_ACCOUNT_NAME" -n "$SERVICE_ACCOUNT_NAMESPACE" +kubectl annotate serviceaccount "$SERVICE_ACCOUNT_NAME" --namespace "$SERVICE_ACCOUNT_NAMESPACE" azure.workload.identity/client-id="$USER_ASSIGNED_CLIENT_ID" +``` + +## Establish federated identity credential trust + +Establish a federated identity credential between the managed identity, the service account issuer, and the subject. + +```bash +export FEDERATED_CREDENTIAL_NAME="myFederatedCredentialName$RANDOM_SUFFIX" +az identity federated-credential create --name "$FEDERATED_CREDENTIAL_NAME" --identity-name "$IDENTITY_NAME" --resource-group "$RESOURCE_GROUP_NAME" --issuer "$AKS_OIDC_ISSUER" --subject "system:serviceaccount:$SERVICE_ACCOUNT_NAMESPACE:$SERVICE_ACCOUNT_NAME" --audience "api://AzureADTokenExchange" +``` + +## Deploy the workload with migration sidecar + +```bash +export POD_NAME="httpbin-pod" + +cat < pod.yaml +apiVersion: v1 +kind: Pod +metadata: + name: $POD_NAME + namespace: $SERVICE_ACCOUNT_NAMESPACE + labels: + app: httpbin + annotations: + azure.workload.identity/inject-proxy-sidecar: "true" + azure.workload.identity/proxy-sidecar-port: "8000" +spec: + serviceAccountName: $SERVICE_ACCOUNT_NAME + containers: + - name: httpbin + image: docker.io/kennethreitz/httpbin + env: + - name: IDENTITY_ENDPOINT + value: "http://localhost:8000/metadata/identity/oauth2/token" + - name: IDENTITY_HEADER + value: "true" + - name: IMDS_ENDPOINT + value: "http://169.254.169.254" +EOF +kubectl apply -f pod.yaml +kubectl wait --for=condition=Ready pod/httpbin-pod -n "$SERVICE_ACCOUNT_NAMESPACE" --timeout=120s +kubectl describe pods $POD_NAME -n "$SERVICE_ACCOUNT_NAMESPACE" +kubectl logs $POD_NAME -n "$SERVICE_ACCOUNT_NAMESPACE" +``` + +## Remove pod-managed identity + +After you've completed your testing and the application is successfully able to get a token using the proxy sidecar, you can remove the Microsoft Entra pod-managed identity mapping for the pod from your cluster, and then remove the identity. + +```bash +az aks pod-identity delete $IDENTITY_NAME +``` + +## Next steps + +This article showed you how to set up your pod to authenticate using a workload identity as a migration option. For more information about Microsoft Entra Workload ID, see the [Overview][workload-identity-overview] article. + + +[pod-annotations]: workload-identity-overview.md#pod-annotations +[az-identity-create]: /cli/azure/identity#az-identity-create +[az-account-set]: /cli/azure/account#az-account-set +[az-aks-get-credentials]: /cli/azure/aks#az-aks-get-credentials +[workload-identity-overview]: workload-identity-overview.md +[az-identity-federated-credential-create]: /cli/azure/identity/federated-credential#az-identity-federated-credential-create +[az-aks-pod-identity-delete]: /cli/azure/aks/pod-identity#az-aks-pod-identity-delete +[azure-identity-supported-versions]: workload-identity-overview.md#dependencies +[azure-identity-libraries]: ../active-directory/develop/reference-v2-libraries.md +[openid-connect-overview]: /azure/active-directory/develop/v2-protocols-oidc +[install-azure-cli]: /cli/azure/install-azure-cli +[assign-rbac-managed-identity]: /azure/role-based-access-control/role-assignments-portal-managed-identity + + +[kubectl-describe]: https://kubernetes.io/docs/reference/generated/kubectl/kubectl-commands#describe +[kubelet-logs]: https://kubernetes.io/docs/reference/generated/kubectl/kubectl-commands#logs \ No newline at end of file diff --git a/scenarios/azure-compute-docs/articles/container-instances/container-instances-vnet.md b/scenarios/azure-compute-docs/articles/container-instances/container-instances-vnet.md new file mode 100644 index 000000000..ec4f35ee9 --- /dev/null +++ b/scenarios/azure-compute-docs/articles/container-instances/container-instances-vnet.md @@ -0,0 +1,410 @@ +--- +title: Deploy container group to Azure virtual network +description: Learn how to deploy a container group to a new or existing Azure virtual network via the Azure CLI. +ms.topic: how-to +ms.author: tomcassidy +author: tomvcassidy +ms.service: azure-container-instances +services: container-instances +ms.date: 09/09/2024 +ms.custom: devx-track-azurecli, innovation-engine +--- + +# Deploy container instances into an Azure virtual network + +[Azure Virtual Network](/azure/virtual-network/virtual-networks-overview) provides secure, private networking for your Azure and on-premises resources. By deploying container groups into an Azure virtual network, your containers can communicate securely with other resources in the virtual network. + +This article shows how to use the [az container create][az-container-create] command in the Azure CLI to deploy container groups to either a new virtual network or an existing virtual network. + +> [!IMPORTANT] +> * Subnets must be delegated before using a virtual network +> * Before deploying container groups in virtual networks, we suggest checking the limitation first. For networking scenarios and limitations, see [Virtual network scenarios and resources for Azure Container Instances](container-instances-virtual-network-concepts.md). +> * Container group deployment to a virtual network is generally available for Linux and Windows containers, in most regions where Azure Container Instances is available. For details, see [available-regions][available-regions]. + +[!INCLUDE [network profile callout](./includes/network-profile-callout.md)] + +Examples in this article are formatted for the Bash shell. If you prefer another shell such as PowerShell or Command Prompt, adjust the line continuation characters accordingly. + +## Prerequisites + +### Define environment variables + +The automated deployment pathway uses the following environment variables and resource names throughout this guide. Users proceeding through the guide manually can use their own variables and names as preferred. + +```azurecli-interactive +export RANDOM_ID="$(openssl rand -hex 3)" +export MY_RESOURCE_GROUP_NAME="myACIResourceGroup$RANDOM_ID" +export MY_VNET_NAME="aci-vnet" +export MY_SUBNET_NAME="aci-subnet" +export MY_SUBNET_ID="/subscriptions/$(az account show --query id --output tsv)/resourceGroups/$MY_RESOURCE_GROUP_NAME/providers/Microsoft.Network/virtualNetworks/$MY_VNET_NAME/subnets/$MY_SUBNET_NAME" +export MY_APP_CONTAINER_NAME="appcontainer" +export MY_COMM_CHECKER_NAME="commchecker" +export MY_YAML_APP_CONTAINER_NAME="appcontaineryaml" +export MY_REGION="eastus2" +``` + +### Create a resource group + +You need a resource group to manage all the resources used in the following examples. To create a resource group, use [az group create][az-group-create]: + +```azurecli-interactive +az group create --name $MY_RESOURCE_GROUP_NAME --location $MY_REGION +``` + +A successful operation should produce output similar to the following JSON: + +Results: + + + +```json +{ + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxx/resourceGroups/myACIResourceGroup123abc", + "location": "abcdef", + "managedBy": null, + "name": "myACIResourceGroup123", + "properties": { + "provisioningState": "Succeeded" + }, + "tags": null, + "type": "Microsoft.Resources/resourceGroups" +} +``` + +## Deploy to new virtual network + +> [!NOTE] +> If you are using subnet IP range /29 to have only 3 IP addresses. we recommend always to go one range above (never below). For example, use subnet IP range /28 so you can have at least 1 or more IP buffer per container group. By doing this, you can avoid containers in stuck, not able to start, restart or even not able to stop states. + +To deploy to a new virtual network and have Azure create the network resources for you automatically, specify the following when you execute [az container create][az-container-create]: + +* Virtual network name +* Virtual network address prefix in CIDR format +* Subnet name +* Subnet address prefix in CIDR format + +The virtual network and subnet address prefixes specify the address spaces for the virtual network and subnet, respectively. These values are represented in Classless Inter-Domain Routing (CIDR) notation, for example `10.0.0.0/16`. For more information about working with subnets, see [Add, change, or delete a virtual network subnet](/azure/virtual-network/virtual-network-manage-subnet). + +Once you deploy your first container group with this method, you can deploy to the same subnet by specifying the virtual network and subnet names, or the network profile that Azure automatically creates for you. Because Azure delegates the subnet to Azure Container Instances, you can deploy *only* container groups to the subnet. + +### Example + +The following [az container create][az-container-create] command specifies settings for a new virtual network and subnet. Provide the name of a resource group that was created in a region where container group deployments in a virtual network are [available](container-instances-region-availability.md). This command deploys the public Microsoft aci-helloworld container that runs a small Node.js webserver serving a static web page. In the next section, you'll deploy a second container group to the same subnet, and test communication between the two container instances. + +```azurecli-interactive +az container create \ + --name $MY_APP_CONTAINER_NAME \ + --resource-group $MY_RESOURCE_GROUP_NAME \ + --image mcr.microsoft.com/azuredocs/aci-helloworld \ + --vnet $MY_VNET_NAME \ + --vnet-address-prefix 10.0.0.0/16 \ + --subnet $MY_SUBNET_NAME \ + --subnet-address-prefix 10.0.0.0/24 \ + --os-type Linux \ + --cpu 1.0 \ + --memory 1.5 +``` + +A successful operation should produce output similar to the following JSON: + +Results: + + + +```json +{ + "confidentialComputeProperties": null, + "containers": [ + { + "command": null, + "environmentVariables": [], + "image": "mcr.microsoft.com/azuredocs/aci-helloworld", + "instanceView": { + "currentState": { + "detailStatus": "", + "exitCode": null, + "finishTime": null, + "startTime": "0000-00-00T00:00:00.000000+00:00", + "state": "Running" + }, + "events": [ + { + "count": 1, + "firstTimestamp": "0000-00-00T00:00:00+00:00", + "lastTimestamp": "0000-00-00T00:00:00+00:00", + "message": "Successfully pulled image \"mcr.microsoft.com/azuredocs/aci-helloworld@sha256:0000000000000000000000000000000000000000000000000000000000000000\"", + "name": "Pulled", + "type": "Normal" + }, + { + "count": 1, + "firstTimestamp": "0000-00-00T00:00:00+00:00", + "lastTimestamp": "0000-00-00T00:00:00+00:00", + "message": "pulling image \"mcr.microsoft.com/azuredocs/aci-helloworld@sha256:0000000000000000000000000000000000000000000000000000000000000000\"", + "name": "Pulling", + "type": "Normal" + }, + { + "count": 1, + "firstTimestamp": "0000-00-00T00:00:00+00:00", + "lastTimestamp": "0000-00-00T00:00:00+00:00", + "message": "Started container", + "name": "Started", + "type": "Normal" + } + ], + "previousState": null, + "restartCount": 0 + }, + "livenessProbe": null, + "name": "appcontainer", + "ports": [ + { + "port": 80, + "protocol": "TCP" + } + ], + "readinessProbe": null, + "resources": { + "limits": null, + "requests": { + "cpu": 1.0, + "gpu": null, + "memoryInGb": 1.5 + } + }, + "securityContext": null, + "volumeMounts": null + } + ], + "diagnostics": null, + "dnsConfig": null, + "encryptionProperties": null, + "extensions": null, + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxx/resourceGroups/myACIResourceGroup123/providers/Microsoft.ContainerInstance/containerGroups/appcontainer", + "identity": null, + "imageRegistryCredentials": null, + "initContainers": [], + "instanceView": { + "events": [], + "state": "Running" + }, + "ipAddress": { + "autoGeneratedDomainNameLabelScope": null, + "dnsNameLabel": null, + "fqdn": null, + "ip": "10.0.0.4", + "ports": [ + { + "port": 80, + "protocol": "TCP" + } + ], + "type": "Private" + }, + "location": "eastus", + "name": "appcontainer", + "osType": "Linux", + "priority": null, + "provisioningState": "Succeeded", + "resourceGroup": "myACIResourceGroup123abc", + "restartPolicy": "Always", + "sku": "Standard", + "subnetIds": [ + { + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxx/resourceGroups/myACIResourceGroup123/providers/Microsoft.Network/virtualNetworks/aci-vnet/subnets/aci-subnet", + "name": null, + "resourceGroup": "myACIResourceGroup123abc" + } + ], + "tags": {}, + "type": "Microsoft.ContainerInstance/containerGroups", + "volumes": null, + "zones": null +} +``` + +When you deploy to a new virtual network by using this method, the deployment can take a few minutes while the network resources are created. After the initial deployment, further container group deployments to the same subnet complete more quickly. + +## Deploy to existing virtual network + +To deploy a container group to an existing virtual network: + +1. Create a subnet within your existing virtual network, use an existing subnet in which a container group is already deployed, or use an existing subnet emptied of *all* other resources and configuration. The subnet that you use for container groups can contain only container groups. Before you deploy a container group to a subnet, you must explicitly delegate the subnet before provisioning. Once delegated, the subnet can be used only for container groups. If you attempt to deploy resources other than container groups to a delegated subnet, the operation fails. +1. Deploy a container group with [az container create][az-container-create] and specify one of the following: + * Virtual network name and subnet name + * Virtual network resource ID and subnet resource ID, which allows using a virtual network from a different resource group + +### Deploy using a YAML file + +You can also deploy a container group to an existing virtual network by using a YAML file, a [Resource Manager template](https://github.com/Azure/azure-quickstart-templates/tree/master/quickstarts/microsoft.containerinstance/aci-vnet), or another programmatic method such as with the Python SDK. + +For example, when using a YAML file, you can deploy to a virtual network with a subnet delegated to Azure Container Instances. Specify the following properties: + +* `ipAddress`: The private IP address settings for the container group. + * `ports`: The ports to open, if any. + * `protocol`: The protocol (TCP or UDP) for the opened port. +* `subnetIds`: The resource IDs of the subnets to be deployed to + * `id`: The resource ID of the subnet + * `name`: The name of the subnet + +This YAML creates a container group in your virtual network. Enter your container group name in the name fields and your subnet ID in the subnet ID field. We use *appcontaineryaml* for the name. If you need to find your subnet ID and no longer have access to previous outputs, you can use the [az container show][az-container-show] command to view it. Look for the `id` field under `subnetIds`. + +```YAML +apiVersion: '2021-07-01' +location: eastus +name: appcontaineryaml +properties: + containers: + - name: appcontaineryaml + properties: + image: mcr.microsoft.com/azuredocs/aci-helloworld + ports: + - port: 80 + protocol: TCP + resources: + requests: + cpu: 1.0 + memoryInGB: 1.5 + ipAddress: + type: Private + ports: + - protocol: tcp + port: '80' + osType: Linux + restartPolicy: Always + subnetIds: + - id: + name: default +tags: null +type: Microsoft.ContainerInstance/containerGroups +``` + +The following Bash command is for the automated deployment pathway. + +```bash +echo -e "apiVersion: '2021-07-01'\nlocation: $MY_REGION\nname: $MY_YAML_APP_CONTAINER_NAME\nproperties:\n containers:\n - name: $MY_YAML_APP_CONTAINER_NAME\n properties:\n image: mcr.microsoft.com/azuredocs/aci-helloworld\n ports:\n - port: 80\n protocol: TCP\n resources:\n requests:\n cpu: 1.0\n memoryInGB: 1.5\n ipAddress:\n type: Private\n ports:\n - protocol: tcp\n port: '80'\n osType: Linux\n restartPolicy: Always\n subnetIds:\n - id: $MY_SUBNET_ID\n name: default\ntags: null\ntype: Microsoft.ContainerInstance/containerGroups" > container-instances-vnet.yaml +``` + +Deploy the container group with the [az container create][az-container-create] command, specifying the YAML file name for the `--file` parameter: + +```azurecli-interactive +az container create --resource-group $MY_RESOURCE_GROUP_NAME \ + --file container-instances-vnet.yaml \ + --os-type Linux +``` + +The following Bash command is for the automated deployment pathway. + +```bash +rm container-instances-vnet.yaml +``` + +Once the deployment completes, run the [az container show][az-container-show] command to display its status: + +```azurecli-interactive +az container list --resource-group $MY_RESOURCE_GROUP_NAME --output table +``` + +The output should resemble the sample below: + +Results: + + + +```output +Name ResourceGroup Status Image IP:ports Network CPU/Memory OsType Location +---------------- ------------------------ --------- ------------------------------------------ -------------- --------- --------------- -------- ---------- +appcontainer myACIResourceGroup123abc Succeeded mcr.microsoft.com/azuredocs/aci-helloworld 10.0.0.4:80,80 Private 1.0 core/1.5 gb Linux abcdef +appcontaineryaml myACIResourceGroup123abc Succeeded mcr.microsoft.com/azuredocs/aci-helloworld 10.0.0.5:80,80 Private 1.0 core/1.5 gb Linux abcdef +``` + +### Demonstrate communication between container instances + +The following example deploys a third container group to the same subnet created previously. Using an Alpine Linux image, it verifies communication between itself and the first container instance. + +> [!NOTE] +> Due to rate limiting in effect for pulling public Docker images like the Alpine Linux one used here, you may receive an error in the form: +> +> (RegistryErrorResponse) An error response is received from the docker registry 'index.docker.io'. Please retry later. +> Code: RegistryErrorResponse +> Message: An error response is received from the docker registry 'index.docker.io'. Please retry later. + +The following Bash command is for the automated deployment pathway. + +```bash +echo -e "Due to rate limiting in effect for pulling public Docker images like the Alpine Linux one used here, you may receive an error in the form:\n\n(RegistryErrorResponse) An error response is received from the docker registry 'index.docker.io'. Please retry later.\nCode: RegistryErrorResponse\nMessage: An error response is received from the docker registry 'index.docker.io'. Please retry later.\n\nIf this occurs, the automated deployment will exit. You can try again or go to the end of the guide to see instructions for cleaning up your resources." +``` + +First, get the IP address of the first container group you deployed, the *appcontainer*: + +```azurecli-interactive +az container show --resource-group $MY_RESOURCE_GROUP_NAME \ + --name $MY_APP_CONTAINER_NAME \ + --query ipAddress.ip --output tsv +``` + +The output displays the IP address of the container group in the private subnet. For example: + +Results: + + + +```output +10.0.0.4 +``` + +Now, set `CONTAINER_GROUP_IP` to the IP you retrieved with the `az container show` command, and execute the following `az container create` command. This second container, *commchecker*, runs an Alpine Linux-based image and executes `wget` against the first container group's private subnet IP address. + +```azurecli-interactive +az container create \ + --resource-group $MY_RESOURCE_GROUP_NAME \ + --name $MY_COMM_CHECKER_NAME \ + --image mcr.microsoft.com/devcontainers/base:alpine \ + --command-line "wget 10.0.0.4" \ + --restart-policy never \ + --vnet $MY_VNET_NAME \ + --subnet $MY_SUBNET_NAME \ + --os-type Linux \ + --cpu 1.0 \ + --memory 1.5 +``` + +After this second container deployment completes, pull its logs so you can see the output of the `wget` command it executed: + +```azurecli-interactive +az container logs --resource-group $MY_RESOURCE_GROUP_NAME --name $MY_COMM_CHECKER_NAME +``` + +If the second container communicated successfully with the first, output is similar to: + +```output +Connecting to 10.0.0.4 (10.0.0.4:80) +index.html 100% |*******************************| 1663 0:00:00 ETA +``` + +The log output should show that `wget` was able to connect and download the index file from the first container using its private IP address on the local subnet. Network traffic between the two container groups remained within the virtual network. + +## Clean up resources + +If you don't plan to continue using these resources, you can delete them to avoid Azure charges. You can clean up all the resources you used in this guide by deleting the resource group with the [az group delete][az-group-delete] command. Once deleted, **these resources are unrecoverable**. + +## Next steps + +* To deploy a new virtual network, subnet, network profile, and container group using a Resource Manager template, see [Create an Azure container group with virtual network](https://github.com/Azure/azure-quickstart-templates/tree/master/quickstarts/microsoft.containerinstance/aci-vnet). + +* To deploy Azure Container Instances that can pull images from an Azure Container Registry through a private endpoint, see [Deploy to Azure Container Instances from Azure Container Registry using a managed identity](../container-instances/using-azure-container-registry-mi.md). + + +[aci-vnet-01]: ./media/container-instances-vnet/aci-vnet-01.png + + +[aci-helloworld]: https://hub.docker.com/_/microsoft-azuredocs-aci-helloworld + + +[az-group-create]: /cli/azure/group#az-group-create +[az-container-create]: /cli/azure/container#az_container_create +[az-container-show]: /cli/azure/container#az_container_show +[az-network-vnet-create]: /cli/azure/network/vnet#az_network_vnet_create +[az-group-delete]: /cli/azure/group#az-group-delete +[available-regions]: https://azure.microsoft.com/explore/global-infrastructure/products-by-region/?products=container-instances \ No newline at end of file diff --git a/scenarios/azure-docs/articles/virtual-machine-scale-sets/.openpublishing.redirection.virtual-machine-scale-sets.json b/scenarios/azure-compute-docs/articles/virtual-machine-scale-sets/.openpublishing.redirection.virtual-machine-scale-sets.json similarity index 100% rename from scenarios/azure-docs/articles/virtual-machine-scale-sets/.openpublishing.redirection.virtual-machine-scale-sets.json rename to scenarios/azure-compute-docs/articles/virtual-machine-scale-sets/.openpublishing.redirection.virtual-machine-scale-sets.json diff --git a/scenarios/azure-docs/articles/virtual-machine-scale-sets/TOC.yml b/scenarios/azure-compute-docs/articles/virtual-machine-scale-sets/TOC.yml similarity index 100% rename from scenarios/azure-docs/articles/virtual-machine-scale-sets/TOC.yml rename to scenarios/azure-compute-docs/articles/virtual-machine-scale-sets/TOC.yml diff --git a/scenarios/azure-docs/articles/virtual-machine-scale-sets/breadcrumb/toc.yml b/scenarios/azure-compute-docs/articles/virtual-machine-scale-sets/breadcrumb/toc.yml similarity index 100% rename from scenarios/azure-docs/articles/virtual-machine-scale-sets/breadcrumb/toc.yml rename to scenarios/azure-compute-docs/articles/virtual-machine-scale-sets/breadcrumb/toc.yml diff --git a/scenarios/azure-docs/articles/virtual-machine-scale-sets/index.yml b/scenarios/azure-compute-docs/articles/virtual-machine-scale-sets/index.yml similarity index 100% rename from scenarios/azure-docs/articles/virtual-machine-scale-sets/index.yml rename to scenarios/azure-compute-docs/articles/virtual-machine-scale-sets/index.yml diff --git a/scenarios/azure-compute-docs/articles/virtual-machine-scale-sets/tutorial-autoscale-cli.md b/scenarios/azure-compute-docs/articles/virtual-machine-scale-sets/tutorial-autoscale-cli.md new file mode 100644 index 000000000..f3cc966c0 --- /dev/null +++ b/scenarios/azure-compute-docs/articles/virtual-machine-scale-sets/tutorial-autoscale-cli.md @@ -0,0 +1,147 @@ +--- +title: Tutorial - Autoscale a scale set with the Azure CLI +description: Learn how to use the Azure CLI to automatically scale a Virtual Machine Scale Set as CPU demands increases and decreases +author: ju-shim +ms.author: jushiman +ms.topic: tutorial +ms.service: azure-virtual-machine-scale-sets +ms.subservice: autoscale +ms.date: 06/14/2024 +ms.reviewer: mimckitt +ms.custom: avverma, devx-track-azurecli, linux-related-content, innovation-engine +--- + +# Tutorial: Automatically scale a Virtual Machine Scale Set with the Azure CLI + +When you create a scale set, you define the number of VM instances that you wish to run. As your application demand changes, you can automatically increase or decrease the number of VM instances. The ability to autoscale lets you keep up with customer demand or respond to application performance changes throughout the lifecycle of your app. In this tutorial you learn how to: + +> [!div class="checklist"] +> * Use autoscale with a scale set +> * Create and use autoscale rules +> * Simulate CPU load to trigger autoscale rules +> * Monitor autoscale actions as demand changes + +[!INCLUDE [quickstarts-free-trial-note](~/reusable-content/ce-skilling/azure/includes/quickstarts-free-trial-note.md)] + +[!INCLUDE [azure-cli-prepare-your-environment.md](~/reusable-content/azure-cli/azure-cli-prepare-your-environment.md)] + +- This tutorial requires version 2.0.32 or later of the Azure CLI. If using Azure Cloud Shell, the latest version is already installed. + +## Create a scale set +Create a resource group with [az group create](/cli/azure/group). + +```azurecli-interactive +export RANDOM_SUFFIX=$(openssl rand -hex 3) +export REGION="WestUS2" +export MY_RESOURCE_GROUP_NAME="myResourceGroup$RANDOM_SUFFIX" +az group create --name $MY_RESOURCE_GROUP_NAME --location $REGION +``` + +Now create a Virtual Machine Scale Set with [az vmss create](/cli/azure/vmss). The following example creates a scale set with an instance count of 2, generates SSH keys if they don't exist, and uses a valid image *Ubuntu2204*. + +```azurecli-interactive +export MY_SCALE_SET_NAME="myScaleSet$RANDOM_SUFFIX" +az vmss create \ + --resource-group $MY_RESOURCE_GROUP_NAME \ + --name $MY_SCALE_SET_NAME \ + --image Ubuntu2204 \ + --orchestration-mode Flexible \ + --instance-count 2 \ + --admin-username azureuser \ + --generate-ssh-keys +``` + +## Define an autoscale profile +To enable autoscale on a scale set, you first define an autoscale profile. This profile defines the default, minimum, and maximum scale set capacity. These limits let you control cost by not continually creating VM instances, and balance acceptable performance with a minimum number of instances that remain in a scale-in event. Create an autoscale profile with [az monitor autoscale create](/cli/azure/monitor/autoscale#az-monitor-autoscale-create). The following example sets the default and minimum capacity of 2 VM instances, and a maximum of 10: + +```azurecli-interactive +az monitor autoscale create \ + --resource-group $MY_RESOURCE_GROUP_NAME \ + --resource $MY_SCALE_SET_NAME \ + --resource-type Microsoft.Compute/virtualMachineScaleSets \ + --name autoscale \ + --min-count 2 \ + --max-count 10 \ + --count 2 +``` + +## Create a rule to autoscale out +If your application demand increases, the load on the VM instances in your scale set increases. If this increased load is consistent, rather than just a brief demand, you can configure autoscale rules to increase the number of VM instances. When these instances are created and your application is deployed, the scale set starts to distribute traffic to them through the load balancer. You control which metrics to monitor, how long the load must meet a given threshold, and how many VM instances to add. + +Create a rule with [az monitor autoscale rule create](/cli/azure/monitor/autoscale/rule#az-monitor-autoscale-rule-create) that increases the number of VM instances when the average CPU load is greater than 70% over a 5-minute period. When the rule triggers, the number of VM instances is increased by three. + +```azurecli-interactive +az monitor autoscale rule create \ + --resource-group $MY_RESOURCE_GROUP_NAME \ + --autoscale-name autoscale \ + --condition "Percentage CPU > 70 avg 5m" \ + --scale out 3 +``` + +## Create a rule to autoscale in +When application demand decreases, the load on the VM instances drops. If this decreased load persists over a period of time, you can configure autoscale rules to decrease the number of VM instances in the scale set. This scale-in action helps reduce costs by running only the necessary number of instances required to meet current demand. + +Create another rule with [az monitor autoscale rule create](/cli/azure/monitor/autoscale/rule#az-monitor-autoscale-rule-create) that decreases the number of VM instances when the average CPU load drops below 30% over a 5-minute period. The following example scales in the number of VM instances by one. + +```azurecli-interactive +az monitor autoscale rule create \ + --resource-group $MY_RESOURCE_GROUP_NAME \ + --autoscale-name autoscale \ + --condition "Percentage CPU < 30 avg 5m" \ + --scale in 1 +``` + +## Simulate CPU load on scale set +To test the autoscale rules, you need to simulate sustained CPU load on the VM instances in the scale set. In this minimalist approach, we avoid installing additional packages by using the built-in `yes` command to generate CPU load. The following command starts 3 background processes that continuously output data to `/dev/null` for 60 seconds and then terminates them. + +```bash +for i in {1..3}; do + yes > /dev/null & +done +sleep 60 +pkill yes +``` + +This command simulates CPU load without introducing package installation errors. + +## Monitor the active autoscale rules +To monitor the number of VM instances in your scale set, use the `watch` command. It may take up to 5 minutes for the autoscale rules to begin the scale-out process in response to the CPU load. However, once it happens, you can exit watch with *CTRL + C* keys. + +By then, the scale set will automatically increase the number of VM instances to meet the demand. The following command shows the list of VM instances in the scale set: + +```azurecli-interactive +az vmss list-instances \ + --resource-group $MY_RESOURCE_GROUP_NAME \ + --name $MY_SCALE_SET_NAME \ + --output table +``` + +Once the CPU threshold has been met, the autoscale rules increase the number of VM instances in the scale set. The output will show the list of VM instances as new ones are created. + +```output + InstanceId LatestModelApplied Location Name ProvisioningState ResourceGroup VmId +------------ -------------------- ---------- --------------- ------------------- -------------------- ------------------------------------ + 1 True WestUS2 myScaleSet_1 Succeeded myResourceGroupxxxxx xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + 2 True WestUS2 myScaleSet_2 Succeeded myResourceGroupxxxxx xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + 4 True WestUS2 myScaleSet_4 Creating myResourceGroupxxxxx xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + 5 True WestUS2 myScaleSet_5 Creating myResourceGroupxxxxx xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + 6 True WestUS2 myScaleSet_6 Creating myResourceGroupxxxxx xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx +``` + +Once the CPU load subsides, the average CPU load returns to normal. After another 5 minutes, the autoscale rules then scale in the number of VM instances. Scale-in actions remove VM instances with the highest IDs first. When a scale set uses Availability Sets or Availability Zones, scale-in actions are evenly distributed across the VM instances. The following sample output shows one VM instance being deleted as the scale set autoscales in: + +```output +6 True WestUS2 myScaleSet_6 Deleting myResourceGroupxxxxx xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx +``` + +## Clean up resources +To remove your scale set and associated resources, please manually delete the resource group using your preferred method. + +## Next steps +In this tutorial, you learned how to automatically scale in or out a scale set with the Azure CLI: + +> [!div class="checklist"] +> * Use autoscale with a scale set +> * Create and use autoscale rules +> * Simulate CPU load to trigger autoscale rules +> * Monitor autoscale actions as demand changes \ No newline at end of file diff --git a/scenarios/azure-compute-docs/articles/virtual-machine-scale-sets/tutorial-modify-scale-sets-cli.md b/scenarios/azure-compute-docs/articles/virtual-machine-scale-sets/tutorial-modify-scale-sets-cli.md new file mode 100644 index 000000000..94c5a5c89 --- /dev/null +++ b/scenarios/azure-compute-docs/articles/virtual-machine-scale-sets/tutorial-modify-scale-sets-cli.md @@ -0,0 +1,438 @@ +--- +title: Modify an Azure Virtual Machine Scale Set using Azure CLI +description: Learn how to modify and update an Azure Virtual Machine Scale Set using Azure CLI +author: ju-shim +ms.author: jushiman +ms.topic: how-to +ms.service: azure-virtual-machine-scale-sets +ms.date: 06/14/2024 +ms.reviewer: mimckitt +ms.custom: mimckitt, devx-track-azurecli, linux-related-content, innovation-engine +--- + +# Tutorial: Modify a Virtual Machine Scale Set using Azure CLI +Throughout the lifecycle of your applications, you may need to modify or update your Virtual Machine Scale Set. These updates may include how to update the configuration of the scale set, or change the application configuration. This article describes how to modify an existing scale set using the Azure CLI. + +Below, we declare environment variables that will be used throughout this document. A random suffix is appended to resource names that need to be unique for each deployment. The `REGION` is set to *WestUS2*. + +## Setup Resource Group +Before proceeding, ensure the resource group exists. This step creates the resource group if it does not already exist. + +```bash +export RANDOM_SUFFIX=$(openssl rand -hex 3) +export MY_RESOURCE_GROUP_NAME="myResourceGroup$RANDOM_SUFFIX" +export REGION="WestUS2" +az group create --name $MY_RESOURCE_GROUP_NAME --location $REGION +``` + + +```JSON +{ + "id": "/subscriptions/xxxxx/resourceGroups/myResourceGroupxxx", + "location": "WestUS2", + "managedBy": null, + "name": "myResourceGroupxxx", + "properties": { + "provisioningState": "Succeeded" + }, + "tags": null, + "type": "Microsoft.Resources/resourceGroups" +} +``` + +## Create the Virtual Machine Scale Set +To ensure that subsequent update and query commands have a valid resource to work on, create a Virtual Machine Scale Set. In this step, we deploy a basic scale set using a valid image (*Ubuntu2204*) and set the instance count to 5 so that instance-specific updates can target an existing instance ID. + +```azurecli-interactive +export SCALE_SET_NAME="myScaleSet$RANDOM_SUFFIX" +az vmss create \ + --resource-group $MY_RESOURCE_GROUP_NAME \ + --name $SCALE_SET_NAME \ + --image Ubuntu2204 \ + --upgrade-policy-mode manual \ + --instance-count 5 \ + --admin-username azureuser \ + --generate-ssh-keys +``` + + +```JSON +{ + "id": "/subscriptions/xxxxx/resourceGroups/myResourceGroupxxx/providers/Microsoft.Compute/virtualMachineScaleSets/myScaleSetxxx", + "location": "WestUS2", + "name": "myScaleSetxxx", + "provisioningState": "Succeeded" +} +``` + +## Update the scale set model +A scale set has a "scale set model" that captures the *desired* state of the scale set as a whole. To query the model for a scale set, you can use [az vmss show](/cli/azure/vmss#az-vmss-show): + +```azurecli +az vmss show --resource-group $MY_RESOURCE_GROUP_NAME --name $SCALE_SET_NAME +``` + +The exact presentation of the output depends on the options you provide to the command. The following example shows condensed sample output from the Azure CLI: + +```output +{ + "id": "/subscriptions/xxxxx/resourceGroups/myResourceGroupxxx/providers/Microsoft.Compute/virtualMachineScaleSets/myScaleSetxxx", + "location": "WestUS2", + "name": "myScaleSetxxx", + "orchestrationMode": "Flexible", + "platformFaultDomainCount": 1, + "resourceGroup": "myResourceGroupxxx", + "sku": { + "capacity": 5, + "name": "Standard_DS1_v2", + "tier": "Standard" + }, + "timeCreated": "2022-11-29T22:16:43.250912+00:00", + "type": "Microsoft.Compute/virtualMachineScaleSets", + "networkProfile": { + "networkApiVersion": "2020-11-01", + "networkInterfaceConfigurations": [ + { + "deleteOption": "Delete", + "disableTcpStateTracking": false, + "dnsSettings": { + "dnsServers": [] + }, + "enableIpForwarding": false, + "ipConfigurations": [ + { + "applicationGatewayBackendAddressPools": [], + "applicationSecurityGroups": [], + "loadBalancerBackendAddressPools": [ + { + "id": "/subscriptions/xxxxx/resourceGroups/myResourceGroupxxx/providers/Microsoft.Network/loadBalancers/myScaleSetLB/backendAddressPools/myScaleSetLBBEPool", + "resourceGroup": "myResourceGroupxxx" + } + ], + "name": "mysca2215IPConfig", + "privateIpAddressVersion": "IPv4", + "subnet": { + "id": "/subscriptions/xxxxx/resourceGroups/myResourceGroupxxx/providers/Microsoft.Network/virtualNetworks/myScaleSetVNET/subnets/myScaleSetSubnet", + "resourceGroup": "myResourceGroupxxx" + } + } + ], + "name": "mysca2215Nic", + "networkSecurityGroup": { + "id": "/subscriptions/xxxxx/resourceGroups/myResourceGroupxxx/providers/Microsoft.Network/networkSecurityGroups/myScaleSetNSG", + "resourceGroup": "myResourceGroupxxx" + }, + "primary": true + } + ] + }, + "osProfile": { + "allowExtensionOperations": true, + "computerNamePrefix": "myScaleS", + "linuxConfiguration": { + "disablePasswordAuthentication": true, + "enableVmAgentPlatformUpdates": false, + "patchSettings": { + "assessmentMode": "ImageDefault", + "patchMode": "ImageDefault" + }, + "provisionVmAgent": true + } + }, + "storageProfile": { + "imageReference": { + "offer": "UbuntuServer", + "publisher": "Canonical", + "sku": "22_04-lts", + "version": "latest" + }, + "osDisk": { + "caching": "ReadWrite", + "createOption": "FromImage", + "deleteOption": "Delete", + "diskSizeGb": 30, + "managedDisk": { + "storageAccountType": "Premium_LRS" + }, + "osType": "Linux" + } + } +} +``` + +You can use [az vmss update](/cli/azure/vmss#az-vmss-update) to update various properties of your scale set. For example, updating your license type or a VM's instance protection policy. Note that the allowed license type value is *RHEL_BYOS* rather than *Windows_Server*. + +```azurecli-interactive +az vmss update --name $SCALE_SET_NAME --resource-group $MY_RESOURCE_GROUP_NAME --license-type RHEL_BYOS +``` + +```azurecli-interactive +export INSTANCE_ID=$(az vmss list-instances \ + --resource-group $MY_RESOURCE_GROUP_NAME \ + --name $SCALE_SET_NAME \ + --query "[0].instanceId" \ + -o tsv) + +az vmss update \ + --name $SCALE_SET_NAME \ + --resource-group $MY_RESOURCE_GROUP_NAME \ + --instance-id "$INSTANCE_ID" \ + --protect-from-scale-set-actions False \ + --protect-from-scale-in +``` + +Additionally, if you previously deployed the scale set with the `az vmss create` command, you can run the `az vmss create` command again to update the scale set. Make sure that all properties in the `az vmss create` command are the same as before, except for the properties that you wish to modify. For example, below we're increasing the instance count to five. + +> [!IMPORTANT] +>Starting November 2023, VM scale sets created using PowerShell and Azure CLI will default to Flexible Orchestration Mode if no orchestration mode is specified. For more information about this change and what actions you should take, go to [Breaking Change for VMSS PowerShell/CLI Customers - Microsoft Community Hub](https://techcommunity.microsoft.com/t5/azure-compute-blog/breaking-change-for-vmss-powershell-cli-customers/ba-p/3818295) + +```azurecli-interactive +az vmss create \ + --resource-group $MY_RESOURCE_GROUP_NAME \ + --name $SCALE_SET_NAME \ + --orchestration-mode flexible \ + --image RHELRaw8LVMGen2 \ + --admin-username azureuser \ + --generate-ssh-keys \ + --instance-count 5 \ + --os-disk-size-gb 64 +``` + +## Updating individual VM instances in a scale set +Similar to how a scale set has a model view, each VM instance in the scale set has its own model view. To query the model view for a particular VM instance in a scale set, you can use [az vm show](/cli/azure/vm#az-vm-show). + +```azurecli +export INSTANCE_NAME=$(az vmss list-instances \ + --resource-group $MY_RESOURCE_GROUP_NAME \ + --name $SCALE_SET_NAME \ + --query "[0].name" \ + -o tsv) + +az vm show --resource-group $MY_RESOURCE_GROUP_NAME --name $INSTANCE_NAME +``` + +The exact presentation of the output depends on the options you provide to the command. The following example shows condensed sample output from the Azure CLI: + +```output +{ + "hardwareProfile": { + "vmSize": "Standard_DS1_v2" + }, + "id": "/subscriptions/xxxxx/resourceGroups/myResourceGroupxxx/providers/Microsoft.Compute/virtualMachines/myScaleSet_Instance1", + "location": "WestUS2", + "name": "myScaleSet_Instance1", + "networkProfile": { + "networkInterfaces": [ + { + "deleteOption": "Delete", + "id": "/subscriptions/xxxxx/resourceGroups/myResourceGroupxxx/providers/Microsoft.Network/networkInterfaces/mysca2215Nic-5cf164f7", + "primary": true, + "resourceGroup": "myResourceGroupxxx" + } + ] + }, + "osProfile": { + "allowExtensionOperations": true, + "computerName": "myScaleset_Computer1", + "linuxConfiguration": { + "disablePasswordAuthentication": true, + "enableVmAgentPlatformUpdates": false, + "patchSettings": { + "assessmentMode": "ImageDefault", + "patchMode": "ImageDefault" + }, + "provisionVmAgent": true + } + }, + "provisioningState": "Succeeded", + "resourceGroup": "myResourceGroupxxx", + "storageProfile": { + "dataDisks": [], + "imageReference": { + "exactVersion": "22.04.202204200", + "offer": "0001-com-ubuntu-server-jammy", + "publisher": "Canonical", + "sku": "22_04-lts", + "version": "latest" + }, + "osDisk": { + "caching": "ReadWrite", + "createOption": "FromImage", + "deleteOption": "Delete", + "diskSizeGb": 30, + "managedDisk": { + "id": "/subscriptions/xxxxx/resourceGroups/myResourceGroupxxx/providers/Microsoft.Compute/disks/myScaleSet_Instance1_disk1_xxx", + "resourceGroup": "myResourceGroupxxx", + "storageAccountType": "Premium_LRS" + }, + "name": "myScaleSet_Instance1_disk1_xxx", + "osType": "Linux" + } + }, + "timeCreated": "2022-11-29T22:16:44.500895+00:00", + "type": "Microsoft.Compute/virtualMachines", + "virtualMachineScaleSet": { + "id": "/subscriptions/xxxxx/resourceGroups/myResourceGroupxxx/providers/Microsoft.Compute/virtualMachineScaleSets/myScaleSetxxx", + "resourceGroup": "myResourceGroupxxx" + } +} +``` + +These properties describe the configuration of a VM instance within a scale set, not the configuration of the scale set as a whole. + +You can perform updates to individual VM instances in a scale set just like you would a standalone VM. For example, attaching a new data disk to instance 1: + +```azurecli-interactive +az vm disk attach --resource-group $MY_RESOURCE_GROUP_NAME --vm-name $INSTANCE_NAME --name disk_name1 --new +``` + +Running [az vm show](/cli/azure/vm#az-vm-show) again, we now will see that the VM instance has the new disk attached. + +```output +{ + "storageProfile": { + "dataDisks": [ + { + "caching": "None", + "createOption": "Empty", + "deleteOption": "Detach", + "diskSizeGb": 1023, + "lun": 0, + "managedDisk": { + "id": "/subscriptions/xxxxx/resourceGroups/myResourceGroupxxx/providers/Microsoft.Compute/disks/disk_name1", + "resourceGroup": "myResourceGroupxxx", + "storageAccountType": "Premium_LRS" + }, + "name": "disk_name1", + "toBeDetached": false + } + ] + } +} +``` + +## Add an Instance to your scale set +There are times where you might want to add a new VM to your scale set but want different configuration options than those listed in the scale set model. VMs can be added to a scale set during creation by using the [az vm create](/cli/azure/vmss#az-vmss-create) command and specifying the scale set name you want the instance added to. + +```azurecli-interactive +export NEW_INSTANCE_NAME="myNewInstance$RANDOM_SUFFIX" +az vm create --name $NEW_INSTANCE_NAME --resource-group $MY_RESOURCE_GROUP_NAME --vmss $SCALE_SET_NAME --image RHELRaw8LVMGen2 +``` + +```output +{ + "fqdns": "", + "id": "/subscriptions/xxxxx/resourceGroups/myResourceGroupxxx/providers/Microsoft.Compute/virtualMachines/myNewInstancexxx", + "location": "WestUS2", + "macAddress": "60-45-BD-D7-13-DD", + "powerState": "VM running", + "privateIpAddress": "10.0.0.6", + "publicIpAddress": "20.172.144.96", + "resourceGroup": "myResourceGroupxxx", + "zones": "" +} +``` + +If we then check our scale set, we'll see the new instance added. + +```azurecli-interactive +az vm list --resource-group $MY_RESOURCE_GROUP_NAME --output table +``` + +```output +Name ResourceGroup Location +-------------------- --------------- ---------- +myNewInstancexxx myResourceGroupxxx WestUS2 +myScaleSet_Instance1 myResourceGroupxxx WestUS2 +myScaleSet_Instance1 myResourceGroupxxx WestUS2 +``` + +## Bring VMs up-to-date with the latest scale set model + +> [!NOTE] +> Upgrade modes are not currently supported on Virtual Machine Scale Sets using Flexible orchestration mode. + +Scale sets have an "upgrade policy" that determine how VMs are brought up-to-date with the latest scale set model. The three modes for the upgrade policy are: + +- **Automatic** - In this mode, the scale set makes no guarantees about the order of VMs being brought down. The scale set may take down all VMs at the same time. +- **Rolling** - In this mode, the scale set rolls out the update in batches with an optional pause time between batches. +- **Manual** - In this mode, when you update the scale set model, nothing happens to existing VMs until a manual update is triggered. + +If your scale set is set to manual upgrades, you can trigger a manual upgrade using [az vmss update](/cli/azure/vmss#az-vmss-update). + +```azurecli +az vmss update --resource-group $MY_RESOURCE_GROUP_NAME --name $SCALE_SET_NAME +``` + +>[!NOTE] +> Service Fabric clusters can only use *Automatic* mode, but the update is handled differently. For more information, see [Service Fabric application upgrades](../service-fabric/service-fabric-application-upgrade.md). + +## Reimage a scale set +Virtual Machine Scale Sets will generate a unique name for each VM in the scale set. The naming convention differs by orchestration mode: + +- Flexible orchestration Mode: {scale-set-name}_{8-char-guid} +- Uniform orchestration mode: {scale-set-name}_{instance-id} + +In the cases where you need to reimage a specific instance, use [az vmss reimage](/cli/azure/vmss#az-vmss-reimage) and specify the instance id. Another option is to use [az vm redeploy](/cli/azure/vm#az-vm-redeploy) to reimage the VM directly. This command is useful if you want to reimage a VM without having to specify the instance ID. + +```azurecli +# Get the VM name first +VM_NAME=$(az vmss list-instances \ + --resource-group $MY_RESOURCE_GROUP_NAME \ + --name $SCALE_SET_NAME \ + --query "[0].name" \ + -o tsv) + +# Reimage the VM directly +az vm redeploy \ + --resource-group $MY_RESOURCE_GROUP_NAME \ + --name $VM_NAME +``` + +## Update the OS image for your scale set +You may have a scale set that runs an old version of Ubuntu. You want to update to a newer version of Ubuntu, such as the latest version. The image reference version property isn't part of a list, so you can directly modify these properties using [az vmss update](/cli/azure/vmss#az-vmss-update). + +```azurecli +az vmss update --resource-group $MY_RESOURCE_GROUP_NAME --name $SCALE_SET_NAME --set virtualMachineProfile.storageProfile.imageReference.version=latest +``` + +Alternatively, you may want to change the image your scale set uses. For example, you may want to update or change a custom image used by your scale set. You can change the image your scale set uses by updating the image reference ID property. The image reference ID property isn't part of a list, so you can directly modify this property using [az vmss update](/cli/azure/vmss#az-vmss-update). + +If you use Azure platform images, you can update the image by modifying the *imageReference* (more information, see the [REST API documentation](/rest/api/compute/virtualmachinescalesets/createorupdate)). + +>[!NOTE] +> With platform images, it is common to specify "latest" for the image reference version. When you create, scale out, and reimage, VMs are created with the latest available version. However, it **does not** mean that the OS image is automatically updated over time as new image versions are released. A separate feature provides automatic OS upgrades. For more information, see the [Automatic OS Upgrades documentation](virtual-machine-scale-sets-automatic-upgrade.md). + +If you use custom images, you can update the image by updating the *imageReference* ID (more information, see the [REST API documentation](/rest/api/compute/virtualmachinescalesets/createorupdate)). + +## Update the load balancer for your scale set +Let's say you have a scale set with an Azure Load Balancer, and you want to replace the Azure Load Balancer with an Azure Application Gateway. The load balancer and Application Gateway properties for a scale set are part of a list, so you can use the commands to remove or add list elements instead of modifying the properties directly. + +```text +# Remove the load balancer backend pool from the scale set model +az vmss update --resource-group $MY_RESOURCE_GROUP_NAME --name $SCALE_SET_NAME --remove virtualMachineProfile.networkProfile.networkInterfaceConfigurations[0].ipConfigurations[0].loadBalancerBackendAddressPools 0 + +# Remove the load balancer backend pool from the scale set model; only necessary if you have NAT pools configured on the scale set +az vmss update --resource-group $MY_RESOURCE_GROUP_NAME --name $SCALE_SET_NAME --remove virtualMachineProfile.networkProfile.networkInterfaceConfigurations[0].ipConfigurations[0].loadBalancerInboundNatPools 0 + +# Add the application gateway backend pool to the scale set model +az vmss update --resource-group $MY_RESOURCE_GROUP_NAME --name $SCALE_SET_NAME --add virtualMachineProfile.networkProfile.networkInterfaceConfigurations[0].ipConfigurations[0].ApplicationGatewayBackendAddressPools '{"id": "/subscriptions/xxxxx/resourceGroups/'$MY_RESOURCE_GROUP_NAME'/providers/Microsoft.Network/applicationGateways/{applicationGatewayName}/backendAddressPools/{applicationGatewayBackendPoolName}"}' +``` + +>[!NOTE] +> These commands assume there is only one IP configuration and load balancer on the scale set. If there are multiple, you may need to use a list index other than *0*. + +## Next steps +In this tutorial, you learned how to modify various aspects of your scale set and individual instances. + +> [!div class="checklist"] +> * Update the scale set model +> * Update an individual VM instance in a scale set +> * Add an instance to your scale set +> * Bring VMs up-to-date with the latest scale set model +> * Reimage a scale set +> * Update the OS image for your scale set +> * Update the load balancer for your scale set + +> [!div class="nextstepaction"] +> [Use data disks with scale sets](tutorial-use-disks-powershell.md) \ No newline at end of file diff --git a/scenarios/azure-docs/articles/virtual-machine-scale-sets/virtual-machine-scale-sets-faq.yml b/scenarios/azure-compute-docs/articles/virtual-machine-scale-sets/virtual-machine-scale-sets-faq.yml similarity index 100% rename from scenarios/azure-docs/articles/virtual-machine-scale-sets/virtual-machine-scale-sets-faq.yml rename to scenarios/azure-compute-docs/articles/virtual-machine-scale-sets/virtual-machine-scale-sets-faq.yml diff --git a/scenarios/azure-compute-docs/articles/virtual-machines/disks-enable-performance.md b/scenarios/azure-compute-docs/articles/virtual-machines/disks-enable-performance.md new file mode 100644 index 000000000..cb1c2373e --- /dev/null +++ b/scenarios/azure-compute-docs/articles/virtual-machines/disks-enable-performance.md @@ -0,0 +1,241 @@ +--- +title: Preview - Increase performance of Premium SSDs and Standard SSD/HDDs +description: Increase the performance of Azure Premium SSDs and Standard SSD/HDDs using performance plus. +author: roygara +ms.service: azure-disk-storage +ms.topic: how-to +ms.date: 12/09/2024 +ms.author: rogarana +ms.custom: devx-track-azurepowershell +--- + +# Preview - Increase IOPS and throughput limits for Azure Premium SSDs and Standard SSD/HDDs + +The Input/Output Operations Per Second (IOPS) and throughput limits for Azure Premium solid-state drives (SSD), Standard SSDs, and Standard hard disk drives (HDD) that are 513 GiB and larger can be increased by enabling performance plus. Enabling performance plus (preview) improves the experience for workloads that require high IOPS and throughput, such as database and transactional workloads. There's no extra charge for enabling performance plus on a disk. + +Once enabled, the IOPS and throughput limits for an eligible disk increase to the higher maximum limits. To see the new IOPS and throughput limits for eligible disks, consult the columns that begin with "*Expanded" in the [Scalability and performance targets for VM disks](disks-scalability-targets.md) article. + +## Limitations + +- Can only be enabled on Standard HDD, Standard SSD, and Premium SSD managed disks that are 513 GiB or larger. +- Can only be enabled on new disks. + - To work around this, create a snapshot of your disk, then create a new disk from the snapshot. +- Not supported for disks recovered with Azure Site Recovery or Azure Backup. +- Can't be enabled in the Azure portal. + +## Prerequisites + +Either use the Azure Cloud Shell to run your commands or install a version of the [Azure PowerShell module](/powershell/azure/install-azure-powershell) 9.5 or newer, or a version of the [Azure CLI](/cli/azure/install-azure-cli) that is 2.44.0 or newer. + +## Enable performance plus + +You need to create a new disk to use performance plus. The following script creates a disk that has performance plus enabled and attach it to a VM: + +# [Azure CLI](#tab/azure-cli) + +### Create a resource group + +This step creates a resource group with a unique name. + +```azurecli +export RANDOM_SUFFIX=$(openssl rand -hex 3) +export MY_RG="PerfPlusRG$RANDOM_SUFFIX" +export REGION="WestUS2" +az group create -g $MY_RG -l $REGION +``` + +Results: + + +```JSON +{ + "id": "/subscriptions/xxxxx/resourceGroups/PerfPlusRGxxx", + "location": "WestUS2", + "name": "PerfPlusRGxxx", + "properties": { + "provisioningState": "Succeeded" + } +} +``` + +### Create a new disk with performance plus enabled + +This step creates a new disk of 513 GiB (or larger) with performance plus enabled using a valid SKU value. + +```azurecli +export MY_DISK="PerfPlusDisk$RANDOM_SUFFIX" +export SKU="Premium_LRS" +export DISK_SIZE=513 +az disk create -g $MY_RG -n $MY_DISK --size-gb $DISK_SIZE --sku $SKU -l $REGION --performance-plus true +``` + +Results: + + +```JSON +{ + "id": "/subscriptions/xxxxx/resourceGroups/PerfPlusRGxxx/providers/Microsoft.Compute/disks/PerfPlusDiskxxx", + "location": "WestUS2", + "name": "PerfPlusDiskxxx", + "properties": { + "provisioningState": "Succeeded", + "diskSizeGb": 513, + "sku": "Premium_LRS", + "performancePlus": true + }, + "type": "Microsoft.Compute/disks" +} +``` + +### Attempt to attach the disk to a VM + +This optional step attempts to attach the disk to an existing VM. It first checks if the VM exists and then proceeds accordingly. + +```azurecli +export MY_VM="NonExistentVM" +if az vm show -g $MY_RG -n $MY_VM --query "name" --output tsv >/dev/null 2>&1; then + az vm disk attach --vm-name $MY_VM --name $MY_DISK --resource-group $MY_RG +else + echo "VM $MY_VM not found. Skipping disk attachment." +fi +``` + +Results: + + +```text +VM NonExistentVM not found. Skipping disk attachment. +``` + +### Create a new disk from an existing disk or snapshot with performance plus enabled + +This series of steps creates a separate resource group and then creates a new disk from an existing disk or snapshot. Replace the SOURCE_URI with a valid source blob URI that belongs to the same region (WestUS2) as the disk. + +#### Create a resource group for migration + +```azurecli +export RANDOM_SUFFIX=$(openssl rand -hex 3) +export MY_MIG_RG="PerfPlusMigrRG$RANDOM_SUFFIX" +export REGION="WestUS2" +az group create -g $MY_MIG_RG -l $REGION +``` + +Results: + + +```JSON +{ + "id": "/subscriptions/xxxxx/resourceGroups/PerfPlusMigrRGxxx", + "location": "WestUS2", + "name": "PerfPlusMigrRGxxx", + "properties": { + "provisioningState": "Succeeded" + } +} +``` + +#### Create the disk from an existing snapshot or disk + +```azurecli +# Create a snapshot from the original disk +export MY_SNAPSHOT_NAME="PerfPlusSnapshot$RANDOM_SUFFIX" +echo "Creating snapshot from original disk..." +az snapshot create \ + --name $MY_SNAPSHOT_NAME \ + --resource-group $MY_RG \ + --source $MY_DISK + +# Get the snapshot ID for use as source +SNAPSHOT_ID=$(az snapshot show \ + --name $MY_SNAPSHOT_NAME \ + --resource-group $MY_RG \ + --query id \ + --output tsv) + +echo "Using snapshot ID: $SNAPSHOT_ID" + +# Create the new disk using the snapshot as source +export MY_MIG_DISK="PerfPlusMigrDisk$RANDOM_SUFFIX" +export SKU="Premium_LRS" +export DISK_SIZE=513 + +az disk create \ + --name $MY_MIG_DISK \ + --resource-group $MY_MIG_RG \ + --size-gb $DISK_SIZE \ + --performance-plus true \ + --sku $SKU \ + --source $SNAPSHOT_ID \ + --location $REGION +``` + +Results: + + +```JSON +{ + "id": "/subscriptions/xxxxx/resourceGroups/PerfPlusMigrRGxxx/providers/Microsoft.Compute/disks/PerfPlusMigrDiskxxx", + "location": "WestUS2", + "name": "PerfPlusMigrDiskxxx", + "properties": { + "provisioningState": "Succeeded", + "diskSizeGb": 513, + "sku": "Premium_LRS", + "performancePlus": true, + "source": "https://examplestorageaccount.blob.core.windows.net/snapshots/sample-westus2.vhd" + }, + "type": "Microsoft.Compute/disks" +} +``` + +# [Azure PowerShell](#tab/azure-powershell) + +You need to create a new disk to use performance plus. The following script creates a disk that has performance plus enabled and attach it to a VM: + +```azurepowershell +$myRG=yourResourceGroupName +$myDisk=yourDiskName +$myVM=yourVMName +$region=desiredRegion +# Valid values are Premium_LRS, Premium_ZRS, StandardSSD_LRS, StandardSSD_ZRS, or Standard_LRS +$sku=desiredSKU +#Size must be 513 or larger +$size=513 +$lun=desiredLun + +Set-AzContext -SubscriptionName + +$diskConfig = New-AzDiskConfig -Location $region -CreateOption Empty -DiskSizeGB $size -SkuName $sku -PerformancePlus $true + +$dataDisk = New-AzDisk -ResourceGroupName $myRG -DiskName $myDisk -Disk $diskConfig + +Add-AzVMDataDisk -VMName $myVM -ResourceGroupName $myRG -DiskName $myDisk -Lun $lun -CreateOption Empty -ManagedDiskId $dataDisk.Id +``` + +To migrate data from an existing disk or snapshot to a new disk with performance plus enabled, use the following script: + +```azurepowershell +$myDisk=yourDiskOrSnapshotName +$myVM=yourVMName +$region=desiredRegion +# Valid values are Premium_LRS, Premium_ZRS, StandardSSD_LRS, StandardSSD_ZRS, or Standard_LRS +$sku=desiredSKU +#Size must be 513 or larger +$size=513 +$sourceURI=diskOrSnapshotURI +$lun=desiredLun + +Set-AzContext -SubscriptionName <> + +$diskConfig = New-AzDiskConfig -Location $region -CreateOption Copy -DiskSizeGB $size -SkuName $sku -PerformancePlus $true -SourceResourceID $sourceURI + +$dataDisk = New-AzDisk -ResourceGroupName $myRG -DiskName $myDisk -Disk $diskconfig +Add-AzVMDataDisk -VMName $myVM -ResourceGroupName $myRG -DiskName $myDisk -Lun $lun -CreateOption Empty -ManagedDiskId $dataDisk.Id +``` +--- + +## Next steps + +- [Create an incremental snapshot for managed disks](disks-incremental-snapshots.md) +- [Expand virtual hard disks on a Linux VM](linux/expand-disks.md) +- [How to expand virtual hard disks attached to a Windows virtual machine](windows/expand-os-disk.md) \ No newline at end of file diff --git a/scenarios/azure-compute-docs/articles/virtual-machines/linux/cloud-init.txt b/scenarios/azure-compute-docs/articles/virtual-machines/linux/cloud-init.txt new file mode 100644 index 000000000..6f0566319 --- /dev/null +++ b/scenarios/azure-compute-docs/articles/virtual-machines/linux/cloud-init.txt @@ -0,0 +1,41 @@ +#cloud-config +package_upgrade: true +packages: + - nginx + - nodejs + - npm +write_files: + - owner: www-data:www-data + path: /etc/nginx/sites-available/default + defer: true + content: | + server { + listen 80; + location / { + proxy_pass http://localhost:3000; + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection keep-alive; + proxy_set_header Host $host; + proxy_cache_bypass $http_upgrade; + } + } + - owner: azureuser:azureuser + path: /home/azureuser/myapp/index.js + defer: true + content: | + var express = require('express') + var app = express() + var os = require('os'); + app.get('/', function (req, res) { + res.send('Hello World from host ' + os.hostname() + '!') + }) + app.listen(3000, function () { + console.log('Hello world app listening on port 3000!') + }) +runcmd: + - service nginx restart + - cd "/home/azureuser/myapp" + - npm init + - npm install express -y + - nodejs index.js \ No newline at end of file diff --git a/scenarios/azure-compute-docs/articles/virtual-machines/linux/multiple-nics.md b/scenarios/azure-compute-docs/articles/virtual-machines/linux/multiple-nics.md new file mode 100644 index 000000000..8f02ee1a8 --- /dev/null +++ b/scenarios/azure-compute-docs/articles/virtual-machines/linux/multiple-nics.md @@ -0,0 +1,268 @@ +--- +title: Create a Linux VM in Azure with multiple NICs +description: Learn how to create a Linux VM with multiple NICs attached to it using the Azure CLI or Resource Manager templates. +author: mattmcinnes +ms.service: azure-virtual-machines +ms.subservice: networking +ms.topic: how-to +ms.custom: devx-track-azurecli, linux-related-content, innovation-engine +ms.date: 04/06/2023 +ms.author: mattmcinnes +ms.reviewer: cynthn +--- + +# How to create a Linux virtual machine in Azure with multiple network interface cards + +**Applies to:** :heavy_check_mark: Linux VMs :heavy_check_mark: Flexible scale sets + +This article details how to create a VM with multiple NICs with the Azure CLI. + +## Create supporting resources +Install the latest [Azure CLI](/cli/azure/install-az-cli2) and log in to an Azure account using [az login](/cli/azure/reference-index). + +In the following examples, replace example parameter names with your own values. Example parameter names included *myResourceGroup*, *mystorageaccount*, and *myVM*. + +First, create a resource group with [az group create](/cli/azure/group). The following example creates a resource group named *myResourceGroup* in the *eastus* location. In these examples, we declare environment variables as they are used and add a random suffix to unique resource names. + +```azurecli +export RANDOM_SUFFIX=$(openssl rand -hex 3) +export MY_RESOURCE_GROUP_NAME="myResourceGroup$RANDOM_SUFFIX" +export REGION="WestUS2" +az group create --name $MY_RESOURCE_GROUP_NAME --location $REGION +``` + +```JSON +{ + "id": "/subscriptions/xxxxx/resourceGroups/myResourceGroupxxx", + "location": "WestUS2", + "managedBy": null, + "name": "myResourceGroupxxx", + "properties": { + "provisioningState": "Succeeded" + }, + "tags": null, + "type": "Microsoft.Resources/resourceGroups" +} +``` + +Create the virtual network with [az network vnet create](/cli/azure/network/vnet). The following example creates a virtual network named *myVnet* and subnet named *mySubnetFrontEnd*: + +```azurecli +export VNET_NAME="myVnet" +export FRONTEND_SUBNET="mySubnetFrontEnd" +az network vnet create \ + --resource-group $MY_RESOURCE_GROUP_NAME \ + --name $VNET_NAME \ + --address-prefix 10.0.0.0/16 \ + --subnet-name $FRONTEND_SUBNET \ + --subnet-prefix 10.0.1.0/24 +``` + +Create a subnet for the back-end traffic with [az network vnet subnet create](/cli/azure/network/vnet/subnet). The following example creates a subnet named *mySubnetBackEnd*: + +```azurecli +export BACKEND_SUBNET="mySubnetBackEnd" +az network vnet subnet create \ + --resource-group $MY_RESOURCE_GROUP_NAME \ + --vnet-name $VNET_NAME \ + --name $BACKEND_SUBNET \ + --address-prefix 10.0.2.0/24 +``` + +Create a network security group with [az network nsg create](/cli/azure/network/nsg). The following example creates a network security group named *myNetworkSecurityGroup*: + +```azurecli +export NSG_NAME="myNetworkSecurityGroup" +az network nsg create \ + --resource-group $MY_RESOURCE_GROUP_NAME \ + --name $NSG_NAME +``` + +## Create and configure multiple NICs +Create two NICs with [az network nic create](/cli/azure/network/nic). The following example creates two NICs, named *myNic1* and *myNic2*, connected to the network security group, with one NIC connecting to each subnet: + +```azurecli +export NIC1="myNic1" +export NIC2="myNic2" +az network nic create \ + --resource-group $MY_RESOURCE_GROUP_NAME \ + --name $NIC1 \ + --vnet-name $VNET_NAME \ + --subnet $FRONTEND_SUBNET \ + --network-security-group $NSG_NAME +az network nic create \ + --resource-group $MY_RESOURCE_GROUP_NAME \ + --name $NIC2 \ + --vnet-name $VNET_NAME \ + --subnet $BACKEND_SUBNET \ + --network-security-group $NSG_NAME +``` + +## Create a VM and attach the NICs +When you create the VM, specify the NICs you created with --nics. You also need to take care when you select the VM size. There are limits for the total number of NICs that you can add to a VM. Read more about [Linux VM sizes](../sizes.md). + +Create a VM with [az vm create](/cli/azure/vm). The following example creates a VM named *myVM*: + +```azurecli +export VM_NAME="myVM" +az vm create \ + --resource-group $MY_RESOURCE_GROUP_NAME \ + --name $VM_NAME \ + --image Ubuntu2204 \ + --size Standard_DS3_v2 \ + --admin-username azureuser \ + --generate-ssh-keys \ + --nics $NIC1 $NIC2 +``` + +Add routing tables to the guest OS by completing the steps in [Configure the guest OS for multiple NICs](#configure-guest-os-for-multiple-nics). + +## Add a NIC to a VM +The previous steps created a VM with multiple NICs. You can also add NICs to an existing VM with the Azure CLI. Different [VM sizes](../sizes.md) support a varying number of NICs, so size your VM accordingly. If needed, you can [resize a VM](../resize-vm.md). + +Create another NIC with [az network nic create](/cli/azure/network/nic). The following example creates a NIC named *myNic3* connected to the back-end subnet and network security group created in the previous steps: + +```azurecli +export NIC3="myNic3" +az network nic create \ + --resource-group $MY_RESOURCE_GROUP_NAME \ + --name $NIC3 \ + --vnet-name $VNET_NAME \ + --subnet $BACKEND_SUBNET \ + --network-security-group $NSG_NAME +``` + +To add a NIC to an existing VM, first deallocate the VM with [az vm deallocate](/cli/azure/vm). The following example deallocates the VM named *myVM*: + +```azurecli +az vm deallocate --resource-group $MY_RESOURCE_GROUP_NAME --name $VM_NAME +``` + +Add the NIC with [az vm nic add](/cli/azure/vm/nic). The following example adds *myNic3* to *myVM*: + +```azurecli +az vm nic add \ + --resource-group $MY_RESOURCE_GROUP_NAME \ + --vm-name $VM_NAME \ + --nics $NIC3 +``` + +Start the VM with [az vm start](/cli/azure/vm): + +```azurecli +az vm start --resource-group $MY_RESOURCE_GROUP_NAME --name $VM_NAME +``` + +Add routing tables to the guest OS by completing the steps in [Configure the guest OS for multiple NICs](#configure-guest-os-for-multiple-nics). + +## Remove a NIC from a VM +To remove a NIC from an existing VM, first deallocate the VM with [az vm deallocate](/cli/azure/vm). The following example deallocates the VM named *myVM*: + +```azurecli +az vm deallocate --resource-group $MY_RESOURCE_GROUP_NAME --name $VM_NAME +``` + +Remove the NIC with [az vm nic remove](/cli/azure/vm/nic). The following example removes *myNic3* from *myVM*: + +```azurecli +az vm nic remove \ + --resource-group $MY_RESOURCE_GROUP_NAME \ + --vm-name $VM_NAME \ + --nics $NIC3 +``` + +Start the VM with [az vm start](/cli/azure/vm): + +```azurecli +az vm start --resource-group $MY_RESOURCE_GROUP_NAME --name $VM_NAME +``` + +## Create multiple NICs using Resource Manager templates +Azure Resource Manager templates use declarative JSON files to define your environment. You can read an [overview of Azure Resource Manager](/azure/azure-resource-manager/management/overview). Resource Manager templates provide a way to create multiple instances of a resource during deployment, such as creating multiple NICs. You use *copy* to specify the number of instances to create: + +```json +"copy": { + "name": "multiplenics" + "count": "[parameters('count')]" +} +``` + +Read more about [creating multiple instances using *copy*](/azure/azure-resource-manager/templates/copy-resources). + +You can also use a copyIndex() to then append a number to a resource name, which allows you to create myNic1, myNic2, etc. The following shows an example of appending the index value: + +```json +"name": "[concat('myNic', copyIndex())]", +``` + +You can read a complete example of [creating multiple NICs using Resource Manager templates](/azure/virtual-network/template-samples). + +Add routing tables to the guest OS by completing the steps in [Configure the guest OS for multiple NICs](#configure-guest-os-for-multiple-nics). + +## Configure guest OS for multiple NICs + +The previous steps created a virtual network and subnet, attached NICs, then created a VM. A public IP address and network security group rules that allow SSH traffic were not created. To configure the guest OS for multiple NICs, you need to allow remote connections and run commands locally on the VM. + +To allow SSH traffic, create a network security group rule with [az network nsg rule create](/cli/azure/network/nsg/rule#az-network-nsg-rule-create) as follows: + +```azurecli +az network nsg rule create \ + --resource-group $MY_RESOURCE_GROUP_NAME \ + --nsg-name $NSG_NAME \ + --name allow_ssh \ + --priority 101 \ + --destination-port-ranges 22 +``` + +Create a public IP address with [az network public-ip create](/cli/azure/network/public-ip#az-network-public-ip-create) and assign it to the first NIC with [az network nic ip-config update](/cli/azure/network/nic/ip-config#az-network-nic-ip-config-update): + +```azurecli +export PUBLIC_IP_NAME="myPublicIP" +az network public-ip create --resource-group $MY_RESOURCE_GROUP_NAME --name $PUBLIC_IP_NAME + +az network nic ip-config update \ + --resource-group $MY_RESOURCE_GROUP_NAME \ + --nic-name $NIC1 \ + --name ipconfig1 \ + --public-ip $PUBLIC_IP_NAME +``` + +To view the public IP address of the VM, use [az vm show](/cli/azure/vm#az-vm-show) as follows: + +```azurecli +az vm show --resource-group $MY_RESOURCE_GROUP_NAME --name $VM_NAME -d --query publicIps -o tsv +``` + +```TEXT +x.x.x.x +``` + +Now SSH to the public IP address of your VM. The default username provided in a previous step was *azureuser*. Provide your own username and public IP address: + +```bash +export IP_ADDRESS=$(az vm show --resource-group $MY_RESOURCE_GROUP_NAME --name $VM_NAME -d --query publicIps -o tsv) +ssh -o StrictHostKeyChecking=no azureuser@$IP_ADDRESS +``` +To send to or from a secondary network interface, you have to manually add persistent routes to the operating system for each secondary network interface. In this article, *eth1* is the secondary interface. Instructions for adding persistent routes to the operating system vary by distro. See documentation for your distro for instructions. + +When adding the route to the operating system, the gateway address is the first address of the subnet the network interface is in. For example, if the subnet has been assigned the range 10.0.2.0/24, the gateway you specify for the route is 10.0.2.1 or if the subnet has been assigned the range 10.0.2.128/25, the gateway you specify for the route is 10.0.2.129. You can define a specific network for the route's destination, or specify a destination of 0.0.0.0, if you want all traffic for the interface to go through the specified gateway. The gateway for each subnet is managed by the virtual network. + +Once you've added the route for a secondary interface, verify that the route is in your route table with `route -n`. The following example output is for the route table that has the two network interfaces added to the VM in this article: + +```output +Kernel IP routing table +Destination Gateway Genmask Flags Metric Ref Use Iface +0.0.0.0 10.0.1.1 0.0.0.0 UG 0 0 0 eth0 +0.0.0.0 10.0.2.1 0.0.0.0 UG 0 0 0 eth1 +10.0.1.0 0.0.0.0 255.255.255.0 U 0 0 0 eth0 +10.0.2.0 0.0.0.0 255.255.255.0 U 0 0 0 eth1 +168.63.129.16 10.0.1.1 255.255.255.255 UGH 0 0 0 eth0 +169.254.169.254 10.0.1.1 255.255.255.255 UGH 0 0 0 eth0 +``` + +Confirm that the route you added persists across reboots by checking your route table again after a reboot. To test connectivity, you can enter the following command, for example, where *eth1* is the name of a secondary network interface: `ping bing.com -c 4 -I eth1` + +## Next steps +Review [Linux VM sizes](../sizes.md) when trying to creating a VM with multiple NICs. Pay attention to the maximum number of NICs each VM size supports. + +To further secure your VMs, use just in time VM access. This feature opens network security group rules for SSH traffic when needed, and for a defined period of time. For more information, see [Manage virtual machine access using just in time](/azure/security-center/security-center-just-in-time). \ No newline at end of file diff --git a/scenarios/azure-compute-docs/articles/virtual-machines/linux/quick-create-terraform/main.tf b/scenarios/azure-compute-docs/articles/virtual-machines/linux/quick-create-terraform/main.tf new file mode 100644 index 000000000..9482a95fa --- /dev/null +++ b/scenarios/azure-compute-docs/articles/virtual-machines/linux/quick-create-terraform/main.tf @@ -0,0 +1,124 @@ +resource "random_pet" "rg_name" { + prefix = var.resource_group_name_prefix +} + +resource "azurerm_resource_group" "rg" { + location = var.resource_group_location + name = random_pet.rg_name.id +} + +# Create virtual network +resource "azurerm_virtual_network" "my_terraform_network" { + name = "myVnet" + address_space = ["10.0.0.0/16"] + location = azurerm_resource_group.rg.location + resource_group_name = azurerm_resource_group.rg.name +} + +# Create subnet +resource "azurerm_subnet" "my_terraform_subnet" { + name = "mySubnet" + resource_group_name = azurerm_resource_group.rg.name + virtual_network_name = azurerm_virtual_network.my_terraform_network.name + address_prefixes = ["10.0.1.0/24"] +} + +# Create public IPs +resource "azurerm_public_ip" "my_terraform_public_ip" { + name = "myPublicIP" + location = azurerm_resource_group.rg.location + resource_group_name = azurerm_resource_group.rg.name + allocation_method = "Dynamic" +} + +# Create Network Security Group and rule +resource "azurerm_network_security_group" "my_terraform_nsg" { + name = "myNetworkSecurityGroup" + location = azurerm_resource_group.rg.location + resource_group_name = azurerm_resource_group.rg.name + + security_rule { + name = "SSH" + priority = 1001 + direction = "Inbound" + access = "Allow" + protocol = "Tcp" + source_port_range = "*" + destination_port_range = "22" + source_address_prefix = "*" + destination_address_prefix = "*" + } +} + +# Create network interface +resource "azurerm_network_interface" "my_terraform_nic" { + name = "myNIC" + location = azurerm_resource_group.rg.location + resource_group_name = azurerm_resource_group.rg.name + + ip_configuration { + name = "my_nic_configuration" + subnet_id = azurerm_subnet.my_terraform_subnet.id + private_ip_address_allocation = "Dynamic" + public_ip_address_id = azurerm_public_ip.my_terraform_public_ip.id + } +} + +# Connect the security group to the network interface +resource "azurerm_network_interface_security_group_association" "example" { + network_interface_id = azurerm_network_interface.my_terraform_nic.id + network_security_group_id = azurerm_network_security_group.my_terraform_nsg.id +} + +# Generate random text for a unique storage account name +resource "random_id" "random_id" { + keepers = { + # Generate a new ID only when a new resource group is defined + resource_group = azurerm_resource_group.rg.name + } + + byte_length = 8 +} + +# Create storage account for boot diagnostics +resource "azurerm_storage_account" "my_storage_account" { + name = "diag${random_id.random_id.hex}" + location = azurerm_resource_group.rg.location + resource_group_name = azurerm_resource_group.rg.name + account_tier = "Standard" + account_replication_type = "LRS" +} + +# Create virtual machine +resource "azurerm_linux_virtual_machine" "my_terraform_vm" { + name = "myVM" + location = azurerm_resource_group.rg.location + resource_group_name = azurerm_resource_group.rg.name + network_interface_ids = [azurerm_network_interface.my_terraform_nic.id] + size = "Standard_DS1_v2" + + os_disk { + name = "myOsDisk" + caching = "ReadWrite" + storage_account_type = "Premium_LRS" + } + + source_image_reference { + publisher = "Canonical" + offer = "0001-com-ubuntu-server-jammy" + sku = "22_04-lts-gen2" + version = "latest" + } + + computer_name = "hostname" + admin_username = var.username + + admin_ssh_key { + username = var.username + public_key = azapi_resource_action.ssh_public_key_gen.output.publicKey + } + + boot_diagnostics { + storage_account_uri = azurerm_storage_account.my_storage_account.primary_blob_endpoint + } +} \ No newline at end of file diff --git a/scenarios/azure-compute-docs/articles/virtual-machines/linux/quick-create-terraform/outputs.tf b/scenarios/azure-compute-docs/articles/virtual-machines/linux/quick-create-terraform/outputs.tf new file mode 100644 index 000000000..f7d0c3184 --- /dev/null +++ b/scenarios/azure-compute-docs/articles/virtual-machines/linux/quick-create-terraform/outputs.tf @@ -0,0 +1,7 @@ +output "resource_group_name" { + value = azurerm_resource_group.rg.name +} + +output "public_ip_address" { + value = azurerm_linux_virtual_machine.my_terraform_vm.public_ip_address +} \ No newline at end of file diff --git a/scenarios/azure-compute-docs/articles/virtual-machines/linux/quick-create-terraform/providers.tf b/scenarios/azure-compute-docs/articles/virtual-machines/linux/quick-create-terraform/providers.tf new file mode 100644 index 000000000..158b40408 --- /dev/null +++ b/scenarios/azure-compute-docs/articles/virtual-machines/linux/quick-create-terraform/providers.tf @@ -0,0 +1,22 @@ +terraform { + required_version = ">=0.12" + + required_providers { + azapi = { + source = "azure/azapi" + version = "~>1.5" + } + azurerm = { + source = "hashicorp/azurerm" + version = "~>3.0" + } + random = { + source = "hashicorp/random" + version = "~>3.0" + } + } +} + +provider "azurerm" { + features {} +} \ No newline at end of file diff --git a/scenarios/azure-compute-docs/articles/virtual-machines/linux/quick-create-terraform/quick-create-terraform.md b/scenarios/azure-compute-docs/articles/virtual-machines/linux/quick-create-terraform/quick-create-terraform.md new file mode 100644 index 000000000..d6e92dc62 --- /dev/null +++ b/scenarios/azure-compute-docs/articles/virtual-machines/linux/quick-create-terraform/quick-create-terraform.md @@ -0,0 +1,367 @@ +--- +title: 'Quickstart: Use Terraform to create a Linux VM' +description: In this quickstart, you learn how to use Terraform to create a Linux virtual machine +author: tomarchermsft +ms.service: azure-virtual-machines +ms.collection: linux +ms.topic: quickstart +ms.date: 07/24/2023 +ms.author: tarcher +ms.custom: devx-track-terraform, linux-related-content, innovation-engine +ai-usage: ai-assisted +--- + +# Quickstart: Use Terraform to create a Linux VM + +**Applies to:** :heavy_check_mark: Linux VMs + +Article tested with the following Terraform and Terraform provider versions: + +This article shows you how to create a complete Linux environment and supporting resources with Terraform. Those resources include a virtual network, subnet, public IP address, and more. + +[!INCLUDE [Terraform abstract](~/azure-dev-docs-pr/articles/terraform/includes/abstract.md)] + +In this article, you learn how to: +> [!div class="checklist"] +> * Create a random value for the Azure resource group name using [random_pet](https://registry.terraform.io/providers/hashicorp/random/latest/docs/resources/pet). +> * Create an Azure resource group using [azurerm_resource_group](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/resource_group). +> * Create a virtual network (VNET) using [azurerm_virtual_network](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/virtual_network). +> * Create a subnet using [azurerm_subnet](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/subnet). +> * Create a public IP using [azurerm_public_ip](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/public_ip). +> * Create a network security group using [azurerm_network_security_group](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/network_security_group). +> * Create a network interface using [azurerm_network_interface](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/network_interface). +> * Create an association between the network security group and the network interface using [azurerm_network_interface_security_group_association](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/network_interface_security_group_association). +> * Generate a random value for a unique storage account name using [random_id](https://registry.terraform.io/providers/hashicorp/random/latest/docs/resources/id). +> * Create a storage account for boot diagnostics using [azurerm_storage_account](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/storage_account). +> * Create a Linux VM using [azurerm_linux_virtual_machine](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/linux_virtual_machine). +> * Create an AzAPI resource using [azapi_resource](https://registry.terraform.io/providers/Azure/azapi/latest/docs/resources/azapi_resource). +> * Create an AzAPI resource to generate an SSH key pair using [azapi_resource_action](https://registry.terraform.io/providers/Azure/azapi/latest/docs/resources/azapi_resource_action). + +## Prerequisites + +- [Install and configure Terraform](/azure/developer/terraform/quickstart-configure) + +## Implement the Terraform code + +> [!NOTE] +> The sample code for this article is located in the [Azure Terraform GitHub repo](https://github.com/Azure/terraform/tree/master/quickstart/101-vm-with-infrastructure). You can view the log file containing the [test results from current and previous versions of Terraform](https://github.com/Azure/terraform/tree/master/quickstart/101-vm-with-infrastructure/TestRecord.md). +> +> See more [articles and sample code showing how to use Terraform to manage Azure resources](/azure/terraform) + +1. Create a directory in which to test the sample Terraform code and make it the current directory. + +1. Create a file named providers.tf and insert the following code: + +```text +terraform { + required_version = ">=0.12" + + required_providers { + azapi = { + source = "azure/azapi" + version = "~>1.5" + } + azurerm = { + source = "hashicorp/azurerm" + version = "~>3.0" + } + random = { + source = "hashicorp/random" + version = "~>3.0" + } + } +} + +provider "azurerm" { + features {} +} +``` + +1. Create a file named ssh.tf and insert the following code: + +```text +resource "random_pet" "ssh_key_name" { + prefix = "ssh" + separator = "" +} + +resource "azapi_resource_action" "ssh_public_key_gen" { + type = "Microsoft.Compute/sshPublicKeys@2022-11-01" + resource_id = azapi_resource.ssh_public_key.id + action = "generateKeyPair" + method = "POST" + + response_export_values = ["publicKey", "privateKey"] +} + +resource "azapi_resource" "ssh_public_key" { + type = "Microsoft.Compute/sshPublicKeys@2022-11-01" + name = random_pet.ssh_key_name.id + location = azurerm_resource_group.rg.location + parent_id = azurerm_resource_group.rg.id +} + +output "key_data" { + value = azapi_resource_action.ssh_public_key_gen.output.publicKey +} +``` + +1. Create a file named main.tf and insert the following code: + +```text +resource "random_pet" "rg_name" { + prefix = var.resource_group_name_prefix +} + +resource "azurerm_resource_group" "rg" { + location = var.resource_group_location + name = random_pet.rg_name.id +} + +# Create virtual network +resource "azurerm_virtual_network" "my_terraform_network" { + name = "myVnet" + address_space = ["10.0.0.0/16"] + location = azurerm_resource_group.rg.location + resource_group_name = azurerm_resource_group.rg.name +} + +# Create subnet +resource "azurerm_subnet" "my_terraform_subnet" { + name = "mySubnet" + resource_group_name = azurerm_resource_group.rg.name + virtual_network_name = azurerm_virtual_network.my_terraform_network.name + address_prefixes = ["10.0.1.0/24"] +} + +# Create public IPs +resource "azurerm_public_ip" "my_terraform_public_ip" { + name = "myPublicIP" + location = azurerm_resource_group.rg.location + resource_group_name = azurerm_resource_group.rg.name + allocation_method = "Dynamic" +} + +# Create Network Security Group and rule +resource "azurerm_network_security_group" "my_terraform_nsg" { + name = "myNetworkSecurityGroup" + location = azurerm_resource_group.rg.location + resource_group_name = azurerm_resource_group.rg.name + + security_rule { + name = "SSH" + priority = 1001 + direction = "Inbound" + access = "Allow" + protocol = "Tcp" + source_port_range = "*" + destination_port_range = "22" + source_address_prefix = "*" + destination_address_prefix = "*" + } +} + +# Create network interface +resource "azurerm_network_interface" "my_terraform_nic" { + name = "myNIC" + location = azurerm_resource_group.rg.location + resource_group_name = azurerm_resource_group.rg.name + + ip_configuration { + name = "my_nic_configuration" + subnet_id = azurerm_subnet.my_terraform_subnet.id + private_ip_address_allocation = "Dynamic" + public_ip_address_id = azurerm_public_ip.my_terraform_public_ip.id + } +} + +# Connect the security group to the network interface +resource "azurerm_network_interface_security_group_association" "example" { + network_interface_id = azurerm_network_interface.my_terraform_nic.id + network_security_group_id = azurerm_network_security_group.my_terraform_nsg.id +} + +# Generate random text for a unique storage account name +resource "random_id" "random_id" { + keepers = { + # Generate a new ID only when a new resource group is defined + resource_group = azurerm_resource_group.rg.name + } + + byte_length = 8 +} + +# Create storage account for boot diagnostics +resource "azurerm_storage_account" "my_storage_account" { + name = "diag${random_id.random_id.hex}" + location = azurerm_resource_group.rg.location + resource_group_name = azurerm_resource_group.rg.name + account_tier = "Standard" + account_replication_type = "LRS" +} + +# Create virtual machine +resource "azurerm_linux_virtual_machine" "my_terraform_vm" { + name = "myVM" + location = azurerm_resource_group.rg.location + resource_group_name = azurerm_resource_group.rg.name + network_interface_ids = [azurerm_network_interface.my_terraform_nic.id] + size = "Standard_DS1_v2" + + os_disk { + name = "myOsDisk" + caching = "ReadWrite" + storage_account_type = "Premium_LRS" + } + + source_image_reference { + publisher = "Canonical" + offer = "0001-com-ubuntu-server-jammy" + sku = "22_04-lts-gen2" + version = "latest" + } + + computer_name = "hostname" + admin_username = var.username + + admin_ssh_key { + username = var.username + public_key = azapi_resource_action.ssh_public_key_gen.output.publicKey + } + + boot_diagnostics { + storage_account_uri = azurerm_storage_account.my_storage_account.primary_blob_endpoint + } +} +``` + +1. Create a file named variables.tf and insert the following code: + +```text +variable "resource_group_location" { + type = string + default = "eastus2" + description = "Location of the resource group." +} + +variable "resource_group_name_prefix" { + type = string + default = "rg" + description = "Prefix of the resource group name that's combined with a random ID so name is unique in your Azure subscription." +} + +variable "username" { + type = string + description = "The username for the local account that will be created on the new VM." + default = "azureadmin" +} +``` + +1. Create a file named outputs.tf and insert the following code: + +```text +output "resource_group_name" { + value = azurerm_resource_group.rg.name +} + +output "public_ip_address" { + value = azurerm_linux_virtual_machine.my_terraform_vm.public_ip_address +} +``` + +## Initialize Terraform + +In this section, Terraform is initialized; this command downloads the Azure provider required to manage your Azure resources. Before running the command, ensure you are in the directory where you created the Terraform files. You can set any necessary environment variables here. + +```bash +# Set your preferred Azure region (defaults to eastus2 if not specified) +export TF_VAR_resource_group_location="eastus2" +export TERRAFORM_DIR=$(pwd) +terraform init -upgrade +``` + +Key points: + +- The -upgrade parameter upgrades the necessary provider plugins to the newest version that complies with the configuration's version constraints. + +## Create a Terraform execution plan + +This step creates an execution plan but does not execute it. It shows what actions are necessary to create the configuration specified in your files. + +```bash +terraform plan -out main.tfplan +``` + +Key points: + +- The terraform plan command creates an execution plan, allowing you to verify whether it matches your expectations before applying any changes. +- The optional -out parameter writes the plan to a file so that the exact plan can be applied later. + +## Apply a Terraform execution plan + +Apply the previously created execution plan to deploy the infrastructure to your cloud. + +```bash +terraform apply main.tfplan +``` + +Key points: + +- This command applies the plan created with terraform plan -out main.tfplan. +- If you used a different filename for the -out parameter, use that same filename with terraform apply. +- If the -out parameter wasn’t used, run terraform apply without any parameters. + +Cost information isn't presented during the virtual machine creation process for Terraform like it is for the [Azure portal](quick-create-portal.md). If you want to learn more about how cost works for virtual machines, see the [Cost optimization Overview page](../plan-to-manage-costs.md). + +## Verify the results + +#### [Azure CLI](#tab/azure-cli) + +1. Get the Azure resource group name. + +```bash +export RESOURCE_GROUP_NAME=$(terraform output -raw resource_group_name) +``` + +1. Run az vm list with a JMESPath query to display the names of the virtual machines created in the resource group. + +```azurecli +az vm list \ + --resource-group $RESOURCE_GROUP_NAME \ + --query "[].{\"VM Name\":name}" -o table +``` + +Results: + + + +```console +VM Name +----------- +myVM +``` + +#### [Azure PowerShell](#tab/azure-powershell) + +1. Get the Azure resource group name. + +```console +$resource_group_name=$(terraform output -raw resource_group_name) +``` + +1. Run Get-AzVm to display the names of all the virtual machines in the resource group. + +```azurepowershell +Get-AzVm -ResourceGroupName $resource_group_name +``` + +## Troubleshoot Terraform on Azure + +[Troubleshoot common problems when using Terraform on Azure](/azure/developer/terraform/troubleshoot) + +## Next steps + +In this quickstart, you deployed a simple virtual machine using Terraform. To learn more about Azure virtual machines, continue to the tutorial for Linux VMs. + +> [!div class="nextstepaction"] +> [Azure Linux virtual machine tutorials](./tutorial-manage-vm.md) \ No newline at end of file diff --git a/scenarios/azure-compute-docs/articles/virtual-machines/linux/quick-create-terraform/ssh.tf b/scenarios/azure-compute-docs/articles/virtual-machines/linux/quick-create-terraform/ssh.tf new file mode 100644 index 000000000..11de7c0a4 --- /dev/null +++ b/scenarios/azure-compute-docs/articles/virtual-machines/linux/quick-create-terraform/ssh.tf @@ -0,0 +1,25 @@ +resource "random_pet" "ssh_key_name" { + prefix = "ssh" + separator = "" +} + +resource "azapi_resource_action" "ssh_public_key_gen" { + type = "Microsoft.Compute/sshPublicKeys@2022-11-01" + resource_id = azapi_resource.ssh_public_key.id + action = "generateKeyPair" + method = "POST" + + response_export_values = ["publicKey", "privateKey"] +} + +resource "azapi_resource" "ssh_public_key" { + type = "Microsoft.Compute/sshPublicKeys@2022-11-01" + name = random_pet.ssh_key_name.id + location = azurerm_resource_group.rg.location + parent_id = azurerm_resource_group.rg.id +} + +output "key_data" { + value = azapi_resource_action.ssh_public_key_gen.output.publicKey + sensitive = true +} \ No newline at end of file diff --git a/scenarios/azure-compute-docs/articles/virtual-machines/linux/quick-create-terraform/variables.tf b/scenarios/azure-compute-docs/articles/virtual-machines/linux/quick-create-terraform/variables.tf new file mode 100644 index 000000000..37a12b1f4 --- /dev/null +++ b/scenarios/azure-compute-docs/articles/virtual-machines/linux/quick-create-terraform/variables.tf @@ -0,0 +1,17 @@ +variable "resource_group_location" { + type = string + default = "eastus2" + description = "Location of the resource group." +} + +variable "resource_group_name_prefix" { + type = string + default = "rg" + description = "Prefix of the resource group name that's combined with a random ID so name is unique in your Azure subscription." +} + +variable "username" { + type = string + description = "The username for the local account that will be created on the new VM." + default = "azureadmin" +} \ No newline at end of file diff --git a/scenarios/azure-compute-docs/articles/virtual-machines/linux/tutorial-automate-vm-deployment.md b/scenarios/azure-compute-docs/articles/virtual-machines/linux/tutorial-automate-vm-deployment.md new file mode 100644 index 000000000..5b46a9fd8 --- /dev/null +++ b/scenarios/azure-compute-docs/articles/virtual-machines/linux/tutorial-automate-vm-deployment.md @@ -0,0 +1,193 @@ +--- +title: Tutorial - Customize a Linux VM with cloud-init in Azure +description: In this tutorial, you learn how to use cloud-init and Key Vault to customize Linux VMs the first time they boot in Azure +author: ju-shim +ms.service: azure-virtual-machines +ms.collection: linux +ms.topic: tutorial +ms.date: 10/18/2023 +ms.author: jushiman +ms.reviewer: mattmcinnes +ms.custom: mvc, devx-track-azurecli, linux-related-content, innovation-engine +--- + +# Tutorial - How to use cloud-init to customize a Linux virtual machine in Azure on first boot + +**Applies to:** :heavy_check_mark: Linux VMs :heavy_check_mark: Flexible scale sets + +In a previous tutorial, you learned how to SSH to a virtual machine (VM) and manually install NGINX. To create VMs in a quick and consistent manner, some form of automation is typically desired. A common approach to customize a VM on first boot is to use [cloud-init](https://cloudinit.readthedocs.io). In this tutorial you learn how to: + +> [!div class="checklist"] +> * Create a cloud-init config file +> * Create a VM that uses a cloud-init file +> * View a running Node.js app after the VM is created +> * Use Key Vault to securely store certificates +> * Automate secure deployments of NGINX with cloud-init + +If you choose to install and use the CLI locally, this tutorial requires that you are running the Azure CLI version 2.0.30 or later. Run `az --version` to find the version. If you need to install or upgrade, see [Install Azure CLI]( /cli/azure/install-azure-cli). + +## Cloud-init overview + +[Cloud-init](https://cloudinit.readthedocs.io) is a widely used approach to customize a Linux VM as it boots for the first time. You can use cloud-init to install packages and write files, or to configure users and security. As cloud-init runs during the initial boot process, there are no additional steps or required agents to apply your configuration. + +Cloud-init also works across distributions. For example, you don't use **apt-get install** or **yum install** to install a package. Instead you can define a list of packages to install. Cloud-init automatically uses the native package management tool for the distro you select. + +We are working with our partners to get cloud-init included and working in the images that they provide to Azure. For detailed information cloud-init support for each distribution, see [Cloud-init support for VMs in Azure](using-cloud-init.md). + +## Create cloud-init config file + +To see cloud-init in action, create a VM that installs NGINX and runs a simple 'Hello World' Node.js app. The following cloud-init configuration installs the required packages, creates a Node.js app, then initializes and starts the app. + +At your bash prompt or in the Cloud Shell, create a file named *cloud-init.txt* and paste the following configuration. For example, type `sensible-editor cloud-init.txt` to create the file and see a list of available editors. Make sure that the whole cloud-init file is copied correctly, especially the first line: + +```yaml +#cloud-config +package_upgrade: true +packages: + - nginx + - nodejs + - npm +write_files: + - owner: www-data:www-data + path: /etc/nginx/sites-available/default + defer: true + content: | + server { + listen 80; + location / { + proxy_pass http://localhost:3000; + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection keep-alive; + proxy_set_header Host $host; + proxy_cache_bypass $http_upgrade; + } + } + - owner: azureuser:azureuser + path: /home/azureuser/myapp/index.js + defer: true + content: | + var express = require('express') + var app = express() + var os = require('os'); + app.get('/', function (req, res) { + res.send('Hello World from host ' + os.hostname() + '!') + }) + app.listen(3000, function () { + console.log('Hello world app listening on port 3000!') + }) +runcmd: + - service nginx restart + - cd "/home/azureuser/myapp" + - npm init + - npm install express -y + - nodejs index.js +``` + +For more information about cloud-init configuration options, see [cloud-init config examples](https://cloudinit.readthedocs.io/en/latest/topics/examples.html). + +## Create virtual machine + +Before you can create a VM, create a resource group with [az group create](/cli/azure/group#az-group-create). The following example creates a resource group. In these commands, a random suffix is appended to the resource group and VM names to prevent name collisions during repeated deployments. + +```bash +export RANDOM_SUFFIX=$(openssl rand -hex 3) +export RESOURCE_GROUP="myResourceGroupAutomate$RANDOM_SUFFIX" +export REGION="eastus2" +az group create --name $RESOURCE_GROUP --location $REGION +``` + +Results: + + +```JSON +{ + "id": "/subscriptions/xxxxx-xxxxx-xxxxx-xxxxx/resourceGroups/myResourceGroupAutomatexxx", + "location": "eastus", + "managedBy": null, + "name": "myResourceGroupAutomatexxx", + "properties": { + "provisioningState": "Succeeded" + }, + "tags": null, + "type": "Microsoft.Resources/resourceGroups" +} +``` + +Now create a VM with [az vm create](/cli/azure/vm#az-vm-create). Use the `--custom-data` parameter to pass in your cloud-init config file. Provide the full path to the *cloud-init.txt* config if you saved the file outside of your present working directory. The following example creates a VM; note that the VM name is also appended with the random suffix. + +```bash +export VM_NAME="myAutomatedVM$RANDOM_SUFFIX" +az vm create \ + --resource-group $RESOURCE_GROUP \ + --name $VM_NAME \ + --image Ubuntu2204 \ + --admin-username azureuser \ + --generate-ssh-keys \ + --custom-data cloud-init.txt +``` + +Results: + + +```JSON +{ + "fqdns": "", + "id": "/subscriptions/xxxxx/resourceGroups/myResourceGroupAutomatexxx/providers/Microsoft.Compute/virtualMachines/myAutomatedVMxxx", + "location": "eastus", + "name": "myAutomatedVMxxx", + "powerState": "VM running", + "publicIpAddress": "x.x.x.x", + "resourceGroup": "myResourceGroupAutomatexxx", + "zones": "" +} +``` + +It takes a few minutes for the VM to be created, the packages to install, and the app to start. There are background tasks that continue to run after the Azure CLI returns you to the prompt. It may be another couple of minutes before you can access the app. When the VM has been created, take note of the `publicIpAddress` displayed by the Azure CLI. This address is used to access the Node.js app via a web browser. + +To allow web traffic to reach your VM, open port 80 from the Internet with [az vm open-port](/cli/azure/vm#az-vm-open-port): + +```bash +az vm open-port --port 80 --resource-group $RESOURCE_GROUP --name $VM_NAME +``` + +Results: + + +```JSON +{ + "endpoints": [ + { + "name": "80", + "protocol": "tcp", + "publicPort": 80, + "privatePort": 80 + } + ], + "id": "/subscriptions/xxxxx/resourceGroups/myResourceGroupAutomatexxx/providers/Microsoft.Compute/virtualMachines/myAutomatedVMxxx", + "location": "eastus", + "name": "myAutomatedVMxxx" +} +``` + +## Test web app + +Now you can open a web browser and enter *http://* in the address bar. Provide your own public IP address from the VM create process. Your Node.js app is displayed as shown in the following example: + +![View running NGINX site](./media/tutorial-automate-vm-deployment/nginx.png) + +## Next steps + +In this tutorial, you configured VMs on first boot with cloud-init. You learned how to: + +> [!div class="checklist"] +> * Create a cloud-init config file +> * Create a VM that uses a cloud-init file +> * View a running Node.js app after the VM is created +> * Use Key Vault to securely store certificates +> * Automate secure deployments of NGINX with cloud-init + +Advance to the next tutorial to learn how to create custom VM images. + +> [!div class="nextstepaction"] +> [Create custom VM images](./tutorial-custom-images.md) \ No newline at end of file diff --git a/scenarios/azure-compute-docs/articles/virtual-machines/linux/tutorial-elasticsearch.md b/scenarios/azure-compute-docs/articles/virtual-machines/linux/tutorial-elasticsearch.md new file mode 100644 index 000000000..1bcd70639 --- /dev/null +++ b/scenarios/azure-compute-docs/articles/virtual-machines/linux/tutorial-elasticsearch.md @@ -0,0 +1,304 @@ +--- +title: Deploy ElasticSearch on a development virtual machine in Azure +description: Install the Elastic Stack (ELK) onto a development Linux VM in Azure +services: virtual-machines +author: rloutlaw +manager: justhe +ms.service: azure-virtual-machines +ms.collection: linux +ms.devlang: azurecli +ms.custom: devx-track-azurecli, linux-related-content, innovation-engine +ms.topic: how-to +ms.date: 10/11/2017 +ms.author: routlaw +--- + +# Install the Elastic Stack (ELK) on an Azure VM + +**Applies to:** :heavy_check_mark: Linux VMs :heavy_check_mark: Flexible scale sets + +This article walks you through how to deploy [Elasticsearch](https://www.elastic.co/products/elasticsearch), [Logstash](https://www.elastic.co/products/logstash), and [Kibana](https://www.elastic.co/products/kibana), on an Ubuntu VM in Azure. To see the Elastic Stack in action, you can optionally connect to Kibana and work with some sample logging data. + +Additionally, you can follow the [Deploy Elastic on Azure Virtual Machines](/training/modules/deploy-elastic-azure-virtual-machines/) module for a more guided tutorial on deploying Elastic on Azure Virtual Machines. + +In this tutorial you learn how to: + +> [!div class="checklist"] +> * Create an Ubuntu VM in an Azure resource group +> * Install Elasticsearch, Logstash, and Kibana on the VM +> * Send sample data to Elasticsearch with Logstash +> * Open ports and work with data in the Kibana console + +This deployment is suitable for basic development with the Elastic Stack. For more on the Elastic Stack, including recommendations for a production environment, see the [Elastic documentation](https://www.elastic.co/guide/index.html) and the [Azure Architecture Center](/azure/architecture/elasticsearch/). + +[!INCLUDE [azure-cli-prepare-your-environment.md](~/reusable-content/azure-cli/azure-cli-prepare-your-environment.md)] + +- This article requires version 2.0.4 or later of the Azure CLI. If using Azure Cloud Shell, the latest version is already installed. + +## Create a resource group + +In this section, environment variables are declared for use in subsequent commands. A random suffix is appended to resource names for uniqueness. + +```bash +export RANDOM_SUFFIX=$(openssl rand -hex 3) +export RESOURCE_GROUP="myResourceGroup$RANDOM_SUFFIX" +export REGION="eastus2" +az group create --name $RESOURCE_GROUP --location $REGION +``` + +Results: + + +```JSON +{ + "id": "/subscriptions/xxxxx/resourceGroups/myResourceGroupxxxxxx", + "location": "eastus", + "managedBy": null, + "name": "myResourceGroupxxxxxx", + "properties": { + "provisioningState": "Succeeded" + }, + "tags": null, + "type": "Microsoft.Resources/resourceGroups" +} +``` + +## Create a virtual machine + +This section creates a VM with a unique name, while also generating SSH keys if they do not already exist. A random suffix is appended to ensure uniqueness. + +```bash +export VM_NAME="myVM$RANDOM_SUFFIX" +az vm create \ + --resource-group $RESOURCE_GROUP \ + --name $VM_NAME \ + --image Ubuntu2204 \ + --admin-username azureuser \ + --generate-ssh-keys +``` + +When the VM has been created, the Azure CLI shows information similar to the following example. Take note of the publicIpAddress. This address is used to access the VM. + +Results: + + +```JSON +{ + "fqdns": "", + "id": "/subscriptions/xxxxx/resourceGroups/myResourceGroupxxxxxx/providers/Microsoft.Compute/virtualMachines/myVMxxxxxx", + "location": "eastus", + "macAddress": "xx:xx:xx:xx:xx:xx", + "powerState": "VM running", + "privateIpAddress": "10.0.0.4", + "publicIpAddress": "x.x.x.x", + "resourceGroup": "$RESOURCE_GROUP" +} +``` + +## SSH into your VM + +If you don't already know the public IP address of your VM, run the following command to list it: + +```azurecli-interactive +az network public-ip list --resource-group $RESOURCE_GROUP --query [].ipAddress +``` + +Use the following command to create an SSH session with the virtual machine. Substitute the correct public IP address of your virtual machine. In this example, the IP address is *40.68.254.142*. + +```bash +export PUBLIC_IP_ADDRESS=$(az network public-ip list --resource-group $RESOURCE_GROUP --query [].ipAddress -o tsv) +``` + +## Install the Elastic Stack + +In this section, you import the Elasticsearch signing key and update your APT sources list to include the Elastic package repository. This is followed by installing the Java runtime environment which is required for the Elastic Stack components. + +```bash +ssh azureuser@$PUBLIC_IP_ADDRESS -o StrictHostKeyChecking=no " +wget -qO - https://artifacts.elastic.co/GPG-KEY-elasticsearch | sudo apt-key add - +echo "deb https://artifacts.elastic.co/packages/5.x/apt stable main" | sudo tee -a /etc/apt/sources.list.d/elastic-5.x.list +" +``` + +Install the Java Virtual Machine on the VM and configure the JAVA_HOME variable: + +```bash +ssh azureuser@$PUBLIC_IP_ADDRESS -o StrictHostKeyChecking=no " +sudo apt install -y openjdk-8-jre-headless +export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 +" +``` + +Run the following command to update Ubuntu package sources and install Elasticsearch, Kibana, and Logstash. + +```bash +ssh azureuser@$PUBLIC_IP_ADDRESS -o StrictHostKeyChecking=no " + wget -qO elasticsearch.gpg https://artifacts.elastic.co/GPG-KEY-elasticsearch + sudo mv elasticsearch.gpg /etc/apt/trusted.gpg.d/ + + echo "deb https://artifacts.elastic.co/packages/7.x/apt stable main" | sudo tee /etc/apt/sources.list.d/elastic-7.x.list + + sudo apt update + + # Now install the ELK stack + sudo apt install -y elasticsearch kibana logstash +" +``` + +> [!NOTE] +> Detailed installation instructions, including directory layouts and initial configuration, are maintained in [Elastic's documentation](https://www.elastic.co/guide/en/elastic-stack/current/installing-elastic-stack.html) + +## Start Elasticsearch + +Start Elasticsearch on your VM with the following command: + +```bash +ssh azureuser@$PUBLIC_IP_ADDRESS -o StrictHostKeyChecking=no " +sudo systemctl start elasticsearch.service +" +``` + +This command produces no output, so verify that Elasticsearch is running on the VM with this curl command: + +```bash +ssh azureuser@$PUBLIC_IP_ADDRESS -o StrictHostKeyChecking=no " +sleep 11 +sudo curl -XGET 'localhost:9200/' +" +``` + +If Elasticsearch is running, you see output like the following: + +Results: + + +```json +{ + "name" : "w6Z4NwR", + "cluster_name" : "elasticsearch", + "cluster_uuid" : "SDzCajBoSK2EkXmHvJVaDQ", + "version" : { + "number" : "5.6.3", + "build_hash" : "1a2f265", + "build_date" : "2017-10-06T20:33:39.012Z", + "build_snapshot" : false, + "lucene_version" : "6.6.1" + }, + "tagline" : "You Know, for Search" +} +``` + +## Start Logstash and add data to Elasticsearch + +Start Logstash with the following command: + +```bash +ssh azureuser@$PUBLIC_IP_ADDRESS -o StrictHostKeyChecking=no " +sudo systemctl start logstash.service +" +``` + +Test Logstash to make sure it's working correctly: + +```bash +ssh azureuser@$PUBLIC_IP_ADDRESS -o StrictHostKeyChecking=no " +# Time-limited test with file input instead of stdin +sudo timeout 11s /usr/share/logstash/bin/logstash -e 'input { file { path => "/var/log/syslog" start_position => "end" sincedb_path => "/dev/null" stat_interval => "1 second" } } output { stdout { codec => json } }' || echo "Logstash test completed" +" +``` + +This is a basic Logstash [pipeline](https://www.elastic.co/guide/en/logstash/5.6/pipeline.html) that echoes standard input to standard output. + +Set up Logstash to forward the kernel messages from this VM to Elasticsearch. To create the Logstash configuration file, run the following command which writes the configuration to a new file called vm-syslog-logstash.conf: + +```bash +ssh azureuser@$PUBLIC_IP_ADDRESS -o StrictHostKeyChecking=no " +cat << 'EOF' > vm-syslog-logstash.conf +input { + stdin { + type => "stdin-type" + } + + file { + type => "syslog" + path => [ "/var/log/*.log", "/var/log/*/*.log", "/var/log/messages", "/var/log/syslog" ] + start_position => "beginning" + } +} + +output { + + stdout { + codec => rubydebug + } + elasticsearch { + hosts => "localhost:9200" + } +} +EOF +" +``` + +Test this configuration and send the syslog data to Elasticsearch: + +```bash +# Run Logstash with the configuration for 60 seconds +sudo timeout 60s /usr/share/logstash/bin/logstash -f vm-syslog-logstash.conf & +LOGSTASH_PID=$! + +# Wait for data to be processed +echo "Processing logs for 60 seconds..." +sleep 65 + +# Verify data was sent to Elasticsearch with proper error handling +echo "Verifying data in Elasticsearch..." +ES_COUNT=$(sudo curl -s -XGET 'localhost:9200/_cat/count?v' | tail -n 1 | awk '{print $3}' 2>/dev/null || echo "0") + +# Make sure ES_COUNT is a number or default to 0 +if ! [[ "$ES_COUNT" =~ ^[0-9]+$ ]]; then + ES_COUNT=0 + echo "Warning: Could not get valid document count from Elasticsearch" +fi + +echo "Found $ES_COUNT documents in Elasticsearch" + +if [ "$ES_COUNT" -gt 0 ]; then + echo "✅ Logstash successfully sent data to Elasticsearch" +else + echo "❌ No data found in Elasticsearch, there might be an issue with Logstash configuration" +fi +``` + +You see the syslog entries in your terminal echoed as they are sent to Elasticsearch. Use CTRL+C to exit out of Logstash once you've sent some data. + +## Start Kibana and visualize the data in Elasticsearch + +Edit the Kibana configuration file (/etc/kibana/kibana.yml) and change the IP address Kibana listens on so you can access it from your web browser: + +```text +server.host: "0.0.0.0" +``` + +Start Kibana with the following command: + +```bash +ssh azureuser@$PUBLIC_IP_ADDRESS -o StrictHostKeyChecking=no " +sudo systemctl start kibana.service +" +``` + +Open port 5601 from the Azure CLI to allow remote access to the Kibana console: + +```azurecli-interactive +az vm open-port --port 5601 --resource-group $RESOURCE_GROUP --name $VM_NAME +``` + +## Next steps + +In this tutorial, you deployed the Elastic Stack into a development VM in Azure. You learned how to: + +> [!div class="checklist"] +> * Create an Ubuntu VM in an Azure resource group +> * Install Elasticsearch, Logstash, and Kibana on the VM +> * Send sample data to Elasticsearch from Logstash +> * Open ports and work with data in the Kibana console \ No newline at end of file diff --git a/scenarios/azure-compute-docs/articles/virtual-machines/linux/tutorial-lamp-stack.md b/scenarios/azure-compute-docs/articles/virtual-machines/linux/tutorial-lamp-stack.md new file mode 100644 index 000000000..a318871e8 --- /dev/null +++ b/scenarios/azure-compute-docs/articles/virtual-machines/linux/tutorial-lamp-stack.md @@ -0,0 +1,186 @@ +--- +title: Tutorial - Deploy LAMP and WordPress on a VM +description: In this tutorial, you learn how to install the LAMP stack, and WordPress, on a Linux virtual machine in Azure. +author: ju-shim +ms.collection: linux +ms.service: azure-virtual-machines +ms.devlang: azurecli +ms.custom: linux-related-content, innovation-engine +ms.topic: tutorial +ms.date: 4/4/2023 +ms.author: mattmcinnes +ms.reviewer: cynthn +#Customer intent: As an IT administrator, I want to learn how to install the LAMP stack so that I can quickly prepare a Linux VM to run web applications. +--- + +# Tutorial: Install a LAMP stack on an Azure Linux VM + +**Applies to:** :heavy_check_mark: Linux VMs + +This article walks you through how to deploy an Apache web server, MySQL, and PHP (the LAMP stack) on an Ubuntu VM in Azure. To see the LAMP server in action, you can optionally install and configure a WordPress site. In this tutorial you learn how to: + +> [!div class="checklist"] +> * Create an Ubuntu VM +> * Open port 80 for web traffic +> * Install Apache, MySQL, and PHP +> * Verify installation and configuration +> * Install WordPress + +This setup is for quick tests or proof of concept. For more on the LAMP stack, including recommendations for a production environment, see the [Ubuntu documentation](https://help.ubuntu.com/community/ApacheMySQLPHP). + +This tutorial uses the CLI within the [Azure Cloud Shell](/azure/cloud-shell/overview), which is constantly updated to the latest version. To open the Cloud Shell, select **Try it** from the top of any code block. + +If you choose to install and use the CLI locally, this tutorial requires that you're running the Azure CLI version 2.0.30 or later. Run `az --version` to find the version. If you need to install or upgrade, see [Install Azure CLI]( /cli/azure/install-azure-cli). + +## Create a resource group + +Create a resource group with the [az group create](/cli/azure/group) command. An Azure resource group is a logical container into which Azure resources are deployed and managed. + +The following example creates a resource group using environment variables and appends a random suffix to ensure uniqueness. + +```azurecli-interactive +export REGION="eastus2" +export RANDOM_SUFFIX="$(openssl rand -hex 3)" +export MY_RESOURCE_GROUP_NAME="myResourceGroup${RANDOM_SUFFIX}" +az group create --name "${MY_RESOURCE_GROUP_NAME}" --location $REGION +``` + +Results: + + + +```JSON +{ + "id": "/subscriptions/xxxxx-xxxxx-xxxxx-xxxxx/resourceGroups/myResourceGroupxxxxx", + "location": "eastus", + "name": "myResourceGroupxxxxx", + "properties": { + "provisioningState": "Succeeded" + } +} +``` + +## Create a virtual machine + +Create a VM with the [az vm create](/cli/azure/vm) command. + +The following example creates a VM using environment variables. It creates a VM named *myVM* and creates SSH keys if they don't already exist in a default key location. To use a specific set of keys, use the `--ssh-key-value` option. The command also sets *azureuser* as an administrator user name. You use this name later to connect to the VM. + +```azurecli-interactive +export MY_VM_NAME="myVM${RANDOM_SUFFIX}" +export IMAGE="Ubuntu2204" +export ADMIN_USERNAME="azureuser" +az vm create \ + --resource-group "${MY_RESOURCE_GROUP_NAME}" \ + --name $MY_VM_NAME \ + --image $IMAGE \ + --admin-username $ADMIN_USERNAME \ + --generate-ssh-keys +``` + +When the VM has been created, the Azure CLI shows information similar to the following example. Take note of the `publicIpAddress`. This address is used to access the VM in later steps. + +```output +{ + "fqdns": "", + "id": "/subscriptions//resourceGroups/myResourceGroup/providers/Microsoft.Compute/virtualMachines/myVM", + "location": "eastus", + "macAddress": "00-0D-3A-23-9A-49", + "powerState": "VM running", + "privateIpAddress": "10.0.0.4", + "publicIpAddress": "40.68.254.142", + "resourceGroup": "myResourceGroup" +} +``` + +## Open port 80 for web traffic + +By default, only SSH connections are allowed into Linux VMs deployed in Azure. Because this VM is going to be a web server, you need to open port 80 from the internet. Use the [az vm open-port](/cli/azure/vm) command to open the desired port. + +```azurecli-interactive +az vm open-port --port 80 --resource-group "${MY_RESOURCE_GROUP_NAME}" --name $MY_VM_NAME +``` + +For more information about opening ports to your VM, see [Open ports](nsg-quickstart.md). + +## SSH into your VM + +If you don't already know the public IP address of your VM, run the [az network public-ip list](/cli/azure/network/public-ip) command. You need this IP address for several later steps. + +```azurecli-interactive +export PUBLIC_IP=$(az network public-ip list --resource-group "${MY_RESOURCE_GROUP_NAME}" --query [].ipAddress -o tsv) +``` + +Use the `ssh` command to create an SSH session with the virtual machine. Substitute the correct public IP address of your virtual machine. + +## Install Apache, MySQL, and PHP + +Run the following command to update Ubuntu package sources and install Apache, MySQL, and PHP. Note the caret (^) at the end of the command, which is part of the `lamp-server^` package name. + +```bash +ssh -o StrictHostKeyChecking=no azureuser@$PUBLIC_IP "sudo apt-get update && sudo DEBIAN_FRONTEND=noninteractive apt-get -y install lamp-server^" +``` + +You're prompted to install the packages and other dependencies. This process installs the minimum required PHP extensions needed to use PHP with MySQL. + +## Verify Apache + +Check the version of Apache with the following command: +```bash +ssh -o StrictHostKeyChecking=no azureuser@$PUBLIC_IP "apache2 -v" +``` + +With Apache installed, and port 80 open to your VM, the web server can now be accessed from the internet. To view the Apache2 Ubuntu Default Page, open a web browser, and enter the public IP address of the VM. Use the public IP address you used to SSH to the VM: + +![Apache default page][3] + +## Verify and secure MySQL + +Check the version of MySQL with the following command (note the capital `V` parameter): + +```bash +ssh -o StrictHostKeyChecking=no azureuser@$PUBLIC_IP "mysql -V" +``` + +To help secure the installation of MySQL, including setting a root password, you can run the `sudo mysql_secure_installation` command. This command prompts you to answer several questions to help secure your MySQL installation. + +You can optionally set up the Validate Password Plugin (recommended). Then, set a password for the MySQL root user, and configure the remaining security settings for your environment. We recommend that you answer "Y" (yes) to all questions. + +If you want to try MySQL features (create a MySQL database, add users, or change configuration settings), login to MySQL. This step isn't required to complete this tutorial. For doing this, you can use the `sudo mysql -u root -p` command in your VM and then enter your root password when prompted. This command connects to your VM via SSH and launches the MySQL command line client as the root user. + +When done, exit the mysql prompt by typing `\q`. + +## Verify PHP + +Check the version of PHP with the following command: + +```bash +ssh -o StrictHostKeyChecking=no azureuser@$PUBLIC_IP "php -v" +``` + +If you want to test further, you can create a quick PHP info page to view in a browser. The following command creates the PHP info page `sudo sh -c 'echo \"\" > /var/www/html/info.php` + +Now you can check the PHP info page you created. Open a browser and go to `http://yourPublicIPAddress/info.php`. Substitute the public IP address of your VM. It should look similar to this image. + +![PHP info page][2] + +[!INCLUDE [virtual-machines-linux-tutorial-wordpress.md](../includes/virtual-machines-linux-tutorial-wordpress.md)] + +## Next steps + +In this tutorial, you deployed a LAMP server in Azure. You learned how to: + +> [!div class="checklist"] +> * Create an Ubuntu VM +> * Open port 80 for web traffic +> * Install Apache, MySQL, and PHP +> * Verify installation and configuration +> * Install WordPress on the LAMP server + +Advance to the next tutorial to learn how to secure web servers with TLS/SSL certificates. + +> [!div class="nextstepaction"] +> [Secure web server with TLS](tutorial-secure-web-server.md) + +[2]: ./media/tutorial-lamp-stack/phpsuccesspage.png +[3]: ./media/tutorial-lamp-stack/apachesuccesspage.png \ No newline at end of file diff --git a/scenarios/azure-compute-docs/articles/virtual-machines/linux/tutorial-manage-vm.md b/scenarios/azure-compute-docs/articles/virtual-machines/linux/tutorial-manage-vm.md new file mode 100644 index 000000000..08dd74bc9 --- /dev/null +++ b/scenarios/azure-compute-docs/articles/virtual-machines/linux/tutorial-manage-vm.md @@ -0,0 +1,332 @@ +--- +title: Tutorial - Create and manage Linux VMs with the Azure CLI +description: In this tutorial, you learn how to use the Azure CLI to create and manage Linux VMs in Azure +author: ju-shim +ms.service: azure-virtual-machines +ms.collection: linux +ms.topic: tutorial +ms.date: 03/23/2023 +ms.author: jushiman +ms.custom: mvc, devx-track-azurecli, linux-related-content, innovation-engine +#Customer intent: As an IT administrator, I want to learn about common maintenance tasks so that I can create and manage Linux VMs in Azure +--- + +# Tutorial: Create and Manage Linux VMs with the Azure CLI + +**Applies to:** :heavy_check_mark: Linux VMs :heavy_check_mark: Flexible scale sets + +Azure virtual machines provide a fully configurable and flexible computing environment. This tutorial covers basic Azure virtual machine deployment items such as selecting a VM size, selecting a VM image, and deploying a VM. You learn how to: + +> [!div class="checklist"] +> * Create and connect to a VM +> * Select and use VM images +> * View and use specific VM sizes +> * Resize a VM +> * View and understand VM state + +This tutorial uses the CLI within the [Azure Cloud Shell](/azure/cloud-shell/overview), which is constantly updated to the latest version. + +If you choose to install and use the CLI locally, this tutorial requires that you are running the Azure CLI version 2.0.30 or later. Run `az --version` to find the version. If you need to install or upgrade, see [Install Azure CLI]( /cli/azure/install-azure-cli). + +## Create resource group + +Below, we declare environment variables. A random suffix is appended to resource names that need to be unique for each deployment. + +```bash +export RANDOM_SUFFIX=$(openssl rand -hex 3) +export REGION="eastus2" +export MY_RESOURCE_GROUP_NAME="myResourceGroupVM$RANDOM_SUFFIX" +az group create --name $MY_RESOURCE_GROUP_NAME --location $REGION +``` + +Results: + + + +```JSON +{ + "id": "/subscriptions/xxxxx-xxxxx-xxxxx-xxxxx/resourceGroups/myResourceGroupVMxxx", + "location": "eastus2", + "name": "myResourceGroupVMxxx", + "properties": { + "provisioningState": "Succeeded" + } +} +``` + +An Azure resource group is a logical container into which Azure resources are deployed and managed. A resource group must be created before a virtual machine. In this example, a resource group named *myResourceGroupVM* is created in the *eastus2* region. + +The resource group is specified when creating or modifying a VM, which can be seen throughout this tutorial. + +## Create virtual machine + +When you create a virtual machine, several options are available such as operating system image, disk sizing, and administrative credentials. The following example creates a VM named *myVM* that runs SUSE Linux Enterprise Server (SLES). A user account named *azureuser* is created on the VM, and SSH keys are generated if they do not exist in the default key location (*~/.ssh*). + +```bash +export MY_VM_NAME="myVM$RANDOM_SUFFIX" +az vm create \ + --resource-group $MY_RESOURCE_GROUP_NAME \ + --name $MY_VM_NAME \ + --image SuseSles15SP5 \ + --public-ip-sku Standard \ + --admin-username azureuser \ + --generate-ssh-keys +``` + +It may take a few minutes to create the VM. Once the VM has been created, the Azure CLI outputs information about the VM. Take note of the `publicIpAddress`; this address can be used to access the virtual machine. + +```JSON +{ + "fqdns": "", + "id": "/subscriptions/xxxxx-xxxxx-xxxxx-xxxxx/resourceGroups/myResourceGroupVMxxx/providers/Microsoft.Compute/virtualMachines/myVMxxx", + "location": "eastus2", + "macAddress": "00-0D-3A-23-9A-49", + "powerState": "VM running", + "privateIpAddress": "10.0.0.4", + "publicIpAddress": "52.174.34.95", + "resourceGroup": "myResourceGroupVMxxx" +} +``` + +## Connect to VM + +You can now connect to the VM with SSH in the Azure Cloud Shell or from your local computer. Replace the example IP address with the `publicIpAddress` noted in the previous step. + +To connect to the VM, first retrieve the public IP address using the Azure CLI. Execute the following command to store the IP address in a variable: +```export IP_ADDRESS=$(az vm show --show-details --resource-group $MY_RESOURCE_GROUP_NAME --name $MY_VM_NAME --query publicIps --output tsv)``` + +Once you have the IP address, use SSH to connect to the VM. The following command connects to the VM using the `azureuser` account and the retrieved IP address: +```ssh -o StrictHostKeyChecking=no azureuser@$IP_ADDRESS``` + +## Understand VM images + +The Azure Marketplace includes many images that can be used to create VMs. In the previous steps, a virtual machine was created using a SUSE image. In this step, the Azure CLI is used to search the marketplace for an Ubuntu image, which is then used to deploy a second virtual machine. + +To see a list of the most commonly used images, use the [az vm image list](/cli/azure/vm/image) command. + +```bash +az vm image list --output table +``` + +The command output returns the most popular VM images on Azure. + +```output +Architecture Offer Publisher Sku Urn UrnAlias Version +-------------- ---------------------------- ---------------------- ---------------------------------- ------------------------------------------------------------------------------ ----------------------- --------- +x64 debian-10 Debian 10 Debian:debian-10:10:latest Debian latest +x64 flatcar-container-linux-free kinvolk stable kinvolk:flatcar-container-linux-free:stable:latest Flatcar latest +x64 opensuse-leap-15-3 SUSE gen2 SUSE:opensuse-leap-15-3:gen2:latest openSUSE-Leap latest +x64 RHEL RedHat 7-LVM RedHat:RHEL:7-LVM:latest RHEL latest +x64 sles-15-sp3 SUSE gen2 SUSE:sles-15-sp3:gen2:latest SLES latest +x64 0001-com-ubuntu-server-jammy Canonical 18.04-LTS Canonical:UbuntuServer:18.04-LTS:latest UbuntuLTS latest +x64 WindowsServer MicrosoftWindowsServer 2022-Datacenter MicrosoftWindowsServer:WindowsServer:2022-Datacenter:latest Win2022Datacenter latest +x64 WindowsServer MicrosoftWindowsServer 2022-datacenter-azure-edition-core MicrosoftWindowsServer:WindowsServer:2022-datacenter-azure-edition-core:latest Win2022AzureEditionCore latest +x64 WindowsServer MicrosoftWindowsServer 2019-Datacenter MicrosoftWindowsServer:WindowsServer:2019-Datacenter:latest Win2019Datacenter latest +x64 WindowsServer MicrosoftWindowsServer 2016-Datacenter MicrosoftWindowsServer:WindowsServer:2016-Datacenter:latest Win2016Datacenter latest +x64 WindowsServer MicrosoftWindowsServer 2012-R2-Datacenter MicrosoftWindowsServer:WindowsServer:2012-R2-Datacenter:latest Win2012R2Datacenter latest +x64 WindowsServer MicrosoftWindowsServer 2012-Datacenter MicrosoftWindowsServer:WindowsServer:2012-Datacenter:latest Win2012Datacenter latest +x64 WindowsServer MicrosoftWindowsServer 2008-R2-SP1 MicrosoftWindowsServer:WindowsServer:2008-R2-SP1:latest Win2008R2SP1 latest +``` + +A full list can be seen by adding the `--all` parameter. The image list can also be filtered by `--publisher` or `–-offer`. In this example, the list is filtered for all images, published by OpenLogic, with an offer that matches *0001-com-ubuntu-server-jammy*. + +```bash +az vm image list --offer 0001-com-ubuntu-server-jammy --publisher Canonical --all --output table +``` + +Example partial output: + +```output +Architecture Offer Publisher Sku Urn Version +-------------- --------------------------------- ----------- --------------- ------------------------------------------------------------------------ --------------- +x64 0001-com-ubuntu-server-jammy Canonical 22_04-lts Canonical:0001-com-ubuntu-server-jammy:22_04-lts:22.04.202204200 22.04.202204200 +x64 0001-com-ubuntu-server-jammy Canonical 22_04-lts Canonical:0001-com-ubuntu-server-jammy:22_04-lts:22.04.202205060 22.04.202205060 +x64 0001-com-ubuntu-server-jammy Canonical 22_04-lts Canonical:0001-com-ubuntu-server-jammy:22_04-lts:22.04.202205280 22.04.202205280 +x64 0001-com-ubuntu-server-jammy Canonical 22_04-lts Canonical:0001-com-ubuntu-server-jammy:22_04-lts:22.04.202206040 22.04.202206040 +x64 0001-com-ubuntu-server-jammy Canonical 22_04-lts Canonical:0001-com-ubuntu-server-jammy:22_04-lts:22.04.202206090 22.04.202206090 +x64 0001-com-ubuntu-server-jammy Canonical 22_04-lts Canonical:0001-com-ubuntu-server-jammy:22_04-lts:22.04.202206160 22.04.202206160 +x64 0001-com-ubuntu-server-jammy Canonical 22_04-lts Canonical:0001-com-ubuntu-server-jammy:22_04-lts:22.04.202206220 22.04.202206220 +x64 0001-com-ubuntu-server-jammy Canonical 22_04-lts Canonical:0001-com-ubuntu-server-jammy:22_04-lts:22.04.202207060 22.04.202207060 +``` + +> [!NOTE] +> Canonical has changed the **Offer** names they use for the most recent versions. Before Ubuntu 20.04, the **Offer** name is UbuntuServer. For Ubuntu 20.04 the **Offer** name is `0001-com-ubuntu-server-focal` and for Ubuntu 22.04 it's `0001-com-ubuntu-server-jammy`. + +To deploy a VM using a specific image, take note of the value in the *Urn* column, which consists of the publisher, offer, SKU, and optionally a version number to [identify](cli-ps-findimage.md#terminology) the image. When specifying the image, the image version number can be replaced with `latest`, which selects the latest version of the distribution. In this example, the `--image` parameter is used to specify the latest version of a Ubuntu 22.04. + +```bash +export MY_VM2_NAME="myVM2$RANDOM_SUFFIX" +az vm create --resource-group $MY_RESOURCE_GROUP_NAME --name $MY_VM2_NAME --image Canonical:0001-com-ubuntu-server-jammy:22_04-lts:latest --generate-ssh-keys +``` + +## Understand VM sizes + +A virtual machine size determines the amount of compute resources such as CPU, GPU, and memory that are made available to the virtual machine. Virtual machines need to be sized appropriately for the expected work load. If workload increases, an existing virtual machine can be resized. + +### VM Sizes + +The following table categorizes sizes into use cases. + +| Type | Description | +|--------------------------|------------------------------------------------------------------------------------------------------------------------------------| +| [General purpose](../sizes-general.md) | Balanced CPU-to-memory. Ideal for dev / test and small to medium applications and data solutions. | +| [Compute optimized](../sizes-compute.md) | High CPU-to-memory. Good for medium traffic applications, network appliances, and batch processes. | +| [Memory optimized](../sizes-memory.md) | High memory-to-core. Great for relational databases, medium to large caches, and in-memory analytics. | +| [Storage optimized](../sizes-storage.md) | High disk throughput and IO. Ideal for Big Data, SQL, and NoSQL databases. | +| [GPU](../sizes-gpu.md) | Specialized VMs targeted for heavy graphic rendering and video editing. | +| [High performance](../sizes-hpc.md) | Our most powerful CPU VMs with optional high-throughput network interfaces (RDMA). | + +### Find available VM sizes + +To see a list of VM sizes available in a particular region, use the [az vm list-sizes](/cli/azure/vm) command. + +```bash +az vm list-sizes --location $REGION --output table +``` + +Example partial output: + +```output + MaxDataDiskCount MemoryInMb Name NumberOfCores OsDiskSizeInMb ResourceDiskSizeInMb +------------------ ------------ ---------------------- --------------- ---------------- ---------------------- +4 8192 Standard_D2ds_v4 2 1047552 76800 +8 16384 Standard_D4ds_v4 4 1047552 153600 +16 32768 Standard_D8ds_v4 8 1047552 307200 +32 65536 Standard_D16ds_v4 16 1047552 614400 +32 131072 Standard_D32ds_v4 32 1047552 1228800 +32 196608 Standard_D48ds_v4 48 1047552 1843200 +32 262144 Standard_D64ds_v4 64 1047552 2457600 +4 8192 Standard_D2ds_v5 2 1047552 76800 +8 16384 Standard_D4ds_v5 4 1047552 153600 +16 32768 Standard_D8ds_v5 8 1047552 307200 +32 65536 Standard_D16ds_v5 16 1047552 614400 +32 131072 Standard_D32ds_v5 32 1047552 1228800 +32 196608 Standard_D48ds_v5 48 1047552 1843200 +32 262144 Standard_D64ds_v5 64 1047552 2457600 +32 393216 Standard_D96ds_v5 96 1047552 3686400 +``` + +### Create VM with specific size + +In the previous VM creation example, a size was not provided, which results in a default size. A VM size can be selected at creation time using [az vm create](/cli/azure/vm) and the `--size` parameter. + +```bash +export MY_VM3_NAME="myVM3$RANDOM_SUFFIX" +az vm create \ + --resource-group $MY_RESOURCE_GROUP_NAME \ + --name $MY_VM3_NAME \ + --image SuseSles15SP5 \ + --size Standard_D2ds_v4 \ + --generate-ssh-keys +``` + +### Resize a VM + +After a VM has been deployed, it can be resized to increase or decrease resource allocation. You can view the current size of a VM with [az vm show](/cli/azure/vm): + +```bash +az vm show --resource-group $MY_RESOURCE_GROUP_NAME --name $MY_VM_NAME --query hardwareProfile.vmSize +``` + +Before resizing a VM, check if the desired size is available on the current Azure cluster. The [az vm list-vm-resize-options](/cli/azure/vm) command returns the list of sizes. + +```bash +az vm list-vm-resize-options --resource-group $MY_RESOURCE_GROUP_NAME --name $MY_VM_NAME --query [].name +``` + +If the desired size is available, the VM can be resized from a powered-on state, although it will be rebooted during the operation. Use the [az vm resize]( /cli/azure/vm) command to perform the resize. + +```bash +az vm resize --resource-group $MY_RESOURCE_GROUP_NAME --name $MY_VM_NAME --size Standard_D4s_v3 +``` + +If the desired size is not available on the current cluster, the VM needs to be deallocated before the resize operation can occur. Use the [az vm deallocate]( /cli/azure/vm) command to stop and deallocate the VM. Note that when the VM is powered back on, any data on the temporary disk may be removed. The public IP address also changes unless a static IP address is being used. Once deallocated, the resize can occur. + +After the resize, the VM can be started. + +```bash +az vm start --resource-group $MY_RESOURCE_GROUP_NAME --name $MY_VM_NAME +``` + +## VM power states + +An Azure VM can have one of many power states. This state represents the current state of the VM from the standpoint of the hypervisor. + +### Power states + +| Power State | Description | +|-------------|-------------| +| Starting | Indicates the virtual machine is being started. | +| Running | Indicates that the virtual machine is running. | +| Stopping | Indicates that the virtual machine is being stopped. | +| Stopped | Indicates that the virtual machine is stopped. Virtual machines in the stopped state still incur compute charges. | +| Deallocating| Indicates that the virtual machine is being deallocated. | +| Deallocated | Indicates that the virtual machine is removed from the hypervisor but still available in the control plane. Virtual machines in the Deallocated state do not incur compute charges. | +| - | Indicates that the power state of the virtual machine is unknown. | + +### Find the power state + +To retrieve the state of a particular VM, use the [az vm get-instance-view](/cli/azure/vm) command. Be sure to specify a valid name for a virtual machine and resource group. + +```bash +az vm get-instance-view \ + --name $MY_VM_NAME \ + --resource-group $MY_RESOURCE_GROUP_NAME \ + --query instanceView.statuses[1] --output table +``` + +Output: + +```output +Code Level DisplayStatus +------------------ ------- --------------- +PowerState/running Info VM running +``` + +To retrieve the power state of all the VMs in your subscription, use the [Virtual Machines - List All API](/rest/api/compute/virtualmachines/listall) with parameter **statusOnly** set to *true*. + +## Management tasks + +During the life-cycle of a virtual machine, you may want to run management tasks such as starting, stopping, or deleting a virtual machine. Additionally, you may want to create scripts to automate repetitive or complex tasks. Using the Azure CLI, many common management tasks can be run from the command line or in scripts. + +### Get IP address + +This command returns the private and public IP addresses of a virtual machine. + +```bash +az vm list-ip-addresses --resource-group $MY_RESOURCE_GROUP_NAME --name $MY_VM_NAME --output table +``` + +### Stop virtual machine + +```bash +az vm stop --resource-group $MY_RESOURCE_GROUP_NAME --name $MY_VM_NAME +``` + +### Start virtual machine + +```bash +az vm start --resource-group $MY_RESOURCE_GROUP_NAME --name $MY_VM_NAME +``` + +### Deleting VM resources + +Depending on how you delete a VM, it may only delete the VM resource, not the networking and disk resources. You can change the default behavior to delete other resources when you delete the VM. For more information, see [Delete a VM and attached resources](../delete.md). + +Deleting a resource group also deletes all resources in the resource group, like the VM, virtual network, and disk. The `--no-wait` parameter returns control to the prompt without waiting for the operation to complete. The `--yes` parameter confirms that you wish to delete the resources without an additional prompt to do so. + +## Next steps + +In this tutorial, you learned about basic VM creation and management such as how to: + +> [!div class="checklist"] +> * Create and connect to a VM +> * Select and use VM images +> * View and use specific VM sizes +> * Resize a VM +> * View and understand VM state + +Advance to the next tutorial to learn about VM disks. + +> [!div class="nextstepaction"] +> [Create and Manage VM disks](./tutorial-manage-disks.md) \ No newline at end of file diff --git a/scenarios/azure-databases-docs/articles/mysql/flexible-server/tutorial-deploy-wordpress-on-aks.md b/scenarios/azure-databases-docs/articles/mysql/flexible-server/tutorial-deploy-wordpress-on-aks.md new file mode 100644 index 000000000..7abe980b7 --- /dev/null +++ b/scenarios/azure-databases-docs/articles/mysql/flexible-server/tutorial-deploy-wordpress-on-aks.md @@ -0,0 +1,488 @@ +--- +title: 'Tutorial: Deploy WordPress on AKS cluster by using Azure CLI' +description: Learn how to quickly build and deploy WordPress on AKS with Azure Database for MySQL - Flexible Server. +ms.service: mysql +ms.subservice: flexible-server +author: mksuni +ms.author: sumuth +ms.topic: tutorial +ms.date: 3/20/2024 +ms.custom: vc, devx-track-azurecli, innovation-engine, linux-related-content +--- + +# Tutorial: Deploy WordPress app on AKS with Azure Database for MySQL - Flexible Server + +[!INCLUDE[applies-to-mysql-flexible-server](../includes/applies-to-mysql-flexible-server.md)] + +[![Deploy to Azure](https://aka.ms/deploytoazurebutton)](https://go.microsoft.com/fwlink/?linkid=2286232) + +In this tutorial, you deploy a scalable WordPress application secured via HTTPS on an Azure Kubernetes Service (AKS) cluster with Azure Database for MySQL flexible server using the Azure CLI. +**[AKS](../../aks/intro-kubernetes.md)** is a managed Kubernetes service that lets you quickly deploy and manage clusters. **[Azure Database for MySQL flexible server](overview.md)** is a fully managed database service designed to provide more granular control and flexibility over database management functions and configuration settings. + +> [!NOTE] +> This tutorial assumes a basic understanding of Kubernetes concepts, WordPress, and MySQL. + +[!INCLUDE [flexible-server-free-trial-note](../includes/flexible-server-free-trial-note.md)] + +## Prerequisites + +Before you get started, make sure you're logged into Azure CLI and have selected a subscription to use with the CLI. Ensure you have [Helm installed](https://helm.sh/docs/intro/install/). + +> [!NOTE] +> If you're running the commands in this tutorial locally instead of Azure Cloud Shell, run the commands as administrator. + +## Create a resource group + +An Azure resource group is a logical group in which Azure resources are deployed and managed. All resources must be placed in a resource group. The following command creates a resource group with the previously defined `$MY_RESOURCE_GROUP_NAME` and `$REGION` parameters. + +```bash +export RANDOM_ID="$(openssl rand -hex 3)" +export MY_RESOURCE_GROUP_NAME="myWordPressAKSResourceGroup$RANDOM_ID" +export REGION="westeurope" +az group create \ + --name $MY_RESOURCE_GROUP_NAME \ + --location $REGION +``` + +Results: + +```json +{ + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/myWordPressAKSResourceGroupXXX", + "location": "eastus", + "managedBy": null, + "name": "testResourceGroup", + "properties": { + "provisioningState": "Succeeded" + }, + "tags": null, + "type": "Microsoft.Resources/resourceGroups" +} +``` + +> [!NOTE] +> The location for the resource group is where resource group metadata is stored. It's also where your resources run in Azure if you don't specify another region during resource creation. + +## Create a virtual network and subnet + +A virtual network is the fundamental building block for private networks in Azure. Azure Virtual Network enables Azure resources like VMs to securely communicate with each other and the internet. + +```bash +export NETWORK_PREFIX="$(($RANDOM % 253 + 1))" +export MY_VNET_PREFIX="10.$NETWORK_PREFIX.0.0/16" +export MY_SN_PREFIX="10.$NETWORK_PREFIX.0.0/22" +export MY_VNET_NAME="myVNet$RANDOM_ID" +export MY_SN_NAME="mySN$RANDOM_ID" +az network vnet create \ + --resource-group $MY_RESOURCE_GROUP_NAME \ + --location $REGION \ + --name $MY_VNET_NAME \ + --address-prefix $MY_VNET_PREFIX \ + --subnet-name $MY_SN_NAME \ + --subnet-prefixes $MY_SN_PREFIX +``` + +Results: + +```json +{ + "newVNet": { + "addressSpace": { + "addressPrefixes": [ + "10.210.0.0/16" + ] + }, + "enableDdosProtection": false, + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/myWordPressAKSResourceGroupXXX/providers/Microsoft.Network/virtualNetworks/myVNetXXX", + "location": "eastus", + "name": "myVNet210", + "provisioningState": "Succeeded", + "resourceGroup": "myWordPressAKSResourceGroupXXX", + "subnets": [ + { + "addressPrefix": "10.210.0.0/22", + "delegations": [], + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/myWordPressAKSResourceGroupXXX/providers/Microsoft.Network/virtualNetworks/myVNetXXX/subnets/mySNXXX", + "name": "mySN210", + "privateEndpointNetworkPolicies": "Disabled", + "privateLinkServiceNetworkPolicies": "Enabled", + "provisioningState": "Succeeded", + "resourceGroup": "myWordPressAKSResourceGroupXXX", + "type": "Microsoft.Network/virtualNetworks/subnets" + } + ], + "type": "Microsoft.Network/virtualNetworks", + "virtualNetworkPeerings": [] + } +} +``` + +## Create an Azure Database for MySQL flexible server instance + +Azure Database for MySQL flexible server is a managed service that you can use to run, manage, and scale highly available MySQL servers in the cloud. Create an Azure Database for MySQL flexible server instance with the [az mysql flexible-server create](/cli/azure/mysql/flexible-server) command. A server can contain multiple databases. The following command creates a server using service defaults and variable values from your Azure CLI's local context: + +```bash +export MY_DNS_LABEL="mydnslabel$RANDOM_ID" +export MY_MYSQL_DB_NAME="mydb$RANDOM_ID" +export MY_MYSQL_ADMIN_USERNAME="dbadmin$RANDOM_ID" +export MY_MYSQL_ADMIN_PW="$(openssl rand -base64 32)" +export MY_MYSQL_SN_NAME="myMySQLSN$RANDOM_ID" +az mysql flexible-server create \ + --admin-password $MY_MYSQL_ADMIN_PW \ + --admin-user $MY_MYSQL_ADMIN_USERNAME \ + --auto-scale-iops Disabled \ + --high-availability Disabled \ + --iops 500 \ + --location $REGION \ + --name $MY_MYSQL_DB_NAME \ + --database-name wordpress \ + --resource-group $MY_RESOURCE_GROUP_NAME \ + --sku-name Standard_B2s \ + --storage-auto-grow Disabled \ + --storage-size 20 \ + --subnet $MY_MYSQL_SN_NAME \ + --private-dns-zone $MY_DNS_LABEL.private.mysql.database.azure.com \ + --tier Burstable \ + --version 8.0.21 \ + --vnet $MY_VNET_NAME \ + --yes -o JSON +``` + +Results: + +```json +{ + "databaseName": "wordpress", + "host": "mydbxxx.mysql.database.azure.com", + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/myWordPressAKSResourceGroupXXX/providers/Microsoft.DBforMySQL/flexibleServers/mydbXXX", + "location": "East US", + "resourceGroup": "myWordPressAKSResourceGroupXXX", + "skuname": "Standard_B2s", + "subnetId": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/myWordPressAKSResourceGroupXXX/providers/Microsoft.Network/virtualNetworks/myVNetXXX/subnets/myMySQLSNXXX", + "username": "dbadminxxx", + "version": "8.0.21" +} +``` + +The server created has the following attributes: + +- A new empty database is created when the server is first provisioned. +- The server name, admin username, admin password, resource group name, and location are already specified in the local context environment of the cloud shell and are in the same location as your resource group and other Azure components. +- The service defaults for the remaining server configurations are compute tier (Burstable), compute size/SKU (Standard_B2s), backup retention period (seven days), and MySQL version (8.0.21). +- The default connectivity method is Private access (virtual network integration) with a linked virtual network and an auto generated subnet. + +> [!NOTE] +> The connectivity method cannot be changed after creating the server. For example, if you selected `Private access (VNet Integration)` during creation, then you cannot change to `Public access (allowed IP addresses)` after creation. We highly recommend creating a server with Private access to securely access your server using VNet Integration. Learn more about Private access in the [concepts article](./concepts-networking-vnet.md). + +If you'd like to change any defaults, refer to the Azure CLI [reference documentation](/cli/azure//mysql/flexible-server) for the complete list of configurable CLI parameters. + +## Check the Azure Database for MySQL - Flexible Server status + +It takes a few minutes to create the Azure Database for MySQL - Flexible Server and supporting resources. + +```bash +runtime="10 minute"; endtime=$(date -ud "$runtime" +%s); while [[ $(date -u +%s) -le $endtime ]]; do STATUS=$(az mysql flexible-server show -g $MY_RESOURCE_GROUP_NAME -n $MY_MYSQL_DB_NAME --query state -o tsv); echo $STATUS; if [ "$STATUS" = 'Ready' ]; then break; else sleep 10; fi; done +``` + +## Configure server parameters in Azure Database for MySQL - Flexible Server + +You can manage Azure Database for MySQL - Flexible Server configuration using server parameters. The server parameters are configured with the default and recommended value when you create the server. + +To show details about a particular parameter for a server, run the [az mysql flexible-server parameter show](/cli/azure/mysql/flexible-server/parameter) command. + +### Disable Azure Database for MySQL - Flexible Server SSL connection parameter for WordPress integration + +You can also modify the value of certain server parameters to update the underlying configuration values for the MySQL server engine. To update the server parameter, use the [az mysql flexible-server parameter set](/cli/azure/mysql/flexible-server/parameter#az-mysql-flexible-server-parameter-set) command. + +```bash +az mysql flexible-server parameter set \ + -g $MY_RESOURCE_GROUP_NAME \ + -s $MY_MYSQL_DB_NAME \ + -n require_secure_transport -v "OFF" -o JSON +``` + +Results: + +```json +{ + "allowedValues": "ON,OFF", + "currentValue": "OFF", + "dataType": "Enumeration", + "defaultValue": "ON", + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/myWordPressAKSResourceGroupXXX/providers/Microsoft.DBforMySQL/flexibleServers/mydbXXX/configurations/require_secure_transport", + "isConfigPendingRestart": "False", + "isDynamicConfig": "True", + "isReadOnly": "False", + "name": "require_secure_transport", + "resourceGroup": "myWordPressAKSResourceGroupXXX", + "source": "user-override", + "systemData": null, + "type": "Microsoft.DBforMySQL/flexibleServers/configurations", + "value": "OFF" +} +``` + +## Create AKS cluster + +To create an AKS cluster with Container Insights, use the [az aks create](/cli/azure/aks#az-aks-create) command with the **--enable-addons** monitoring parameter. The following example creates an autoscaling, availability zone-enabled cluster named **myAKSCluster**: + +This action takes a few minutes. + +```bash +export MY_SN_ID=$(az network vnet subnet list --resource-group $MY_RESOURCE_GROUP_NAME --vnet-name $MY_VNET_NAME --query "[0].id" --output tsv) +export MY_AKS_CLUSTER_NAME="myAKSCluster$RANDOM_ID" + +az aks create \ + --resource-group $MY_RESOURCE_GROUP_NAME \ + --name $MY_AKS_CLUSTER_NAME \ + --auto-upgrade-channel stable \ + --enable-cluster-autoscaler \ + --enable-addons monitoring \ + --location $REGION \ + --node-count 1 \ + --min-count 1 \ + --max-count 3 \ + --network-plugin azure \ + --network-policy azure \ + --vnet-subnet-id $MY_SN_ID \ + --no-ssh-key \ + --node-vm-size Standard_DS2_v2 \ + --service-cidr 10.255.0.0/24 \ + --dns-service-ip 10.255.0.10 \ + --zones 1 2 3 +``` +> [!NOTE] +> When creating an AKS cluster, a second resource group is automatically created to store the AKS resources. See [Why are two resource groups created with AKS?](../../aks/faq.md#why-are-two-resource-groups-created-with-aks) + +## Connect to the cluster + +To manage a Kubernetes cluster, use [kubectl](https://kubernetes.io/docs/reference/kubectl/overview/), the Kubernetes command-line client. If you use Azure Cloud Shell, `kubectl` is already installed. The following example installs `kubectl` locally using the [az aks install-cli](/cli/azure/aks#az-aks-install-cli) command. + + ```bash + if ! [ -x "$(command -v kubectl)" ]; then az aks install-cli; fi +``` + +## Load credentials + +Next, configure `kubectl` to connect to your Kubernetes cluster using the [az aks get-credentials](/cli/azure/aks#az-aks-get-credentials) command. This command downloads credentials and configures the Kubernetes CLI to use them. The command uses `~/.kube/config`, the default location for the [Kubernetes configuration file](https://kubernetes.io/docs/concepts/configuration/organize-cluster-access-kubeconfig/). You can specify a different location for your Kubernetes configuration file using the **--file** argument. + +```bash +az aks get-credentials --resource-group $MY_RESOURCE_GROUP_NAME --name $MY_AKS_CLUSTER_NAME --overwrite-existing +``` + +## Verify Connection +To verify the connection to your cluster, use the [kubectl get]( https://kubernetes.io/docs/reference/generated/kubectl/kubectl-commands#get) command to return a list of the cluster nodes. + +```bash +kubectl get nodes +``` + +## Setup FQDN + +You can configure your ingress controller with a static public IP address. The static public IP address remains if you delete your ingress controller. The IP address doesn't remain if you delete your AKS cluster. +When you upgrade your ingress controller, you must pass a parameter to the Helm release to ensure the ingress controller service is made aware of the load balancer that will be allocated to it. For the HTTPS certificates to work correctly, use a DNS label to configure a fully qualified domain name (FQDN) for the ingress controller IP address. Your FQDN should follow this form: $MY_DNS_LABEL.AZURE_REGION_NAME.cloudapp.azure.com. + +```bash +export MY_PUBLIC_IP_NAME="myPublicIP$RANDOM_ID" +export MY_STATIC_IP=$(az network public-ip create --resource-group MC_${MY_RESOURCE_GROUP_NAME}_${MY_AKS_CLUSTER_NAME}_${REGION} --location ${REGION} --name ${MY_PUBLIC_IP_NAME} --dns-name ${MY_DNS_LABEL} --sku Standard --allocation-method static --version IPv4 --zone 1 2 3 --query publicIp.ipAddress -o tsv) +``` + +## Install NGINX ingress controller + +Next, you add the ingress-nginx Helm repository, update the local Helm Chart repository cache, and install ingress-nginx addon via Helm. You can set the DNS label with the **--set controller.service.annotations."service\.beta\.kubernetes\.io/azure-dns-label-name"=""** parameter either when you first deploy the ingress controller or later. In this example, you specify your own public IP address that you created in the previous step with the **--set controller.service.loadBalancerIP="" parameter**. + +```bash + helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx + helm repo update + helm upgrade --install --cleanup-on-fail --atomic ingress-nginx ingress-nginx/ingress-nginx \ + --namespace ingress-nginx \ + --create-namespace \ + --set controller.service.annotations."service\.beta\.kubernetes\.io/azure-dns-label-name"=$MY_DNS_LABEL \ + --set controller.service.loadBalancerIP=$MY_STATIC_IP \ + --set controller.service.annotations."service\.beta\.kubernetes\.io/azure-load-balancer-health-probe-request-path"=/healthz \ + --wait --timeout 10m0s +``` + +## Add HTTPS termination to custom domain + +At this point in the tutorial, you have an AKS web app with NGINX as the ingress controller and a custom domain you can use to access your application. The next step is to add an SSL certificate to the domain so that users can reach your application securely via https. + +### Set Up Cert Manager + +To add HTTPS, we're going to use Cert Manager. Cert Manager is an open source tool for obtaining and managing SSL certificates for Kubernetes deployments. Cert Manager obtains certificates from popular public issuers and private issuers, ensures the certificates are valid and up-to-date, and attempts to renew certificates at a configured time before they expire. +1. In order to install cert-manager, we must first create a namespace to run it in. This tutorial installs cert-manager into the cert-manager namespace. You can run cert-manager in a different namespace, but you must make modifications to the deployment manifests. +2. We can now install cert-manager. All resources are included in a single YAML manifest file. Install the manifest file with the following command: +3. Add the `certmanager.k8s.io/disable-validation: "true"` label to the cert-manager namespace by running the following. This allows the system resources that cert-manager requires to bootstrap TLS to be created in its own namespace. + +```bash +kubectl create namespace cert-manager +kubectl apply -f https://github.com/jetstack/cert-manager/releases/download/v1.7.0/cert-manager.crds.yaml +kubectl label namespace cert-manager certmanager.k8s.io/disable-validation=true +``` + +## Obtain certificate via Helm Charts + +Helm is a Kubernetes deployment tool for automating the creation, packaging, configuration, and deployment of applications and services to Kubernetes clusters. +Cert-manager provides Helm charts as a first-class method of installation on Kubernetes. +1. Add the Jetstack Helm repository. This repository is the only supported source of cert-manager charts. There are other mirrors and copies across the internet, but those are unofficial and could present a security risk. +2. Update local Helm Chart repository cache. +3. Install Cert-Manager addon via Helm. +4. Apply the certificate issuer YAML file. ClusterIssuers are Kubernetes resources that represent certificate authorities (CAs) that can generate signed certificates by honoring certificate signing requests. All cert-manager certificates require a referenced issuer that is in a ready condition to attempt to honor the request. You can find the issuer we're in the `cluster-issuer-prod.yml file`. + +```bash +helm repo add jetstack https://charts.jetstack.io +helm repo update +helm upgrade --install --cleanup-on-fail --atomic \ + --namespace cert-manager \ + --version v1.7.0 \ + --wait --timeout 10m0s \ + cert-manager jetstack/cert-manager +export SSL_EMAIL_ADDRESS="$(az account show --query user.name --output tsv)" +cluster_issuer_variables=$( +```text +Release "wordpress" does not exist. Installing it now. +NAME: wordpress +LAST DEPLOYED: Tue Oct 24 16:19:35 2023 +NAMESPACE: wordpress +STATUS: deployed +REVISION: 1 +TEST SUITE: None +NOTES: +CHART NAME: wordpress +CHART VERSION: 18.0.8 +APP VERSION: 6.3.2 + +** Please be patient while the chart is being deployed ** + +Your WordPress site can be accessed through the following DNS name from within your cluster: + + wordpress.wordpress.svc.cluster.local (port 80) + +To access your WordPress site from outside the cluster follow the steps below: + +1. Get the WordPress URL and associate WordPress hostname to your cluster external IP: + + export CLUSTER_IP=$(minikube ip) # On Minikube. Use: `kubectl cluster-info` on others K8s clusters + echo "WordPress URL: https://mydnslabelxxx.eastus.cloudapp.azure.com/" + echo "$CLUSTER_IP mydnslabelxxx.eastus.cloudapp.azure.com" | sudo tee -a /etc/hosts + export CLUSTER_IP=$(minikube ip) # On Minikube. Use: `kubectl cluster-info` on others K8s clusters + echo "WordPress URL: https://mydnslabelxxx.eastus.cloudapp.azure.com/" + echo "$CLUSTER_IP mydnslabelxxx.eastus.cloudapp.azure.com" | sudo tee -a /etc/hosts + +2. Open a browser and access WordPress using the obtained URL. + +3. Login with the following credentials below to see your blog: + + echo Username: wpcliadmin + echo Password: $(kubectl get secret --namespace wordpress wordpress -o jsonpath="{.data.wordpress-password}" | base64 -d) +``` + +## Browse your AKS deployment secured via HTTPS + +Wait for the cluster to setup. It often takes 2-3 minutes for the SSL certificate to propagate and about 5 minutes to have all WordPress POD replicas ready and the site to be fully reachable via https. + +```bash +runtime="5 minute" +endtime=$(date -ud "$runtime" +%s) +while [[ $(date -u +%s) -le $endtime ]]; do + export DEPLOYMENT_REPLICAS=$(kubectl -n wordpress get deployment wordpress -o=jsonpath='{.status.availableReplicas}'); + echo Current number of replicas "$DEPLOYMENT_REPLICAS/3"; + if [ "$DEPLOYMENT_REPLICAS" = "3" ]; then + break; + else + sleep 10; + fi; +done +``` + +## Verify Site works +Check that WordPress content is delivered correctly using the following command: + +```bash +if curl -I -s -f https://$FQDN > /dev/null ; then + curl -L -s -f https://$FQDN 2> /dev/null | head -n 9 +else + exit 1 +fi; +``` + +Results: + +```HTML +{ + + + + + + +WordPress on AKS + + +} +``` + +## Visit Application +Visit the website through the following URL: + +```bash +echo "You can now visit your web server at https://$FQDN" +``` + +## Clean up the resources (optional) + +To avoid Azure charges, you should clean up unneeded resources. When you no longer need the cluster, use the [az group delete](/cli/azure/group#az-group-delete) command to remove the resource group, container service, and all related resources. + +> [!NOTE] +> When you delete the cluster, the Microsoft Entra service principal used by the AKS cluster is not removed. For steps on how to remove the service principal, see [AKS service principal considerations and deletion](../../aks/kubernetes-service-principal.md#other-considerations). If you used a managed identity, the identity is managed by the platform and does not require removal. + +## Next steps + +- Learn how to [access the Kubernetes web dashboard](../../aks/kubernetes-dashboard.md) for your AKS cluster +- Learn how to [scale your cluster](../../aks/tutorial-kubernetes-scale.md) +- Learn how to manage your [Azure Database for MySQL flexible server instance](./quickstart-create-server-cli.md) +- Learn how to [configure server parameters](./how-to-configure-server-parameters-cli.md) for your database server diff --git a/scenarios/azure-dev-docs/articles/ansible/vm-configure.md b/scenarios/azure-dev-docs/articles/ansible/vm-configure.md new file mode 100644 index 000000000..8c6eca78a --- /dev/null +++ b/scenarios/azure-dev-docs/articles/ansible/vm-configure.md @@ -0,0 +1,157 @@ +--- +title: Create a Linux virtual machines in Azure using Ansible +description: Learn how to create a Linux virtual machine in Azure using Ansible +keywords: ansible, azure, devops, virtual machine +ms.topic: tutorial +ms.date: 08/14/2024 +ms.custom: devx-track-ansible, linux-related-content, innovation-engine +author: +ms.author: +--- + +# Create a Linux virtual machines in Azure using Ansible + +This article presents a sample Ansible playbook for configuring a Linux virtual machine. + +In this article, you learn how to: + +> [!div class="checklist"] +> * Create a resource group +> * Create a virtual network +> * Create a public IP address +> * Create a network security group +> * Create a virtual network interface card +> * Create a virtual machine + +## Configure your environment + +- **Azure subscription**: If you don't have an Azure subscription, create a [free account](https://azure.microsoft.com/free/?ref=microsoft.com&utm_source=microsoft.com&utm_medium=docs&utm_campaign=visualstudio) before you begin. +- **Install Ansible**: Do one of the following options: + + - [Install](/azure/ansible/ansible-install-configure#install-ansible-on-an-azure-linux-virtual-machine) and [configure](/azure/ansible/ansible-install-configure#create-azure-credentials) Ansible on a Linux virtual machine + - [Configure Azure Cloud Shell](/azure/cloud-shell/quickstart) + +## Implement the Ansible playbook + +1. Create a directory in which to test and run the sample Ansible code and make it the current directory. + +2. Create a file named main.yml and insert the following code. In the playbook below the resource group name and other relevant properties use environment variables so that they are unique for each run. + +```bash +export RANDOM_SUFFIX=$(openssl rand -hex 3) +export REGION="eastus2" +export MY_RESOURCE_GROUP="myResourceGroup$RANDOM_SUFFIX" +export MY_VM_NAME="myVM$RANDOM_SUFFIX" +export MY_VNET_NAME="myVnet$RANDOM_SUFFIX" +export MY_SUBNET_NAME="mySubnet$RANDOM_SUFFIX" +export MY_NIC_NAME="myNIC$RANDOM_SUFFIX" +export MY_PUBLIC_IP_NAME="myPublicIP$RANDOM_SUFFIX" +export MY_NSG_NAME="myNetworkSecurityGroup$RANDOM_SUFFIX" + +cat > main.yml <<'EOF' +- name: Create Azure VM + hosts: localhost + connection: local + tasks: + - name: Create resource group + azure_rm_resourcegroup: + name: "{{ lookup('env', 'MY_RESOURCE_GROUP') }}" + location: "{{ lookup('env', 'REGION') }}" + - name: Create virtual network + azure_rm_virtualnetwork: + resource_group: "{{ lookup('env', 'MY_RESOURCE_GROUP') }}" + name: "{{ lookup('env', 'MY_VNET_NAME') }}" + address_prefixes: "10.0.0.0/16" + - name: Add subnet + azure_rm_subnet: + resource_group: "{{ lookup('env', 'MY_RESOURCE_GROUP') }}" + name: "{{ lookup('env', 'MY_SUBNET_NAME') }}" + address_prefix: "10.0.1.0/24" + virtual_network: "{{ lookup('env', 'MY_VNET_NAME') }}" + - name: Create public IP address + azure_rm_publicipaddress: + resource_group: "{{ lookup('env', 'MY_RESOURCE_GROUP') }}" + allocation_method: Static + name: "{{ lookup('env', 'MY_PUBLIC_IP_NAME') }}" + register: output_ip_address + - name: Public IP of VM + debug: + msg: "The public IP is {{ output_ip_address.state.ip_address }}." + - name: Create Network Security Group that allows SSH + azure_rm_securitygroup: + resource_group: "{{ lookup('env', 'MY_RESOURCE_GROUP') }}" + name: "{{ lookup('env', 'MY_NSG_NAME') }}" + rules: + - name: SSH + protocol: Tcp + destination_port_range: 22 + access: Allow + priority: 1001 + direction: Inbound + - name: Create virtual network interface card + azure_rm_networkinterface: + resource_group: "{{ lookup('env', 'MY_RESOURCE_GROUP') }}" + name: "{{ lookup('env', 'MY_NIC_NAME') }}" + virtual_network: "{{ lookup('env', 'MY_VNET_NAME') }}" + subnet_name: "{{ lookup('env', 'MY_SUBNET_NAME') }}" + security_group: "{{ lookup('env', 'MY_NSG_NAME') }}" + ip_configurations: + - name: ipconfig1 + public_ip_address_name: "{{ lookup('env', 'MY_PUBLIC_IP_NAME') }}" + primary: yes + - name: Create VM + azure_rm_virtualmachine: + resource_group: "{{ lookup('env', 'MY_RESOURCE_GROUP') }}" + name: "{{ lookup('env', 'MY_VM_NAME') }}" + vm_size: Standard_DS1_v2 + admin_username: azureuser + ssh_password_enabled: false + generate_ssh_keys: yes # This will automatically generate keys if they don't exist + network_interfaces: "{{ lookup('env', 'MY_NIC_NAME') }}" + image: + offer: 0001-com-ubuntu-server-jammy + publisher: Canonical + sku: 22_04-lts + version: latest +EOF +``` + +## Run the playbook + +Run the Ansible playbook using the ansible-playbook command. + +```bash +ansible-playbook main.yml +``` + +## Verify the results + +Run the following command to verify the VM was created. This command filters the VMs by name. + +```azurecli +az vm list -d -o table --query "[?name=='${MY_VM_NAME}']" +``` + + +```JSON +[ + { + "name": "myVM", + "powerState": "running", + "publicIps": "xxx.xxx.xxx.xxx" + } +] +``` + +## Connect to the VM + +Run the SSH command to connect to your new Linux VM. Replace the placeholder with the IP address obtained from the previous step. + +```bash +ssh -o StrictHostKeyChecking=no azureuser@$MY_PUBLIC_IP_NAME +``` + +## Next steps + +> [!div class="nextstepaction"] +> [Manage a Linux virtual machine in Azure using Ansible](./vm-manage.md) \ No newline at end of file diff --git a/scenarios/azure-docs/articles/aks/learn/quick-kubernetes-deploy-cli.md b/scenarios/azure-docs/articles/aks/learn/quick-kubernetes-deploy-cli.md index c0957f07c..3bb386a62 100644 --- a/scenarios/azure-docs/articles/aks/learn/quick-kubernetes-deploy-cli.md +++ b/scenarios/azure-docs/articles/aks/learn/quick-kubernetes-deploy-cli.md @@ -33,18 +33,6 @@ This quickstart assumes a basic understanding of Kubernetes concepts. For more i - Make sure that the identity you're using to create your cluster has the appropriate minimum permissions. For more details on access and identity for AKS, see [Access and identity options for Azure Kubernetes Service (AKS)](../concepts-identity.md). - If you have multiple Azure subscriptions, select the appropriate subscription ID in which the resources should be billed using the [az account set](/cli/azure/account#az-account-set) command. For more information, see [How to manage Azure subscriptions – Azure CLI](/cli/azure/manage-azure-subscriptions-azure-cli?tabs=bash#change-the-active-subscription). -## Define environment variables - -Define the following environment variables for use throughout this quickstart: - -```azurecli-interactive -export RANDOM_ID="$(openssl rand -hex 3)" -export MY_RESOURCE_GROUP_NAME="myAKSResourceGroup$RANDOM_ID" -export REGION="westeurope" -export MY_AKS_CLUSTER_NAME="myAKSCluster$RANDOM_ID" -export MY_DNS_LABEL="mydnslabel$RANDOM_ID" -``` - ## Create a resource group An [Azure resource group][azure-resource-group] is a logical group in which Azure resources are deployed and managed. When you create a resource group, you're prompted to specify a location. This location is the storage location of your resource group metadata and where your resources run in Azure if you don't specify another region during resource creation. @@ -52,6 +40,9 @@ An [Azure resource group][azure-resource-group] is a logical group in which Azur Create a resource group using the [`az group create`][az-group-create] command. ```azurecli-interactive +export RANDOM_ID="$(openssl rand -hex 3)" +export MY_RESOURCE_GROUP_NAME="myAKSResourceGroup$RANDOM_ID" +export REGION="westeurope" az group create --name $MY_RESOURCE_GROUP_NAME --location $REGION ``` @@ -76,6 +67,7 @@ Results: Create an AKS cluster using the [`az aks create`][az-aks-create] command. The following example creates a cluster with one node and enables a system-assigned managed identity. ```azurecli-interactive +export MY_AKS_CLUSTER_NAME="myAKSCluster$RANDOM_ID" az aks create \ --resource-group $MY_RESOURCE_GROUP_NAME \ --name $MY_AKS_CLUSTER_NAME \ @@ -84,25 +76,18 @@ az aks create \ ``` > [!NOTE] -> When you create a new cluster, AKS automatically creates a second resource group to store the AKS resources. For more information, see [Why are two resource groups created with AKS?](../faq.md#why-are-two-resource-groups-created-with-aks) - -## Download credentials - -Configure `kubectl` to connect to your Kubernetes cluster using the [az aks get-credentials][az-aks-get-credentials] command. This command downloads credentials and configures the Kubernetes CLI to use them. - -```azurecli-interactive -az aks get-credentials --resource-group $MY_RESOURCE_GROUP_NAME --name $MY_AKS_CLUSTER_NAME -``` +> When you create a new cluster, AKS automatically creates a second resource group to store the AKS resources. For more information, see [Why are two resource groups created with AKS?](../faq.yml) ## Connect to the cluster To manage a Kubernetes cluster, use the Kubernetes command-line client, [kubectl][kubectl]. `kubectl` is already installed if you use Azure Cloud Shell. To install `kubectl` locally, use the [`az aks install-cli`][az-aks-install-cli] command. -Verify the connection to your cluster using the [kubectl get][kubectl-get] command. This command returns a list of the cluster nodes. +1. Configure `kubectl` to connect to your Kubernetes cluster using the [az aks get-credentials][az-aks-get-credentials] command. This command downloads credentials and configures the Kubernetes CLI to use them. Then verify the connection to your cluster using the [kubectl get][kubectl-get] command. This command returns a list of the cluster nodes. -```azurecli-interactive -kubectl get nodes -``` + ```azurecli-interactive + az aks get-credentials --resource-group $MY_RESOURCE_GROUP_NAME --name $MY_AKS_CLUSTER_NAME + kubectl get nodes + ``` ## Deploy the application @@ -359,30 +344,6 @@ To deploy the application, you use a manifest file to create all the objects req kubectl apply -f aks-store-quickstart.yaml ``` -## Test the application - -You can validate that the application is running by visiting the public IP address or the application URL. - -Get the application URL using the following commands: - -```azurecli-interactive -runtime="5 minutes" -endtime=$(date -ud "$runtime" +%s) -while [[ $(date -u +%s) -le $endtime ]] -do - STATUS=$(kubectl get pods -l app=store-front -o 'jsonpath={..status.conditions[?(@.type=="Ready")].status}') - echo $STATUS - if [ "$STATUS" == 'True' ] - then - export IP_ADDRESS=$(kubectl get service store-front --output 'jsonpath={..status.loadBalancer.ingress[0].ip}') - echo "Service IP Address: $IP_ADDRESS" - break - else - sleep 10 - fi -done -``` - Results: ```HTML @@ -434,7 +395,7 @@ To learn more about AKS and walk through a complete code-to-deployment example, [kubernetes-concepts]: ../concepts-clusters-workloads.md [aks-tutorial]: ../tutorial-kubernetes-prepare-app.md -[azure-resource-group]: ../../azure-resource-manager/management/overview.md +[azure-resource-group]: /azure/azure-resource-manager/management/overview [az-aks-create]: /cli/azure/aks#az-aks-create [az-aks-get-credentials]: /cli/azure/aks#az-aks-get-credentials [az-aks-install-cli]: /cli/azure/aks#az-aks-install-cli @@ -442,4 +403,4 @@ To learn more about AKS and walk through a complete code-to-deployment example, [az-group-delete]: /cli/azure/group#az-group-delete [kubernetes-deployment]: ../concepts-clusters-workloads.md#deployments-and-yaml-manifests [aks-solution-guidance]: /azure/architecture/reference-architectures/containers/aks-start-here?toc=/azure/aks/toc.json&bc=/azure/aks/breadcrumb/toc.json -[baseline-reference-architecture]: /azure/architecture/reference-architectures/containers/aks/baseline-aks?toc=/azure/aks/toc.json&bc=/azure/aks/breadcrumb/toc.json +[baseline-reference-architecture]: /azure/architecture/reference-architectures/containers/aks/baseline-aks?toc=/azure/aks/toc.json&bc=/azure/aks/breadcrumb/toc.json \ No newline at end of file diff --git a/scenarios/azure-docs/articles/batch/quick-create-cli.md b/scenarios/azure-docs/articles/batch/quick-create-cli.md new file mode 100644 index 000000000..b0b86f1f4 --- /dev/null +++ b/scenarios/azure-docs/articles/batch/quick-create-cli.md @@ -0,0 +1,247 @@ +--- +title: 'Quickstart: Use the Azure CLI to create a Batch account and run a job' +description: Follow this quickstart to use the Azure CLI to create a Batch account, a pool of compute nodes, and a job that runs basic tasks on the pool. +ms.topic: quickstart +ms.date: 03/19/2025 +ms.custom: mvc, devx-track-azurecli, mode-api, linux-related-content, innovation-engine +author: padmalathas +ms.author: padmalathas +--- + +# Quickstart: Use the Azure CLI to create a Batch account and run a job + +This quickstart shows you how to get started with Azure Batch by using Azure CLI commands and scripts to create and manage Batch resources. You create a Batch account that has a pool of virtual machines, or compute nodes. You then create and run a job with tasks that run on the pool nodes. + +After you complete this quickstart, you understand the [key concepts of the Batch service](batch-service-workflow-features.md) and are ready to use Batch with more realistic, larger scale workloads. + +## Prerequisites + +- [!INCLUDE [quickstarts-free-trial-note](~/reusable-content/ce-skilling/azure/includes/quickstarts-free-trial-note.md)] + +- Azure Cloud Shell or Azure CLI. + + You can run the Azure CLI commands in this quickstart interactively in Azure Cloud Shell. To run the commands in the Cloud Shell, select **Open Cloudshell** at the upper-right corner of a code block. Select **Copy** to copy the code, and paste it into Cloud Shell to run it. You can also [run Cloud Shell from within the Azure portal](https://shell.azure.com). Cloud Shell always uses the latest version of the Azure CLI. + + Alternatively, you can [install Azure CLI locally](/cli/azure/install-azure-cli) to run the commands. The steps in this article require Azure CLI version 2.0.20 or later. Run [az version](/cli/azure/reference-index?#az-version) to see your installed version and dependent libraries, and run [az upgrade](/cli/azure/reference-index?#az-upgrade) to upgrade. If you use a local installation, sign in to Azure by using the appropriate command. + +>[!NOTE] +>For some regions and subscription types, quota restrictions might cause Batch account or node creation to fail or not complete. In this situation, you can request a quota increase at no charge. For more information, see [Batch service quotas and limits](batch-quota-limit.md). + +## Create a resource group + +Run the following [az group create](/cli/azure/group#az-group-create) command to create an Azure resource group. The resource group is a logical container that holds the Azure resources for this quickstart. + +```azurecli-interactive +export RANDOM_SUFFIX=$(openssl rand -hex 3) +export REGION="canadacentral" +export RESOURCE_GROUP="qsBatch$RANDOM_SUFFIX" + +az group create \ + --name $RESOURCE_GROUP \ + --location $REGION +``` + +Results: + + + +```JSON +{ + "id": "/subscriptions/xxxxx/resourceGroups/qsBatchxxx", + "location": "eastus2", + "managedBy": null, + "name": "qsBatchxxx", + "properties": { + "provisioningState": "Succeeded" + }, + "tags": null, + "type": "Microsoft.Resources/resourceGroups" +} +``` + +## Create a storage account + +Use the [az storage account create](/cli/azure/storage/account#az-storage-account-create) command to create an Azure Storage account to link to your Batch account. Although this quickstart doesn't use the storage account, most real-world Batch workloads use a linked storage account to deploy applications and store input and output data. + +Run the following command to create a Standard_LRS SKU storage account in your resource group: + +```azurecli-interactive +export STORAGE_ACCOUNT="mybatchstorage$RANDOM_SUFFIX" + +az storage account create \ + --resource-group $RESOURCE_GROUP \ + --name $STORAGE_ACCOUNT \ + --location $REGION \ + --sku Standard_LRS +``` + +## Create a Batch account + +Run the following [az batch account create](/cli/azure/batch/account#az-batch-account-create) command to create a Batch account in your resource group and link it with the storage account. + +```azurecli-interactive +export BATCH_ACCOUNT="mybatchaccount$RANDOM_SUFFIX" + +az batch account create \ + --name $BATCH_ACCOUNT \ + --storage-account $STORAGE_ACCOUNT \ + --resource-group $RESOURCE_GROUP \ + --location $REGION +``` + +Sign in to the new Batch account by running the [az batch account login](/cli/azure/batch/account#az-batch-account-login) command. Once you authenticate your account with Batch, subsequent `az batch` commands in this session use this account context. + +```azurecli-interactive +az batch account login \ + --name $BATCH_ACCOUNT \ + --resource-group $RESOURCE_GROUP \ + --shared-key-auth +``` + +## Create a pool of compute nodes + +Run the [az batch pool create](/cli/azure/batch/pool#az-batch-pool-create) command to create a pool of Linux compute nodes in your Batch account. The following example creates a pool that consists of two Standard_A1_v2 size VMs running Ubuntu 20.04 LTS OS. This node size offers a good balance of performance versus cost for this quickstart example. + +```azurecli-interactive +export POOL_ID="myPool$RANDOM_SUFFIX" + +az batch pool create \ + --id $POOL_ID \ + --image canonical:0001-com-ubuntu-server-focal:20_04-lts \ + --node-agent-sku-id "batch.node.ubuntu 20.04" \ + --target-dedicated-nodes 2 \ + --vm-size Standard_A1_v2 +``` + +Batch creates the pool immediately, but takes a few minutes to allocate and start the compute nodes. To see the pool status, use the [az batch pool show](/cli/azure/batch/pool#az-batch-pool-show) command. This command shows all the properties of the pool, and you can query for specific properties. The following command queries for the pool allocation state: + +```azurecli-interactive +az batch pool show --pool-id $POOL_ID \ + --query "{allocationState: allocationState}" +``` + +Results: + + + +```JSON +{ + "allocationState": "resizing" +} +``` + +While Batch allocates and starts the nodes, the pool is in the `resizing` state. You can create a job and tasks while the pool state is still `resizing`. The pool is ready to run tasks when the allocation state is `steady` and all the nodes are running. + +## Create a job + +Use the [az batch job create](/cli/azure/batch/job#az-batch-job-create) command to create a Batch job to run on your pool. A Batch job is a logical group of one or more tasks. The job includes settings common to the tasks, such as the pool to run on. The following example creates a job that initially has no tasks. + +```azurecli-interactive +export JOB_ID="myJob$RANDOM_SUFFIX" + +az batch job create \ + --id $JOB_ID \ + --pool-id $POOL_ID +``` + +## Create job tasks + +Batch provides several ways to deploy apps and scripts to compute nodes. Use the [az batch task create](/cli/azure/batch/task#az-batch-task-create) command to create tasks to run in the job. Each task has a command line that specifies an app or script. + +The following Bash script creates four identical, parallel tasks called `myTask1` through `myTask4`. The task command line displays the Batch environment variables on the compute node, and then waits 90 seconds. + +```azurecli-interactive +for i in {1..4} +do + az batch task create \ + --task-id myTask$i \ + --job-id $JOB_ID \ + --command-line "/bin/bash -c 'printenv | grep AZ_BATCH; sleep 90s'" +done +``` + +Batch distributes the tasks to the compute nodes. + +## View task status + +After you create the tasks, Batch queues them to run on the pool. Once a node is available, a task runs on the node. + +Use the [az batch task show](/cli/azure/batch/task#az-batch-task-show) command to view the status of Batch tasks. The following example shows details about the status of `myTask1`: + +```azurecli-interactive +az batch task show \ + --job-id $JOB_ID \ + --task-id myTask1 +``` + +The command output includes many details. For example, an `exitCode` of `0` indicates that the task command completed successfully. The `nodeId` shows the name of the pool node that ran the task. + +## View task output + +Use the [az batch task file list](/cli/azure/batch/task#az-batch-task-file-show) command to list the files a task created on a node. The following command lists the files that `myTask1` created: + +```azurecli-interactive +# Wait for task to complete before downloading output +echo "Waiting for task to complete..." +while true; do + STATUS=$(az batch task show --job-id $JOB_ID --task-id myTask1 --query "state" -o tsv) + if [ "$STATUS" == "running" ]; then + break + fi + sleep 10 +done + +az batch task file list --job-id $JOB_ID --task-id myTask1 --output table +``` + +Results are similar to the following output: + +Results: + + + +```output +Name URL Is Directory Content Length +---------- ---------------------------------------------------------------------------------------- -------------- ---------------- +stdout.txt https://mybatchaccount.eastus2.batch.azure.com/jobs/myJob/tasks/myTask1/files/stdout.txt False 695 +certs https://mybatchaccount.eastus2.batch.azure.com/jobs/myJob/tasks/myTask1/files/certs True +wd https://mybatchaccount.eastus2.batch.azure.com/jobs/myJob/tasks/myTask1/files/wd True +stderr.txt https://mybatchaccount.eastus2.batch.azure.com/jobs/myJob/tasks/myTask1/files/stderr.txt False 0 +``` + +The [az batch task file download](/cli/azure/batch/task#az-batch-task-file-download) command downloads output files to a local directory. Run the following example to download the *stdout.txt* file: + +```azurecli-interactive +az batch task file download \ + --job-id $JOB_ID \ + --task-id myTask1 \ + --file-path stdout.txt \ + --destination ./stdout.txt +``` + +You can view the contents of the standard output file in a text editor. The following example shows a typical *stdout.txt* file. The standard output from this task shows the Azure Batch environment variables that are set on the node. You can refer to these environment variables in your Batch job task command lines, and in the apps and scripts the command lines run. + +```text +AZ_BATCH_TASK_DIR=/mnt/batch/tasks/workitems/myJob/job-1/myTask1 +AZ_BATCH_NODE_STARTUP_DIR=/mnt/batch/tasks/startup +AZ_BATCH_CERTIFICATES_DIR=/mnt/batch/tasks/workitems/myJob/job-1/myTask1/certs +AZ_BATCH_ACCOUNT_URL=https://mybatchaccount.eastus2.batch.azure.com/ +AZ_BATCH_TASK_WORKING_DIR=/mnt/batch/tasks/workitems/myJob/job-1/myTask1/wd +AZ_BATCH_NODE_SHARED_DIR=/mnt/batch/tasks/shared +AZ_BATCH_TASK_USER=_azbatch +AZ_BATCH_NODE_ROOT_DIR=/mnt/batch/tasks +AZ_BATCH_JOB_ID=myJob +AZ_BATCH_NODE_IS_DEDICATED=true +AZ_BATCH_NODE_ID=tvm-257509324_2-20180703t215033z +AZ_BATCH_POOL_ID=myPool +AZ_BATCH_TASK_ID=myTask1 +AZ_BATCH_ACCOUNT_NAME=mybatchaccount +AZ_BATCH_TASK_USER_IDENTITY=PoolNonAdmin +``` + +## Next steps + +In this quickstart, you created a Batch account and pool, created and ran a Batch job and tasks, and viewed task output from the nodes. Now that you understand the key concepts of the Batch service, you're ready to use Batch with more realistic, larger scale workloads. To learn more about Azure Batch, continue to the Azure Batch tutorials. + +> [!div class="nextstepaction"] +> [Tutorial: Run a parallel workload with Azure Batch](./tutorial-parallel-python.md) \ No newline at end of file diff --git a/scenarios/azure-docs/articles/confidential-computing/confidential-enclave-nodes-aks-get-started.md b/scenarios/azure-docs/articles/confidential-computing/confidential-enclave-nodes-aks-get-started.md new file mode 100644 index 000000000..fefa977d9 --- /dev/null +++ b/scenarios/azure-docs/articles/confidential-computing/confidential-enclave-nodes-aks-get-started.md @@ -0,0 +1,249 @@ +--- +title: 'Quickstart: Deploy an AKS cluster with confidential computing Intel SGX agent nodes by using the Azure CLI' +description: Learn how to create an Azure Kubernetes Service (AKS) cluster with enclave confidential containers a Hello World app by using the Azure CLI. +author: angarg05 +ms.service: azure-virtual-machines +ms.subservice: azure-confidential-computing +ms.topic: quickstart +ms.date: 11/06/2023 +ms.author: ananyagarg +ms.custom: devx-track-azurecli, mode-api, innovation-engine +--- + +# Quickstart: Deploy an AKS cluster with confidential computing Intel SGX agent nodes by using the Azure CLI + +In this quickstart, you'll use the Azure CLI to deploy an Azure Kubernetes Service (AKS) cluster with enclave-aware (DCsv2/DCSv3) VM nodes. You'll then run a simple Hello World application in an enclave. + +AKS is a managed Kubernetes service that enables developers or cluster operators to quickly deploy and manage clusters. To learn more, read the AKS introduction and the overview of AKS confidential nodes. + +Features of confidential computing nodes include: + +- Linux worker nodes supporting Linux containers. +- Generation 2 virtual machine (VM) with Ubuntu 18.04 VM nodes. +- Intel SGX capable CPU to help run your containers in confidentiality protected enclave leveraging Encrypted Page Cache (EPC) memory. For more information, see Frequently asked questions for Azure confidential computing. +- Intel SGX DCAP Driver preinstalled on the confidential computing nodes. For more information, see Frequently asked questions for Azure confidential computing. + +> [!NOTE] +> DCsv2/DCsv3 VMs use specialized hardware that's subject to region availability. For more information, see the available SKUs and supported regions. + +## Prerequisites + +This quickstart requires: + +- A minimum of eight DCsv2/DCSv3/DCdsv3 cores available in your subscription. + + By default, there is no pre-assigned quota for Intel SGX VM sizes for your Azure subscriptions. You should follow these instructions to request for VM core quota for your subscriptions. + +## Create an AKS cluster with enclave-aware confidential computing nodes and Intel SGX add-on + +Use the following instructions to create an AKS cluster with the Intel SGX add-on enabled, add a node pool to the cluster, and verify what you created with a Hello World enclave application. + +### Create an AKS cluster with a system node pool and AKS Intel SGX Addon + +> [!NOTE] +> If you already have an AKS cluster that meets the prerequisite criteria listed earlier, skip to the next section to add a confidential computing node pool. + +Intel SGX AKS Addon "confcom" exposes the Intel SGX device drivers to your containers to avoid added changes to your pod YAML. + +## Create Resource Group + +First, create a resource group for the cluster by using the `az group create` command. + +```bash +export RANDOM_SUFFIX="$(openssl rand -hex 3)" +export RESOURCE_GROUP="myResourceGroup$RANDOM_SUFFIX" +export REGION="eastus2" +az group create --name $RESOURCE_GROUP --location $REGION +``` + +Results: + + + +```json +{ + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/myResourceGroupxxxxxx", + "location": "eastus2", + "managedBy": null, + "name": "myResourceGroupxxxxxx", + "properties": { + "provisioningState": "Succeeded" + }, + "tags": null, + "type": "Microsoft.Resources/resourceGroups" +} +``` + +## Create Cluster with Confidential Computing Add-on +Now create an AKS cluster with the confidential computing add-on enabled. This command deploys a new AKS cluster with a system node pool of non-confidential computing nodes. Confidential computing Intel SGX nodes are not recommended for system node pools. + +```bash +export AKS_CLUSTER="myAKSCluster$RANDOM_SUFFIX" +az aks create -g $RESOURCE_GROUP --name $AKS_CLUSTER --generate-ssh-keys --enable-addons confcom +``` + +## Add a user node pool with confidential computing capabilities to the AKS cluster + +Run the following command to add a user node pool of `Standard_DC4s_v3` size with two nodes to the AKS cluster. After you run the command, a new node pool with DCsv3 should be visible with confidential computing add-on DaemonSets. + +```bash +az aks nodepool add --cluster-name $AKS_CLUSTER --name confcompool1 --resource-group $RESOURCE_GROUP --node-vm-size Standard_DC4s_v3 --node-count 2 +``` + +## Get Credentials + +Get the credentials for your AKS cluster. + +```bash +az aks get-credentials --resource-group $RESOURCE_GROUP --name $AKS_CLUSTER +``` + +## Verify the node pool and add-on + +Use the `kubectl get pods` command to verify that the nodes are created properly and the SGX-related DaemonSets are running on DCsv3 node pools: + +```bash +kubectl get pods --all-namespaces +``` + +Results: + + + +```text +NAMESPACE NAME READY STATUS RESTARTS AGE +kube-system sgx-device-plugin-xxxxx 1/1 Running 0 5m +``` + +## Enable the confidential computing AKS add-on on the existing cluster + +To enable the confidential computing add-on, use the `az aks enable-addons` command with the `confcom` add-on, specifying your existing AKS cluster name and resource group. + +```bash +az aks enable-addons --addons confcom --name $AKS_CLUSTER --resource-group $RESOURCE_GROUP +``` + +### Verify that DaemonSets are running on confidential node pools + +```bash +kubectl get nodes +``` + +Results: + + + +```text +NAME STATUS ROLES AGE VERSION +aks-confcompool1-xxxxx-vmss000000 Ready agent 5m v1.xx.x +``` + +## Deploy Hello World from an isolated enclave application + +Deploy a file named `hello-world-enclave.yaml`. This deployment assumes that you've deployed the *confcom* add-on. + +```bash +cat < hello-world-enclave.yaml +apiVersion: batch/v1 +kind: Job +metadata: + name: oe-helloworld + namespace: default +spec: + template: + metadata: + labels: + app: oe-helloworld + spec: + containers: + - name: oe-helloworld + image: mcr.microsoft.com/acc/samples/oe-helloworld:latest + resources: + limits: + sgx.intel.com/epc: "10Mi" + requests: + sgx.intel.com/epc: "10Mi" + volumeMounts: + - name: var-run-aesmd + mountPath: /var/run/aesmd + restartPolicy: "Never" + volumes: + - name: var-run-aesmd + hostPath: + path: /var/run/aesmd + backoffLimit: 0 +EOF +kubectl apply -f hello-world-enclave.yaml +``` + +Results: + + + +```text +job.batch/oe-helloworld created +``` + +## Check Jobs + +You can confirm that the workload successfully created a Trusted Execution Environment (enclave) by running the following commands: + +```bash +kubectl get jobs -l app=oe-helloworld +``` + +Results: + + + +```text +NAME COMPLETIONS DURATION AGE +oe-helloworld 1/1 1s 23s +``` + +## Check Pods + +```bash +kubectl get pods -l app=oe-helloworld +``` + +Results: + + + +```text +NAME READY STATUS RESTARTS AGE +oe-helloworld-xxxxx 0/1 Completed 0 25s +``` + +## Wait for Pod to finish deploying. + +```bash +while [[ $(kubectl get pods -l app=oe-helloworld -o 'jsonpath={..status.phase}') != "Succeeded" ]]; do + sleep 2 +done + +kubectl logs -l app=oe-helloworld +``` + +Results: + + + +```text +Hello world from the enclave +Enclave called into host to print: Hello World! +``` + +## Next steps + +- Run Python, Node, or other applications through confidential containers using ISV/OSS SGX wrapper software. Review [confidential container samples in GitHub](https://github.com/Azure-Samples/confidential-container-samples). + +- Run enclave-aware applications by using the [enclave-aware Azure container samples in GitHub](https://github.com/Azure-Samples/confidential-computing/blob/main/containersamples/). + + +[az-group-create]: /cli/azure/group#az_group_create + +[az-aks-create]: /cli/azure/aks#az_aks_create + +[az-aks-get-credentials]: /cli/azure/aks#az_aks_get_credentials \ No newline at end of file diff --git a/scenarios/azure-docs/articles/confidential-computing/hellow-world-enclave.yaml b/scenarios/azure-docs/articles/confidential-computing/hellow-world-enclave.yaml new file mode 100644 index 000000000..c877c63c6 --- /dev/null +++ b/scenarios/azure-docs/articles/confidential-computing/hellow-world-enclave.yaml @@ -0,0 +1,28 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: oe-helloworld + namespace: default +spec: + template: + metadata: + labels: + app: oe-helloworld + spec: + containers: + - name: oe-helloworld + image: mcr.microsoft.com/acc/samples/oe-helloworld:latest + resources: + limits: + sgx.intel.com/epc: "10Mi" + requests: + sgx.intel.com/epc: "10Mi" + volumeMounts: + - name: var-run-aesmd + mountPath: /var/run/aesmd + restartPolicy: "Never" + volumes: + - name: var-run-aesmd + hostPath: + path: /var/run/aesmd + backoffLimit: 0 \ No newline at end of file diff --git a/scenarios/azure-docs/articles/iot-edge/quickstart-linux.md b/scenarios/azure-docs/articles/iot-edge/quickstart-linux.md new file mode 100644 index 000000000..ab473661a --- /dev/null +++ b/scenarios/azure-docs/articles/iot-edge/quickstart-linux.md @@ -0,0 +1,429 @@ +--- +title: "Quickstart: Create an Azure IoT Edge Device on Linux" +description: Learn to configure an Azure IoT Edge device on Linux. This guide walks you through creating an IoT Hub, registering a device, and deploying a simulated sensor module. +#customer intent: As a developer, I want to create an IoT Edge device on Linux so that I can deploy and test containerized modules. +author: PatAltimore +ms.author: patricka +ms.date: 03/27/2025 +ms.topic: quickstart +ms.service: azure-iot-edge +services: iot-edge +ms.custom: mvc, devx-track-azurecli, mode-other, linux-related-content +--- + +## Environment Variables + +In this section we declare environment variables that will be used throughout the Exec Doc. A random suffix is appended to resource names that must be unique for each deployment. + +```bash +export RANDOM_SUFFIX=$(openssl rand -hex 3) +export REGION="eastus2" +export RESOURCE_GROUP="IoTEdgeResources$RANDOM_SUFFIX" +export IOTHUB_NAME="UniqueIoTHub$RANDOM_SUFFIX" +export VM_NAME="myvm$RANDOM_SUFFIX" +``` + +# Quickstart: Deploy your first IoT Edge module to a virtual Linux device + +[!INCLUDE [iot-edge-version-all-supported](includes/iot-edge-version-all-supported.md)] + +Try Azure IoT Edge in this quickstart by deploying containerized code to a virtual Linux IoT Edge device. IoT Edge lets you remotely manage code on your devices so you can send more of your workloads to the edge. For this quickstart, use an Azure virtual machine for your IoT Edge device. It lets you quickly create a test machine and delete it when you're done. + +In this quickstart, you learn how to: + +* Create an IoT Hub. +* Register an IoT Edge device to your IoT hub. +* Install and start the IoT Edge runtime on a virtual device. +* Deploy a module remotely to an IoT Edge device. + +:::image type="content" source="./media/quickstart-linux/install-edge-full.png" alt-text="Diagram of Quickstart architecture for device and cloud."::: + +This quickstart walks you through creating a Linux virtual machine that's configured to be an IoT Edge device. Then, you deploy a module from the Azure portal to your device. This quickstart uses a simulated sensor module that generates temperature, humidity, and pressure data. The other Azure IoT Edge tutorials build upon the work you do here by deploying additional modules that analyze the simulated data for business insights. + +If you don't have an active Azure subscription, create a [free account](https://azure.microsoft.com/free) before you begin. + +## Prerequisites + +Set up your environment for the Azure CLI. + +[!INCLUDE [azure-cli-prepare-your-environment-no-header.md](~/reusable-content/azure-cli/azure-cli-prepare-your-environment-no-header.md)] + +## Create a resource group + +A resource group to manage all the resources you use in this quickstart. This quickstart and the following tutorials use the example resource group name **IoTEdgeResources** with a randomized suffix. + + ```azurecli-interactive + az group create --name $RESOURCE_GROUP --location $REGION + ``` +Results: + + +```JSON +{ + "id": "/subscriptions/xxxxx-xxxxx-xxxxx-xxxxx/resourceGroups/IoTEdgeResourcesabcd12", + "location": "westus2", + "managedBy": null, + "name": "IoTEdgeResourcesabcd12", + "properties": { + "provisioningState": "Succeeded" + }, + "tags": null, + "type": "Microsoft.Resources/resourceGroups" +} +``` + +## Create an IoT Hub + +Start the quickstart by creating an IoT Hub with the Azure CLI. + +:::image type="content" source="./media/quickstart-linux/create-iot-hub.png" alt-text="Diagram that shows how to create an IoT Hub in the cloud."::: + +The free tier of IoT Hub works for this quickstart. If you've used IoT Hub in the past and already have a hub created, you can use that IoT hub. + +The following code creates a **S1** hub in the resource group. Replace the placeholder with your preferred IoT Hub name if desired – here we use the environment variable $IOTHUB_NAME. Creating an IoT Hub might take a few minutes. + + ```azurecli-interactive + az iot hub create --resource-group $RESOURCE_GROUP --name $IOTHUB_NAME --sku S1 --partition-count 2 + ``` +Results: + + +```JSON +{ + "name": "UniqueIoTHubabcd12", + "sku": "S1", + "resourceGroup": "IoTEdgeResourcesabcd12", + "location": "westus2", + "state": "Active", + "skuCapacity": 1 +} +``` + +If you use F1 (the free tier), you can only create one IoT Hub per subscription. If you try to create a second hub, you'll receive an error message. In such a case, change the SKU to **S1**. Each subscription can only have one free IoT hub. If you get an error that the IoT Hub name isn't available, it means that someone else already has a hub with that name. Try a new name. + +## Register an IoT Edge device + +Register an IoT Edge device with the IoT hub you just created. + +:::image type="content" source="./media/quickstart-linux/register-device.png" alt-text="Diagram of how to register a device with an IoT Hub identity."::: + +Create a device identity for your IoT Edge device so that it can communicate with your IoT hub. The device identity lives in the cloud, and you use a unique device connection string to associate a physical device to a device identity. + +Because IoT Edge devices behave and are managed differently from typical IoT devices, declare this identity as an IoT Edge device using the --edge-enabled flag. + +1. Enter the following command in Azure Cloud Shell to create a device named **myEdgeDevice** in your hub. + + ```azurecli-interactive + az config set extension.use_dynamic_install=yes_without_prompt + az iot hub device-identity create --device-id myEdgeDevice --edge-enabled --hub-name $IOTHUB_NAME + ``` +Results: + + +```JSON +{ + "deviceId": "myEdgeDevice", + "generationId": "xxxxxxxx", + "status": "enabled", + "connectionState": "Disconnected", + "statusReason": null, + "connectionStateUpdatedTime": null, + "statusUpdatedTime": "2025-03-27T00:00:00.000Z", + "lastActivityTime": null, + "cloudToDeviceMessageCount": 0, + "authentication": { + "symmetricKey": { + "primaryKey": "xxxxxxxxxxxxxxxx==", + "secondaryKey": "xxxxxxxxxxxxxxxx==" + }, + "type": "sas" + }, + "capabilities": { + "iotEdge": true + }, + "etag": "xxxxxxxxxxxxxx" +} +``` + +2. Check the connection string for your device, which links the physical device to its identity in IoT Hub. It includes the name of your IoT Hub, the name of your device, and a shared key that authenticates connections between them. You use this connection string again in the next section to set up your IoT Edge device. + + ```azurecli-interactive + az iot hub device-identity connection-string show --device-id myEdgeDevice --hub-name $IOTHUB_NAME + ``` +Results: + + +```JSON +{ + "connectionString": "HostName=UniqueIoTHubabcd12.azure-devices.net;DeviceId=myEdgeDevice;SharedAccessKey=xxxxxxxxxxxxxxxxxxxxxxx" +} +``` + +For example, the connection string should look similar to +HostName=contoso-hub.azure-devices.net;DeviceId=myEdgeDevice;SharedAccessKey=. + +## Configure your IoT Edge device + +Create a virtual machine with the Azure IoT Edge runtime. + +:::image type="content" source="./media/quickstart-linux/start-runtime.png" alt-text="Diagram of how to start the runtime on a device."::: + +The IoT Edge runtime is deployed on all IoT Edge devices and has three components. The IoT Edge security daemon starts each time an IoT Edge device boots and bootstraps the device by starting the IoT Edge agent. The IoT Edge agent facilitates deployment and monitoring of modules on the IoT Edge device, including the IoT Edge hub. The IoT Edge hub manages communications between modules on the IoT Edge device, and between the device and IoT Hub. + +During runtime configuration, provide a device connection string. This string is retrieved from the Azure CLI. This string associates your physical device with the IoT Edge device identity in Azure. + +### Deploy the IoT Edge device + +This section uses an Azure Resource Manager template to create a new virtual machine and install the IoT Edge runtime on it. If you want to use your own Linux device instead, you can follow the installation steps in [Manually provision a single Linux IoT Edge device](how-to-provision-single-device-linux-symmetric.md), then return to this quickstart. + +Use the Deploy to Azure button or CLI commands to create an IoT Edge device based on the prebuilt [iotedge-vm-deploy](https://github.com/Azure/iotedge-vm-deploy) template. + +* Deploy using the IoT Edge Azure Resource Manager template. + + [![Deploy to Azure](https://aka.ms/deploytoazurebutton)](https://portal.azure.com/#create/Microsoft.Template/uri/https%3A%2F%2Fraw.githubusercontent.com%2Fazure%2Fiotedge-vm-deploy%2Fmain%2FedgeDeploy.json) + +* For bash or Cloud Shell users, first create a file named **cloud-init.txt** in your current working directory. This file contains the configuration for the IoT Edge runtime: + + ```text + #cloud-config + package_update: true + package_upgrade: true + runcmd: + - curl https://packages.microsoft.com/config/ubuntu/22.04/packages-microsoft-prod.deb > packages-microsoft-prod.deb + - dpkg -i packages-microsoft-prod.deb + - apt-get update + - apt-get install aziot-edge -y + - | + CONNECTION_STRING="$(az iot hub device-identity connection-string show \ + --device-id myEdgeDevice \ + --hub-name $IOTHUB_NAME \ + -o tsv)" + iotedge config mp --connection-string "$CONNECTION_STRING" + iotedge config apply + ``` + +* Then, copy the following command into a text editor, replace the placeholder text with your information, then copy into your bash or Cloud Shell window: + + ```azurecli-interactive + az vm create \ + --resource-group $RESOURCE_GROUP \ + --name $VM_NAME \ + --image Ubuntu2204 \ + --admin-username azureuser \ + --generate-ssh-keys \ + --custom-data cloud-init.txt + ``` + +* For PowerShell users, copy the following command into your PowerShell window, then replace the placeholder text with your own information: + + ```powershell + az deployment group create ` + --resource-group $RESOURCE_GROUP ` + --template-uri "https://raw.githubusercontent.com/Azure/iotedge-vm-deploy/main/edgeDeploy.json" ` + --parameters dnsLabelPrefix="$VM_NAME" ` + --parameters adminUsername='azureUser' ` + --parameters deviceConnectionString=$(az iot hub device-identity connection-string show --device-id myEdgeDevice --hub-name $IOTHUB_NAME -o tsv) ` + --parameters authenticationType='password' ` + --parameters adminPasswordOrKey="" + ``` + +This template takes the following parameters: + +| Parameter | Description | +| ---------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| **resource-group** | The resource group in which the resources are created. Use the default **IoTEdgeResources** that we've been using throughout this article or provide the name of an existing resource group in your subscription. | +| **template-uri** | A pointer to the Resource Manager template that we're using. | +| **dnsLabelPrefix** | A string that is used to create the virtual machine's hostname. Replace the placeholder text with a name for your virtual machine. | +| **adminUsername** | A username for the admin account of the virtual machine. Use the example **azureUser** or provide a new username. | +| **deviceConnectionString** | The connection string from the device identity in IoT Hub, which is used to configure the IoT Edge runtime on the virtual machine. The CLI command within this parameter grabs the connection string for you. Replace the placeholder text with your IoT hub name. | +| **authenticationType** | The authentication method for the admin account. This quickstart uses **password** authentication, but you can also set this parameter to **sshPublicKey**. | +| **adminPasswordOrKey** | The password or value of the SSH key for the admin account. Replace the placeholder text with a secure password. Your password must be at least 12 characters long and have three of four of the following: lowercase characters, uppercase characters, digits, and special characters. | + +After deployment completes, JSON-formatted output in the CLI contains the SSH information to connect to the virtual machine. The output includes the public IP address of the virtual machine, which you can use to connect to it. + + ```bash + export IP_ADDRESS=$(az vm show -d -g $RESOURCE_GROUP -n $VM_NAME --query publicIps -o tsv) + + ssh azureuser@$IP_ADDRESS -o StrictHostKeyChecking=no + ``` + +### View the IoT Edge runtime status + +The rest of the commands in this quickstart take place on your IoT Edge device itself, so that you can see what's happening on the device. If you're using a virtual machine, connect to that machine now using the admin username that you set up and the DNS name that was output by the deployment command. You can also find the DNS name on your virtual machine's overview page in the Azure portal. Use the following command to connect to your virtual machine. Replace and with your own values. + + ```text + ssh @ + ``` + +Once connected to your virtual machine, verify that the runtime was successfully installed and configured on your IoT Edge device. + +1. Check if IoT Edge is running. The following command returns a status of **Ok** if IoT Edge is running or provides any service errors. + + ```bash + sudo iotedge system status + ``` +Results: + + +```text +Status: Ok +``` + + >[!TIP] + >You need elevated privileges to run iotedge commands. Once you sign out of your machine and sign back in the first time after installing the IoT Edge runtime, your permissions are automatically updated. Until then, use sudo in front of the commands. + +2. If you need to troubleshoot the service, retrieve the service logs. + + ```bash + sudo iotedge system logs + ``` +Results: + + +```text +... (service log output redacted for brevity) ... +``` + +3. View all the modules running on your IoT Edge device. Since the service just started for the first time, you should only see the **edgeAgent** module running. The edgeAgent module runs by default and helps to install and start any additional modules that you deploy to your device. + + ```bash + sudo iotedge list + ``` +Results: + + +```JSON +[ + { + "Name": "$edgeAgent", + "Status": "running" + } +] +``` + +Your IoT Edge device is now configured. It's ready to run cloud-deployed modules. + +## Deploy a module + +Manage your Azure IoT Edge device from the cloud to deploy a module that sends device telemetry data to IoT Hub. + +:::image type="content" source="./media/quickstart-linux/deploy-module.png" alt-text="Diagram of how to deploy a module from cloud to device."::: + +A key capability of Azure IoT Edge is deploying code to your IoT Edge devices from the cloud. IoT Edge modules are executable packages implemented as containers. In this section, you deploy a pre-built module from the [IoT Edge Modules section of Microsoft Artifact Registry](https://mcr.microsoft.com/catalog?cat=IoT%20Edge%20Modules&alphaSort=asc&alphaSortKey=Name). + +The module that you deploy in this section simulates a sensor and sends generated data. This module is a useful piece of code when you're getting started with IoT Edge because you can use the simulated data for development and testing. If you want to see exactly what this module does, you can view the [simulated temperature sensor source code](https://github.com/Azure/iotedge/blob/main/edge-modules/SimulatedTemperatureSensor/src/Program.cs). + +Use these steps to deploy your first module. + +1. Sign in to the [Azure portal](https://portal.azure.com) and go to your IoT Hub. + +2. From the menu on the left, under **Device Management**, select **Devices**. + +3. Select the device ID of the target IoT Edge device from the list. + + When you create a new IoT Edge device, it displays the status code 417 -- The device's deployment configuration is not set in the Azure portal. This status is normal, and means that the device is ready to receive a module deployment. + +4. On the upper bar, select **Set Modules**. + + Select the modules you want to run on your device. You can choose from modules that you've built yourself or images in a container registry. In this quickstart, you deploy a module from the Microsoft container registry. + +5. In the **IoT Edge modules** section, select **Add** then choose **IoT Edge Module**. + +6. Update the following module settings: + + | Setting | Value | + |--------------------|----------------------------------------------------------------------| + | IoT Module name | SimulatedTemperatureSensor | + | Image URI | mcr.microsoft.com/azureiotedge-simulated-temperature-sensor:latest | + | Restart policy | always | + | Desired status | running | + +7. Select **Next: Routes** to continue to configure routes. + +8. Add a route that sends all messages from the simulated temperature module to IoT Hub. + + | Setting | Value | + |------------|--------------------------------------------| + | Name | SimulatedTemperatureSensorToIoTHub | + | Value | FROM /messages/modules/SimulatedTemperatureSensor/* INTO $upstream | + +9. Select **Next: Review + create**. + +10. Review the JSON file, and then select **Create**. The JSON file defines all the modules that you deploy to your IoT Edge device. + + > [!NOTE] + > When you submit a new deployment to an IoT Edge device, nothing is pushed to your device. Instead, the device queries IoT Hub regularly for any new instructions. If the device finds an updated deployment manifest, it uses the information about the new deployment to pull the module images from the cloud then starts running the modules locally. This process can take a few minutes. + +After you create the module deployment details, the wizard returns you to the device details page. View the deployment status on the **Modules** tab. + +You should see three modules: **$edgeAgent**, **$edgeHub**, and **SimulatedTemperatureSensor**. If one or more of the modules has **Yes** under **Specified in Deployment** but not under **Reported by Device**, your IoT Edge device is still starting them. Wait a few minutes and refresh the page. + +:::image type="content" source="./media/quickstart-linux/view-deployed-modules.png" alt-text="Screenshot that shows the SimulatedTemperatureSensor in the list of deployed modules." lightbox="./media/quickstart-linux/view-deployed-modules.png"::: + +If you have issues deploying modules, learn more in [Troubleshoot IoT Edge devices from the Azure portal](troubleshoot-in-portal.md). + +## View generated data + +In this quickstart, you create a new IoT Edge device and install the IoT Edge runtime on it. Then, you use the Azure portal to deploy an IoT Edge module to run on the device without making changes to the device itself. + +In this case, the module that you pushed generates sample environment data that you can use for testing later. The simulated sensor is monitoring both a machine and the environment around the machine. For example, this sensor can be in a server room, on a factory floor, or on a wind turbine. The message includes ambient temperature and humidity, machine temperature and pressure, and a timestamp. The IoT Edge tutorials use the data created by this module as test data for analytics. + +Open the command prompt on your IoT Edge device, or use the SSH connection from Azure CLI. Confirm that the module you deployed from the cloud is running on your IoT Edge device: + +```bash +sudo iotedge list +``` +Results: + + +```JSON +[ + { + "Name": "$edgeAgent", + "Status": "running" + }, + { + "Name": "$edgeHub", + "Status": "running" + }, + { + "Name": "SimulatedTemperatureSensor", + "Status": "running" + } +] +``` + +View the messages sent from the temperature sensor module: + +```bash +sudo iotedge logs SimulatedTemperatureSensor -f +``` +Results: + + +```text +... (sample sensor data output redacted for brevity) ... +``` + +>[!TIP] +>IoT Edge commands are case sensitive when referring to module names. + +## Clean up resources + +To continue with the IoT Edge tutorials, use the device you registered and set up in this quickstart. Otherwise, delete the Azure resources you created to avoid charges. + +If you created your virtual machine and IoT hub in a new resource group, you can delete that group and all the associated resources. Double-check the contents of the resource group to ensure there's nothing you want to keep. If you don't want to delete the whole group, you can delete individual resources instead. + +> [!IMPORTANT] +> Deleting a resource group is irreversible. + +(The deletion commands have been removed from this Exec Doc to avoid accidental deletion during automated execution.) + +## Next steps + +In this quickstart, you created an IoT Edge device and used the Azure IoT Edge cloud interface to deploy code onto the device. Now, you use a test device that generates raw data about its environment. + +In the next tutorial, you'll learn how to monitor the activity and health of your device from the Azure portal. + +> [!div class="nextstepaction"] +> [Monitor IoT Edge devices](tutorial-monitor-with-workbooks.md) \ No newline at end of file diff --git a/scenarios/azure-docs/articles/static-web-apps/get-started-cli.md b/scenarios/azure-docs/articles/static-web-apps/get-started-cli.md index 76bf1532a..b04a12b86 100644 --- a/scenarios/azure-docs/articles/static-web-apps/get-started-cli.md +++ b/scenarios/azure-docs/articles/static-web-apps/get-started-cli.md @@ -27,17 +27,6 @@ In this quickstart, you deploy a web application to Azure Static Web apps using - [Azure CLI](/cli/azure/install-azure-cli) installed (version 2.29.0 or higher). - [A Git setup](https://www.git-scm.com/downloads). -## Define environment variables - -The first step in this quickstart is to define environment variables. - -```bash -export RANDOM_ID="$(openssl rand -hex 3)" -export MY_RESOURCE_GROUP_NAME="myStaticWebAppResourceGroup$RANDOM_ID" -export REGION=EastUS2 -export MY_STATIC_WEB_APP_NAME="myStaticWebApp$RANDOM_ID" -``` - ## Create a repository (optional) (Optional) This article uses a GitHub template repository as another way to make it easy for you to get started. The template features a starter app to deploy to Azure Static Web Apps. @@ -55,6 +44,9 @@ export MY_STATIC_WEB_APP_NAME="myStaticWebApp$RANDOM_ID" Create a resource group. ```bash +export RANDOM_ID="$(openssl rand -hex 3)" +export MY_RESOURCE_GROUP_NAME="myStaticWebAppResourceGroup$RANDOM_ID" +export REGION=EastUS2 az group create \ --name $MY_RESOURCE_GROUP_NAME \ --location $REGION @@ -81,6 +73,7 @@ Results: Deploy the app as a static web app from the Azure CLI. ```bash +export MY_STATIC_WEB_APP_NAME="myStaticWebApp$RANDOM_ID" az staticwebapp create \ --name $MY_STATIC_WEB_APP_NAME \ --resource-group $MY_RESOURCE_GROUP_NAME \ diff --git a/scenarios/azure-docs/articles/virtual-machine-scale-sets/flexible-virtual-machine-scale-sets-cli.md b/scenarios/azure-docs/articles/virtual-machine-scale-sets/flexible-virtual-machine-scale-sets-cli.md index ad3c2fdb4..3db8ac0d1 100644 --- a/scenarios/azure-docs/articles/virtual-machine-scale-sets/flexible-virtual-machine-scale-sets-cli.md +++ b/scenarios/azure-docs/articles/virtual-machine-scale-sets/flexible-virtual-machine-scale-sets-cli.md @@ -25,33 +25,14 @@ The Azure Cloud Shell is a free interactive shell that you can use to run the st To open the Cloud Shell, select **Open Cloud Shell** from the upper right corner of a code block. You can also launch Cloud Shell in a separate browser tab by going to [https://shell.azure.com/cli](https://shell.azure.com/cli). Select **Copy** to copy the blocks of code, paste it into the Cloud Shell, and press enter to run it. -## Define environment variables +## Create a resource group -Define environment variables as follows. +A resource group is a logical container into which Azure resources are deployed and managed. All resources must be placed in a resource group. The following command creates a resource group with the previously defined $MY_RESOURCE_GROUP_NAME and $REGION parameters. ```bash export RANDOM_ID="$(openssl rand -hex 3)" export MY_RESOURCE_GROUP_NAME="myVMSSResourceGroup$RANDOM_ID" export REGION=EastUS -export MY_VMSS_NAME="myVMSS$RANDOM_ID" -export MY_USERNAME=azureuser -export MY_VM_IMAGE="Ubuntu2204" -export MY_VNET_NAME="myVNet$RANDOM_ID" -export NETWORK_PREFIX="$(($RANDOM % 254 + 1))" -export MY_VNET_PREFIX="10.$NETWORK_PREFIX.0.0/16" -export MY_VM_SN_NAME="myVMSN$RANDOM_ID" -export MY_VM_SN_PREFIX="10.$NETWORK_PREFIX.0.0/24" -export MY_APPGW_SN_NAME="myAPPGWSN$RANDOM_ID" -export MY_APPGW_SN_PREFIX="10.$NETWORK_PREFIX.1.0/24" -export MY_APPGW_NAME="myAPPGW$RANDOM_ID" -export MY_APPGW_PUBLIC_IP_NAME="myAPPGWPublicIP$RANDOM_ID" -``` - -## Create a resource group - -A resource group is a logical container into which Azure resources are deployed and managed. All resources must be placed in a resource group. The following command creates a resource group with the previously defined $MY_RESOURCE_GROUP_NAME and $REGION parameters. - -```bash az group create --name $MY_RESOURCE_GROUP_NAME --location $REGION -o JSON ``` @@ -78,6 +59,11 @@ Now you'll create network resources. In this step you're going to create a virtu #### Create virtual network and subnet ```bash +export MY_VNET_NAME="myVNet$RANDOM_ID" +export NETWORK_PREFIX="$(($RANDOM % 254 + 1))" +export MY_VNET_PREFIX="10.$NETWORK_PREFIX.0.0/16" +export MY_VM_SN_NAME="myVMSN$RANDOM_ID" +export MY_VM_SN_PREFIX="10.$NETWORK_PREFIX.0.0/24" az network vnet create --name $MY_VNET_NAME --resource-group $MY_RESOURCE_GROUP_NAME --location $REGION --address-prefix $MY_VNET_PREFIX --subnet-name $MY_VM_SN_NAME --subnet-prefix $MY_VM_SN_PREFIX -o JSON ``` @@ -124,6 +110,10 @@ Results: Azure Application Gateway requires a dedicated subnet within your virtual network. The following command creates a subnet named $MY_APPGW_SN_NAME with a specified address prefix named $MY_APPGW_SN_PREFIX in your virtual network $MY_VNET_NAME. ```bash +export MY_APPGW_SN_NAME="myAPPGWSN$RANDOM_ID" +export MY_APPGW_SN_PREFIX="10.$NETWORK_PREFIX.1.0/24" +export MY_APPGW_NAME="myAPPGW$RANDOM_ID" +export MY_APPGW_PUBLIC_IP_NAME="myAPPGWPublicIP$RANDOM_ID" az network vnet subnet create --name $MY_APPGW_SN_NAME --resource-group $MY_RESOURCE_GROUP_NAME --vnet-name $MY_VNET_NAME --address-prefix $MY_APPGW_SN_PREFIX -o JSON ``` @@ -393,6 +383,9 @@ https://techcommunity.microsoft.com/t5/azure-compute-blog/breaking-change-for-vm Now create a Virtual Machine Scale Set with [az vmss create](/cli/azure/vmss). The following example creates a zone redundant scale set with an instance count of *2* with public IP in subnet $MY_VM_SN_NAME within your resource group $MY_RESOURCE_GROUP_NAME, integrates the Application Gateway, and generates SSH keys. Make sure to save the SSH keys if you need to log into your VMs via ssh. ```bash +export MY_VMSS_NAME="myVMSS$RANDOM_ID" +export MY_USERNAME=azureuser +export MY_VM_IMAGE="Ubuntu2204" az vmss create --name $MY_VMSS_NAME --resource-group $MY_RESOURCE_GROUP_NAME --image $MY_VM_IMAGE --admin-username $MY_USERNAME --generate-ssh-keys --public-ip-per-vm --orchestration-mode Uniform --instance-count 2 --zones 1 2 3 --vnet-name $MY_VNET_NAME --subnet $MY_VM_SN_NAME --vm-sku Standard_DS2_v2 --upgrade-policy-mode Automatic --app-gateway $MY_APPGW_NAME --backend-pool-name appGatewayBackendPool -o JSON ``` diff --git a/scenarios/azure-docs/articles/virtual-machine-scale-sets/tutorial-use-custom-image-cli.md b/scenarios/azure-docs/articles/virtual-machine-scale-sets/tutorial-use-custom-image-cli.md new file mode 100644 index 000000000..a7129cff1 --- /dev/null +++ b/scenarios/azure-docs/articles/virtual-machine-scale-sets/tutorial-use-custom-image-cli.md @@ -0,0 +1,210 @@ +--- +title: Tutorial - Use a custom VM image in a scale set with Azure CLI +description: Learn how to use the Azure CLI to create a custom VM image that you can use to deploy a Virtual Machine Scale Set +author: ju-shim +ms.service: azure-virtual-machine-scale-sets +ms.subservice: shared-image-gallery +ms.topic: tutorial +ms.date: 10/28/2024 +ms.reviewer: mimckitt +ms.author: jushiman +ms.custom: mvc, devx-track-azurecli, innovation-engine +--- + +# Tutorial: Create and use a custom image for Virtual Machine Scale Sets with the Azure CLI +When you create a scale set, you specify an image to be used when the VM instances are deployed. To reduce the number of tasks after VM instances are deployed, you can use a custom VM image. This custom VM image includes any required application installs or configurations. Any VM instances created in the scale set use the custom VM image and are ready to serve your application traffic. In this tutorial you learn how to: + +> [!div class="checklist"] +> * Create an Azure Compute Gallery +> * Create a specialized image definition +> * Create an image version +> * Create a scale set from a specialized image +> * Share an image gallery + +[!INCLUDE [quickstarts-free-trial-note](~/reusable-content/ce-skilling/azure/includes/quickstarts-free-trial-note.md)] + +[!INCLUDE [azure-cli-prepare-your-environment.md](~/reusable-content/azure-cli/azure-cli-prepare-your-environment.md)] + +- This article requires version 2.4.0 or later of the Azure CLI. If using Azure Cloud Shell, the latest version is already installed. + +## Overview +An [Azure Compute Gallery](../virtual-machines/shared-image-galleries.md) simplifies custom image sharing across your organization. Custom images are like marketplace images, but you create them yourself. Custom images can be used to bootstrap configurations such as preloading applications, application configurations, and other OS configurations. + +The Azure Compute Gallery lets you share your custom VM images with others. Choose which images you want to share, which regions you want to make them available in, and who you want to share them with. + +## Create and configure a source VM +First, create a resource group with [az group create](/cli/azure/group), then create a VM with [az vm create](/cli/azure/vm#az-vm-create). This VM is then used as the source for the image. + +The following example creates a Linux-based VM named *myVM* in the resource group named *myResourceGroup*. + +```azurecli-interactive +export RANDOM_ID=$(openssl rand -hex 3) +export MY_RESOURCE_GROUP_NAME="myResourceGroup$RANDOM_ID" +export REGION="eastus" +export MY_VM_NAME="myVM$RANDOM_ID" + +az group create --name $MY_RESOURCE_GROUP_NAME --location $REGION + +az vm create \ + --resource-group $MY_RESOURCE_GROUP_NAME \ + --name $MY_VM_NAME \ + --image debian11 \ + --admin-username azureuser \ + --generate-ssh-keys +``` + +> [!TIP] +> The **ID** of your VM is shown in the output of the [az vm create](/cli/azure/vm#az-vm-create) command. Copy and store this in a safe location so you can use it later in this tutorial. + +## Create an image gallery +An image gallery is the primary resource used for enabling image sharing. + +Allowed characters for gallery names are uppercase or lowercase letters, digits, dots, and periods. The gallery name can't contain dashes. Gallery names must be unique within your subscription. + +Create an image gallery using [az sig create](/cli/azure/sig#az-sig-create). + +In the following example: + +* You create a resource group for the gallery named *myGalleryRG* located in *East US*. +* The gallery is named *myGallery*. + +```azurecli-interactive +export MY_GALLERY_RG_NAME="myGalleryRG$RANDOM_ID" +export MY_GALLERY_NAME="myGallery$RANDOM_ID" + +az group create --name $MY_GALLERY_RG_NAME --location $REGION +az sig create --resource-group $MY_GALLERY_RG_NAME --gallery-name $MY_GALLERY_NAME +``` + +## Create an image definition +Image definitions create a logical grouping for images. They're used to manage information about the image versions that are created within them. + +Image definition names can be made up of uppercase or lowercase letters, digits, dots, dashes, and periods. + +Make sure your image definition is the right type: + +* **State** - If you have generalized the VM (using Sysprep for Windows, or waagent -deprovision for Linux), then you should create a generalized image definition using `--os-state generalized`. If you want to use the VM without removing existing user accounts, create a specialized image definition using `--os-state specialized`. +* **Security type** - New Azure VMs are created with Trusted Launch configured by default. This tutorial includes subsequent code samples that reflect the Trusted Launch configuration when creating the image definition and scale set. If you're creating an image with a VM that doesn't have Trusted Launch enabled, make sure to reflect the correct security type when you create both of those resources. For more information about Trusted Launch, see [Trusted Launch for Azure virtual machines](/azure/virtual-machines/trusted-launch). + +For more information about the values you can specify for an image definition, see [Image definitions](../virtual-machines/shared-image-galleries.md#image-definitions). + +Create an image definition in the gallery using [az sig image-definition create](/cli/azure/sig/image-definition#az-sig-image-definition-create). + +In the following example, the image definition is: +* Named *myImageDefinition*. +* Configured for a [specialized](../virtual-machines/shared-image-galleries.md#generalized-and-specialized-images) Linux OS image. To create a definition for images using a Windows OS, use `--os-type Windows`. +* Configured for Trusted Launch. + +```azurecli-interactive +export MY_IMAGE_DEF_NAME="myImageDefinition$RANDOM_ID" +MY_PUBLISHER_NAME="myPublisher$RANDOM_ID" + +az sig image-definition create \ + --resource-group $MY_GALLERY_RG_NAME \ + --gallery-name $MY_GALLERY_NAME \ + --gallery-image-definition $MY_IMAGE_DEF_NAME \ + --publisher $MY_PUBLISHER_NAME \ + --offer myOffer \ + --sku mySKU \ + --os-type Linux \ + --os-state specialized \ + --features SecurityType=TrustedLaunch +``` + +> [!TIP] +> The **ID** of your image definition is shown in the output of the command. Copy and store this in a safe location so you can use it later in this tutorial. + +## Create the image version +Create an image version from the VM using [az image gallery create-image-version](/cli/azure/sig/image-version#az-sig-image-version-create). + +Allowed characters for the image version are numbers and periods. Numbers must be within the range of a 32-bit integer. Format: *MajorVersion*.*MinorVersion*.*Patch*. + +In the following example: + +* The version of the image is *1.0.0*. +* We create one replica in the *South Central US* region and one replica in the *East US* region. The replication regions must include the region the source VM is located. +* `--virtual-machine` is the ID of the VM we created previously. + +```azurecli-interactive +export MY_VM_ID=$(az vm show --name $MY_VM_NAME --resource-group $MY_RESOURCE_GROUP_NAME --query "id" --output tsv) + +az sig image-version create \ + --resource-group $MY_GALLERY_RG_NAME \ + --gallery-name $MY_GALLERY_NAME \ + --gallery-image-definition $MY_IMAGE_DEF_NAME \ + --gallery-image-version 1.0.0 \ + --virtual-machine $MY_VM_ID +``` + +> [!NOTE] +> You need to wait for the image version to completely finish being built and replicated before you can use the same image to create another image version. +> +> You can also store your image in Premium storage by a adding `--storage-account-type premium_lrs`, or [Zone Redundant Storage](/azure/storage/common/storage-redundancy) by adding `--storage-account-type standard_zrs` when you create the image version. + + +## Create a scale set from the image + +You create a scale set using [`az vmss create`](/cli/azure/vmss#az-vmss-create). If you're using a specialized source VM, add the `--specialized` parameter to indicate it's a specialized image. + +When you use the image definition ID for `--image` to create the scale set instances, you create a scale set that uses the latest version of the image that is available. If you want a specific version of the image, make sure you include the image _version_ ID when you define the `--image`. + +* **Latest image example**: `/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/myRG/providers/Microsoft.Compute/galleries/myGallery/images/myImage` + +* **Specific image example**: `/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/myRG/providers/Microsoft.Compute/galleries/myGallery/images/myImage/versions/1.0.0` + +In the following example, the scale set is: +* Named *myScaleSet* +* Using the latest version of the *myImageDefinition* image. +* Configured for Trusted Launch. + +```azurecli +export MY_IMAGE_DEF_ID=$(az sig image-definition show --resource-group $MY_GALLERY_RG_NAME --gallery-name $MY_GALLERY_NAME --gallery-image-definition $MY_IMAGE_DEF_NAME --query "id" --output tsv) +export MY_SCALE_SET_RG_NAME="myResourceGroup$RANDOM_ID" +export MY_SCALE_SET_NAME="myScaleSet$RANDOM_ID" + +az group create --name $MY_SCALE_SET_RG_NAME --location $REGION + +az vmss create \ + --resource-group $MY_SCALE_SET_RG_NAME \ + --name $MY_SCALE_SET_NAME \ + --orchestration-mode flexible \ + --image $MY_IMAGE_DEF_ID \ + --specialized \ + --security-type TrustedLaunch +``` + +It takes a few minutes to create and configure all the scale set resources and VMs. + +## Share the gallery + +You can share images across subscriptions using Azure role-based access control (Azure RBAC), and you can share them at the gallery, image definition, or image version levels. Any user with read permission to an image version, even across subscriptions, is able to deploy a VM using the image version. + +We recommend that you share with other users at the gallery level. + +The following example: +* Gets the object ID of the gallery using [az sig show](/cli/azure/sig#az-sig-show). +* Provides access to the gallery using [az role assignment create](/cli/azure/role/assignment#az-role-assignment-create). + * Uses the object ID as the scope of the assignment. + * Uses the signed-in user's ID as the assignee for demonstration purposes. When you use this code in your test or production code, make sure you update the assignee to reflect who you want to be able to access this image. For more information about how to share resources using Azure RBAC, see [Add or remove Azure role assignments using Azure CLI](/azure/role-based-access-control/role-assignments-cli). , along with an email address, using [az role assignment create](/cli/azure/role/assignment#az-role-assignment-create) to give a user access to the shared image gallery. + +For example, you can get the gallery ID and assign the Reader role to the signed-in user. This allows the user to access the shared image gallery. + +Note: Ensure you have the necessary permissions to perform these operations and that the target user or service principal has the appropriate access to the shared resources. + +## Clean up resources +To remove your scale set and additional resources, delete the resource group and all its resources with [az group delete](/cli/azure/group). The `--no-wait` parameter returns control to the prompt without waiting for the operation to complete. The `--yes` parameter confirms that you wish to delete the resources without an additional prompt to do so. + +## Next steps +In this tutorial, you learned how to create and use a custom VM image for your scale sets with the Azure CLI: + +> [!div class="checklist"] +> * Create an Azure Compute Gallery +> * Create a specialized image definition +> * Create an image version +> * Create a scale set from a specialized image +> * Share an image gallery + +Advance to the next tutorial to learn how to deploy applications to your scale set. + +> [!div class="nextstepaction"] +> [Deploy applications to your scale sets](tutorial-install-apps-cli.md) \ No newline at end of file diff --git a/scenarios/azure-docs/articles/virtual-machines/linux/attach-disk-portal.yml b/scenarios/azure-docs/articles/virtual-machines/linux/attach-disk-portal.yml deleted file mode 100644 index babdb3954..000000000 --- a/scenarios/azure-docs/articles/virtual-machines/linux/attach-disk-portal.yml +++ /dev/null @@ -1,259 +0,0 @@ -### YamlMime:HowTo - -metadata: - title: Attach a data disk to a Linux VM - description: Use the portal to attach new or existing data disk to a Linux VM. - author: roygara - ms.author: rogarana - ms.date: 03/19/2024 - ms.service: azure-disk-storage - ms.topic: how-to - ms.collection: linux - ms.custom: - - linux-related-content - - ge-structured-content-pilot - -title: | - Use the portal to attach a data disk to a Linux VM -introduction: | - **Applies to:** :heavy_check_mark: Linux VMs :heavy_check_mark: Flexible scale sets - - This article shows you how to attach both new and existing disks to a Linux virtual machine through the Azure portal. You can also [attach a data disk to a Windows VM in the Azure portal](../windows/attach-managed-disk-portal.yml). - -prerequisites: - summary: | - Before you attach disks to your VM, review these tips: - dependencies: - - The size of the virtual machine controls how many data disks you can attach. For details, see [Sizes for virtual machines](../sizes.md). - -procedureSection: - - title: | - Find the virtual machine - summary: | - Follow these steps: - steps: - - | - Go to the [Azure portal](https://portal.azure.com/) to find the VM. Search for and select **Virtual machines**. - - | - Select the VM you'd like to attach the disk to from the list. - - | - In the **Virtual machines** page, under **Settings**, select **Disks**. - - - title: | - Attach a new disk - summary: | - Follow these steps: - steps: - - | - On the **Disks** pane, under **Data disks**, select **Create and attach a new disk**. - - | - Enter a name for your managed disk. Review the default settings, and update the **Storage type**, **Size (GiB)**, **Encryption** and **Host caching** as necessary. - - :::image type="content" source="./media/attach-disk-portal/create-new-md.png" alt-text="Screenshot of review disk settings." lightbox="./media/attach-disk-portal/create-new-md.png"::: - - - | - When you're done, select **Save** at the top of the page to create the managed disk and update the VM configuration. - - - title: | - Attach an existing disk - summary: | - Follow these steps: - steps: - - | - On the **Disks** pane, under **Data disks**, select **Attach existing disks**. - - | - Select the drop-down menu for **Disk name** and select a disk from the list of available managed disks. - - | - Select **Save** to attach the existing managed disk and update the VM configuration: - - - title: | - Connect to the Linux VM to mount the new disk - summary: | - To partition, format, and mount your new disk so your Linux VM can use it, SSH into your VM. For more information, see [How to use SSH with Linux on Azure](mac-create-ssh-keys.md). The following example connects to a VM with the public IP address of *10.123.123.25* with the username *azureuser*: - code: | - ```bash - ssh azureuser@10.123.123.25 - ``` - - - title: | - Find the disk - summary: | - Once connected to your VM, you need to find the disk. In this example, we're using `lsblk` to list the disks. - code: | - ```bash - lsblk -o NAME,HCTL,SIZE,MOUNTPOINT | grep -i "sd" - ``` - - The output is similar to the following example: - - ```output - sda 0:0:0:0 30G - ├─sda1 29.9G / - ├─sda14 4M - └─sda15 106M /boot/efi - sdb 1:0:1:0 14G - └─sdb1 14G /mnt - sdc 3:0:0:0 4G - ``` - - In this example, the disk that was added was `sdc`. It's a LUN 0 and is 4GB. - - For a more complex example, here's what multiple data disks look like in the portal: - - :::image type="content" source="./media/attach-disk-portal/find-disk.png" alt-text="Screenshot of multiple disks shown in the portal."::: - - In the image, you can see that there are 3 data disks: 4 GB on LUN 0, 16GB at LUN 1, and 32G at LUN 2. - - Here's what that might look like using `lsblk`: - - ```output - sda 0:0:0:0 30G - ├─sda1 29.9G / - ├─sda14 4M - └─sda15 106M /boot/efi - sdb 1:0:1:0 14G - └─sdb1 14G /mnt - sdc 3:0:0:0 4G - sdd 3:0:0:1 16G - sde 3:0:0:2 32G - ``` - - From the output of `lsblk` you can see that the 4GB disk at LUN 0 is `sdc`, the 16GB disk at LUN 1 is `sdd`, and the 32G disk at LUN 2 is `sde`. - - ### Prepare a new empty disk - - > [!IMPORTANT] - > If you are using an existing disk that contains data, skip to [mounting the disk](#mount-the-disk). - > The following instructions will delete data on the disk. - - If you're attaching a new disk, you need to partition the disk. - - The `parted` utility can be used to partition and to format a data disk. - - Use the latest version `parted` that is available for your distro. - - If the disk size is 2 tebibytes (TiB) or larger, you must use GPT partitioning. If disk size is under 2 TiB, then you can use either MBR or GPT partitioning. - - - The following example uses `parted` on `/dev/sdc`, which is where the first data disk will typically be on most VMs. Replace `sdc` with the correct option for your disk. We're also formatting it using the [XFS](https://xfs.wiki.kernel.org/) filesystem. - - ```bash - sudo parted /dev/sdc --script mklabel gpt mkpart xfspart xfs 0% 100% - sudo mkfs.xfs /dev/sdc1 - sudo partprobe /dev/sdc1 - ``` - - Use the [`partprobe`](https://linux.die.net/man/8/partprobe) utility to make sure the kernel is aware of the new partition and filesystem. Failure to use `partprobe` can cause the blkid or lslbk commands to not return the UUID for the new filesystem immediately. - - ### Mount the disk - - Create a directory to mount the file system using `mkdir`. The following example creates a directory at `/datadrive`: - - ```bash - sudo mkdir /datadrive - ``` - - Use `mount` to then mount the filesystem. The following example mounts the */dev/sdc1* partition to the `/datadrive` mount point: - - ```bash - sudo mount /dev/sdc1 /datadrive - ``` - To ensure that the drive is remounted automatically after a reboot, it must be added to the */etc/fstab* file. It's also highly recommended that the UUID (Universally Unique Identifier) is used in */etc/fstab* to refer to the drive rather than just the device name (such as, */dev/sdc1*). If the OS detects a disk error during boot, using the UUID avoids the incorrect disk being mounted to a given location. Remaining data disks would then be assigned those same device IDs. To find the UUID of the new drive, use the `blkid` utility: - - ```bash - sudo blkid - ``` - - The output looks similar to the following example: - - ```output - /dev/sda1: LABEL="cloudimg-rootfs" UUID="11111111-1b1b-1c1c-1d1d-1e1e1e1e1e1e" TYPE="ext4" PARTUUID="1a1b1c1d-11aa-1234-1a1a1a1a1a1a" - /dev/sda15: LABEL="UEFI" UUID="BCD7-96A6" TYPE="vfat" PARTUUID="1e1g1cg1h-11aa-1234-1u1u1a1a1u1u" - /dev/sdb1: UUID="22222222-2b2b-2c2c-2d2d-2e2e2e2e2e2e" TYPE="ext4" TYPE="ext4" PARTUUID="1a2b3c4d-01" - /dev/sda14: PARTUUID="2e2g2cg2h-11aa-1234-1u1u1a1a1u1u" - /dev/sdc1: UUID="33333333-3b3b-3c3c-3d3d-3e3e3e3e3e3e" TYPE="xfs" PARTLABEL="xfspart" PARTUUID="c1c2c3c4-1234-cdef-asdf3456ghjk" - ``` - - > [!NOTE] - > Improperly editing the **/etc/fstab** file could result in an unbootable system. If unsure, refer to the distribution's documentation for information on how to properly edit this file. You should create a backup of the **/etc/fstab** file is created before editing. - - Next, open the **/etc/fstab** file in a text editor. Add a line to the end of the file, using the UUID value for the `/dev/sdc1` device that was created in the previous steps, and the mountpoint of `/datadrive`. Using the example from this article, the new line would look like the following: - - ```config - UUID=33333333-3b3b-3c3c-3d3d-3e3e3e3e3e3e /datadrive xfs defaults,nofail 1 2 - ``` - - When you're done editing the file, save and close the editor. - - > [!NOTE] - > Later removing a data disk without editing fstab could cause the VM to fail to boot. Most distributions provide either the *nofail* and/or *nobootwait* fstab options. These options allow a system to boot even if the disk fails to mount at boot time. Consult your distribution's documentation for more information on these parameters. - > - > The *nofail* option ensures that the VM starts even if the filesystem is corrupt or the disk does not exist at boot time. Without this option, you may encounter behavior as described in [Cannot SSH to Linux VM due to FSTAB errors](/archive/blogs/linuxonazure/cannot-ssh-to-linux-vm-after-adding-data-disk-to-etcfstab-and-rebooting) - - - - title: | - Verify the disk - summary: | - You can now use `lsblk` again to see the disk and the mountpoint. - - ```bash - lsblk -o NAME,HCTL,SIZE,MOUNTPOINT | grep -i "sd" - ``` - - The output will look something like this: - - ```output - sda 0:0:0:0 30G - ├─sda1 29.9G / - ├─sda14 4M - └─sda15 106M /boot/efi - sdb 1:0:1:0 14G - └─sdb1 14G /mnt - sdc 3:0:0:0 4G - └─sdc1 4G /datadrive - ``` - - You can see that `sdc` is now mounted at `/datadrive`. - - ### TRIM/UNMAP support for Linux in Azure - - Some Linux kernels support TRIM/UNMAP operations to discard unused blocks on the disk. This feature is primarily useful to inform Azure that deleted pages are no longer valid and can be discarded. This feature can save money on disks that are billed based on the amount of consumed storage, such as unmanaged standard disks and disk snapshots. - - There are two ways to enable TRIM support in your Linux VM. As usual, consult your distribution for the recommended approach: - steps: - - | - Use the `discard` mount option in */etc/fstab*, for example: - - ```config - UUID=33333333-3b3b-3c3c-3d3d-3e3e3e3e3e3e /datadrive xfs defaults,discard 1 2 - ``` - - | - In some cases, the `discard` option may have performance implications. Alternatively, you can run the `fstrim` command manually from the command line, or add it to your crontab to run regularly: - - **Ubuntu** - - ```bash - sudo apt-get install util-linux - sudo fstrim /datadrive - ``` - - **RHEL** - - ```bash - sudo yum install util-linux - sudo fstrim /datadrive - ``` - - **SUSE** - - ```bash - sudo zypper install util-linux - sudo fstrim /datadrive - ``` - -relatedContent: - - text: Troubleshoot Linux VM device name changes - url: /troubleshoot/azure/virtual-machines/troubleshoot-device-names-problems - - text: Attach a data disk using the Azure CLI - url: add-disk.md -#For more information, and to help troubleshoot disk issues, see [Troubleshoot Linux VM device name changes](/troubleshoot/azure/virtual-machines/troubleshoot-device-names-problems). - -#You can also [attach a data disk](add-disk.md) using the Azure CLI. diff --git a/scenarios/azure-docs/articles/virtual-machines/linux/disk-encryption-faq.yml b/scenarios/azure-docs/articles/virtual-machines/linux/disk-encryption-faq.yml deleted file mode 100644 index f77fa18bd..000000000 --- a/scenarios/azure-docs/articles/virtual-machines/linux/disk-encryption-faq.yml +++ /dev/null @@ -1,200 +0,0 @@ -### YamlMime:FAQ -metadata: - title: FAQ - Azure Disk Encryption for Linux VMs - description: This article provides answers to frequently asked questions about Microsoft Azure Disk Encryption for Linux IaaS VMs. - author: msmbaldwin - ms.service: azure-virtual-machines - ms.collection: linux - ms.subservice: security - ms.topic: faq - ms.author: mbaldwin - ms.date: 08/06/2024 -title: Azure Disk Encryption for Linux virtual machines FAQ -summary: | - This article provides answers to frequently asked questions (FAQ) about Azure Disk Encryption for Linux virtual machines (VMs). For more information about this service, see [Azure Disk Encryption overview](disk-encryption-overview.md). - - -sections: - - name: Ignored - questions: - - question: | - What is Azure Disk Encryption for Linux virtual machines? - answer: | - Azure Disk Encryption for Linux virtual machines uses the dm-crypt feature of Linux to provide full disk encryption of the OS disk* and data disks. Additionally, it provides encryption of the temporary disk when using the [EncryptFormatAll feature](disk-encryption-linux.md#use-encryptformatall-feature-for-data-disks-on-linux-vms). The content flows encrypted from the VM to the Storage backend with a customer-managed key. - - See [Supported virtual machines and operating systems](disk-encryption-overview.md#supported-vms-and-operating-systems). - - - question: | - Where is Azure Disk Encryption in general availability (GA)? - answer: | - Azure Disk Encryption for Linux virtual machines is in general availability in all Azure public regions. - - - question: | - What user experiences are available with Azure Disk Encryption? - answer: | - Azure Disk Encryption GA supports Azure Resource Manager templates, Azure PowerShell, and Azure CLI. The different user experiences give you flexibility. You have three different options for enabling disk encryption for your virtual machines. For more information on the user experience and step-by-step guidance available in Azure Disk Encryption, see [Azure Disk Encryption scenarios for Linux](disk-encryption-linux.md). - - - question: | - How much does Azure Disk Encryption cost? - answer: | - There's no charge for encrypting VM disks with Azure Disk Encryption, but there are charges associated with the use of Azure Key Vault. For more information on Azure Key Vault costs, see the [Key Vault pricing](https://azure.microsoft.com/pricing/details/key-vault/) page. - - - question: | - How can I start using Azure Disk Encryption? - answer: | - To get started, read the [Azure Disk Encryption overview](disk-encryption-overview.md). - - - question: | - What VM sizes and operating systems support Azure Disk Encryption? - answer: | - The [Azure Disk Encryption overview](disk-encryption-overview.md) article lists the [VM sizes](disk-encryption-overview.md#supported-vms) and [VM operating systems](disk-encryption-overview.md#supported-operating-systems) that support Azure Disk Encryption. - - - question: | - Can I encrypt both boot and data volumes with Azure Disk Encryption? - answer: | - Yes, you can encrypt both boot and data volumes, or you can encrypt the data volume without having to encrypt the OS volume first. - - After you've encrypted the OS volume, disabling encryption on the OS volume isn't supported. For Linux virtual machines in a scale set, only the data volume can be encrypted. - - - question: | - Can I encrypt an unmounted volume with Azure Disk Encryption? - answer: | - No, Azure Disk Encryption only encrypts mounted volumes. - - - question: | - What is Storage server-side encryption? - answer: | - Storage server-side encryption encrypts Azure managed disks in Azure Storage. Managed disks are encrypted by default with Server-side encryption with a platform-managed key (as of June 10, 2017). You can manage encryption of managed disks with your own keys by specifying a customer-managed key. For more information see: [Server-side encryption of Azure managed disks](../disk-encryption.md). - - - question: | - How is Azure Disk Encryption different from other disk encryption solutions and when should I use each solution? - answer: | - See [Overview of managed disk encryption options](../disk-encryption-overview.md). - - - question: | - How do I rotate secrets or encryption keys? - answer: | - To rotate secrets, just call the same command you used originally to enable disk encryption, specifying a different Key Vault. To rotate the key encryption key, call the same command you used originally to enable disk encryption, specifying the new key encryption. - - >[!WARNING] - > - If you previously used [Azure Disk Encryption with Microsoft Entra app](disk-encryption-linux-aad.md) by specifying Microsoft Entra credentials to encrypt this VM, you must continue to use this option to encrypt your VM. You can't use Azure Disk Encryption on this encrypted VM as this isn't a supported scenario, meaning switching away from Microsoft Entra application for this encrypted VM isn't supported yet. - - - question: | - How do I add or remove a key encryption key if I didn't originally use one? - answer: | - To add a key encryption key, call the enable command again passing the key encryption key parameter. To remove a key encryption key, call the enable command again without the key encryption key parameter. - - - question: | - Does Azure Disk Encryption allow you to bring your own key (BYOK)? - answer: | - Yes, you can supply your own key encryption keys. These keys are safeguarded in Azure Key Vault, which is the key store for Azure Disk Encryption. For more information on the key encryption keys support scenarios, see [Creating and configuring a key vault for Azure Disk Encryption](disk-encryption-key-vault.md). - - - question: | - Can I use an Azure-created key encryption key? - answer: | - Yes, you can use Azure Key Vault to generate a key encryption key for Azure disk encryption use. These keys are safeguarded in Azure Key Vault, which is the key store for Azure Disk Encryption. For more information on the key encryption key, see [Creating and configuring a key vault for Azure Disk Encryption](disk-encryption-key-vault.md). - - - question: | - Can I use an on-premises key management service or HSM to safeguard the encryption keys? - answer: | - You can't use the on-premises key management service or HSM to safeguard the encryption keys with Azure Disk Encryption. You can only use the Azure Key Vault service to safeguard the encryption keys. For more information on the key encryption key support scenarios, see [Creating and configuring a key vault for Azure Disk Encryption](disk-encryption-key-vault.md). - - - question: | - What are the prerequisites to configure Azure Disk Encryption? - answer: | - There are prerequisites for Azure Disk Encryption. See the [Creating and configuring a key vault for Azure Disk Encryption](disk-encryption-key-vault.md) article to create a new key vault, or set up an existing key vault for disk encryption access to enable encryption, and safeguard secrets and keys. For more information on the key encryption key support scenarios, see [Creating and configuring a key vault for Azure Disk Encryption](disk-encryption-key-vault.md). - - - question: | - What are the prerequisites to configure Azure Disk Encryption with a Microsoft Entra app (previous release)? - answer: | - There are prerequisites for Azure Disk Encryption. See the [Azure Disk Encryption with Microsoft Entra ID](disk-encryption-linux-aad.md) content to create an Microsoft Entra application, create a new key vault, or set up an existing key vault for disk encryption access to enable encryption, and safeguard secrets and keys. For more information on the key encryption key support scenarios, see [Creating and configuring a key vault for Azure Disk Encryption with Microsoft Entra ID](disk-encryption-key-vault-aad.md). - - - question: | - Is Azure Disk Encryption using a Microsoft Entra app (previous release) still supported? - answer: | - Yes. Disk encryption using a Microsoft Entra app is still supported. However, when encrypting new virtual machines it's recommended that you use the new method rather than encrypting with a Microsoft Entra app. - - - question: | - Can I migrate virtual machines that were encrypted with a Microsoft Entra app to encryption without a Microsoft Entra app? - answer: Currently, there isn't a direct migration path for machines that were encrypted with a Microsoft Entra app to encryption without a Microsoft Entra app. Additionally, there isn't a direct path from encryption without a Microsoft Entra app to encryption with an AD app. - - - question: | - What version of Azure PowerShell does Azure Disk Encryption support? - answer: | - Use the latest version of the Azure PowerShell SDK to configure Azure Disk Encryption. Download the latest version of [Azure PowerShell](https://github.com/Azure/azure-powershell/releases). Azure Disk Encryption is *not* supported by Azure SDK version 1.1.0. - - > [!NOTE] - > The Linux Azure disk encryption preview extension "Microsoft.OSTCExtension.AzureDiskEncryptionForLinux" is deprecated. This extension was published for Azure disk encryption preview release. You should not use the preview version of the extension in your testing or production deployment. - - > For deployment scenarios like Azure Resource Manager (ARM), where you have a need to deploy Azure disk encryption extension for Linux VM to enable encryption on your Linux IaaS VM, you must use the Azure disk encryption production supported extension "Microsoft.Azure.Security.AzureDiskEncryptionForLinux". - - - question: | - Can I apply Azure Disk Encryption on my custom Linux image? - answer: | - You can't apply Azure Disk Encryption on your custom Linux image. Only the gallery Linux images for the supported distributions called out previously are supported. Custom Linux images aren't currently supported. - - - question: | - Can I apply updates to a Linux Red Hat VM that uses the yum update? - answer: | - Yes, you can perform a yum update on a Red Hat Linux VM. For more information, see [Azure Disk Encryption on an isolated network](disk-encryption-isolated-network.md). - - - question: | - What is the recommended Azure disk encryption workflow for Linux? - answer: | - The following workflow is recommended to have the best results on Linux: - * Start from the unmodified stock gallery image corresponding to the needed OS distro and version - * Back up any mounted drives you want encrypted. This back up allows for recovery if there's a failure, for example if the VM is rebooted before encryption has completed. - * Encrypt (can take several hours or even days depending on VM characteristics and size of any attached data disks) - * Customize, and add software to the image as needed. - - If this workflow isn't possible, relying on [Storage Service Encryption (SSE)](../../storage/common/storage-service-encryption.md) at the platform storage account layer may be an alternative to full disk encryption using dm-crypt. - - - question: | - What is the disk "Bek Volume" or "/mnt/azure_bek_disk"? - answer: | - The "Bek volume" is a local data volume that securely stores the encryption keys for Encrypted Azure virtual machines. - > [!NOTE] - > Do not delete or edit any contents in this disk. Do not unmount the disk since the encryption key presence is needed for any encryption operations on the IaaS VM. - - - - question: | - What encryption method does Azure Disk Encryption use? - answer: | - Azure Disk Encryption uses the decrypt default of aes-xts-plain64 with a 256-bit volume master key. - - - question: | - If I use EncryptFormatAll and specify all volume types, will it erase the data on the data drives that we already encrypted? - answer: | - No, data won't be erased from data drives that are already encrypted using Azure Disk Encryption. Similar to how EncryptFormatAll didn't re-encrypt the OS drive, it won't re-encrypt the already encrypted data drive. For more information, see the [EncryptFormatAll criteria](disk-encryption-linux.md#use-encryptformatall-feature-for-data-disks-on-linux-vms). - - - question: | - Is XFS filesystem supported? - answer: | - Encryption of XFS OS disks is supported. - - Encryption of XFS data disks is supported only when the EncryptFormatAll parameter is used. This option reformats the volume, erasing any data previously there. For more information, see the [EncryptFormatAll criteria](disk-encryption-linux.md#use-encryptformatall-feature-for-data-disks-on-linux-vms). - - - question: | - Is resizing the OS partition supported? - answer: | - Resize of an Azure Disk Encryption encrypted OS disk isn't supported. - - - question: | - Can I backup and restore an encrypted VM? - answer: | - Azure Backup provides a mechanism to backup and restore encrypted VM's within the same subscription and region. For instructions, please see [Back up and restore encrypted virtual machines with Azure Backup](../../backup/backup-azure-vms-encryption.md). Restoring an encrypted VM to a different region is not currently supported. - - - question: | - Where can I go to ask questions or provide feedback? - answer: | - You can ask questions or provide feedback on the [Microsoft Q&A question page for Azure Disk Encryption](/answers/topics/azure-disk-encryption.html). - -additionalContent: | - - ## Next steps - - In this document, you learned more about the most frequent questions related to Azure Disk Encryption. For more information about this service, see the following articles: - - - [Azure Disk Encryption Overview](disk-encryption-overview.md) - - [Apply disk encryption in Azure Security Center](../../security-center/asset-inventory.md) - - [Azure data encryption at rest](../../security/fundamentals/encryption-atrest.md) diff --git a/scenarios/azure-docs/articles/virtual-machines/linux/faq.yml b/scenarios/azure-docs/articles/virtual-machines/linux/faq.yml deleted file mode 100644 index 5700bcc9c..000000000 --- a/scenarios/azure-docs/articles/virtual-machines/linux/faq.yml +++ /dev/null @@ -1,141 +0,0 @@ -### YamlMime:FAQ -metadata: - title: Frequently asked questions for Linux VMs in Azure - description: Provides answers to some of the common questions about Linux virtual machines created with the Resource Manager model. - author: ju-shim - ms.service: azure-virtual-machines - ms.collection: linux - ms.topic: faq - ms.date: 03/06/2024 - ms.author: jushiman -title: Frequently asked question about Linux Virtual Machines -summary: | - This article addresses some common questions about Linux virtual machines created in Azure using the Resource Manager deployment model. For the Windows version of this topic, see [Frequently asked question about Windows Virtual Machines](../windows/faq.yml) - - -sections: - - name: Ignored - questions: - - question: | - What can I run on an Azure VM? - answer: | - All subscribers can run server software on an Azure virtual machine. For more information, see [Linux on Azure-Endorsed Distributions](endorsed-distros.md) - - - question: | - How much storage can I use with a virtual machine? - answer: | - Each data disk can be up to 32,767 GiB. The number of data disks you can use depends on the size of the virtual machine. For details, see [Sizes for Virtual Machines](../sizes.md). - - Azure Managed Disks are the recommended disk storage offerings for use with Azure Virtual Machines for persistent storage of data. You can use multiple Managed Disks with each Virtual Machine. Managed Disks offer two types of durable storage options: Premium and Standard Managed Disks. For pricing information, see [Managed Disks Pricing](https://azure.microsoft.com/pricing/details/managed-disks). - - Azure storage accounts can also provide storage for the operating system disk and any data disks. Each disk is a .vhd file stored as a page blob. For pricing details, see [Storage Pricing Details](https://azure.microsoft.com/pricing/details/storage/). - - - question: | - How can I access my virtual machine? - answer: | - Establish a remote connection to sign on to the virtual machine, using Secure Shell (SSH). See the instructions on how to connect [from Windows](ssh-from-windows.md) or - [from Linux and Mac](mac-create-ssh-keys.md). By default, SSH allows a maximum of 10 concurrent connections. You can increase this number by editing the configuration file. - - If you’re having problems, check out [Troubleshoot Secure Shell (SSH) connections](/troubleshoot/azure/virtual-machines/troubleshoot-ssh-connection?toc=%2fazure%2fvirtual-machines%2flinux%2ftoc.json). - - - question: | - Can I use the temporary disk (/dev/sdb1) to store data? - answer: | - Don't use the temporary disk (/dev/sdb1) to store data. It is only there for temporary storage. You risk losing data that can’t be recovered. - - - question: | - Can I copy or clone an existing Azure VM? - answer: | - Yes. For instructions, see [How to create a copy of a Linux virtual machine in the Resource Manager deployment model](/previous-versions/azure/virtual-machines/linux/copy-vm). - - - question: | - Why am I not seeing Canada Central and Canada East regions through Azure Resource Manager? - answer: | - The two new regions of Canada Central and Canada East are not automatically registered for virtual machine creation for existing Azure subscriptions. This registration is done automatically when a virtual machine is deployed through the Azure portal to any other region using Azure Resource Manager. After a virtual machine is deployed to any other Azure region, the new regions should be available for subsequent virtual machines. - - - question: | - Can I add a NIC to my VM after it's created? - answer: | - Yes, this is now possible. The VM first needs to be stopped deallocated. Then you can add or remove a NIC (unless it's the last NIC on the VM). - - - question: | - Are there any computer name requirements? - answer: | - Yes. The computer name can be a maximum of 64 characters in length. See [Naming conventions rules and restrictions](/azure/architecture/best-practices/resource-naming) for more information around naming your resources. - - - question: | - Are there any resource group name requirements? - answer: | - Yes. The resource group name can be a maximum of 90 characters in length. See [Naming conventions rules and restrictions](/azure/architecture/best-practices/resource-naming) for more information about resource groups. - - - question: | - What are the username requirements when creating a VM? - answer: | - Usernames should be 1 - 32 characters in length. - - The following usernames are not allowed: - - - `1` - - `123` - - `a` - - `actuser` - - `adm` - - `admin` - - `admin1` - - `admin2` - - `administrator` - - `aspnet` - - `backup` - - `console` - - `david` - - `guest` - - `john` - - `owner` - - `root` - - `server` - - `sql` - - `support_388945a0` - - `support` - - `sys` - - `test` - - `test1` - - `test2` - - `test3` - - `user` - - `user1` - - `user2` - - `user3` - - `user4` - - `user5` - - `video` - - - - question: | - What are the password requirements when creating a VM? - answer: | - There are varying password length requirements, depending on the tool you are using: - - Azure portal - between 12 - 72 characters - - Azure PowerShell - between 8 - 123 characters - - Azure CLI - between 12 - 123 characters - - Azure Resource Manager (ARM) templates - 12 - 72 characters and control characters are not allowed - - - Passwords must also meet 3 out of the following 4 complexity requirements: - - * Have lower characters - * Have upper characters - * Have a digit - * Have a special character (Regex match [\W_]) - - The following passwords are not allowed: - - * abc@123 - * P@$$w0rd - * P@ssw0rd - * P@ssword123 - * Pa$$word - * pass@word1 - * Password! - * Password1 - * Password22 - * iloveyou! diff --git a/scenarios/azure-docs/articles/virtual-machines/linux/tutorial-lemp-stack.md b/scenarios/azure-docs/articles/virtual-machines/linux/tutorial-lemp-stack.md index b4e50bcc4..101666de0 100644 --- a/scenarios/azure-docs/articles/virtual-machines/linux/tutorial-lemp-stack.md +++ b/scenarios/azure-docs/articles/virtual-machines/linux/tutorial-lemp-stack.md @@ -30,42 +30,10 @@ This article walks you through how to deploy an NGINX web server, Azure MySQL Fl > * Install WordPress This setup is for quick tests or proof of concept. For more on the LEMP stack, including recommendations for a production environment, see the [Ubuntu documentation](https://help.ubuntu.com/community/ApacheMySQLPHP). -This tutorial uses the CLI within the [Azure Cloud Shell](../../cloud-shell/overview.md), which is constantly updated to the latest version. To open the Cloud Shell, select **Try it** from the top of any code block. +This tutorial uses the CLI within the [Azure Cloud Shell](/azure/cloud-shell/overview), which is constantly updated to the latest version. To open the Cloud Shell, select **Try it** from the top of any code block. If you choose to install and use the CLI locally, this tutorial requires that you're running the Azure CLI version 2.0.30 or later. Find the version by running the `az --version` command. If you need to install or upgrade, see [Install Azure CLI]( /cli/azure/install-azure-cli). -## Variable declaration - -First we need to define a few variables that help with the configuration of the LEMP workload. - -```bash -export NETWORK_PREFIX="$(($RANDOM % 254 + 1))" -export RANDOM_ID="$(openssl rand -hex 3)" -export MY_RESOURCE_GROUP_NAME="myLEMPResourceGroup$RANDOM_ID" -export REGION="westeurope" -export MY_VM_NAME="myVM$RANDOM_ID" -export MY_VM_USERNAME="azureadmin" -export MY_VM_SIZE='Standard_DS2_v2' -export MY_VM_IMAGE='Canonical:0001-com-ubuntu-minimal-jammy:minimal-22_04-lts-gen2:latest' -export MY_PUBLIC_IP_NAME="myPublicIP$RANDOM_ID" -export MY_DNS_LABEL="mydnslabel$RANDOM_ID" -export MY_NSG_NAME="myNSG$RANDOM_ID" -export MY_NSG_SSH_RULE="Allow-Access$RANDOM_ID" -export MY_VM_NIC_NAME="myVMNic$RANDOM_ID" -export MY_VNET_NAME="myVNet$RANDOM_ID" -export MY_VNET_PREFIX="10.$NETWORK_PREFIX.0.0/22" -export MY_SN_NAME="mySN$RANDOM_ID" -export MY_SN_PREFIX="10.$NETWORK_PREFIX.0.0/24" -export MY_MYSQL_DB_NAME="mydb$RANDOM_ID" -export MY_MYSQL_ADMIN_USERNAME="dbadmin$RANDOM_ID" -export MY_MYSQL_ADMIN_PW="$(openssl rand -base64 32)" -export MY_MYSQL_SN_NAME="myMySQLSN$RANDOM_ID" -export MY_WP_ADMIN_PW="$(openssl rand -base64 32)" -export MY_WP_ADMIN_USER="wpcliadmin" -export MY_AZURE_USER=$(az account show --query user.name --output tsv) -export FQDN="${MY_DNS_LABEL}.${REGION}.cloudapp.azure.com" -``` - + +```json +{ + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/test-rg69e367", + "location": "eastus2", + "managedBy": null, + "name": "test-rg69e367", + "properties": { + "provisioningState": "Succeeded" + }, + "tags": null, + "type": "Microsoft.Resources/resourceGroups" +} +``` + +## Create VNET + +Use [az network vnet create](/cli/azure/network/vnet#az-network-vnet-create) to create a virtual network with one subnet in the resource group: + +```bash +export RESOURCE_GROUP_NAME="test-rg$RANDOM_SUFFIX" +export VNET_NAME="vnet-1$RANDOM_SUFFIX" +export SUBNET_NAME="subnet-1$RANDOM_SUFFIX" +export VNET_ADDRESS_PREFIX="10.0.0.0/16" +export SUBNET_ADDRESS_PREFIX="10.0.0.0/24" + +az network vnet create \ + --resource-group $RESOURCE_GROUP_NAME \ + --name $VNET_NAME \ + --address-prefix $VNET_ADDRESS_PREFIX \ + --subnet-name $SUBNET_NAME \ + --subnet-prefix $SUBNET_ADDRESS_PREFIX +``` + +Results: + + + +```json +{ + "newVNet": { + "addressSpace": { + "addressPrefixes": [ + "10.0.0.0/16" + ] + }, + "enableDdosProtection": false, + "etag": "W/\"300c6da1-ee4a-47ee-af6e-662d3a0230a1\"", + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/test-rg69e367/providers/Microsoft.Network/virtualNetworks/vnet-169e367", + "location": "eastus2", + "name": "vnet-169e367", + "provisioningState": "Succeeded", + "resourceGroup": "test-rg69e367", + "resourceGuid": "3d64254d-70d4-47e3-a129-473d70ea2ab8", + "subnets": [ + { + "addressPrefix": "10.0.0.0/24", + "delegations": [], + "etag": "W/\"300c6da1-ee4a-47ee-af6e-662d3a0230a1\"", + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/test-rg69e367/providers/Microsoft.Network/virtualNetworks/vnet-169e367/subnets/subnet-169e367", + "name": "subnet-169e367", + "privateEndpointNetworkPolicies": "Disabled", + "privateLinkServiceNetworkPolicies": "Enabled", + "provisioningState": "Succeeded", + "resourceGroup": "test-rg69e367", + "type": "Microsoft.Network/virtualNetworks/subnets" + } + ], + "type": "Microsoft.Network/virtualNetworks", + "virtualNetworkPeerings": [] + } +} +``` + +## Create Bastion Subnet + +Create the Bastion subnet with [az network vnet subnet create](/cli/azure/network/vnet/subnet). + +```bash +export RESOURCE_GROUP_NAME="test-rg$RANDOM_SUFFIX" +export VNET_NAME="vnet-1$RANDOM_SUFFIX" +export SUBNET_NAME="AzureBastionSubnet" +export SUBNET_ADDRESS_PREFIX="10.0.1.0/24" + +az network vnet subnet create \ + --vnet-name $VNET_NAME \ + --resource-group $RESOURCE_GROUP_NAME \ + --name AzureBastionSubnet \ + --address-prefix $SUBNET_ADDRESS_PREFIX +``` + +Results: + + + +```json +{ + "addressPrefix": "10.0.1.0/24", + "delegations": [], + "etag": "W/\"a2863964-0276-453f-a104-b37391e8088b\"", + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/test-rg69e367/providers/Microsoft.Network/virtualNetworks/vnet-169e367/subnets/AzureBastionSubnet", + "name": "AzureBastionSubnet", + "privateEndpointNetworkPolicies": "Disabled", + "privateLinkServiceNetworkPolicies": "Enabled", + "provisioningState": "Succeeded", + "resourceGroup": "test-rg69e367", + "type": "Microsoft.Network/virtualNetworks/subnets" +} +``` + +### Create Azure Bastion + +1. Create a public IP address for the Azure Bastion host with [az network public-ip create](/cli/azure/network/public-ip). + +```bash +export RESOURCE_GROUP_NAME="test-rg$RANDOM_SUFFIX" +export PUBLIC_IP_NAME="public-ip-bastion$RANDOM_SUFFIX" +export REGION="eastus2" +export ALLOCATION_METHOD="Static" +export SKU="Standard" + +az network public-ip create \ + --resource-group $RESOURCE_GROUP_NAME \ + --name $PUBLIC_IP_NAME \ + --location $REGION \ + --allocation-method $ALLOCATION_METHOD \ + --sku $SKU +``` + +Results: + + + +```json +{ + "publicIp": { + "ddosSettings": { + "protectionMode": "VirtualNetworkInherited" + }, + "etag": "W/\"efa750bf-63f9-4c02-9ace-a747fc405d0f\"", + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/test-rg69e367/providers/Microsoft.Network/publicIPAddresses/public-ip-bastion69e367", + "idleTimeoutInMinutes": 4, + "ipAddress": "203.0.113.173", + "ipTags": [], + "location": "eastus2", + "name": "public-ip-bastion69e367", + "provisioningState": "Succeeded", + "publicIPAddressVersion": "IPv4", + "publicIPAllocationMethod": "Static", + "resourceGroup": "test-rg69e367", + "resourceGuid": "fc809493-80c8-482c-9f5a-9d6442472a99", + "sku": { + "name": "Standard", + "tier": "Regional" + }, + "type": "Microsoft.Network/publicIPAddresses" + } +} +``` + +## Create Azure Bastion Host + +Create an Azure Bastion host with [az network bastion create](/cli/azure/network/bastion). Azure Bastion is used to securely connect Azure virtual machines without exposing them to the public internet. + +```bash +export RESOURCE_GROUP_NAME="test-rg$RANDOM_SUFFIX" +export BASTION_NAME="bastion$RANDOM_SUFFIX" +export VNET_NAME="vnet-1$RANDOM_SUFFIX" +export PUBLIC_IP_NAME="public-ip-bastion$RANDOM_SUFFIX" +export REGION="eastus2" + +az network bastion create \ + --resource-group $RESOURCE_GROUP_NAME \ + --name $BASTION_NAME \ + --vnet-name $VNET_NAME \ + --public-ip-address $PUBLIC_IP_NAME \ + --location $REGION +``` + +Results: + + + +```json +{ + "disableCopyPaste": false, + "dnsName": "bst-cc1d5c1d-9496-44fa-a8b3-3b2130efa306.bastion.azure.com", + "enableFileCopy": false, + "enableIpConnect": false, + "enableKerberos": false, + "enableSessionRecording": false, + "enableShareableLink": false, + "enableTunneling": false, + "etag": "W/\"229bd068-160b-4935-b23d-eddce4bb31ed\"", + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/test-rg69e367/providers/Microsoft.Network/bastionHosts/bastion69e367", + "ipConfigurations": [ + { + "etag": "W/\"229bd068-160b-4935-b23d-eddce4bb31ed\"", + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/test-rg69e367/providers/Microsoft.Network/bastionHosts/bastion69e367/bastionHostIpConfigurations/bastion_ip_config", + "name": "bastion_ip_config", + "privateIPAllocationMethod": "Dynamic", + "provisioningState": "Succeeded", + "publicIPAddress": { + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/test-rg69e367/providers/Microsoft.Network/publicIPAddresses/public-ip-bastion69e367", + "resourceGroup": "test-rg69e367" + }, + "resourceGroup": "test-rg69e367", + "subnet": { + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/test-rg69e367/providers/Microsoft.Network/virtualNetworks/vnet-169e367/subnets/AzureBastionSubnet", + "resourceGroup": "test-rg69e367" + }, + "type": "Microsoft.Network/bastionHosts/bastionHostIpConfigurations" + } + ], + "location": "eastus2", + "name": "bastion69e367", + "provisioningState": "Succeeded", + "resourceGroup": "test-rg69e367", + "scaleUnits": 2, + "sku": { + "name": "Standard" + }, + "type": "Microsoft.Network/bastionHosts" +} +``` + +## Create a network interface with Accelerated Networking + +1. Use [az network nic create](/cli/azure/network/nic#az-network-nic-create) to create a network interface (NIC) with Accelerated Networking enabled. The following example creates a NIC in the subnet of the virtual network. + + ```bash + export RESOURCE_GROUP_NAME="test-rg$RANDOM_SUFFIX" + export NIC_NAME="nic-1$RANDOM_SUFFIX" + export VNET_NAME="vnet-1$RANDOM_SUFFIX" + export SUBNET_NAME="subnet-1$RANDOM_SUFFIX" + + az network nic create \ + --resource-group $RESOURCE_GROUP_NAME \ + --name $NIC_NAME \ + --vnet-name $VNET_NAME \ + --subnet $SUBNET_NAME \ + --accelerated-networking true + ``` + + Results: + + + + ```json + { + "NewNIC": { + "auxiliaryMode": "None", + "auxiliarySku": "None", + "disableTcpStateTracking": false, + "dnsSettings": { + "appliedDnsServers": [], + "dnsServers": [], + "internalDomainNameSuffix": "juswipouodrupijji24xb0rkxa.cx.internal.cloudapp.net" + }, + "enableAcceleratedNetworking": true, + "enableIPForwarding": false, + "etag": "W/\"0e24b553-769b-4350-b1aa-ab4cd04100bf\"", + "hostedWorkloads": [], + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/test-rg69e367/providers/Microsoft.Network/networkInterfaces/nic-169e367", + "ipConfigurations": [ + { + "etag": "W/\"0e24b553-769b-4350-b1aa-ab4cd04100bf\"", + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/test-rg69e367/providers/Microsoft.Network/networkInterfaces/nic-169e367/ipConfigurations/ipconfig1", + "name": "ipconfig1", + "primary": true, + "privateIPAddress": "10.0.0.4", + "privateIPAddressVersion": "IPv4", + "privateIPAllocationMethod": "Dynamic", + "provisioningState": "Succeeded", + "resourceGroup": "test-rg69e367", + "subnet": { + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/test-rg69e367/providers/Microsoft.Network/virtualNetworks/vnet-169e367/subnets/subnet-169e367", + "resourceGroup": "test-rg69e367" + }, + "type": "Microsoft.Network/networkInterfaces/ipConfigurations" + } + ], + "location": "eastus2", + "name": "nic-169e367", + "nicType": "Standard", + "provisioningState": "Succeeded", + "resourceGroup": "test-rg69e367", + "resourceGuid": "6798a335-bd66-42cc-a92a-bb678d4d146e", + "tapConfigurations": [], + "type": "Microsoft.Network/networkInterfaces", + "vnetEncryptionSupported": false + } + } + ``` + +--- + +## Create a VM and attach the NIC + +Use [az vm create](/cli/azure/vm#az-vm-create) to create the VM, and use the `--nics` option to attach the NIC you created. Ensure you select a VM size and distribution listed in [Windows and Linux Accelerated Networking](https://azure.microsoft.com/updates/accelerated-networking-in-expanded-preview). For a list of all VM sizes and characteristics, see [Sizes for virtual machines in Azure](/azure/virtual-machines/sizes). The following example creates a VM with a size that supports Accelerated Networking, Standard_DS4_v2. The command will generate SSH keys for the virtual machine for login. Make note of the location of the private key. The private key is needed in later steps for connecting to the virtual machine with Azure Bastion. + +```bash +export RESOURCE_GROUP_NAME="test-rg$RANDOM_SUFFIX" +export VM_NAME="vm-1$RANDOM_SUFFIX" +export IMAGE="Ubuntu2204" +export SIZE="Standard_DS4_v2" +export ADMIN_USER="azureuser" +export NIC_NAME="nic-1$RANDOM_SUFFIX" + +az vm create \ + --resource-group $RESOURCE_GROUP_NAME \ + --name $VM_NAME \ + --image $IMAGE \ + --size $SIZE \ + --admin-username $ADMIN_USER \ + --generate-ssh-keys \ + --nics $NIC_NAME +``` + +Results: + + + +```json +{ + "fqdns": "", + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/test-rg69e367/providers/Microsoft.Compute/virtualMachines/vm-169e367", + "location": "eastus2", + "macAddress": "60-45-BD-84-F0-D5", + "powerState": "VM running", + "privateIpAddress": "10.0.0.4", + "publicIpAddress": "", + "resourceGroup": "test-rg69e367", + "zones": "" +} +``` + +## Next steps + +- [How Accelerated Networking works in Linux and FreeBSD VMs](./accelerated-networking-how-it-works.md) + +- [Proximity placement groups](/azure/virtual-machines/co-location) \ No newline at end of file diff --git a/azure-vote-start.yml b/scenarios/azure-management-docs/articles/azure-linux/aks-store-quickstart.yaml similarity index 74% rename from azure-vote-start.yml rename to scenarios/azure-management-docs/articles/azure-linux/aks-store-quickstart.yaml index fabe2db67..179a961c1 100644 --- a/azure-vote-start.yml +++ b/scenarios/azure-management-docs/articles/azure-linux/aks-store-quickstart.yaml @@ -1,8 +1,9 @@ apiVersion: apps/v1 -kind: Deployment +kind: StatefulSet metadata: name: rabbitmq spec: + serviceName: rabbitmq replicas: 1 selector: matchLabels: @@ -47,12 +48,12 @@ spec: path: enabled_plugins --- apiVersion: v1 -data: - rabbitmq_enabled_plugins: | - [rabbitmq_management,rabbitmq_prometheus,rabbitmq_amqp1_0]. kind: ConfigMap metadata: name: rabbitmq-enabled-plugins +data: + rabbitmq_enabled_plugins: | + [rabbitmq_management,rabbitmq_prometheus,rabbitmq_amqp1_0]. --- apiVersion: v1 kind: Service @@ -111,6 +112,27 @@ spec: limits: cpu: 75m memory: 128Mi + startupProbe: + httpGet: + path: /health + port: 3000 + failureThreshold: 5 + initialDelaySeconds: 20 + periodSeconds: 10 + readinessProbe: + httpGet: + path: /health + port: 3000 + failureThreshold: 3 + initialDelaySeconds: 3 + periodSeconds: 5 + livenessProbe: + httpGet: + path: /health + port: 3000 + failureThreshold: 5 + initialDelaySeconds: 3 + periodSeconds: 3 initContainers: - name: wait-for-rabbitmq image: busybox @@ -157,13 +179,30 @@ spec: image: ghcr.io/azure-samples/aks-store-demo/product-service:latest ports: - containerPort: 3002 + env: + - name: AI_SERVICE_URL + value: "http://ai-service:5001/" resources: requests: cpu: 1m memory: 1Mi limits: - cpu: 1m - memory: 7Mi + cpu: 2m + memory: 20Mi + readinessProbe: + httpGet: + path: /health + port: 3002 + failureThreshold: 3 + initialDelaySeconds: 3 + periodSeconds: 5 + livenessProbe: + httpGet: + path: /health + port: 3002 + failureThreshold: 5 + initialDelaySeconds: 3 + periodSeconds: 3 --- apiVersion: v1 kind: Service @@ -212,6 +251,27 @@ spec: limits: cpu: 1000m memory: 512Mi + startupProbe: + httpGet: + path: /health + port: 8080 + failureThreshold: 3 + initialDelaySeconds: 5 + periodSeconds: 5 + readinessProbe: + httpGet: + path: /health + port: 8080 + failureThreshold: 3 + initialDelaySeconds: 3 + periodSeconds: 3 + livenessProbe: + httpGet: + path: /health + port: 8080 + failureThreshold: 5 + initialDelaySeconds: 3 + periodSeconds: 3 --- apiVersion: v1 kind: Service @@ -223,4 +283,4 @@ spec: targetPort: 8080 selector: app: store-front - type: LoadBalancer + type: LoadBalancer \ No newline at end of file diff --git a/scenarios/azure-management-docs/articles/azure-linux/quickstart-azure-cli.md b/scenarios/azure-management-docs/articles/azure-linux/quickstart-azure-cli.md new file mode 100644 index 000000000..39db87e1a --- /dev/null +++ b/scenarios/azure-management-docs/articles/azure-linux/quickstart-azure-cli.md @@ -0,0 +1,458 @@ +--- +title: 'Quickstart: Deploy an Azure Linux Container Host for AKS cluster by using the Azure CLI' +description: Learn how to quickly create an Azure Linux Container Host for AKS cluster using the Azure CLI. +author: suhuruli +ms.author: suhuruli +ms.service: microsoft-linux +ms.custom: references_regions, devx-track-azurecli, linux-related-content, innovation-engine +ms.topic: quickstart +ms.date: 04/18/2023 +--- + +# Quickstart: Deploy an Azure Linux Container Host for AKS cluster by using the Azure CLI + +Get started with the Azure Linux Container Host by using the Azure CLI to deploy an Azure Linux Container Host for AKS cluster. After installing the prerequisites, you will create a resource group, create an AKS cluster, connect to the cluster, and run a sample multi-container application in the cluster. + +## Prerequisites + +- [!INCLUDE [quickstarts-free-trial-note](~/reusable-content/ce-skilling/azure/includes/quickstarts-free-trial-note.md)] +- Use the Bash environment in [Azure Cloud Shell](/azure/cloud-shell/overview). For more information, see [Azure Cloud Shell Quickstart - Bash](/azure/cloud-shell/quickstart). + + :::image type="icon" source="~/reusable-content/ce-skilling/azure/media/cloud-shell/launch-cloud-shell-button.png" border="false" link="https://portal.azure.com/#cloudshell/"::: + +- If you prefer to run CLI reference commands locally, [install](/cli/azure/install-azure-cli) the Azure CLI. If you're running on Windows or macOS, consider running Azure CLI in a Docker container. For more information, see [How to run the Azure CLI in a Docker container](/cli/azure/run-azure-cli-docker). + + - If you're using a local installation, sign in to the Azure CLI by using the [az login](/cli/azure/reference-index#az-login) command. To finish the authentication process, follow the steps displayed in your terminal. For other sign-in options, see [Sign in with the Azure CLI](/cli/azure/authenticate-azure-cli). + - When you're prompted, install the Azure CLI extension on first use. For more information about extensions, see [Use extensions with the Azure CLI](/cli/azure/azure-cli-extensions-overview). + - Run [`az version`](/cli/azure/reference-index?#az-version) to find the version and dependent libraries that are installed. To upgrade to the latest version, run [az upgrade](/cli/azure/reference-index?#az-upgrade). + +## Create a resource group + +An Azure resource group is a logical group in which Azure resources are deployed and managed. When creating a resource group, it is required to specify a location. This location is: + +- The storage location of your resource group metadata. +- Where your resources will run in Azure if you don't specify another region when creating a resource. + +Create a resource group using the `az group create` command. + +```azurecli-interactive +export RANDOM_ID="$(openssl rand -hex 3)" +export MY_RESOURCE_GROUP_NAME="myAzureLinuxResourceGroup$RANDOM_ID" +export REGION="westeurope" + +az group create --name $MY_RESOURCE_GROUP_NAME --location $REGION +``` + +Results: + +```JSON +{ + "id": "/subscriptions/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx/resourceGroups/$MY_RESOURCE_GROUP_NAMExxxxxx", + "location": "$REGION", + "managedBy": null, + "name": "$MY_RESOURCE_GROUP_NAME", + "properties": { + "provisioningState": "Succeeded" + }, + "tags": null, + "type": "Microsoft.Resources/resourceGroups" +} +``` + +## Create an Azure Linux Container Host cluster + +Create an AKS cluster using the `az aks create` command with the `--os-sku` parameter to provision the AKS cluster with an Azure Linux image. + +```azurecli-interactive +export MY_AZ_CLUSTER_NAME="myAzureLinuxCluster$RANDOM_ID" +az aks create --name $MY_AZ_CLUSTER_NAME --resource-group $MY_RESOURCE_GROUP_NAME --os-sku AzureLinux +``` + +After a few minutes, the command completes and returns JSON-formatted information about the cluster. + +## Connect to the cluster + +To manage a Kubernetes cluster, use the Kubernetes command-line client, `kubectl`. `kubectl` is already installed if you use Azure Cloud Shell. To install `kubectl` locally, use the `az aks install-cli` command. + +1. Configure `kubectl` to connect to your Kubernetes cluster using the `az aks get-credentials` command. This command downloads credentials and configures the Kubernetes CLI to use them. + + ```azurecli-interactive + az aks get-credentials --resource-group $MY_RESOURCE_GROUP_NAME --name $MY_AZ_CLUSTER_NAME + kubectl get nodes + ``` + +## Deploy the application + +To deploy the application, you use a manifest file to create all the objects required to run the [AKS Store application](https://github.com/Azure-Samples/aks-store-demo). A Kubernetes manifest file defines a cluster's desired state, such as which container images to run. The manifest includes the following Kubernetes deployments and services: +- **Store front**: Web application for customers to view products and place orders. +- **Product service**: Shows product information. +- **Order service**: Places orders. +- **Rabbit MQ**: Message queue for an order queue. +NOTE: We don't recommend running stateful containers, such as Rabbit MQ, without persistent storage for production. These are used here for simplicity, but we recommend using managed services, such as Azure CosmosDB or Azure Service Bus. + +```bash +cat < aks-store-quickstart.yaml +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: rabbitmq +spec: + serviceName: rabbitmq + replicas: 1 + selector: + matchLabels: + app: rabbitmq + template: + metadata: + labels: + app: rabbitmq + spec: + nodeSelector: + "kubernetes.io/os": linux + containers: + - name: rabbitmq + image: mcr.microsoft.com/mirror/docker/library/rabbitmq:3.10-management-alpine + ports: + - containerPort: 5672 + name: rabbitmq-amqp + - containerPort: 15672 + name: rabbitmq-http + env: + - name: RABBITMQ_DEFAULT_USER + value: "username" + - name: RABBITMQ_DEFAULT_PASS + value: "password" + resources: + requests: + cpu: 10m + memory: 128Mi + limits: + cpu: 250m + memory: 256Mi + volumeMounts: + - name: rabbitmq-enabled-plugins + mountPath: /etc/rabbitmq/enabled_plugins + subPath: enabled_plugins + volumes: + - name: rabbitmq-enabled-plugins + configMap: + name: rabbitmq-enabled-plugins + items: + - key: rabbitmq_enabled_plugins + path: enabled_plugins +--- +apiVersion: v1 +data: + rabbitmq_enabled_plugins: | + [rabbitmq_management,rabbitmq_prometheus,rabbitmq_amqp1_0]. +kind: ConfigMap +metadata: + name: rabbitmq-enabled-plugins +--- +apiVersion: v1 +kind: Service +metadata: + name: rabbitmq +spec: + selector: + app: rabbitmq + ports: + - name: rabbitmq-amqp + port: 5672 + targetPort: 5672 + - name: rabbitmq-http + port: 15672 + targetPort: 15672 + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: order-service +spec: + replicas: 1 + selector: + matchLabels: + app: order-service + template: + metadata: + labels: + app: order-service + spec: + nodeSelector: + "kubernetes.io/os": linux + containers: + - name: order-service + image: ghcr.io/azure-samples/aks-store-demo/order-service:latest + ports: + - containerPort: 3000 + env: + - name: ORDER_QUEUE_HOSTNAME + value: "rabbitmq" + - name: ORDER_QUEUE_PORT + value: "5672" + - name: ORDER_QUEUE_USERNAME + value: "username" + - name: ORDER_QUEUE_PASSWORD + value: "password" + - name: ORDER_QUEUE_NAME + value: "orders" + - name: FASTIFY_ADDRESS + value: "0.0.0.0" + resources: + requests: + cpu: 1m + memory: 50Mi + limits: + cpu: 75m + memory: 128Mi + startupProbe: + httpGet: + path: /health + port: 3000 + failureThreshold: 5 + initialDelaySeconds: 20 + periodSeconds: 10 + readinessProbe: + httpGet: + path: /health + port: 3000 + failureThreshold: 3 + initialDelaySeconds: 3 + periodSeconds: 5 + livenessProbe: + httpGet: + path: /health + port: 3000 + failureThreshold: 5 + initialDelaySeconds: 3 + periodSeconds: 3 + initContainers: + - name: wait-for-rabbitmq + image: busybox + command: ['sh', '-c', 'until nc -zv rabbitmq 5672; do echo waiting for rabbitmq; sleep 2; done;'] + resources: + requests: + cpu: 1m + memory: 50Mi + limits: + cpu: 75m + memory: 128Mi +--- +apiVersion: v1 +kind: Service +metadata: + name: order-service +spec: + type: ClusterIP + ports: + - name: http + port: 3000 + targetPort: 3000 + selector: + app: order-service +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: product-service +spec: + replicas: 1 + selector: + matchLabels: + app: product-service + template: + metadata: + labels: + app: product-service + spec: + nodeSelector: + "kubernetes.io/os": linux + containers: + - name: product-service + image: ghcr.io/azure-samples/aks-store-demo/product-service:latest + ports: + - containerPort: 3002 + env: + - name: AI_SERVICE_URL + value: "http://ai-service:5001/" + resources: + requests: + cpu: 1m + memory: 1Mi + limits: + cpu: 2m + memory: 20Mi + readinessProbe: + httpGet: + path: /health + port: 3002 + failureThreshold: 3 + initialDelaySeconds: 3 + periodSeconds: 5 + livenessProbe: + httpGet: + path: /health + port: 3002 + failureThreshold: 5 + initialDelaySeconds: 3 + periodSeconds: 3 +--- +apiVersion: v1 +kind: Service +metadata: + name: product-service +spec: + type: ClusterIP + ports: + - name: http + port: 3002 + targetPort: 3002 + selector: + app: product-service +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: store-front +spec: + replicas: 1 + selector: + matchLabels: + app: store-front + template: + metadata: + labels: + app: store-front + spec: + nodeSelector: + "kubernetes.io/os": linux + containers: + - name: store-front + image: ghcr.io/azure-samples/aks-store-demo/store-front:latest + ports: + - containerPort: 8080 + name: store-front + env: + - name: VUE_APP_ORDER_SERVICE_URL + value: "http://order-service:3000/" + - name: VUE_APP_PRODUCT_SERVICE_URL + value: "http://product-service:3002/" + resources: + requests: + cpu: 1m + memory: 200Mi + limits: + cpu: 1000m + memory: 512Mi + startupProbe: + httpGet: + path: /health + port: 8080 + failureThreshold: 3 + initialDelaySeconds: 5 + periodSeconds: 5 + readinessProbe: + httpGet: + path: /health + port: 8080 + failureThreshold: 3 + initialDelaySeconds: 3 + periodSeconds: 3 + livenessProbe: + httpGet: + path: /health + port: 8080 + failureThreshold: 5 + initialDelaySeconds: 3 + periodSeconds: 3 +--- +apiVersion: v1 +kind: Service +metadata: + name: store-front +spec: + ports: + - port: 80 + targetPort: 8080 + selector: + app: store-front + type: LoadBalancer +EOF +kubectl apply -f aks-store-quickstart.yaml +``` + +## Wait for cluster to startup + +Wait for cluster to finish spinning up + +```azurecli-interactive +runtime="5 minutes" +endtime=$(date -ud "$runtime" +%s) +while [[ $(date -u +%s) -le $endtime ]] +do + STATUS=$(kubectl get pods -l app=store-front -o 'jsonpath={..status.conditions[?(@.type=="Ready")].status}') + echo $STATUS + if [ "$STATUS" == 'True' ] + then + export IP_ADDRESS=$(kubectl get service store-front --output 'jsonpath={..status.loadBalancer.ingress[0].ip}') + echo "Service IP Address: $IP_ADDRESS" + if [ -n "$IP_ADDRESS" ]; then + break + else + echo "Waiting for IP address..." + fi + else + sleep 10 + fi +done +``` + +## Test the application + +You can validate that the application is running by visiting the public IP address or the application URL. + +Get the application URL using the following commands: + +```azurecli-interactive +curl "http://$IP_ADDRESS" +``` + +Results: + +```HTML + + + + + + + + store-front + + + + + +
+ + +``` + +```OUTPUT +echo "You can now visit your web server at $IP_ADDRESS" +``` + +## Delete the cluster + +If you no longer need them, you can clean up unnecessary resources to avoid Azure charges. You can remove the resource group, container service, and all related resources using the `az group delete` command. + +## Next steps + +In this quickstart, you deployed an Azure Linux Container Host cluster. To learn more about the Azure Linux Container Host, and walk through a complete cluster deployment and management example, continue to the Azure Linux Container Host tutorial. + +> [!div class="nextstepaction"] +> [Azure Linux Container Host tutorial](./tutorial-azure-linux-create-cluster.md) + + +[kubectl-apply]: https://kubernetes.io/docs/reference/generated/kubectl/kubectl-commands#apply \ No newline at end of file diff --git a/scenarios/azure-management-docs/articles/azure-linux/tutorial-azure-linux-add-nodepool.md b/scenarios/azure-management-docs/articles/azure-linux/tutorial-azure-linux-add-nodepool.md new file mode 100644 index 000000000..f88c2f19b --- /dev/null +++ b/scenarios/azure-management-docs/articles/azure-linux/tutorial-azure-linux-add-nodepool.md @@ -0,0 +1,140 @@ +--- +title: Azure Linux Container Host for AKS tutorial - Add an Azure Linux node pool to your existing AKS cluster +description: In this Azure Linux Container Host for AKS tutorial, you learn how to add an Azure Linux node pool to your existing cluster. +author: suhuruli +ms.author: suhuruli +ms.service: microsoft-linux +ms.custom: linux-related-content, innovation-engine +ms.topic: tutorial +ms.date: 06/06/2023 +--- + +# Tutorial: Add an Azure Linux node pool to your existing AKS cluster + +In AKS, nodes with the same configurations are grouped together into node pools. Each pool contains the VMs that run your applications. In the previous tutorial, you created an Azure Linux Container Host cluster with a single node pool. To meet the varying compute or storage requirements of your applications, you can create additional user node pools. + +In this tutorial, part two of five, you learn how to: + +> [!div class="checklist"] +> +> * Add an Azure Linux node pool. +> * Check the status of your node pools. + +In later tutorials, you learn how to migrate nodes to Azure Linux and enable telemetry to monitor your clusters. + +## Prerequisites + +* In the previous tutorial, you created and deployed an Azure Linux Container Host cluster. If you haven't done these steps and would like to follow along, start with [Tutorial 1: Create a cluster with the Azure Linux Container Host for AKS](./tutorial-azure-linux-create-cluster.md). +* You need the latest version of Azure CLI. Run `az --version` to find the version. If you need to install or upgrade, see [Install Azure CLI](/cli/azure/install-azure-cli). + +## Add an Azure Linux node pool + +To add an Azure Linux node pool into your existing cluster, use the `az aks nodepool add` command and specify `--os-sku AzureLinux`. The following example creates a node pool named *ALnodepool* that runs three nodes in the *testAzureLinuxCluster* cluster in the *testAzureLinuxResourceGroup* resource group. Environment variables are declared below and a random suffix is appended to the resource group and cluster names to ensure uniqueness. + +```azurecli-interactive +export RANDOM_SUFFIX=$(openssl rand -hex 3) +export NODEPOOL_NAME="np$RANDOM_SUFFIX" + +az aks nodepool add \ + --resource-group $RESOURCE_GROUP \ + --cluster-name $CLUSTER_NAME \ + --name $NODEPOOL_NAME \ + --node-count 3 \ + --os-sku AzureLinux +``` + + +```JSON +{ + "agentPoolType": "VirtualMachineScaleSets", + "count": 3, + "name": "alnodepool", + "osType": "Linux", + "provisioningState": "Succeeded", + "resourceGroup": "testAzureLinuxResourceGroupxxxxx", + "type": "Microsoft.ContainerService/managedClusters/agentPools" +} +``` + +> [!NOTE] +> The name of a node pool must start with a lowercase letter and can only contain alphanumeric characters. For Linux node pools the length must be between one and 12 characters. + +## Check the node pool status + +To see the status of your node pools, use the `az aks nodepool list` command and specify your resource group and cluster name. The same environment variable values declared earlier are used here. + +```azurecli-interactive +az aks nodepool list --resource-group $RESOURCE_GROUP --cluster-name $CLUSTER_NAME +``` + + +```output +[ + { + "agentPoolType": "VirtualMachineScaleSets", + "availabilityZones": null, + "count": 1, + "enableAutoScaling": false, + "enableEncryptionAtHost": false, + "enableFips": false, + "enableNodePublicIp": false, + "id": "/subscriptions/REDACTED/resourcegroups/myAKSResourceGroupxxxxx/providers/Microsoft.ContainerService/managedClusters/myAKSClusterxxxxx/agentPools/nodepoolx", + "maxPods": 110, + "mode": "System", + "name": "nodepoolx", + "nodeImageVersion": "AKSUbuntu-1804gen2containerd-2023.06.06", + "orchestratorVersion": "1.25.6", + "osDiskSizeGb": 128, + "osDiskType": "Managed", + "osSku": "Ubuntu", + "osType": "Linux", + "powerState": { + "code": "Running" + }, + "provisioningState": "Succeeded", + "resourceGroup": "myAKSResourceGroupxxxxx", + "type": "Microsoft.ContainerService/managedClusters/agentPools", + "vmSize": "Standard_DS2_v2" + }, + { + "agentPoolType": "VirtualMachineScaleSets", + "availabilityZones": null, + "count": 3, + "enableAutoScaling": false, + "enableEncryptionAtHost": false, + "enableFips": false, + "enableNodePublicIp": false, + "id": "/subscriptions/REDACTED/resourcegroups/myAKSResourceGroupxxxxx/providers/Microsoft.ContainerService/managedClusters/myAKSClusterxxxxx/agentPools/npxxxxxx", + "maxPods": 110, + "mode": "User", + "name": "npxxxxxx", + "nodeImageVersion": "AzureLinuxContainerHost-2023.06.06", + "orchestratorVersion": "1.25.6", + "osDiskSizeGb": 128, + "osDiskType": "Managed", + "osSku": "AzureLinux", + "osType": "Linux", + "powerState": { + "code": "Running" + }, + "provisioningState": "Succeeded", + "resourceGroup": "myAKSResourceGroupxxxxx", + "type": "Microsoft.ContainerService/managedClusters/agentPools", + "vmSize": "Standard_DS2_v2" + } +] +``` + +## Next steps + +In this tutorial, you added an Azure Linux node pool to your existing cluster. You learned how to: + +> [!div class="checklist"] +> +> * Add an Azure Linux node pool. +> * Check the status of your node pools. + +In the next tutorial, you learn how to migrate existing nodes to Azure Linux. + +> [!div class="nextstepaction"] +> [Migrating to Azure Linux](./tutorial-azure-linux-migration.md) \ No newline at end of file diff --git a/scenarios/azure-management-docs/articles/azure-linux/tutorial-azure-linux-create-cluster.md b/scenarios/azure-management-docs/articles/azure-linux/tutorial-azure-linux-create-cluster.md new file mode 100644 index 000000000..c9254eacf --- /dev/null +++ b/scenarios/azure-management-docs/articles/azure-linux/tutorial-azure-linux-create-cluster.md @@ -0,0 +1,120 @@ +--- +title: Azure Linux Container Host for AKS tutorial - Create a cluster +description: In this Azure Linux Container Host for AKS tutorial, you will learn how to create an AKS cluster with Azure Linux. +author: suhuruli +ms.author: suhuruli +ms.service: microsoft-linux +ms.custom: linux-related-content, innovation-engine +ms.topic: tutorial +ms.date: 04/18/2023 +--- + +# Tutorial: Create a cluster with the Azure Linux Container Host for AKS + +To create a cluster with the Azure Linux Container Host, you will use: +1. Azure resource groups, a logical container into which Azure resources are deployed and managed. +1. [Azure Kubernetes Service (AKS)](/azure/aks/intro-kubernetes), a hosted Kubernetes service that allows you to quickly create a production ready Kubernetes cluster. + +In this tutorial, part one of five, you will learn how to: + +> [!div class="checklist"] +> * Install the Kubernetes CLI, `kubectl`. +> * Create an Azure resource group. +> * Create and deploy an Azure Linux Container Host cluster. +> * Configure `kubectl` to connect to your Azure Linux Container Host cluster. + +In later tutorials, you'll learn how to add an Azure Linux node pool to an existing cluster and migrate existing nodes to Azure Linux. + +## Prerequisites + +- You need the latest version of Azure CLI. Run `az --version` to find the version. If you need to install or upgrade, see [Install Azure CLI](/cli/azure/install-azure-cli). + +## Create a resource group + +When creating a resource group, it is required to specify a location. This location is: +- The storage location of your resource group metadata. +- Where your resources will run in Azure if you don't specify another region when creating a resource. + +Before running the command, environment variables are declared to ensure unique resource names for each deployment. + +```bash +export REGION="EastUS2" +az group create --name $RESOURCE_GROUP_NAME --location $REGION +``` + + +```JSON +{ + "id": "/subscriptions/xxxxx/resourceGroups/testAzureLinuxResourceGroupxxxxx", + "location": "EastUS2", + "managedBy": null, + "name": "testAzureLinuxResourceGroupxxxxx", + "properties": { + "provisioningState": "Succeeded" + }, + "tags": null, + "type": "Microsoft.Resources/resourceGroups" +} +``` + +> [!NOTE] +> The above example uses *WestUS2*, but Azure Linux Container Host clusters are available in all regions. + +## Create an Azure Linux Container Host cluster + +Create an AKS cluster using the `az aks create` command with the `--os-sku` parameter to provision the Azure Linux Container Host with an Azure Linux image. The following example creates an Azure Linux Container Host cluster. + +```bash +az aks create --name $CLUSTER_NAME --resource-group $RESOURCE_GROUP_NAME --os-sku AzureLinux +``` + + +```JSON +{ + "id": "/subscriptions/xxxxx/resourceGroups/testAzureLinuxResourceGroupxxxxx/providers/Microsoft.ContainerService/managedClusters/testAzureLinuxClusterxxxxx", + "location": "WestUS2", + "name": "testAzureLinuxClusterxxxxx", + "properties": { + "provisioningState": "Succeeded" + }, + "type": "Microsoft.ContainerService/managedClusters" +} +``` + +After a few minutes, the command completes and returns JSON-formatted information about the cluster. + +## Connect to the cluster using kubectl + +To configure `kubectl` to connect to your Kubernetes cluster, use the `az aks get-credentials` command. The following example gets credentials for the Azure Linux Container Host cluster using the resource group and cluster name created earlier: + +```azurecli +az aks get-credentials --resource-group $RESOURCE_GROUP_NAME --name $CLUSTER_NAME +``` + +To verify the connection to your cluster, run the [kubectl get nodes](https://kubernetes.io/docs/reference/generated/kubectl/kubectl-commands#get) command to return a list of the cluster nodes: + +```azurecli-interactive +kubectl get nodes +``` + + +```text +NAME STATUS ROLES AGE VERSION +aks-nodepool1-00000000-0 Ready agent 10m v1.20.7 +aks-nodepool1-00000000-1 Ready agent 10m v1.20.7 +``` + +## Next steps + +In this tutorial, you created and deployed an Azure Linux Container Host cluster. You learned how to: + +> [!div class="checklist"] +> * Install the Kubernetes CLI, `kubectl`. +> * Create an Azure resource group. +> * Create and deploy an Azure Linux Container Host cluster. +> * Configure `kubectl` to connect to your Azure Linux Container Host cluster. + +In the next tutorial, you'll learn how to add an Azure Linux node pool to an existing cluster. + +> [!div class="nextstepaction"] +> [Add an Azure Linux node pool](./tutorial-azure-linux-add-nodepool.md) \ No newline at end of file diff --git a/scenarios/azure-management-docs/articles/azure-linux/tutorial-azure-linux-migration.md b/scenarios/azure-management-docs/articles/azure-linux/tutorial-azure-linux-migration.md new file mode 100644 index 000000000..adc85d4a0 --- /dev/null +++ b/scenarios/azure-management-docs/articles/azure-linux/tutorial-azure-linux-migration.md @@ -0,0 +1,144 @@ +--- +title: Azure Linux Container Host for AKS tutorial - Migrating to Azure Linux +description: In this Azure Linux Container Host for AKS tutorial, you learn how to migrate your nodes to Azure Linux nodes. +author: suhuruli +ms.author: suhuruli +ms.reviewer: schaffererin +ms.service: microsoft-linux +ms.custom: devx-track-azurecli, linux-related-content, innovation-engine +ms.topic: tutorial +ms.date: 01/19/2024 +--- + +# Tutorial: Migrate nodes to Azure Linux + +In this tutorial, part three of five, you migrate your existing nodes to Azure Linux. You can migrate your existing nodes to Azure Linux using one of the following methods: + +* Remove existing node pools and add new Azure Linux node pools. +* In-place OS SKU migration. + +If you don't have any existing nodes to migrate to Azure Linux, skip to the [next tutorial](./tutorial-azure-linux-telemetry-monitor.md). In later tutorials, you learn how to enable telemetry and monitoring in your clusters and upgrade Azure Linux nodes. + +## Prerequisites + +* In previous tutorials, you created and deployed an Azure Linux Container Host for AKS cluster. To complete this tutorial, you need to add an Azure Linux node pool to your existing cluster. If you haven't done this step and would like to follow along, start with [Tutorial 2: Add an Azure Linux node pool to your existing AKS cluster](./tutorial-azure-linux-add-nodepool.md). + + > [!NOTE] + > When adding a new Azure Linux node pool, you need to add at least one as `--mode System`. Otherwise, AKS won't allow you to delete your existing node pool. + +* You need the latest version of Azure CLI. Run `az --version` to find the version. If you need to install or upgrade, see [Install Azure CLI](/cli/azure/install-azure-cli). + +## Add Azure Linux node pools and remove existing node pools + +1. Add a new Azure Linux node pool using the `az aks nodepool add` command. This command adds a new node pool to your cluster with the `--mode System` flag, which makes it a system node pool. System node pools are required for Azure Linux clusters. + +```azurecli-interactive +# Declare environment variables with a random suffix for uniqueness +export RANDOM_SUFFIX=$(openssl rand -hex 3) +export NODE_POOL_NAME="np$RANDOM_SUFFIX" +az aks nodepool add --resource-group $RESOURCE_GROUP --cluster-name $CLUSTER_NAME --name $NODE_POOL_NAME --mode System --os-sku AzureLinux +``` + +Results: + + + +```JSON +{ + "id": "/subscriptions/xxxxx/resourceGroups/myResourceGroupxxx/providers/Microsoft.ContainerService/managedClusters/myAKSCluster/nodePools/systempool", + "name": "systempool", + "provisioningState": "Succeeded" +} +``` + +2. Remove your existing nodes using the `az aks nodepool delete` command. + +## In-place OS SKU migration + +You can now migrate your existing Ubuntu node pools to Azure Linux by changing the OS SKU of the node pool, which rolls the cluster through the standard node image upgrade process. This new feature doesn't require the creation of new node pools. + +### Limitations + +There are several settings that can block the OS SKU migration request. To ensure a successful migration, review the following guidelines and limitations: + +* The OS SKU migration feature isn't available through PowerShell or the Azure portal. +* The OS SKU migration feature isn't able to rename existing node pools. +* Ubuntu and Azure Linux are the only supported Linux OS SKU migration targets. +* An Ubuntu OS SKU with `UseGPUDedicatedVHD` enabled can't perform an OS SKU migration. +* An Ubuntu OS SKU with CVM 20.04 enabled can't perform an OS SKU migration. +* Node pools with Kata enabled can't perform an OS SKU migration. +* Windows OS SKU migration isn't supported. +* OS SKU migration from Mariner to Azure Linux is supported, but rolling back to Mariner is not supported. + +### Prerequisites + +* An existing AKS cluster with at least one Ubuntu node pool. +* We recommend that you ensure your workloads configure and run successfully on the Azure Linux container host before attempting to use the OS SKU migration feature by [deploying an Azure Linux cluster](./quickstart-azure-cli.md) in dev/prod and verifying your service remains healthy. +* Ensure the migration feature is working for you in test/dev before using the process on a production cluster. +* Ensure that your pods have enough [Pod Disruption Budget](/azure/aks/operator-best-practices-scheduler#plan-for-availability-using-pod-disruption-budgets) to allow AKS to move pods between VMs during the upgrade. +* You need Azure CLI version [2.61.0](/cli/azure/release-notes-azure-cli#may-21-2024) or higher. Run `az --version` to find the version. If you need to install or upgrade, see [Install Azure CLI](/cli/azure/install-azure-cli). +* If you are using Terraform, you must have [v3.111.0](https://github.com/hashicorp/terraform-provider-azurerm/releases/tag/v3.111.0) or greater of the AzureRM Terraform module. + +### [Azure CLI](#tab/azure-cli) + +#### Migrate the OS SKU of your Ubuntu node pool + +* Migrate the OS SKU of your node pool to Azure Linux using the `az aks nodepool update` command. This command updates the OS SKU for your node pool from Ubuntu to Azure Linux. The OS SKU change triggers an immediate upgrade operation, which takes several minutes to complete. + +```azurecli-interactive +az aks nodepool update --resource-group $RESOURCE_GROUP --cluster-name $CLUSTER_NAME --name $NODE_POOL_NAME --os-sku AzureLinux +``` + +Results: + + + +```JSON +{ + "id": "/subscriptions/xxxxx/resourceGroups/myResourceGroupxxx/providers/Microsoft.ContainerService/managedClusters/myAKSCluster/nodePools/nodepool1", + "name": "nodepool1", + "osSku": "AzureLinux", + "provisioningState": "Succeeded" +} +``` + +> [!NOTE] +> If you experience issues during the OS SKU migration, you can [roll back to your previous OS SKU](#rollback). + +### Verify the OS SKU migration + +Once the migration is complete on your test clusters, you should verify the following to ensure a successful migration: + +* If your migration target is Azure Linux, run the `kubectl get nodes -o wide` command. The output should show `CBL-Mariner/Linux` as your OS image and `.cm2` at the end of your kernel version. +* Run the `kubectl get pods -o wide -A` command to verify that all of your pods and daemonsets are running on the new node pool. +* Run the `kubectl get nodes --show-labels` command to verify that all of the node labels in your upgraded node pool are what you expect. + +> [!TIP] +> We recommend monitoring the health of your service for a couple weeks before migrating your production clusters. + +### Run the OS SKU migration on your production clusters + +1. Update your existing templates to set `OSSKU=AzureLinux`. In ARM templates, you use `"OSSKU": "AzureLinux"` in the `agentPoolProfile` section. In Bicep, you use `osSku: "AzureLinux"` in the `agentPoolProfile` section. Lastly, for Terraform, you use `os_sku = "AzureLinux"` in the `default_node_pool` section. Make sure that your `apiVersion` is set to `2023-07-01` or later. +2. Redeploy your ARM, Bicep, or Terraform template for the cluster to apply the new `OSSKU` setting. During this deploy, your cluster behaves as if it's taking a node image upgrade. Your cluster surges capacity, and then reboots your existing nodes one by one into the latest AKS image from your new OS SKU. + +### Rollback + +If you experience issues during the OS SKU migration, you can roll back to your previous OS SKU. To do this, you need to change the OS SKU field in your template and resubmit the deployment, which triggers another upgrade operation and restores the node pool to its previous OS SKU. + + > [!NOTE] + > + > OS SKU migration does not support rolling back to OS SKU Mariner. + +* Roll back to your previous OS SKU using the `az aks nodepool update` command. This command updates the OS SKU for your node pool from Azure Linux back to Ubuntu. + +## Next steps + +In this tutorial, you migrated existing nodes to Azure Linux using one of the following methods: + +* Remove existing node pools and add new Azure Linux node pools. +* In-place OS SKU migration. + +In the next tutorial, you learn how to enable telemetry to monitor your clusters. + +> [!div class="nextstepaction"] +> [Enable telemetry and monitoring](./tutorial-azure-linux-telemetry-monitor.md) \ No newline at end of file diff --git a/scenarios/azure-management-docs/articles/azure-linux/tutorial-azure-linux-telemetry-monitor.md b/scenarios/azure-management-docs/articles/azure-linux/tutorial-azure-linux-telemetry-monitor.md new file mode 100644 index 000000000..926da4616 --- /dev/null +++ b/scenarios/azure-management-docs/articles/azure-linux/tutorial-azure-linux-telemetry-monitor.md @@ -0,0 +1,120 @@ +--- +title: Azure Linux Container Host for AKS tutorial - Enable telemetry and monitoring for the Azure Linux Container Host +description: In this Azure Linux Container Host for AKS tutorial, you'll learn how to enable telemetry and monitoring for the Azure Linux Container Host. +author: suhuruli +ms.author: suhuruli +ms.service: microsoft-linux +ms.custom: linux-related-content, innovation-engine +ms.topic: tutorial +ms.date: 03/26/2025 +--- + +# Tutorial: Enable telemetry and monitoring for your Azure Linux Container Host cluster + +In this tutorial, part four of five, you'll set up Container Insights to monitor an Azure Linux Container Host cluster. You'll learn how to: + +> [!div class="checklist"] +> * Enable monitoring for an existing cluster. +> * Verify that the agent is deployed successfully. +> * Verify that the solution is enabled. + +In the next and last tutorial, you'll learn how to upgrade your Azure Linux nodes. + +## Prerequisites + +- In previous tutorials, you created and deployed an Azure Linux Container Host cluster. To complete this tutorial, you need an existing cluster. If you haven't done this step and would like to follow along, start with [Tutorial 1: Create a cluster with the Azure Linux Container Host for AKS](./tutorial-azure-linux-create-cluster.md). +- If you're connecting an existing AKS cluster to a Log Analytics workspace in another subscription, the Microsoft.ContainerService resource provider must be registered in the subscription with the Log Analytics workspace. For more information, see [Register resource provider](/azure/azure-resource-manager/management/resource-providers-and-types#register-resource-provider). +- You need the latest version of Azure CLI. Run `az --version` to find the version. If you need to install or upgrade, see [Install Azure CLI](/cli/azure/install-azure-cli). + +## Enable monitoring + +## Connect to your cluster + +Before enabling monitoring, it's important to ensure you're connected to the correct cluster. The following command retrieves the credentials for your Azure Linux Container Host cluster and configures kubectl to use them: + +```azurecli +az aks get-credentials --resource-group $RESOURCE_GROUP --name $CLUSTER_NAME +``` + +### Use a default Log Analytics workspace + +The following step enables monitoring for your Azure Linux Container Host cluster using Azure CLI. In this example, you aren't required to precreate or specify an existing workspace. This command simplifies the process for you by creating a default workspace in the default resource group of the AKS cluster subscription. If one doesn't already exist in the region, the default workspace created will resemble the format *DefaultWorkspace-< GUID >-< Region >*. + +```azurecli +# Check if monitoring addon is already enabled +MONITORING_ENABLED=$(az aks show -g $RESOURCE_GROUP -n $CLUSTER_NAME --query "addonProfiles.omsagent.enabled" -o tsv) + +if [ "$MONITORING_ENABLED" != "true" ]; then + az aks enable-addons -a monitoring -n $CLUSTER_NAME -g $RESOURCE_GROUP +fi +``` + +### Option 2: Specify a Log Analytics workspace + +In this example, you can specify a Log Analytics workspace to enable monitoring of your Azure Linux Container Host cluster. The resource ID of the workspace will be in the form `"/subscriptions//resourceGroups//providers/Microsoft.OperationalInsights/workspaces/"`. The command to enable monitoring with a specified workspace is as follows: ```az aks enable-addons -a monitoring -n $CLUSTER_NAME -g $RESOURCE_GROUP --workspace-resource-id ``` + +## Verify agent and solution deployment + +Run the following command to verify that the agent is deployed successfully. + +```bash +kubectl get ds ama-logs --namespace=kube-system +``` + +The output should resemble the following example, which indicates that it was deployed properly: + + +```text +User@aksuser:~$ kubectl get ds ama-logs --namespace=kube-system +NAME DESIRED CURRENT READY UP-TO-DATE AVAILABLE NODE SELECTOR AGE +ama-logs 3 3 3 3 3 3m22s +``` + +To verify deployment of the solution, run the following command: + +```bash +kubectl get deployment ama-logs-rs -n=kube-system +``` + +The output should resemble the following example, which indicates that it was deployed properly: + + +```text +User@aksuser:~$ kubectl get deployment ama-logs-rs -n=kube-system +NAME DESIRED CURRENT UP-TO-DATE AVAILABLE AGE +ama-logs-rs 1 1 1 1 3h +``` + +## Verify solution configuration + +Use the `aks show` command to find out whether the solution is enabled or not, what the Log Analytics workspace resource ID is, and summary information about the cluster. + +```azurecli +az aks show -g $RESOURCE_GROUP -n $CLUSTER_NAME --query "addonProfiles.omsagent" +``` + +After a few minutes, the command completes and returns JSON-formatted information about the solution. The results of the command should show the monitoring add-on profile and resemble the following example output: + + +```JSON +{ + "config": { + "logAnalyticsWorkspaceResourceID": "/subscriptions/xxxxx/resourceGroups/xxxxx/providers/Microsoft.OperationalInsights/workspaces/xxxxx" + }, + "enabled": true +} +``` + +## Next steps + +In this tutorial, you enabled telemetry and monitoring for your Azure Linux Container Host cluster. You learned how to: + +> [!div class="checklist"] +> * Enable monitoring for an existing cluster. +> * Verify that the agent is deployed successfully. +> * Verify that the solution is enabled. + +In the next tutorial, you'll learn how to upgrade your Azure Linux nodes. + +> [!div class="nextstepaction"] +> [Upgrade Azure Linux nodes](./tutorial-azure-linux-upgrade.md) \ No newline at end of file diff --git a/scenarios/azure-management-docs/articles/azure-linux/tutorial-azure-linux-upgrade.md b/scenarios/azure-management-docs/articles/azure-linux/tutorial-azure-linux-upgrade.md new file mode 100644 index 000000000..a0373ff2c --- /dev/null +++ b/scenarios/azure-management-docs/articles/azure-linux/tutorial-azure-linux-upgrade.md @@ -0,0 +1,108 @@ +--- +title: Azure Linux Container Host for AKS tutorial - Upgrade Azure Linux Container Host nodes +description: In this Azure Linux Container Host for AKS tutorial, you learn how to upgrade Azure Linux Container Host nodes. +author: suhuruli +ms.author: suhuruli +ms.service: microsoft-linux +ms.custom: linux-related-content, innovation-engine +ms.topic: tutorial +ms.date: 08/18/2024 +--- + +# Tutorial: Upgrade Azure Linux Container Host nodes + +The Azure Linux Container Host ships updates through two mechanisms: updated Azure Linux node images and automatic package updates. + +As part of the application and cluster lifecycle, we recommend keeping your clusters up to date and secured by enabling upgrades for your cluster. You can enable automatic node-image upgrades to ensure your clusters use the latest Azure Linux Container Host image when it scales up. You can also manually upgrade the node-image on a cluster. + +In this tutorial, part five of five, you learn how to: + +> [!div class="checklist"] +> +> * Manually upgrade the node-image on a cluster. +> * Automatically upgrade an Azure Linux Container Host cluster. +> * Deploy Kured in an Azure Linux Container Host cluster. + +> [!NOTE] +> Any upgrade operation, whether performed manually or automatically, upgrades the node image version if not already on the latest. The latest version is contingent on a full AKS release, and can be determined by visiting the [AKS release tracker](/azure/aks/release-tracker). + +## Prerequisites + +* In previous tutorials, you created and deployed an Azure Linux Container Host cluster. To complete this tutorial, you need an existing cluster. If you haven't done this step and would like to follow along, start with [Tutorial 1: Create a cluster with the Azure Linux Container Host for AKS](./tutorial-azure-linux-create-cluster.md). +* You need the latest version of Azure CLI. Find the version using the `az --version` command. If you need to install or upgrade, see [Install Azure CLI](/cli/azure/install-azure-cli). + +## Manually upgrade your cluster + +In order to manually upgrade the node-image on a cluster, you can run the `az aks nodepool upgrade. + +## Automatically upgrade your cluster + +Auto-upgrade provides a set once and forget mechanism that yields tangible time and operational cost benefits. By enabling auto-upgrade, you can ensure your clusters are up to date and don't miss the latest Azure Linux Container Host features or patches from AKS and upstream Kubernetes. + +Automatically completed upgrades are functionally the same as manual upgrades. The selected channel determines the timing of upgrades. When making changes to auto-upgrade, allow 24 hours for the changes to take effect. + +To set the auto-upgrade channel on an existing cluster, update the --auto-upgrade-channel parameter: + +```bash +az aks update --resource-group $AZ_LINUX_RG --name $AZ_LINUX_CLUSTER --auto-upgrade-channel stable +``` + + +```json +{ + "id": "/subscriptions/xxxxx/resourceGroups/testAzureLinuxResourceGroup", + "location": "WestUS2", + "name": "testAzureLinuxCluster", + "properties": { + "autoUpgradeChannel": "stable", + "provisioningState": "Succeeded" + } +} +``` + +For more information on upgrade channels, see [Using cluster auto-upgrade](/azure/aks/auto-upgrade-cluster). + +## Enable automatic package upgrades + +Similar to setting your clusters to auto-upgrade, you can use the same set once and forget mechanism for package upgrades by enabling the node-os upgrade channel. If automatic package upgrades are enabled, the dnf-automatic systemd service runs daily and installs any updated packages that have been published. + +To set the node-os upgrade channel on an existing cluster, update the --node-os-upgrade-channel parameter: + +```bash +az aks update --resource-group $AZ_LINUX_RG --name $AZ_LINUX_CLUSTER --node-os-upgrade-channel Unmanaged +``` + + +```json +{ + "id": "/subscriptions/xxxxx/resourceGroups/testAzureLinuxResourceGroup", + "location": "WestUS2", + "name": "testAzureLinuxCluster", + "properties": { + "nodeOsUpgradeChannel": "Unmanaged", + "provisioningState": "Succeeded" + } +} +``` + +## Enable an automatic reboot daemon + +To protect your clusters, security updates are automatically applied to Azure Linux nodes. These updates include OS security fixes, kernel updates, and package upgrades. Some of these updates require a node reboot to complete the process. AKS doesn't automatically reboot these nodes to complete the update process. + +We recommend enabling an automatic reboot daemon, such as [Kured](https://kured.dev/docs/), so that your cluster can reboot nodes that have taken kernel updates. To deploy the Kured DaemonSet in an Azure Linux Container Host cluster, see [Deploy Kured in an AKS cluster](/azure/aks/node-updates-kured#deploy-kured-in-an-aks-cluster). + +## Clean up resources + +As this tutorial is the last part of the series, you may want to delete your Azure Linux Container Host cluster. The Kubernetes nodes run on Azure virtual machines and continue incurring charges even if you don't use the cluster. + +## Next steps + +In this tutorial, you upgraded your Azure Linux Container Host cluster. You learned how to: + +> [!div class="checklist"] +> +> * Manually upgrade the node-image on a cluster. +> * Automatically upgrade an Azure Linux Container Host cluster. +> * Deploy kured in an Azure Linux Container Host cluster. + +For more information on the Azure Linux Container Host, see the [Azure Linux Container Host overview](./intro-azure-linux.md). \ No newline at end of file diff --git a/scenarios/azure-stack-docs/azure-stack/user/azure-stack-quick-create-vm-linux-cli.md b/scenarios/azure-stack-docs/azure-stack/user/azure-stack-quick-create-vm-linux-cli.md new file mode 100644 index 000000000..e60b44bd3 --- /dev/null +++ b/scenarios/azure-stack-docs/azure-stack/user/azure-stack-quick-create-vm-linux-cli.md @@ -0,0 +1,188 @@ +--- +title: Create Linux VM with Azure CLI in Azure Stack Hub +description: Create a Linux virtual machine by using the Azure CLI in Azure Stack Hub. +author: sethmanheim +ms.topic: quickstart +ms.date: 03/06/2025 +ms.author: sethm +ms.custom: mode-api, devx-track-azurecli, linux-related-content +--- + +# Quickstart: Create a Linux server VM by using the Azure CLI in Azure Stack Hub + +You can create an Ubuntu Server 20.04 LTS virtual machine (VM) by using the Azure CLI. In this article, you create and use a virtual machine. This article also shows you how to: + +* Connect to the virtual machine with a remote client. +* Install an NGINX web server and view the default home page. +* Clean up unused resources. + +## Prerequisites + +Before you begin, make sure you have the following prerequisites: + +* A Linux image in the Azure Stack Hub Marketplace + + The Azure Stack Hub Marketplace doesn't contain a Linux image by default. Have the Azure Stack Hub operator provide the Ubuntu Server 20.04 LTS image you need. The operator can use the instructions in [Download Marketplace items from Azure to Azure Stack Hub](../operator/azure-stack-download-azure-marketplace-item.md). + +* Azure Stack Hub requires a specific version of the Azure CLI to create and manage its resources. If you don't have the Azure CLI configured for Azure Stack Hub, sign in to a Windows-based external client if you're connected through VPN, and follow the instructions for [installing and configuring the Azure CLI](azure-stack-version-profiles-azurecli2.md). + +* A public Secure Shell (SSH) key with the name id_rsa.pub saved in the **.ssh** directory of your Windows user profile. For more information about creating SSH keys, see [Use an SSH key pair with Azure Stack Hub](azure-stack-dev-start-howto-ssh-public-key.md). + +## Create a resource group + +A resource group is a logical container where you can deploy and manage Azure Stack Hub resources. From your Azure Stack Hub integrated system, run the [az group create](/cli/azure/group#az-group-create) command to create a resource group. + +> [!NOTE] +> We assigned values for all variables in the following code examples. However, you can assign your own values. + +The following example creates a resource group named myResourceGroup with a random suffix in the local location: + +```azurecli +export RANDOM_SUFFIX=$(openssl rand -hex 3) +export RESOURCE_GROUP="myResourceGroup$RANDOM_SUFFIX" +export LOCATION="eastus2" +az group create --name $RESOURCE_GROUP --location $LOCATION +``` + +Results: + + +```JSON +{ + "id": "/subscriptions/xxxxx/resourceGroups/myResourceGroupxxx", + "location": "local", + "managedBy": null, + "name": "myResourceGroupxxx", + "properties": { + "provisioningState": "Succeeded" + }, + "tags": null, + "type": "Microsoft.Resources/resourceGroups" +} +``` + +## Create a virtual machine + +Create a virtual machine by using the [az vm create](/cli/azure/vm#az-vm-create) command. The following example creates a VM named myVM. The example uses Demouser as the admin username. Change these values to something that's appropriate for your environment. + +```azurecli +export VM_NAME="myVM$RANDOM_SUFFIX" +az vm create \ + --resource-group $RESOURCE_GROUP \ + --name $VM_NAME \ + --image "Ubuntu2204" \ + --admin-username "azureuser" \ + --assign-identity \ + --generate-ssh-keys \ + --public-ip-sku Standard \ + --location $LOCATION +``` + +Results: + + +```JSON +{ + "fqdns": "", + "id": "/subscriptions/xxxxx/resourceGroups/myResourceGroupxxx/providers/Microsoft.Compute/virtualMachines/myVMxxx", + "location": "local", + "name": "myVMxxx", + "osProfile": { + "computerName": "myVMxxx", + "adminUsername": "Demouser" + }, + "publicIpAddress": "x.x.x.x", + "powerState": "VM running", + "provisioningState": "Succeeded" +} +``` + +The public IP address is returned in the PublicIpAddress parameter. Note the address for later use with the virtual machine. + +## Open port 80 for web traffic + +Because this virtual machine runs the IIS web server, you must open port 80 to internet traffic. To open the port, use the [az vm open-port](/cli/azure/vm) command: + +```azurecli +az vm open-port --port 80 --resource-group $RESOURCE_GROUP --name $VM_NAME +``` + +Results: + + +```JSON +{ + "endPort": 80, + "name": "openPort80", + "port": 80, + "protocol": "Tcp", + "provisioningState": "Succeeded", + "resourceGroup": "myResourceGroupxxx", + "startPort": 80 +} +``` + +## Use SSH to connect to the virtual machine + +From a client computer with SSH installed, connect to the virtual machine. If you work on a Windows client, use [PuTTY](https://www.putty.org/) to create the connection. To connect to the virtual machine, you can use the `ssh` command. + +## Install the NGINX web server + +To update package resources and install the latest NGINX package, run the following script: + +```bash +output=$(az vm run-command invoke --resource-group $RESOURCE_GROUP --name $VM_NAME --command-id RunShellScript --scripts 'apt-get -y install nginx') +value=$(echo "$output" | jq -r '.value[0].message') +extracted=$(echo "$value" | awk '/\[stdout\]/,/\[stderr\]/' | sed '/\[stdout\]/d' | sed '/\[stderr\]/d') +echo "$extracted" +``` + +## View the NGINX welcome page + +With the NGINX web server installed, and port 80 open on your virtual machine, you can access the web server by using the virtual machine's public IP address. To do so, open a browser, and go to http://. Alternatively, you can use the curl command to view the NGINX welcome page: + +```bash +export PUBLIC_IP=$(az vm show -d -g $RESOURCE_GROUP -n $VM_NAME --query publicIps -o tsv) + +output=$(az vm run-command invoke --resource-group $RESOURCE_GROUP --name $VM_NAME --command-id RunShellScript --scripts 'curl -v http://localhost') +value=$(echo "$output" | jq -r '.value[0].message') +extracted=$(echo "$value" | awk '/\[stdout\]/,/\[stderr\]/' | sed '/\[stdout\]/d' | sed '/\[stderr\]/d') +echo "$extracted" +``` + +Results: + + +```HTML + + + +Welcome to nginx! + + + +

Welcome to nginx!

+

If you see this page, the nginx web server is successfully installed and +working. Further configuration is required.

+ +

For online documentation and support please refer to +nginx.org.
+Commercial support is available at +nginx.com.

+ +

Thank you for using nginx.

+ + +``` + +![The NGINX web server Welcome page](./media/azure-stack-quick-create-vm-linux-cli/nginx.png) + +## Next steps + +In this quickstart, you deployed a basic Linux server virtual machine with a web server. To learn more about Azure Stack Hub virtual machines, see [Considerations for virtual machines in Azure Stack Hub](azure-stack-vm-considerations.md). \ No newline at end of file diff --git a/scenarios/metadata.json b/scenarios/metadata.json index 5dfa2d3df..fd27d11bc 100644 --- a/scenarios/metadata.json +++ b/scenarios/metadata.json @@ -2,7 +2,7 @@ { "status": "active", "key": "azure-docs/articles/aks/learn/quick-kubernetes-deploy-cli.md", - "title": "Quickstart: Deploy an Azure Kubernetes Service (AKS) cluster using Azure CLI", + "title": "Deploy an Azure Kubernetes Service (AKS) cluster", "description": "Learn how to quickly deploy a Kubernetes cluster and deploy an application in Azure Kubernetes Service (AKS) using Azure CLI", "stackDetails": "", "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-docs/articles/aks/learn/quick-kubernetes-deploy-cli.md", @@ -65,10 +65,85 @@ ] } }, + { + "status": "active", + "key": "azure-databases-docs/articles/mysql/flexible-server/tutorial-deploy-wordpress-on-aks.md", + "title": "Tutorial: Deploy WordPress on AKS cluster by using Azure CLI", + "description": "Learn how to quickly build and deploy WordPress on AKS with Azure Database for MySQL - Flexible Server.", + "stackDetails": [ + "An Ubuntu 22.04 Linux VM (Standard DS2_v2)", + "Azure Database for MySQL - Flexible Server: 8.0.21", + "NGINX web server", + "PHP version 8.1-fpm", + "Latest WordPress", + "Network interface with public IP and network security group", + "Azure Private DNS Zone for Azure MySQL Flexible Server", + "Use port 22 for SSH and ports 80, 443 for web traffic" + ], + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-databases-docs/articles/mysql/flexible-server/tutorial-deploy-wordpress-on-aks.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/mysql/flexible-server/tutorial-deploy-wordpress-on-aks", + "nextSteps": [ + { + "title": "Access the Kubernetes web dashboard", + "url": "https://learn.microsoft.com/en-us/azure/aks/kubernetes-dashboard" + }, + { + "title": "Scale your AKS cluster", + "url": "https://learn.microsoft.com/en-us/azure/aks/tutorial-kubernetes-scale" + }, + { + "title": "Manage your Azure Database for MySQL flexible server instance", + "url": "https://learn.microsoft.com/en-us/azure/mysql/flexible-server/quickstart-create-server-cli" + }, + { + "title": "Configure server parameters for your database server", + "url": "https://learn.microsoft.com/en-us/azure/mysql/flexible-server/how-to-configure-server-parameters-cli" + } + ], + "configurations": { + "permissions": [ + "Microsoft.Resources/resourceGroups/write", + "Microsoft.Network/virtualNetworks/write", + "Microsoft.Network/publicIPAddresses/write", + "Microsoft.Network/networkSecurityGroups/write", + "Microsoft.Network/networkSecurityGroups/securityRules/write", + "Microsoft.Network/networkInterfaces/write", + "Microsoft.Network/networkInterfaces/ipConfigurations/write", + "Microsoft.Storage/storageAccounts/write", + "Microsoft.Network/privateDnsZones/write", + "Microsoft.Network/privateDnsZones/virtualNetworkLinks/write", + "Microsoft.Network/privateDnsZones/privateDnsRecordSets/write", + "Microsoft.Network/privateDnsZones/privateDnsRecordSets/A/write", + "Microsoft.Network/privateDnsZones/privateDnsRecordSets/TXT/write", + "Microsoft.Network/privateDnsZones/privateDnsRecordSets/SRV/write", + "Microsoft.Network/privateDnsZones/privateDnsRecordSets/CNAME/write", + "Microsoft.Network/privateDnsZones/privateDnsRecordSets/MX/write", + "Microsoft.Network/privateDnsZones/privateDnsRecordSets/AAAA/write", + "Microsoft.Network/privateDnsZones/privateDnsRecordSets/PTR/write", + "Microsoft.Network/privateDnsZones/privateDnsRecordSets/CERT/write", + "Microsoft.Network/privateDnsZones/privateDnsRecordSets/NS/write", + "Microsoft.Network/privateDnsZones/privateDnsRecordSets/SOA/write", + "Microsoft.Network/privateDnsZones/privateDnsRecordSets/CAA/write", + "Microsoft.Network/privateDnsZones/privateDnsRecordSets/ANY/write", + "Microsoft.Network/privateDnsZones/privateDnsRecordSets/SSHFP/write", + "Microsoft.Network/privateDnsZones/privateDnsRecordSets/SPF/write", + "Microsoft.Network/privateDnsZones/privateDnsRecordSets/DNSKEY/write", + "Microsoft.Network/privateDnsZones/privateDnsRecordSets/DS/write", + "Microsoft.Network/privateDnsZones/privateDnsRecordSets/NAPTR/write", + "Microsoft.Compute/virtualMachines/write", + "Microsoft.Compute/virtualMachines/extensions/write", + "Microsoft.Compute/virtualMachines/read", + "Microsoft.Authorization/roleAssignments/write", + "Microsoft.Authorization/roleAssignments/read", + "Microsoft.Authorization/roleDefinitions/read", + "Microsoft.Authorization/roleDefinitions/write" + ] + } + }, { "status": "active", "key": "azure-docs/articles/static-web-apps/get-started-cli.md", - "title": "Quickstart: Building your first static site with the Azure Static Web Apps using the CLI", + "title": "Deploy a Static site with the Azure Static Web Apps", "description": "Learn to deploy a static site to Azure Static Web Apps with the Azure CLI.", "stackDetails": "", "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-docs/articles/static-web-apps/get-started-cli.md", @@ -108,7 +183,7 @@ { "status": "active", "key": "azure-docs/articles/virtual-machines/linux/quick-create-cli.md", - "title": "Quickstart: Use the Azure CLI to create a Linux Virtual Machine", + "title": "Deploy a Linux virtual machine", "description": "In this quickstart, you learn how to use the Azure CLI to create a Linux virtual machine", "stackDetails": [ "An Ubuntu 22.04 Linux VM (Standard DS1_v2)", @@ -159,6 +234,20 @@ "stackDetails": "", "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-docs/articles/virtual-machines/linux/tutorial-lemp-stack.md", "documentationUrl": "https://learn.microsoft.com/en-us/azure/virtual-machines/linux/tutorial-lemp-stack", + "nextSteps": [ + { + "title": "Learn about virtual machines", + "url": "https://learn.microsoft.com/en-us/azure/virtual-machines/" + }, + { + "title": "Create and manage Linux VMs with the Azure CLI", + "url": "https://learn.microsoft.com/en-us/azure/virtual-machines/linux/tutorial-manage-vm" + }, + { + "title": "Secure your Linux VM", + "url": "https://learn.microsoft.com/en-us/azure/virtual-machines/linux/tutorial-secure-web-server" + } + ], "configurations": {} }, { @@ -168,7 +257,7 @@ "description": "This tutorial shows how to deploy Inspektor Gadget in an AKS cluster", "stackDetails": "", "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/DeployIGonAKS/README.md", - "documentationUrl": "", + "documentationUrl": "https://learn.microsoft.com/en-us/troubleshoot/azure/azure-kubernetes/logs/capture-system-insights-from-aks", "nextSteps": [ { "title": "Real-world scenarios where Inspektor Gadget can help you", @@ -221,6 +310,24 @@ "stackDetails": "", "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/CreateRHELVMAndSSH/create-rhel-vm-ssh.md", "documentationUrl": "", + "nextSteps": [ + { + "title": "Learn about virtual machines", + "url": "https://learn.microsoft.com/en-us/azure/virtual-machines/" + }, + { + "title": "Create an Ubuntu Virtual Machine", + "url": "https://learn.microsoft.com/en-us/azure/virtual-machines/linux/quick-create-cli" + }, + { + "title": "Create custom VM images", + "url": "https://learn.microsoft.com/en-us/azure/virtual-machines/linux/tutorial-custom-images" + }, + { + "title": "Load Balance VMs", + "url": "https://learn.microsoft.com/en-us/azure/load-balancer/quickstart-load-balancer-standard-public-cli" + } + ], "configurations": {} }, { @@ -253,24 +360,2379 @@ "permissions": [] } }, + { + "status": "inactive", + "key": "azure-aks-docs/articles/aks/workload-identity-deploy-cluster.md", + "title": "Deploy and configure an AKS cluster with workload identity", + "description": "In this Azure Kubernetes Service (AKS) article, you deploy an Azure Kubernetes Service cluster and configure it with a Microsoft Entra Workload ID.", + "stackDetails": [], + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-aks-docs/articles/aks/workload-identity-deploy-cluster.md", + "documentationUrl": "", + "nextSteps": [ + { + "title": "Kubectl Describe Command Reference", + "url": "https://kubernetes.io/docs/reference/generated/kubectl/kubectl-commands#describe" + } + ], + "configurations": { + "permissions": [] + } + }, { "status": "active", - "key": "PostgresRagLlmDemo/README.md", - "title": "Quickstart: Deploy a Postgres vector database", - "description": "Setup a Postgres vector database and openai resources to run a RAG-LLM model", + "key": "ObtainPerformanceMetricsLinuxSustem/obtain-performance-metrics-linux-system.md", + "title": "Obtaining Performance metrics from a Linux system", + "description": "Learn how to obtainer Performance metrics from a Linux system.", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/ObtainPerformanceMetricsLinuxSustem/obtain-performance-metrics-linux-system.md", + "documentationUrl": "https://learn.microsoft.com/en-us/troubleshoot/azure/virtual-machines/linux/collect-performance-metrics-from-a-linux-system", + "nextSteps": [ + { + "title": "Create a Support Request for your VM", + "url": "https://portal.azure.com/#view/Microsoft_Azure_Support/HelpAndSupportBlade/~/overview" + } + ], + "configurations": { + "permissions": [], + "configurableParams": [ + { + "inputType": "textInput", + "commandKey": "MY_RESOURCE_GROUP_NAME", + "title": "Resource Group Name", + "defaultValue": "" + }, + { + "inputType": "textInput", + "commandKey": "MY_VM_NAME", + "title": "VM Name", + "defaultValue": "" + } + ] + } + }, + { + "status": "inactive", + "key": "azure-aks-docs/articles/aks/create-postgresql-ha.md", + "title": "Create infrastructure for deploying a highly available PostgreSQL database on AKS", + "description": "Create the infrastructure needed to deploy a highly available PostgreSQL database on AKS using the CloudNativePG operator.", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-aks-docs/articles/aks/create-postgresql-ha.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/aks/create-postgresql-ha?tabs=helm", + "nextSteps": [ + { + "title": "Deploy a highly available PostgreSQL database on AKS with Azure CLI", + "url": "https://learn.microsoft.com/en-us/azure/aks/deploy-postgresql-ha?tabs=helm" + } + ], + "configurations": {} + }, + { + "status": "inactive", + "key": "azure-aks-docs/articles/aks/deploy-postgresql-ha.md", + "title": "Deploy a highly available PostgreSQL database on AKS with Azure CLI", + "description": "In this article, you deploy a highly available PostgreSQL database on AKS using the CloudNativePG operator.", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-aks-docs/articles/aks/deploy-postgresql-ha.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/aks/deploy-postgresql-ha", + "nextSteps": [ + { + "title": "Deploy a highly available PostgreSQL database on AKS with Azure CLI", + "url": "https://learn.microsoft.com/en-us/azure/aks/deploy-postgresql-ha?tabs=helm" + } + ], + "configurations": {} + }, + { + "status": "inactive", + "key": "CreateContainerAppDeploymentFromSource/create-container-app-deployment-from-source.md", + "title": "Create a Container App leveraging Blob Store, SQL, and Computer Vision", + "description": "This tutorial shows how to create a Container App leveraging Blob Store, SQL, and Computer Vision", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/CreateContainerAppDeploymentFromSource/create-container-app-deployment-from-source.md", + "documentationUrl": "https://github.com/Azure/computer-vision-nextjs-webapp", + "nextSteps": [ + { + "title": "Azure Container Apps documentation", + "url": "https://learn.microsoft.com/azure/container-apps/" + }, + { + "title": "Azure Database for PostgreSQL documentation", + "url": "https://learn.microsoft.com/azure/postgresql/" + }, + { + "title": "Azure Blob Storage documentation", + "url": "https://learn.microsoft.com/azure/storage/blobs/" + }, + { + "title": "Azure Computer (AI) Vision Documentation", + "url": "https://learn.microsoft.com/azure/ai-services/computer-vision/" + } + ], + "configurations": {} + }, + { + "status": "inactive", + "key": "DeployHAPGOnARO/deploy-ha-pg-on-aro.md", + "title": "Create a Highly Available PostgreSQL Cluster on Azure Red Hat OpenShift", + "description": "This tutorial shows how to create a Highly Available PostgreSQL cluster on Azure Red Hat OpenShift (ARO) using the CloudNativePG operator", "stackDetails": "", - "sourceUrl": "https://raw.githubusercontent.com/aamini7/postgres-rag-llm-demo/main/README.md", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/DeployHAPGOnARO/deploy-ha-pg-on-aro.md", "documentationUrl": "", + "nextSteps": [ + { + "title": "Deploy a highly available PostgreSQL database on AKS with Azure CLI", + "url": "https://learn.microsoft.com/en-us/azure/aks/deploy-postgresql-ha?tabs=helm" + } + ], "configurations": {} }, + { + "status": "inactive", + "key": "AIChatApp/ai-chat-app.md", + "title": "Create an Azure OpenAI, LangChain, ChromaDB, and Chainlit Chat App in Container Apps", + "description": "", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/AIChatApp/ai-chat-app.md", + "documentationUrl": "", + "nextSteps": [], + "configurations": { + "permissions": [] + } + }, + { + "status": "active", + "key": "ConfigurePythonContainer/configure-python-container.md", + "title": "Configure Linux Python apps", + "description": "Learn how to configure the Python container in which web apps are run, using both the Azure portal and the Azure CLI.", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/ConfigurePythonContainer/configure-python-container.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/app-service/configure-language-python", + "nextSteps": [], + "configurations": { + "permissions": [] + } + }, + { + "status": "active", + "key": "CreateSpeechService/create-speech-service.md", + "title": "Deploy an Azure AI Speech service", + "description": "In this Azure AI Speech CLI quickstart, you interact with speech to text, text to speech, and speech translation without having to write code.", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/CreateSpeechService/create-speech-service.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/ai-services/speech-service/spx-basics?tabs=windowsinstall%2Cterminal", + "nextSteps": [], + "configurations": { + "permissions": [] + } + }, + { + "status": "inactive", + "key": "azure-aks-docs/articles/aks/airflow-create-infrastructure.md", + "title": "Create the infrastructure for deploying Apache Airflow on Azure Kubernetes Service (AKS)", + "description": "In this article, you create the infrastructure needed to deploy Apache Airflow on Azure Kubernetes Service (AKS) using Helm.", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-aks-docs/articles/aks/airflow-create-infrastructure.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/aks/airflow-create-infrastructure", + "nextSteps": [ + { + "title": "Deploy Apache Airflow on AKS", + "url": "https://learn.microsoft.com/en-us/azure/aks/airflow-deploy" + } + ], + "configurations": { + "permissions": [] + } + }, + { + "status": "inactive", + "key": "azure-aks-docs/articles/aks/airflow-deploy.md", + "title": "Configure and deploy Apache Airflow on Azure Kubernetes Service (AKS)", + "description": "In this article, you create the infrastructure needed to deploy Apache Airflow on Azure Kubernetes Service (AKS) using Helm.", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-aks-docs/articles/aks/airflow-deploy.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/aks/airflow-deploy", + "nextSteps": [ + { + "title": "Deploy a MongoDB cluster on Azure Kubernetes Service (AKS)", + "url": "https://learn.microsoft.com/en-us/azure/aks/mongodb-overview" + }, + { + "title": "Deploy a highly available PostgreSQL database on Azure Kubernetes Service (AKS)", + "url": "https://learn.microsoft.com/en-us/azure/aks/postgresql-ha-overview" + }, + { + "title": "Deploy a Valkey cluster on Azure Kubernetes Service (AKS)", + "url": "https://learn.microsoft.com/en-us/azure/aks/valkey-overview" + } + ], + "configurations": { + "permissions": [], + "configurableParams": [ + { + "inputType": "textInput", + "commandKey": "MY_RESOURCE_GROUP_NAME", + "title": "Resource Group Name", + "defaultValue": "" + }, + { + "inputType": "textInput", + "commandKey": "MY_CLUSTER_NAME", + "title": "AKS Cluster Name", + "defaultValue": "" + } + ] + } + }, + { + "status": "active", + "key": "DeployPremiumSSDV2/deploy-premium-ssd-v2.md", + "title": "Deploy a Premium SSD v2 managed disk", + "description": "Learn how to deploy a Premium SSD v2 and about its regional availability.", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/DeployPremiumSSDV2/deploy-premium-ssd-v2.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/virtual-machines/disks-deploy-premium-v2?tabs=azure-cli", + "nextSteps": [], + "configurations": { + "permissions": [] + } + }, + { + "status": "inactive", + "key": "GPUNodePoolAKS/gpu-node-pool-aks.md", + "title": "Create a multi-instance GPU node pool in Azure Kubernetes Service (AKS)", + "description": "Learn how to create a multi-instance GPU node pool in Azure Kubernetes Service (AKS).", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/GPUNodePoolAKS/gpu-node-pool-aks.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/aks/gpu-multi-instance?tabs=azure-cli", + "nextSteps": [], + "configurations": { + "permissions": [] + } + }, + { + "status": "active", + "key": "PostgresRagLlmDemo/README.md", + "title": "Deploy a Postgres vector database", + "description": "Set up a Postgres vector database and openai resources to run a RAG-LLM model.", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/PostgresRagLlmDemo/README.md", + "documentationUrl": "", + "nextSteps": [], + "configurations": { + "permissions": [] + } + }, + { + "status": "active", + "key": "CreateAOAIDeployment/create-aoai-deployment.md", + "title": "Deploy an Azure OpenAI Service", + "description": "Learn how to use the Azure CLI to create an Azure OpenAI resource and manage deployments with the Azure OpenAI Service.", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/CreateAOAIDeployment/create-aoai-deployment.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/create-resource?pivots=cli", + "nextSteps": [], + "configurations": { + "permissions": [] + } + }, { "status": "active", "key": "AksKaito/README.md", - "title": "Deploy an AI model on Azure Kubernetes Service (AKS) with the AI toolchain operator (preview)", - "description": "Learn how to enable the AI toolchain operator add-on on Azure Kubernetes Service (AKS) to simplify OSS AI model management and deployment.", + "title": "Deploy an AI model on AKS with the AI toolchain operator", + "description": "Learn how to enable the AI toolchain operator add-on on Azure Kubernetes Service (AKS) to simplify OSS AI model management and deployment", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/AksKaito/README.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/aks/ai-toolchain-operator", + "nextSteps": [ + { + "title": "Check out the KAITO GitHub repository", + "url": "https://github.com/Azure/kaito" + } + ] + }, + { + "status": "inactive", + "key": "azure-aks-docs/articles/aks/trusted-access-feature.md", + "title": "Get secure resource access to Azure Kubernetes Service (AKS) using Trusted Access", + "description": "Learn how to use the Trusted Access feature to give Azure resources access to Azure Kubernetes Service (AKS) clusters.", "stackDetails": "", - "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/azure-aks-docs/refs/heads/main/articles/aks/ai-toolchain-operator.md", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-aks-docs/articles/aks/trusted-access-feature.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/aks/trusted-access-feature", + "nextSteps": [ + { + "title": "Deploy and manage cluster extensions for AKS", + "url": "https://learn.microsoft.com/en-us/azure/aks/cluster-extensions" + }, + { + "title": "Deploy the Azure Machine Learning extension on an AKS or Azure Arc enabled Kubernetes cluster", + "url": "https://learn.microsoft.com/en-us/azure/machine-learning/how-to-deploy-kubernetes-extension" + }, + { + "title": "Deploy Azure Backup on an AKS cluster", + "url": "https://learn.microsoft.com/en-us/azure/backup/azure-kubernetes-service-backup-overview" + }, + { + "title": "Set agentless container posture in Microsoft Defender for Cloud for an AKS cluster", + "url": "https://learn.microsoft.com/en-us/azure/defender-for-cloud/concept-agentless-containers" + } + ], + "configurations": { + "permissions": [], + "configurableParams": [ + { + "inputType": "textInput", + "commandKey": "RESOURCE_GROUP_NAME", + "title": "Resource Group Name", + "defaultValue": "" + }, + { + "inputType": "textInput", + "commandKey": "CLUSTER_NAME", + "title": "AKS Cluster Name", + "defaultValue": "" + } + ] + } + }, + { + "status": "inactive", + "key": "CreateLinuxVMSecureWebServer/create-linux-vm-secure-web-server.md", + "title": "Create a NGINX Webserver Secured via HTTPS", + "description": "This tutorial shows how to create a NGINX Webserver Secured via HTTPS.", + "stackDetails": [], + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/CreateLinuxVMSecureWebServer/create-linux-vm-secure-web-server.md", "documentationUrl": "", + "nextSteps": [ + { + "title": "Learn about virtual machines", + "url": "https://learn.microsoft.com/en-us/azure/virtual-machines/" + }, + { + "title": "Create and manage Linux VMs with the Azure CLI", + "url": "https://learn.microsoft.com/en-us/azure/virtual-machines/linux/tutorial-manage-vm" + }, + { + "title": "Secure your Linux VM", + "url": "https://learn.microsoft.com/en-us/azure/virtual-machines/linux/tutorial-secure-web-server" + } + ], "configurations": {} + }, + { + "status": "active", + "key": "azure-docs/articles/confidential-computing/confidential-enclave-nodes-aks-get-started.md", + "title": "Quickstart: Deploy an AKS cluster with confidential computing Intel SGX agent nodes by using the Azure CLI", + "description": "Learn how to create an Azure Kubernetes Service (AKS) cluster with enclave confidential containers a Hello World app by using the Azure CLI.", + "stackDetails": [], + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-docs/articles/confidential-computing/confidential-enclave-nodes-aks-get-started.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/confidential-computing/confidential-enclave-nodes-aks-get-started", + "nextSteps": [ + { + "title": "Samples to run Python, Node, and other applications through confidential containers", + "url": "https://github.com/Azure-Samples/confidential-container-samples" + }, + { + "title": "Enclave-aware Azure container samples in GitHub", + "url": "https://github.com/Azure-Samples/confidential-computing/blob/main/containersamples/" + } + ], + "configurations": { + "permissions": [] + } + }, + { + "status": "active", + "key": "azure-management-docs/articles/azure-linux/quickstart-azure-cli.md", + "title": "Quickstart: Deploy an Azure Linux Container Host for AKS cluster by using the Azure CLI", + "description": "Learn how to quickly create an Azure Linux Container Host for AKS cluster using the Azure CLI.", + "stackDetails": [], + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-management-docs/articles/azure-linux/quickstart-azure-cli.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/azure-linux/quickstart-azure-cli", + "nextSteps": [ + { + "title": "Azure Linux Container Host tutorial", + "url": "https://learn.microsoft.com/en-us/azure/azure-linux/tutorial-azure-linux-create-cluster" + } + ], + "configurations": { + "permissions": [] + } + }, + { + "status": "active", + "key": "azure-docs/articles/virtual-machine-scale-sets/tutorial-use-custom-image-cli.md", + "title": "Tutorial - Use a custom VM image in a scale set with Azure CLI", + "description": "Learn how to use the Azure CLI to create a custom VM image that you can use to deploy a Virtual Machine Scale Set", + "stackDetails": [], + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-docs/articles/virtual-machine-scale-sets/tutorial-use-custom-image-cli.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/virtual-machine-scale-sets/tutorial-use-custom-image-cli", + "nextSteps": [ + { + "title": "Deploy applications to your scale sets", + "url": "https://learn.microsoft.com/en-us/azure/virtual-machine-scale-sets/tutorial-install-apps-cli" + } + ], + "configurations": { + "permissions": [] + } + }, + { + "status": "active", + "key": "azure-docs/articles/virtual-network/create-virtual-machine-accelerated-networking.md", + "title": "Create an Azure Virtual Machine with Accelerated Networking", + "description": "Use Azure portal, Azure CLI, or PowerShell to create Linux or Windows virtual machines with Accelerated Networking enabled for improved network performance.", + "stackDetails": [], + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-docs/articles/virtual-network/create-virtual-machine-accelerated-networking.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/virtual-network/create-virtual-machine-accelerated-networking?tabs=cli", + "nextSteps": [ + { + "title": "How Accelerated Networking works in Linux and FreeBSD VMs", + "url": "https://learn.microsoft.com/en-us/azure/virtual-network/accelerated-networking-how-it-works" + } + ], + "configurations": { + "permissions": [] + } + }, + { + "status": "inactive", + "key": "DeployHAPGOnAKSTerraform/deploy-ha-pg-on-aks-terraform.md", + "title": "Create a Highly Available PostgreSQL Cluster on Azure Kubernetes Service (AKS) using Terraform", + "description": "This tutorial shows how to create a Highly Available PostgreSQL cluster on AKS using the CloudNativePG operator", + "stackDetails": [], + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/DeployHAPGOnAKSTerraform/deploy-ha-pg-on-aks-terraform.md", + "documentationUrl": "", + "nextSteps": [ + { + "title": "Create infrastructure for deploying a highly available PostgreSQL database on AKS", + "url": "https://learn.microsoft.com/en-us/azure/aks/create-postgresql-ha?tabs=helm" + }, + { + "title": "Deploy a highly available PostgreSQL database on AKS with Azure CLI", + "url": "https://learn.microsoft.com/en-us/azure/aks/deploy-postgresql-ha?tabs=helm" + } + ], + "configurations": { + "permissions": [] + } + }, + { + "status": "active", + "key": "azure-aks-docs/articles/aks/workload-identity-migrate-from-pod-identity.md", + "title": "Migrate your Azure Kubernetes Service (AKS) pod to use workload identity", + "description": "In this Azure Kubernetes Service (AKS) article, you learn how to configure your Azure Kubernetes Service pod to authenticate with workload identity.", + "stackDetails": [], + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-aks-docs/articles/aks/workload-identity-migrate-from-pod-identity.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/aks/workload-identity-migrate-from-pod-identity", + "nextSteps": [ + { + "title": "Use Microsoft Entra Workload ID with Azure Kubernetes Service (AKS)", + "url": "https://learn.microsoft.com/en-us/azure/aks/workload-identity-overview" + } + ], + "configurations": { + "permissions": [], + "configurableParams": [ + { + "inputType": "textInput", + "commandKey": "MY_AKS_RESOURCE_GROUP", + "title": "Resource Group Name", + "defaultValue": "" + }, + { + "inputType": "textInput", + "commandKey": "MY_AKS_CLUSTER_NAME", + "title": "AKS Cluster Name", + "defaultValue": "" + } + ] + } + }, + { + "status": "inactive", + "key": "DeployCassandraOnAKS/deploy-cassandra-on-aks.md", + "title": "Deploy a Cassandra Cluster on AKS", + "description": "Learn how to deploy a Cassandra cluster on an Azure Kubernetes Service (AKS) cluster using Azure CLI and Kubernetes manifests.", + "stackDetails": [], + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/DeployCassandraOnAKS/deploy-cassandra-on-aks.md", + "documentationUrl": "", + "nextSteps": [], + "configurations": { + "permissions": [] + } + }, + { + "status": "inactive", + "key": "DeployClickhouseOnAKS/deploy-clickhouse-on-aks.md", + "title": "Deploy ClickHouse Cluster on AKS", + "description": "Learn how to deploy a ClickHouse Cluster on Azure Kubernetes Service (AKS) using Azure CLI and Kubernetes manifests.", + "stackDetails": [], + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/DeployClickhouseOnAKS/deploy-clickhouse-on-aks.md", + "documentationUrl": "", + "nextSteps": [], + "configurations": { + "permissions": [] + } + }, + { + "status": "inactive", + "key": "DeployLLMWithTorchserveOnAKS/deploy-llm-with-torchserve-on-aks.md", + "title": "Quickstart: Deploy a Large Language Model with TorchServe on Azure Kubernetes Service (AKS)", + "description": "Learn how to deploy a large language model using TorchServe on AKS.", + "stackDetails": [], + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/DeployLLMWithTorchserveOnAKS/deploy-llm-with-torchserve-on-aks.md", + "documentationUrl": "", + "nextSteps": [], + "configurations": { + "permissions": [] + } + }, + { + "status": "active", + "key": "DeployTensorflowOnAKS/deploy-tensorflow-on-aks.md", + "title": "Setup: Deploy a Tensorflow Cluster on Azure Kubernetes Service (AKS)", + "description": "Learn how to deploy a Tensorflow cluster on Azure Kubernetes Service (AKS) using Azure CLI.", + "stackDetails": [], + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/DeployTensorflowOnAKS/deploy-tensorflow-on-aks.md", + "documentationUrl": "", + "nextSteps": [], + "configurations": { + "permissions": [] + } + }, + { + "status": "inactive", + "key": "DeployTrinoOnAKS/deploy-trino-on-aks.md", + "title": "Deploy a Trino Cluster on Azure Kubernetes Service (AKS)", + "description": "Learn how to deploy a Trino Cluster on AKS using Azure CLI for scalable and distributed SQL query processing.", + "stackDetails": [], + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/DeployTrinoOnAKS/deploy-trino-on-aks.md", + "documentationUrl": "", + "nextSteps": [], + "configurations": { + "permissions": [] + } + }, + { + "status": "active", + "key": "FixFstabIssuesRepairVM/fix-fstab-issues-repair-vm.md", + "title": "Troubleshoot Linux VM boot issues due to fstab errors", + "description": "Explains why Linux VM cannot start and how to solve the problem.", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/FixFstabIssuesRepairVM/fix-fstab-issues-repair-vm.md", + "documentationUrl": "https://learn.microsoft.com/en-us/troubleshoot/azure/virtual-machines/linux/linux-virtual-machine-cannot-start-fstab-errors#use-azure-linux-auto-repair-alar", + "nextSteps": [ + { + "title": "Create a Support Request for your VM", + "url": "https://portal.azure.com/#view/Microsoft_Azure_Support/HelpAndSupportBlade/~/overview" + } + ], + "configurations": { + "permissions": [], + "configurableParams": [ + { + "inputType": "textInput", + "commandKey": "MY_RESOURCE_GROUP_NAME", + "title": "Resource Group Name", + "defaultValue": "" + }, + { + "inputType": "textInput", + "commandKey": "MY_VM_NAME", + "title": "VM Name", + "defaultValue": "" + } + ] + } + }, + { + "status": "active", + "key": "KernelBootIssuesRepairVM/kernel-related-boot-issues-repairvm.md", + "title": "Recover Azure Linux VM from kernel panic due to missing initramfs", + "description": "Provides solutions to an issue in which a Linux virtual machine (VM) can't boot after applying kernel changes", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/KernelBootIssuesRepairVM/kernel-related-boot-issues-repairvm.md", + "documentationUrl": "https://learn.microsoft.com/en-us/troubleshoot/azure/virtual-machines/linux/kernel-related-boot-issues#missing-initramfs-alar", + "nextSteps": [ + { + "title": "Create a Support Request for your VM", + "url": "https://portal.azure.com/#view/Microsoft_Azure_Support/HelpAndSupportBlade/~/overview" + } + ], + "configurations": { + "permissions": [], + "configurableParams": [ + { + "inputType": "textInput", + "commandKey": "MY_RESOURCE_GROUP_NAME", + "title": "Resource Group Name", + "defaultValue": "" + }, + { + "inputType": "textInput", + "commandKey": "MY_VM_NAME", + "title": "VM Name", + "defaultValue": "" + } + ] + } + }, + { + "status": "active", + "key": "TroubleshootVMGrubError/troubleshoot-vm-grub-error-repairvm.md", + "title": "Linux VM boots to GRUB rescue", + "description": "Provides troubleshooting guidance for GRUB rescue issues with Linux virtual machines.", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/TroubleshootVMGrubError/troubleshoot-vm-grub-error-repairvm.md", + "documentationUrl": "https://learn.microsoft.com/en-us/troubleshoot/azure/virtual-machines/linux/troubleshoot-vm-boot-error", + "nextSteps": [ + { + "title": "Create a Support Request for your VM", + "url": "https://portal.azure.com/#view/Microsoft_Azure_Support/HelpAndSupportBlade/~/overview" + } + ], + "configurations": { + "permissions": [], + "configurableParams": [ + { + "inputType": "textInput", + "commandKey": "MY_RESOURCE_GROUP_NAME", + "title": "Resource Group Name", + "defaultValue": "" + }, + { + "inputType": "textInput", + "commandKey": "MY_VM_NAME", + "title": "VM Name", + "defaultValue": "" + } + ] + } + }, + { + "status": "active", + "key": "azure-docs/articles/batch/quick-create-cli.md", + "title": "Quickstart: Use the Azure CLI to create a Batch account and run a job", + "description": "Follow this quickstart to use the Azure CLI to create a Batch account, a pool of compute nodes, and a job that runs basic tasks on the pool.", + "stackDetails": [], + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-docs/articles/batch/quick-create-cli.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/batch/quick-create-cli", + "nextSteps": [ + { + "title": "Tutorial: Run a parallel workload with Azure Batch", + "url": "https://learn.microsoft.com/en-us/azure/batch/tutorial-parallel-python" + } + ], + "configurations": {} + }, + { + "status": "active", + "key": "azure-compute-docs/articles/virtual-machines/linux/tutorial-manage-vm.md", + "title": "Tutorial - Create and manage Linux VMs with the Azure CLI", + "description": "In this tutorial, you learn how to use the Azure CLI to create and manage Linux VMs in Azure", + "stackDetails": [], + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-compute-docs/articles/virtual-machines/linux/tutorial-manage-vm.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/virtual-machines/linux/tutorial-manage-vm", + "nextSteps": [ + { + "title": "Create and Manage VM Disks", + "url": "https://learn.microsoft.com/en-us/azure/virtual-machines/linux/tutorial-manage-disks" + } + ], + "configurations": {} + }, + { + "status": "active", + "key": "azure-compute-docs/articles/virtual-machine-scale-sets/tutorial-autoscale-cli.md", + "title": "Tutorial - Autoscale a scale set with the Azure CLI", + "description": "Learn how to use the Azure CLI to automatically scale a Virtual Machine Scale Set as CPU demands increases and decreases", + "stackDetails": [], + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-compute-docs/articles/virtual-machine-scale-sets/tutorial-autoscale-cli.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/virtual-machine-scale-sets/tutorial-autoscale-cli?tabs=Ubuntu", + "nextSteps": [], + "configurations": {} + }, + { + "status": "active", + "key": "azure-compute-docs/articles/virtual-machine-scale-sets/tutorial-modify-scale-sets-cli.md", + "title": "Modify an Azure Virtual Machine Scale Set using Azure CLI", + "description": "Learn how to modify and update an Azure Virtual Machine Scale Set using Azure CLI", + "stackDetails": [], + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-compute-docs/articles/virtual-machine-scale-sets/tutorial-modify-scale-sets-cli.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/virtual-machine-scale-sets/tutorial-modify-scale-sets-cli", + "nextSteps": [ + { + "title": "Use data disks with scale sets", + "url": "https://learn.microsoft.com/en-us/azure/virtual-machine-scale-sets/tutorial-use-disks-powershell" + } + ], + "configurations": {} + }, + { + "status": "active", + "key": "azure-compute-docs/articles/virtual-machines/disks-enable-performance.md", + "title": "Preview - Increase performance of Premium SSDs and Standard SSD/HDDs", + "description": "Increase the performance of Azure Premium SSDs and Standard SSD/HDDs using performance plus.", + "stackDetails": [], + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-compute-docs/articles/virtual-machines/disks-enable-performance.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/virtual-machines/disks-enable-performance?tabs=azure-cli", + "nextSteps": [ + { + "title": "Create an incremental snapshot for managed disks", + "url": "https://learn.microsoft.com/en-us/azure/virtual-machines/disks-incremental-snapshots" + }, + { + "title": "Expand virtual hard disks on a Linux VM", + "url": "https://learn.microsoft.com/en-us/azure/virtual-machines/linux/expand-disks" + } + ], + "configurations": {} + }, + { + "status": "active", + "key": "azure-compute-docs/articles/container-instances/container-instances-vnet.md", + "title": "Deploy container group to Azure virtual network", + "description": "Learn how to deploy a container group to a new or existing Azure virtual network via the Azure CLI.", + "stackDetails": [], + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-compute-docs/articles/container-instances/container-instances-vnet.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/container-instances/container-instances-vnet", + "nextSteps": [ + { + "title": "Create an Azure container group with virtual network", + "url": "https://github.com/Azure/azure-quickstart-templates/tree/master/quickstarts/microsoft.containerinstance/aci-vnet" + }, + { + "title": " Deploy to Azure Container Instances from Azure Container Registry using a managed identity", + "url": "https://learn.microsoft.com/en-us/azure/container-instances/using-azure-container-registry-mi" + } + ], + "configurations": {} + }, + { + "status": "active", + "key": "azure-compute-docs/articles/virtual-machines/linux/multiple-nics.md", + "title": "Create a Linux VM in Azure with multiple NICs", + "description": "Learn how to create a Linux VM with multiple NICs attached to it using the Azure CLI or Resource Manager templates.", + "stackDetails": [], + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-compute-docs/articles/virtual-machines/linux/multiple-nics.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/virtual-machines/linux/multiple-nics", + "nextSteps": [ + { + "title": "Review Linux VM Sizes", + "url": "https://learn.microsoft.com/en-us/azure/virtual-machines/sizes" + }, + { + "title": "Manage virtual machine access using just in time", + "url": "https://learn.microsoft.com/en-us/azure/security-center/security-center-just-in-time" + } + ], + "configurations": {} + }, + { + "status": "inactive", + "key": "azure-compute-docs/articles/virtual-machines/linux/quick-create-terraform/quick-create-terraform.md", + "title": "Quickstart: Use Terraform to create a Linux VM", + "description": "In this quickstart, you learn how to use Terraform to create a Linux virtual machine.", + "stackDetails": [], + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-compute-docs/articles/virtual-machines/linux/quick-create-terraform/quick-create-terraform.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/virtual-machines/linux/quick-create-terraform?tabs=azure-cli", + "nextSteps": [ + { + "title": "Troubleshoot common problems when using Terraform on Azure", + "url": "https://learn.microsoft.com/en-us/azure/developer/terraform/troubleshoot" + }, + { + "title": "Azure Linux Virtual Machine Tutorials", + "url": "https://learn.microsoft.com/en-us/azure/virtual-machines/linux/tutorial-manage-vm" + } + ], + "configurations": {} + }, + { + "status": "inactive", + "key": "AksOpenAiTerraform/README.md", + "title": "How to deploy and run an Azure OpenAI ChatGPT application on AKS via Terraform", + "description": "This article shows how to deploy an AKS cluster and Azure OpenAI Service via Terraform and how to deploy a ChatGPT-like application in Python.", + "stackDetails": [], + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/test_terraform/scenarios/AksOpenAiTerraform/README.md", + "documentationUrl": "", + "nextSteps": [], + "configurations": { + "permissions": [] + } + }, + { + "status": "active", + "key": "upstream/FlatcarOnAzure/flatcar-on-azure.md", + "title": "Running Flatcar Container Linux on Microsoft Azure", + "description": "Deploy Flatcar Container Linux in Microsoft Azure by creating resource groups and using official marketplace images.", + "stackDetails": [], + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/upstream/FlatcarOnAzure/flatcar-on-azure.md", + "documentationUrl": "https://www.flatcar.org/docs/latest/installing/cloud/azure/", + "configurations": {} + }, + { + "status": "active", + "key": "azure-management-docs/articles/azure-linux/tutorial-azure-linux-migration.md", + "title": "Azure Linux Container Host for AKS tutorial - Migrating to Azure Linux", + "description": "In this Azure Linux Container Host for AKS tutorial, you learn how to migrate your nodes to Azure Linux nodes.", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-management-docs/articles/azure-linux/tutorial-azure-linux-migration.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/azure-linux/tutorial-azure-linux-migration?tabs=azure-cli", + "nextSteps": [ + { + "title": "Enable telemetry and monitoring", + "url": "https://learn.microsoft.com/en-us/azure/azure-linux/tutorial-azure-linux-telemetry-monitor" + } + ], + "configurations": { + "permissions": [], + "configurableParams": [ + { + "inputType": "textInput", + "commandKey": "RESOURCE_GROUP", + "title": "Resource Group Name", + "defaultValue": "" + }, + { + "inputType": "textInput", + "commandKey": "CLUSTER_NAME", + "title": "AKS Cluster Name", + "defaultValue": "" + } + ] + } + }, + { + "status": "active", + "key": "azure-management-docs/articles/azure-linux/tutorial-azure-linux-create-cluster.md", + "title": "Azure Linux Container Host for AKS tutorial - Create a cluster", + "description": "In this Azure Linux Container Host for AKS tutorial, you will learn how to create an AKS cluster with Azure Linux.", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-management-docs/articles/azure-linux/tutorial-azure-linux-create-cluster.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/azure-linux/tutorial-azure-linux-create-cluster", + "nextSteps": [ + { + "title": "Add an Azure Linux node pool", + "url": "https://learn.microsoft.com/en-us/azure/azure-linux/tutorial-azure-linux-add-nodepool" + } + ], + "configurations": { + "permissions": [], + "configurableParams": [ + { + "inputType": "textInput", + "commandKey": "RESOURCE_GROUP_NAME", + "title": "Resource Group Name", + "defaultValue": "" + }, + { + "inputType": "textInput", + "commandKey": "CLUSTER_NAME", + "title": "AKS Cluster Name", + "defaultValue": "" + } + ] + } + }, + { + "status": "active", + "key": "azure-management-docs/articles/azure-linux/tutorial-azure-linux-add-nodepool.md", + "title": "Azure Linux Container Host for AKS tutorial - Add an Azure Linux node pool to your existing AKS cluster", + "description": "In this Azure Linux Container Host for AKS tutorial, you learn how to add an Azure Linux node pool to your existing cluster.", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-management-docs/articles/azure-linux/tutorial-azure-linux-add-nodepool.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/azure-linux/tutorial-azure-linux-add-nodepool", + "nextSteps": [ + { + "title": "Migrating to Azure Linux", + "url": "https://learn.microsoft.com/en-us/azure/azure-linux/tutorial-azure-linux-migration" + } + ], + "configurations": { + "permissions": [], + "configurableParams": [ + { + "inputType": "textInput", + "commandKey": "RESOURCE_GROUP", + "title": "Resource Group Name", + "defaultValue": "" + }, + { + "inputType": "textInput", + "commandKey": "CLUSTER_NAME", + "title": "AKS Cluster Name", + "defaultValue": "" + } + ] + } + }, + { + "status": "active", + "key": "azure-management-docs/articles/azure-linux/tutorial-azure-linux-upgrade.md", + "title": "Azure Linux Container Host for AKS tutorial - Upgrade Azure Linux Container Host nodes", + "description": "In this Azure Linux Container Host for AKS tutorial, you learn how to upgrade Azure Linux Container Host nodes.", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-management-docs/articles/azure-linux/tutorial-azure-linux-upgrade.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/azure-linux/tutorial-azure-linux-upgrade", + "nextSteps": [ + { + "title": "Azure Linux Container Host Overview", + "url": "https://learn.microsoft.com/en-us/azure/azure-linux/intro-azure-linux" + } + ], + "configurations": { + "permissions": [], + "configurableParams": [ + { + "inputType": "textInput", + "commandKey": "AZ_LINUX_RG", + "title": "Resource Group Name", + "defaultValue": "" + }, + { + "inputType": "textInput", + "commandKey": "AZ_LINUX_CLUSTER", + "title": "AKS Cluster Name", + "defaultValue": "" + } + ] + } + }, + { + "status": "active", + "key": "azure-management-docs/articles/azure-linux/tutorial-azure-linux-telemetry-monitor.md", + "title": "Azure Linux Container Host for AKS tutorial - Enable telemetry and monitoring for the Azure Linux Container Host", + "description": "In this Azure Linux Container Host for AKS tutorial, you'll learn how to enable telemetry and monitoring for the Azure Linux Container Host.", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-management-docs/articles/azure-linux/tutorial-azure-linux-telemetry-monitor.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/azure-linux/tutorial-azure-linux-telemetry-monitor", + "nextSteps": [ + { + "title": "Upgrade Azure Linux Nodes", + "url": "https://learn.microsoft.com/en-us/azure/azure-linux/tutorial-azure-linux-upgrade" + } + ], + "configurations": { + "permissions": [], + "configurableParams": [ + { + "inputType": "textInput", + "commandKey": "RESOURCE_GROUP", + "title": "Resource Group Name", + "defaultValue": "" + }, + { + "inputType": "textInput", + "commandKey": "CLUSTER_NAME", + "title": "AKS Cluster Name", + "defaultValue": "" + } + ] + } + }, + { + "status": "active", + "key": "azure-stack-docs/azure-stack/user/azure-stack-quick-create-vm-linux-cli.md", + "title": "Create Linux VM with Azure CLI in Azure Stack Hub", + "description": "Create a Linux virtual machine by using the Azure CLI in Azure Stack Hub.", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-stack-docs/azure-stack/user/azure-stack-quick-create-vm-linux-cli.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure-stack/user/azure-stack-quick-create-vm-linux-cli?view=azs-2501", + "nextSteps": [ + { + "title": "Considerations for virtual machines in Azure Stack Hub", + "url": "https://learn.microsoft.com/en-us/azure-stack/user/azure-stack-vm-considerations" + } + ], + "configurations": { + "permissions": [], + "configurableParams": [] + } + }, + { + "status": "active", + "key": "azure-aks-docs/articles/aks/azure-cni-powered-by-cilium.md", + "title": "Configure Azure CNI Powered by Cilium in Azure Kubernetes Service (AKS)", + "description": "Learn how to create an Azure Kubernetes Service (AKS) cluster with Azure CNI Powered by Cilium.", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-aks-docs/articles/aks/azure-cni-powered-by-cilium.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/aks/azure-cni-powered-by-cilium", + "nextSteps": [ + { + "title": "Upgrade Azure CNI IPAM modes and Dataplane Technology.", + "url": "https://learn.microsoft.com/en-us/azure/aks/upgrade-azure-cni" + }, + { + "title": "Use a static IP address with the Azure Kubernetes Service (AKS) load balancer", + "url": "https://learn.microsoft.com/en-us/azure/aks/static-ip" + }, + { + "title": "Use an internal load balancer with Azure Container Service (AKS)", + "url": "https://learn.microsoft.com/en-us/azure/aks/internal-lb" + }, + { + "title": "Create a basic ingress controller with external network connectivity", + "url": "https://learn.microsoft.com/en-us/azure/aks/ingress-basic" + } + ], + "configurations": { + "permissions": [], + "configurableParams": [] + } + }, + { + "status": "active", + "key": "azure-compute-docs/articles/virtual-machines/linux/tutorial-automate-vm-deployment.md", + "title": "Tutorial - Customize a Linux VM with cloud-init in Azure", + "description": "In this tutorial, you learn how to use cloud-init and Key Vault to customize Linux VMs the first time they boot in Azure", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-compute-docs/articles/virtual-machines/linux/tutorial-automate-vm-deployment.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/virtual-machines/linux/tutorial-automate-vm-deployment", + "nextSteps": [ + { + "title": "Create custom VM images", + "url": "https://learn.microsoft.com/en-us/azure/virtual-machines/linux/tutorial-custom-images" + } + ], + "configurations": { + "permissions": [], + "configurableParams": [] + } + }, + { + "status": "active", + "key": "azure-compute-docs/articles/virtual-machines/linux/multiple-nics.md", + "title": "Create a Linux VM in Azure with multiple NICs", + "description": "Learn how to create a Linux VM with multiple NICs attached to it using the Azure CLI or Resource Manager templates.", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-compute-docs/articles/virtual-machines/linux/multiple-nics.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/virtual-machines/linux/multiple-nics", + "nextSteps": [ + { + "title": "Review Linux VM Sizes", + "url": "https://learn.microsoft.com/en-us/azure/virtual-machines/sizes" + }, + { + "title": " Manage virtual machine access using just in time", + "url": "https://learn.microsoft.com/en-us/azure/defender-for-cloud/just-in-time-access-usage" + } + ], + "configurations": { + "permissions": [], + "configurableParams": [] + } + }, + { + "status": "active", + "key": "azure-compute-docs/articles/virtual-machines/disks-enable-performance.md", + "title": "Preview - Increase performance of Premium SSDs and Standard SSD/HDDs", + "description": "Increase the performance of Azure Premium SSDs and Standard SSD/HDDs using performance plus.", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-compute-docs/articles/virtual-machines/disks-enable-performance.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/virtual-machines/disks-enable-performance?tabs=azure-cli", + "nextSteps": [ + { + "title": "Create an incremental snapshot for managed disks", + "url": "https://learn.microsoft.com/en-us/azure/virtual-machines/disks-incremental-snapshots" + }, + { + "title": "Expand virtual hard disks on a Linux VM", + "url": "https://learn.microsoft.com/en-us/azure/virtual-machines/linux/expand-disks" + }, + { + "title": "How to expand virtual hard disks attached to a Windows virtual machine", + "url": "https://learn.microsoft.com/en-us/azure/virtual-machines/windows/expand-os-disk" + } + ], + "configurations": { + "permissions": [], + "configurableParams": [] + } + }, + { + "status": "active", + "key": "azure-compute-docs/articles/virtual-machine-scale-sets/tutorial-modify-scale-sets-cli.md", + "title": "Modify an Azure Virtual Machine Scale Set using Azure CLI", + "description": "Learn how to modify and update an Azure Virtual Machine Scale Set using Azure CLI.", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-compute-docs/articles/virtual-machine-scale-sets/tutorial-modify-scale-sets-cli.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/virtual-machine-scale-sets/tutorial-modify-scale-sets-cli", + "nextSteps": [ + { + "title": "Use data disks with scale sets", + "url": "https://learn.microsoft.com/en-us/azure/virtual-machine-scale-sets/tutorial-use-disks-powershell" + } + ], + "configurations": { + "permissions": [], + "configurableParams": [] + } + }, + { + "status": "active", + "key": "azure-compute-docs/articles/virtual-machine-scale-sets/tutorial-autoscale-cli.md", + "title": "Tutorial - Autoscale a scale set with the Azure CLI", + "description": "Learn how to use the Azure CLI to automatically scale a Virtual Machine Scale Set as CPU demands increases and decreases", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-compute-docs/articles/virtual-machine-scale-sets/tutorial-autoscale-cli.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/virtual-machine-scale-sets/tutorial-autoscale-cli", + "nextSteps": [ + { + "title": "Learn about scale set instance protection", + "url": "https://learn.microsoft.com/en-us/azure/virtual-machine-scale-sets/virtual-machine-scale-sets-instance-protection" + } + ], + "configurations": { + "permissions": [], + "configurableParams": [] + } + }, + { + "status": "active", + "key": "azure-compute-docs/articles/virtual-machines/linux/tutorial-manage-vm.md", + "title": "Tutorial - Create and manage Linux VMs with the Azure CLI", + "description": "In this tutorial, you learn how to use the Azure CLI to create and manage Linux VMs in Azure", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-compute-docs/articles/virtual-machines/linux/tutorial-manage-vm.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/virtual-machines/linux/tutorial-manage-vm", + "nextSteps": [ + { + "title": "Create and Manage VM Disks", + "url": "https://learn.microsoft.com/en-us/azure/virtual-machines/linux/tutorial-manage-disks" + } + ], + "configurations": { + "permissions": [], + "configurableParams": [] + } + }, + { + "status": "active", + "key": "azure-compute-docs/articles/virtual-machines/linux/tutorial-lamp-stack.md", + "title": "Tutorial - Deploy LAMP and WordPress on a VM", + "description": "In this tutorial, you learn how to install the LAMP stack, and WordPress, on a Linux virtual machine in Azure.", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-compute-docs/articles/virtual-machines/linux/tutorial-lamp-stack.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/virtual-machines/linux/tutorial-lamp-stack", + "nextSteps": [ + { + "title": "Secure web server with TLS", + "url": "https://learn.microsoft.com/en-us/azure/virtual-machines/linux/tutorial-secure-web-server" + } + ], + "configurations": { + "permissions": [], + "configurableParams": [] + } + }, + { + "status": "active", + "key": "azure-docs/articles/batch/quick-create-cli.md", + "title": "Quickstart: Use the Azure CLI to create a Batch account and run a job", + "description": "Follow this quickstart to use the Azure CLI to create a Batch account, a pool of compute nodes, and a job that runs basic tasks on the pool.", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-docs/articles/batch/quick-create-cli.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/batch/quick-create-cli", + "nextSteps": [ + { + "title": "Tutorial: Run a parallel workload with Azure Batch", + "url": "https://learn.microsoft.com/en-us/azure/batch/tutorial-parallel-python" + } + ], + "configurations": { + "permissions": [], + "configurableParams": [] + } + }, + { + "status": "active", + "key": "azure-aks-docs/articles/aks/node-image-upgrade.md", + "title": "Upgrade Azure Kubernetes Service (AKS) node images", + "description": "Learn how to upgrade the images on AKS cluster nodes and node pools.", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-aks-docs/articles/aks/node-image-upgrade.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/aks/node-image-upgrade", + "nextSteps": [ + { + "title": "For information about the latest node images, see the AKS release notes", + "url": "https://github.com/Azure/AKS/releases" + }, + { + "title": "Learn how to upgrade the Kubernetes version with Upgrade an AKS cluster", + "url": "https://learn.microsoft.com/en-us/azure/aks/upgrade-aks-cluster" + }, + { + "title": "Automatically apply cluster and node pool upgrades with GitHub Actions", + "url": "https://learn.microsoft.com/en-us/azure/aks/node-upgrade-github-actions" + }, + { + "title": "Learn more about multiple node pools with Create multiple node pools", + "url": "https://learn.microsoft.com/en-us/azure/aks/create-node-pools" + }, + { + "title": "Learn about upgrading best practices with AKS patch and upgrade guidance", + "url": "https://learn.microsoft.com/en-us/azure/architecture/operator-guides/aks/aks-upgrade-practices" + } + ], + "configurations": { + "permissions": [], + "configurableParams": [ + { + "inputType": "textInput", + "commandKey": "AKS_RESOURCE_GROUP", + "title": "Resource Group Name", + "defaultValue": "" + }, + { + "inputType": "textInput", + "commandKey": "AKS_CLUSTER", + "title": "AKS Cluster Name", + "defaultValue": "" + }, + { + "inputType": "textInput", + "commandKey": "AKS_NODEPOOL", + "title": "AKS Node Pool Name", + "defaultValue": "" + } + ] + } + }, + { + "status": "active", + "key": "azure-compute-docs/articles/virtual-machines/linux/tutorial-elasticsearch.md", + "title": "Deploy ElasticSearch on a development virtual machine in Azure", + "description": "Install the Elastic Stack (ELK) onto a development Linux VM in Azure", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-compute-docs/articles/virtual-machines/linux/tutorial-elasticsearch.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/virtual-machines/linux/tutorial-elasticsearch", + "nextSteps": [ + { + "title": "Create a Linux VM with the Azure CLI", + "url": "https://learn.microsoft.com/en-us/azure/virtual-machines/linux/quick-create-cli" + } + ], + "configurations": { + "permissions": [], + "configurableParams": [] + } + }, + { + "status": "active", + "key": "azure-aks-docs/articles/aks/learn/quick-windows-container-deploy-cli.md", + "title": "Deploy a Windows Server container on an Azure Kubernetes Service (AKS) cluster using Azure CLI", + "description": "Learn how to quickly deploy a Kubernetes cluster and deploy an application in a Windows Server container in Azure Kubernetes Service (AKS) using Azure CLI.", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-aks-docs/articles/aks/learn/quick-windows-container-deploy-cli.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/aks/learn/quick-windows-container-deploy-cli?tabs=add-windows-node-pool", + "nextSteps": [ + { + "title": "AKS solution guidance", + "url": "https://learn.microsoft.com/en-us/azure/architecture/reference-architectures/containers/aks-start-here?toc=/azure/aks/toc.json&bc=/azure/aks/breadcrumb/toc.json" + }, + { + "title": "AKS tutorial", + "url": "https://learn.microsoft.com/en-us/azure/aks/tutorial-kubernetes-prepare-app" + } + ], + "configurations": { + "permissions": [], + "configurableParams": [] + } + }, + { + "status": "active", + "key": "azure-aks-docs/articles/aks/spot-node-pool.md", + "title": "Add an Azure Spot node pool to an Azure Kubernetes Service (AKS) cluster", + "description": "Learn how to add an Azure Spot node pool to an Azure Kubernetes Service (AKS) cluster.", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-aks-docs/articles/aks/spot-node-pool.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/aks/spot-node-pool", + "nextSteps": [ + { + "title": "Best practices for advanced scheduler features in AKS", + "url": "https://learn.microsoft.com/en-us/azure/aks/operator-best-practices-advanced-scheduler" + } + ], + "configurations": { + "permissions": [], + "configurableParams": [ + { + "inputType": "textInput", + "commandKey": "RESOURCE_GROUP", + "title": "Resource Group Name", + "defaultValue": "" + }, + { + "inputType": "textInput", + "commandKey": "AKS_CLUSTER", + "title": "AKS Cluster Name", + "defaultValue": "" + } + ] + } + }, + { + "status": "active", + "key": "azure-aks-docs/articles/aks/auto-upgrade-cluster.md", + "title": "Automatically upgrade an Azure Kubernetes Service (AKS) cluster", + "description": "Learn how to automatically upgrade an Azure Kubernetes Service (AKS) cluster to get the latest features and security updates.", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-aks-docs/articles/aks/auto-upgrade-cluster.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/aks/auto-upgrade-cluster?tabs=azure-cli", + "nextSteps": [ + { + "title": "AKS Patch and Upgrade Guidance", + "url": "https://learn.microsoft.com/en-us/azure/architecture/operator-guides/aks/aks-upgrade-practices" + } + ], + "configurations": { + "permissions": [], + "configurableParams": [ + { + "inputType": "textInput", + "commandKey": "RESOURCE_GROUP", + "title": "Resource Group Name", + "defaultValue": "" + }, + { + "inputType": "textInput", + "commandKey": "AKS_CLUSTER_NAME", + "title": "AKS Cluster Name", + "defaultValue": "" + } + ] + } + }, + { + "status": "active", + "key": "azure-aks-docs/articles/aks/auto-upgrade-node-os-image.md", + "title": "autoupgrade Node OS Images", + "description": "Learn how to automatically upgrade an Azure Kubernetes Service (AKS) cluster to get the latest features and security updates.", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-aks-docs/articles/aks/auto-upgrade-node-os-image.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/aks/auto-upgrade-node-os-image?tabs=azure-cli", + "nextSteps": [ + { + "title": "AKS Patch and Upgrade Guidance", + "url": "https://learn.microsoft.com/en-us/azure/architecture/operator-guides/aks/aks-upgrade-practices" + } + ], + "configurations": { + "permissions": [], + "configurableParams": [ + { + "inputType": "textInput", + "commandKey": "RESOURCE_GROUP", + "title": "Resource Group Name", + "defaultValue": "" + }, + { + "inputType": "textInput", + "commandKey": "AKS_CLUSTER", + "title": "AKS Cluster Name", + "defaultValue": "" + } + ] + } + }, + { + "status": "active", + "key": "azure-aks-docs/articles/aks/cost-analysis.md", + "title": "Azure Kubernetes Service (AKS) cost analysis", + "description": "Learn how to use cost analysis to surface granular cost allocation data for your Azure Kubernetes Service (AKS) cluster.", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-aks-docs/articles/aks/cost-analysis.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/aks/cost-analysis", + "nextSteps": [ + { + "title": "Understand Azure Kubernetes Service (AKS) usage and costs", + "url": "https://learn.microsoft.com/en-us/azure/aks/understand-aks-costs" + } + ], + "configurations": { + "permissions": [], + "configurableParams": [ + { + "inputType": "textInput", + "commandKey": "RESOURCE_GROUP", + "title": "Resource Group Name", + "defaultValue": "" + }, + { + "inputType": "textInput", + "commandKey": "CLUSTER_NAME", + "title": "AKS Cluster Name", + "defaultValue": "" + } + ] + } + }, + { + "status": "active", + "key": "azure-aks-docs/articles/aks/istio-deploy-addon.md", + "title": "Deploy Istio-based service mesh add-on for Azure Kubernetes Service", + "description": "Deploy Istio-based service mesh add-on for Azure Kubernetes Service", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-aks-docs/articles/aks/istio-deploy-addon.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/aks/istio-deploy-addon", + "nextSteps": [ + { + "title": "Deploy external or internal ingresses for Istio service mesh add-on", + "url": "https://learn.microsoft.com/en-us/azure/aks/istio-deploy-ingress" + }, + { + "title": "Scale istiod and ingress gateway HPA", + "url": "https://learn.microsoft.com/en-us/azure/aks/istio-scale#scaling" + }, + { + "title": "Collect metrics for Istio service mesh add-on workloads in Azure Managed Prometheus", + "url": "https://learn.microsoft.com/en-us/azure/aks/istio-metrics-managed-prometheus" + } + ], + "configurations": { + "permissions": [], + "configurableParams": [ + { + "inputType": "textInput", + "commandKey": "RESOURCE_GROUP", + "title": "Resource Group Name", + "defaultValue": "" + }, + { + "inputType": "textInput", + "commandKey": "CLUSTER", + "title": "AKS Cluster Name", + "defaultValue": "" + } + ] + } + }, + { + "status": "inactive", + "key": "UseIGOnAKS/use-ig-on-aks.md", + "title": "Comprehensive Guide to Using Inspektor Gadget in Kubernetes", + "description": "This Exec Doc provides a detailed walkthrough of a shell script that demonstrates various operations with the Inspektor Gadget in a Kubernetes environment. It explains each functional block, how the gadget plugin is installed, deployed, and used to run examples, export metrics, and verify configurations.", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/UseIGOnAKS/use-ig-on-aks.md", + "documentationUrl": "", + "nextSteps": [ + { + "title": "Real-world scenarios where Inspektor Gadget can help you", + "url": "https://go.microsoft.com/fwlink/p/?linkid=2260402#use-cases" + }, + { + "title": "Explore the available gadgets", + "url": "https://go.microsoft.com/fwlink/p/?linkid=2260070" + }, + { + "title": "Run your own eBPF program", + "url": "https://go.microsoft.com/fwlink/p/?linkid=2259865" + } + ], + "configurations": { + "permissions": [], + "configurableParams": [ + { + "inputType": "textInput", + "commandKey": "RESOURCE_GROUP", + "title": "Resource Group Name", + "defaultValue": "" + }, + { + "inputType": "textInput", + "commandKey": "CLUSTER_NAME", + "title": "AKS Cluster Name", + "defaultValue": "" + } + ] + } + }, + { + "status": "inactive", + "key": "azure-docs/articles/iot-edge/quickstart-linux.md", + "title": "Quickstart: Create an Azure IoT Edge Device on Linux", + "description": "Learn to configure an Azure IoT Edge device on Linux. This guide walks you through creating an IoT Hub, registering a device, and deploying a simulated sensor module.", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-docs/articles/iot-edge/quickstart-linux.md", + "documentationUrl": "", + "nextSteps": [ + { + "title": "Monitor IoT Edge Devices", + "url": "https://learn.microsoft.com/en-us/azure/iot-edge/tutorial-monitor-with-workbooks" + } + ], + "configurations": { + "permissions": [], + "configurableParams": [] + } + }, + { + "status": "inactive", + "key": "azure-dev-docs/articles/ansible/vm-configure.md", + "title": "Create a Linux virtual machines in Azure using Ansible", + "description": "Learn how to create a Linux virtual machine in Azure using Ansible", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-dev-docs/articles/ansible/vm-configure.md", + "documentationUrl": "", + "nextSteps": [ + { + "title": "Manage a Linux virtual machine in Azure using Ansible", + "url": "https://learn.microsoft.com/en-us/azure/developer/ansible/vm-manage" + } + ], + "configurations": { + "permissions": [], + "configurableParams": [] + } + }, + { + "status": "active", + "key": "azure-aks-docs/articles/aks/istio-scale.md", + "title": "Istio service mesh Azure Kubernetes Service add-on performance and scaling", + "description": "Istio service mesh Azure Kubernetes Service add-on performance and scaling", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-aks-docs/articles/aks/istio-scale.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/aks/istio-scale", + "nextSteps": [], + "configurations": { + "permissions": [], + "configurableParams": [ + { + "inputType": "textInput", + "commandKey": "RESOURCE_GROUP", + "title": "Resource Group Name", + "defaultValue": "" + }, + { + "inputType": "textInput", + "commandKey": "AKS_CLUSTER", + "title": "AKS Cluster Name", + "defaultValue": "" + } + ] + } + }, + { + "status": "active", + "key": "SupportArticles-docs/support/azure/azure-kubernetes/availability-performance/node-not-ready-custom-script-extension-errors.md", + "title": "Node Not Ready because of custom script extension (CSE) errors", + "description": "Troubleshoot scenarios in which custom script extension (CSE) errors cause Node Not Ready states in an Azure Kubernetes Service (AKS) cluster node pool.", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/SupportArticles-docs/support/azure/azure-kubernetes/availability-performance/node-not-ready-custom-script-extension-errors.md", + "documentationUrl": "https://learn.microsoft.com/en-us/troubleshoot/azure/azure-kubernetes/availability-performance/node-not-ready-custom-script-extension-errors", + "nextSteps": [], + "configurations": { + "permissions": [], + "configurableParams": [ + { + "inputType": "textInput", + "commandKey": "RG_NAME", + "title": "Resource Group Name", + "defaultValue": "" + }, + { + "inputType": "textInput", + "commandKey": "CLUSTER_NAME", + "title": "Cluster Name", + "defaultValue": "" + }, + { + "inputType": "textInput", + "commandKey": "AVAILABILITY_SET_VM", + "title": "Availability Set VM Name", + "defaultValue": "" + } + ] + } + }, + { + "status": "active", + "key": "SupportArticles-docs/support/azure/azure-kubernetes/availability-performance/node-not-ready-after-being-healthy.md", + "title": "Node Not Ready status after node is in a healthy state", + "description": "Troubleshoot scenarios in which an Azure Kubernetes Service (AKS) cluster node goes to a Not Ready status after is in a healthy state.", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/SupportArticles-docs/support/azure/azure-kubernetes/availability-performance/node-not-ready-after-being-healthy.md", + "documentationUrl": "https://learn.microsoft.com/en-us/troubleshoot/azure/azure-kubernetes/availability-performance/node-not-ready-after-being-healthy", + "nextSteps": [], + "configurations": { + "permissions": [], + "configurableParams": [ + { + "inputType": "textInput", + "commandKey": "RESOURCE_GROUP", + "title": "Resource Group Name", + "defaultValue": "" + }, + { + "inputType": "textInput", + "commandKey": "AKS_CLUSTER", + "title": "AKS Cluster Name", + "defaultValue": "" + } + ] + } + }, + { + "status": "active", + "key": "SupportArticles-docs/support/azure/azure-kubernetes/connectivity/tcp-timeouts-dial-tcp-nodeip-10250-io-timeout.md", + "title": "TCP 10250 I/O timeout errors when connecting to a node's Kubelet for log retrieval", + "description": "Learn how to troubleshoot TCP 10250 I/O timeout errors that occur when retrieving kubectl logs from a pod in an Azure Kubernetes Service (AKS) cluster.", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/SupportArticles-docs/support/azure/azure-kubernetes/connectivity/tcp-timeouts-dial-tcp-nodeip-10250-io-timeout.md", + "documentationUrl": "https://learn.microsoft.com/en-us/troubleshoot/azure/azure-kubernetes/connectivity/tcp-timeouts-dial-tcp-nodeip-10250-io-timeout", + "nextSteps": [], + "configurations": { + "permissions": [], + "configurableParams": [ + { + "inputType": "textInput", + "commandKey": "POD_NAME", + "title": "Pod Name", + "defaultValue": "" + }, + { + "inputType": "textInput", + "commandKey": "RESOURCE_GROUP", + "title": "Resource Group Name", + "defaultValue": "" + }, + { + "inputType": "textInput", + "commandKey": "CLUSTER_NAME", + "title": "Aks Cluster Name", + "defaultValue": "" + } + ] + } + }, + { + "status": "active", + "key": "azure-aks-docs/articles/aks/kubelet-logs.md", + "title": "View kubelet logs in Azure Kubernetes Service (AKS)", + "description": "Learn how to view troubleshooting information in the kubelet logs from Azure Kubernetes Service (AKS) nodes", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-aks-docs/articles/aks/kubelet-logs.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/aks/kubelet-logs", + "nextSteps": [ + { + "title": "SSH into AKS cluster nodes", + "url": "https://learn.microsoft.com/azure/aks/ssh" + } + ], + "configurations": { + "permissions": [], + "configurableParams": [ + { + "inputType": "textInput", + "commandKey": "RESOURCE_GROUP_NAME", + "title": "Resource Group Name", + "defaultValue": "" + }, + { + "inputType": "textInput", + "commandKey": "AKS_CLUSTER_NAME", + "title": "Aks Cluster Name", + "defaultValue": "" + }, + { + "inputType": "textInput", + "commandKey": "NODE_NAME", + "title": "Node Name", + "defaultValue": "" + } + ] + } + }, + { + "status": "active", + "key": "azure-aks-docs/articles/aks/delete-cluster.md", + "title": "Delete an Azure Kubernetes Service (AKS) cluster", + "description": "Learn about deleting a cluster in Azure Kubernetes Service (AKS).", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-aks-docs/articles/aks/delete-cluster.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/aks/delete-cluster", + "nextSteps": [ + { + "title": "Stop an AKS cluster", + "url": "https://learn.microsoft.com/en-us/azure/aks/start-stop-cluster?tabs=azure-cli" + }, + { + "title": "Upgrade an AKS cluster", + "url": "https://learn.microsoft.com/azure/aks/upgrade-cluster" + } + ], + "configurations": { + "permissions": [], + "configurableParams": [ + { + "inputType": "textInput", + "commandKey": "AKS_CLUSTER_NAME", + "title": "AKS Cluster Name", + "defaultValue": "" + }, + { + "inputType": "textInput", + "commandKey": "RESOURCE_GROUP_NAME", + "title": "Resource Group Name", + "defaultValue": "" + } + ] + } + }, + { + "status": "active", + "key": "azure-aks-docs/articles/aks/access-control-managed-azure-ad.md", + "title": "Control cluster access using Conditional Access with AKS-managed Microsoft Entra integration", + "description": "Learn how to access clusters using Conditional Access when integrating Microsoft Entra ID in your Azure Kubernetes Service (AKS) clusters.", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-aks-docs/articles/aks/access-control-managed-azure-ad.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/aks/access-control-managed-azure-ad", + "nextSteps": [], + "configurations": { + "permissions": [], + "configurableParams": [ + { + "inputType": "textInput", + "commandKey": "RESOURCE_GROUP", + "title": "Resource Group Name", + "defaultValue": "" + }, + { + "inputType": "textInput", + "commandKey": "AKS_CLUSTER", + "title": "AKS Cluster Name", + "defaultValue": "" + } + ] + } + }, + { + "status": "active", + "key": "azure-aks-docs/articles/aks/concepts-network-azure-cni-pod-subnet.md", + "title": "Concepts - Azure CNI Pod Subnet networking in AKS", + "description": "Learn about Azure CNI Pod Subnet, dynamic IP allocation mode, and static block allocation mode in Azure Kubernetes Service (AKS).", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-aks-docs/articles/aks/concepts-network-azure-cni-pod-subnet.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/aks/concepts-network-azure-cni-pod-subnet", + "nextSteps": [], + "configurations": { + "permissions": [], + "configurableParams": [ + { + "inputType": "textInput", + "commandKey": "CLUSTER_NAME", + "title": "Cluster Name", + "defaultValue": "" + }, + { + "inputType": "textInput", + "commandKey": "RESOURCE_GROUP_NAME", + "title": "Resource Group Name", + "defaultValue": "" + } + ] + } + }, + { + "status": "active", + "key": "azure-aks-docs/articles/aks/aks-migration.md", + "title": "Migrate to Azure Kubernetes Service (AKS)", + "description": "This article shows you how to migrate to Azure Kubernetes Service (AKS).", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-aks-docs/articles/aks/aks-migration.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/aks/aks-migration", + "nextSteps": [ + { + "title": "Containerizing ASP.NET applications and migrating to AKS", + "url": "/azure/migrate/tutorial-app-containerization-aspnet-kubernetes" + }, + { + "title": "Containerizing Java web applications and migrating to AKS", + "url": "/azure/migrate/tutorial-app-containerization-java-kubernetes" + } + ], + "configurations": { + "permissions": [], + "configurableParams": [] + } + }, + { + "status": "active", + "key": "azure-aks-docs/articles/aks/use-etags.md", + "title": "Enhancing Concurrency Control with Entity Tags (eTags) in Azure Kubernetes Service", + "description": "Learn how to use eTags (Entity Tags) to enable concurrency control and avoid racing conditions or overwriting scenarios.", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-aks-docs/articles/aks/use-etags.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/aks/use-etags", + "nextSteps": [], + "configurations": { + "permissions": [], + "configurableParams": [] + } + }, + { + "status": "active", + "key": "azure-aks-docs/articles/aks/istio-meshconfig.md", + "title": "Configure Istio-based service mesh add-on for Azure Kubernetes Service", + "description": "Configure Istio-based service mesh add-on for Azure Kubernetes Service", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-aks-docs/articles/aks/istio-meshconfig.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/aks/istio-meshconfig", + "nextSteps": [], + "configurations": { + "permissions": [], + "configurableParams": [ + { + "inputType": "textInput", + "commandKey": "CLUSTER", + "title": "Cluster Name", + "defaultValue": "" + }, + { + "inputType": "textInput", + "commandKey": "RESOURCE_GROUP", + "title": "Resource Group Name", + "defaultValue": "" + } + ] + } + }, + { + "status": "active", + "key": "azure-aks-docs/articles/aks/access-private-cluster.md", + "title": "Access a private Azure Kubernetes Service (AKS) cluster using the command invoke or Run command feature", + "description": "Learn how to access a private Azure Kubernetes Service (AKS) cluster using the Azure CLI command invoke feature or the Azure portal Run command feature.", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-aks-docs/articles/aks/access-private-cluster.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/aks/access-private-cluster", + "nextSteps": [ + { + "title": "Create a private AKS cluster", + "url": "./private-clusters.md" + }, + { + "title": "Install Azure CLI", + "url": "/cli/azure/install-azure-cli" + } + ], + "configurations": { + "permissions": [], + "configurableParams": [ + { + "inputType": "textInput", + "commandKey": "AKS_RESOURCE_GROUP", + "title": "Resource Group Name", + "defaultValue": "" + }, + { + "inputType": "textInput", + "commandKey": "AKS_CLUSTER_NAME", + "title": "AKS Cluster Name", + "defaultValue": "" + } + ] + } + }, + { + "status": "active", + "key": "AKSDNSLookupFailError/aksdns-lookup-fail-error.md", + "title": "Troubleshoot the K8SAPIServerDNSLookupFailVMExtensionError error code (52)", + "description": "Learn how to troubleshoot the K8SAPIServerDNSLookupFailVMExtensionError error (52) when you try to start or create and deploy an Azure Kubernetes Service (AKS) cluster.", + "stackDetails": "https://learn.microsoft.com/en-us/troubleshoot/azure/azure-kubernetes/create-upgrade-delete/error-code-k8sapiserverdnslookupfailvmextensionerror", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/AKSDNSLookupFailError/aksdns-lookup-fail-error.md", + "documentationUrl": "", + "nextSteps": [ + { + "title": "Create a private AKS cluster", + "url": "/azure/aks/private-clusters" + }, + { + "title": "Private Azure Kubernetes service with custom DNS server", + "url": "https://github.com/Azure/terraform/tree/master/quickstart/301-aks-private-cluster" + }, + { + "title": "What is IP address 168.63.129.16?", + "url": "/azure/virtual-network/what" + } + ], + "configurations": { + "permissions": [], + "configurableParams": [ + { + "inputType": "textInput", + "commandKey": "CLUSTER_NAME", + "title": "AKS Cluster Name", + "defaultValue": "" + } + ] + } + }, + { + "status": "active", + "key": "azure-aks-docs/articles/aks/resize-cluster.md", + "title": "Resize Azure Kubernetes Service (AKS) clusters", + "description": "In this article, you learn about the importance of right-sizing your AKS clusters and how you can right-size them to optimize costs and performance.", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-aks-docs/articles/aks/resize-cluster.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/aks/resize-cluster", + "nextSteps": [], + "configurations": { + "permissions": [], + "configurableParams": [ + { + "inputType": "textInput", + "commandKey": "RESOURCE_GROUP", + "title": "Resource Group Name", + "defaultValue": "" + }, + { + "inputType": "textInput", + "commandKey": "AKS_CLUSTER_NAME", + "title": "AKS Cluster Name", + "defaultValue": "" + }, + { + "inputType": "textInput", + "commandKey": "NUM_NODES", + "title": "Number of Nodes", + "defaultValue": "" + }, + { + "inputType": "textInput", + "commandKey": "NODE_POOL_NAME", + "title": "Node Pool Name", + "defaultValue": "" + } + ] + } + }, + { + "status": "active", + "key": "azure-aks-docs/articles/aks/concepts-preview-api-life-cycle.md", + "title": "AKS Preview API life cycle", + "description": "Learn about the AKS preview API life cycle.", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-aks-docs/articles/aks/concepts-preview-api-life-cycle.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/aks/concepts-preview-api-life-cycle", + "nextSteps": [ + { + "title": "AKS Preview CLI Extension", + "url": "https://github.com/Azure/azure-cli-extensions/tree/main/src/aks-preview" + }, + { + "title": "Newer version of the SDK", + "url": "https://azure.github.io/azure-sdk/releases/latest/index.html?search=containerservice" + }, + { + "title": "Terraform release notes", + "url": "/azure/developer/terraform/provider-version-history-azurerm" + }, + { + "title": "client.go in Terraform provider", + "url": "https://github.com/hashicorp/terraform-provider-azurerm/blob/main/internal/services/containers/client/client.go" + } + ], + "configurations": { + "permissions": [], + "configurableParams": [] + } + }, + { + "status": "active", + "key": "azure-aks-docs/articles/aks/use-labels.md", + "title": "Use labels in an Azure Kubernetes Service (AKS) cluster", + "description": "Learn how to use labels in an Azure Kubernetes Service (AKS) cluster.", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-aks-docs/articles/aks/use-labels.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/aks/use-labels", + "nextSteps": [], + "configurations": { + "permissions": [], + "configurableParams": [] + } + }, + { + "status": "active", + "key": "SupportArticles-docs/support/azure/azure-kubernetes/extensions/aks-cost-analysis-add-on-issues.md", + "title": "Azure Kubernetes Service Cost Analysis add-on issues", + "description": "Learn how to resolve issues that occur when you try to enable the Azure Kubernetes Service (AKS) Cost Analysis add-on.", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/SupportArticles-docs/support/azure/azure-kubernetes/extensions/aks-cost-analysis-add-on-issues.md", + "documentationUrl": "https://learn.microsoft.com/en-us/troubleshoot/azure/azure-kubernetes/extensions/aks-cost-analysis-add-on-issues", + "nextSteps": [], + "configurations": { + "permissions": [], + "configurableParams": [ + { + "inputType": "textInput", + "commandKey": "RESOURCE_GROUP", + "title": "Resource Group Name", + "defaultValue": "" + }, + { + "inputType": "textInput", + "commandKey": "AKS_CLUSTER", + "title": "AKS Cluster Name", + "defaultValue": "" + } + ] + } + }, + { + "status": "active", + "key": "SupportArticles-docs/support/azure/azure-kubernetes/availability-performance/cluster-service-health-probe-mode-issues.md", + "title": "Troubleshoot the health probe mode for AKS cluster service load balancer", + "description": "Diagnoses and fixes common issues with the health probe mode feature.", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/SupportArticles-docs/support/azure/azure-kubernetes/availability-performance/cluster-service-health-probe-mode-issues.md", + "documentationUrl": "https://learn.microsoft.com/en-us/troubleshoot/azure/azure-kubernetes/availability-performance/cluster-service-health-probe-mode-issues", + "nextSteps": [], + "configurations": { + "permissions": [], + "configurableParams": [ + { + "inputType": "textInput", + "commandKey": "RESOURCE_GROUP", + "title": "Resource Group Name", + "defaultValue": "" + }, + { + "inputType": "textInput", + "commandKey": "AKS_CLUSTER_NAME", + "title": "AKS Cluster Name", + "defaultValue": "" + } + ] + } + }, + { + "status": "active", + "key": "SupportArticles-docs/support/azure/azure-kubernetes/create-upgrade-delete/error-code-cnidownloadtimeoutvmextensionerror.md", + "title": "Troubleshoot Container Network Interface download failures", + "description": "Learn how to resolve Container Network Interface download failures when you try to create and deploy an Azure Kubernetes Service (AKS) cluster.", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/SupportArticles-docs/support/azure/azure-kubernetes/create-upgrade-delete/error-code-cnidownloadtimeoutvmextensionerror.md", + "documentationUrl": "https://learn.microsoft.com/en-us/troubleshoot/azure/azure-kubernetes/create-upgrade-delete/error-code-cnidownloadtimeoutvmextensionerror", + "nextSteps": [], + "configurations": { + "permissions": [], + "configurableParams": [ + { + "inputType": "textInput", + "commandKey": "RESOURCE_GROUP", + "title": "Resource Group Name", + "defaultValue": "" + }, + { + "inputType": "textInput", + "commandKey": "AKS_CLUSTER_NAME", + "title": "AKS Cluster Name", + "defaultValue": "" + } + ] + } + }, + { + "status": "active", + "key": "SupportArticles-docs/support/azure/azure-kubernetes/connectivity/tcp-timeouts-kubetctl-third-party-tools-connect-api-server.md", + "title": "TCP time-outs when kubectl or other 3rd-party tools connect to API", + "description": "Troubleshoot TCP time-outs that occur when kubectl or other third-party tools connect to the API server in Azure Kubernetes Service (AKS).", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/SupportArticles-docs/support/azure/azure-kubernetes/connectivity/tcp-timeouts-kubetctl-third-party-tools-connect-api-server.md", + "documentationUrl": "https://learn.microsoft.com/en-us/troubleshoot/azure/azure-kubernetes/connectivity/tcp-timeouts-kubetctl-third-party-tools-connect-api-server", + "nextSteps": [], + "configurations": { + "permissions": [], + "configurableParams": [ + { + "inputType": "textInput", + "commandKey": "ResourceGroupName", + "title": "Resource Group Name", + "defaultValue": "" + }, + { + "inputType": "textInput", + "commandKey": "AKSClusterName", + "title": "AKS Cluster Name", + "defaultValue": "" + } + ] + } + }, + { + "status": "active", + "key": "azure-aks-docs/articles/aks/enable-host-encryption.md", + "title": "Enable host-based encryption on Azure Kubernetes Service (AKS)", + "description": "Learn how to configure a host-based encryption in an Azure Kubernetes Service (AKS) cluster.", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-aks-docs/articles/aks/enable-host-encryption.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/aks/enable-host-encryption", + "nextSteps": [], + "configurations": { + "permissions": [], + "configurableParams": [ + { + "inputType": "textInput", + "commandKey": "MY_AKS_CLUSTER", + "title": "AKS Cluster Name", + "defaultValue": "" + }, + { + "inputType": "textInput", + "commandKey": "MY_RESOURCE_GROUP", + "title": "Resource Group Name", + "defaultValue": "" + } + ] + } + }, + { + "status": "active", + "key": "azure-aks-docs/articles/aks/nat-gateway.md", + "title": "Create a managed or user-assigned NAT gateway for your Azure Kubernetes Service (AKS) cluster", + "description": "Learn how to create an AKS cluster with managed NAT integration and user-assigned NAT gateway.", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-aks-docs/articles/aks/nat-gateway.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/aks/nat-gateway", + "nextSteps": [], + "configurations": { + "permissions": [], + "configurableParams": [] + } + }, + { + "status": "active", + "key": "azure-aks-docs/articles/aks/free-standard-pricing-tiers.md", + "title": "Azure Kubernetes Service (AKS) Free, Standard, and Premium pricing tiers for cluster management", + "description": "Learn about the Azure Kubernetes Service (AKS) Free, Standard, and Premium pricing plans and what features, deployment patterns, and recommendations to consider between each plan.", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-aks-docs/articles/aks/free-standard-pricing-tiers.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/aks/free-standard-pricing-tiers", + "nextSteps": [], + "configurations": { + "permissions": [], + "configurableParams": [ + { + "inputType": "textInput", + "commandKey": "RESOURCE_GROUP", + "title": "Resource Group Name", + "defaultValue": "" + }, + { + "inputType": "textInput", + "commandKey": "CLUSTER_NAME", + "title": "AKS Cluster Name", + "defaultValue": "" + } + ] + } + }, + { + "status": "active", + "key": "azure-aks-docs/articles/aks/events.md", + "title": "Use Kubernetes events for troubleshooting", + "description": "Learn about Kubernetes events, which provide details on pods, nodes, and other Kubernetes objects.", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/azure-aks-docs/articles/aks/events.md", + "documentationUrl": "https://learn.microsoft.com/en-us/azure/aks/events", + "nextSteps": [], + "configurations": { + "permissions": [], + "configurableParams": [ + { + "inputType": "textInput", + "commandKey": "RESOURCE_GROUP", + "title": "Resource Group Name", + "defaultValue": "" + }, + { + "inputType": "textInput", + "commandKey": "AKS_CLUSTER", + "title": "AKS Cluster Name", + "defaultValue": "" + } + ] + } + }, + { + "status": "active", + "key": "SupportArticles-docs/support/azure/azure-kubernetes/create-upgrade-delete/upgrading-or-scaling-does-not-succeed.md", + "title": "Troubleshoot cluster upgrading and scaling errors", + "description": "Troubleshoot errors that occur when you try to upgrade or scale an Azure Kubernetes Service (AKS) cluster.", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/SupportArticles-docs/support/azure/azure-kubernetes/create-upgrade-delete/upgrading-or-scaling-does-not-succeed.md", + "documentationUrl": "https://learn.microsoft.com/en-us/troubleshoot/azure/azure-kubernetes/create-upgrade-delete/upgrading-or-scaling-does-not-succeed", + "nextSteps": [ + { + "title": "Request an increase in your resource quota", + "url": "/azure/azure-resource-manager/troubleshooting/error-resource-quota#solution" + }, + { + "title": "Troubleshoot the SubnetIsFull error code", + "url": "error-code-subnetisfull.md" + }, + { + "title": "Troubleshoot UpgradeFailed errors due to eviction failures caused by PDBs", + "url": "error-code-poddrainfailure.md" + }, + { + "title": "How to mitigate stopped upgrade operations due to deprecated APIs", + "url": "/azure/aks/stop-cluster-upgrade-api-breaking-changes#mitigate-stopped-upgrade-operations" + } + ], + "configurations": { + "permissions": [], + "configurableParams": [ + { + "inputType": "textInput", + "commandKey": "RESOURCE_GROUP", + "title": "Resource Group Name", + "defaultValue": "" + }, + { + "inputType": "textInput", + "commandKey": "CLUSTER_NAME", + "title": "Cluster Name", + "defaultValue": "" + } + ] + } + }, + { + "status": "active", + "key": "SupportArticles-docs/support/azure/azure-kubernetes/connectivity/user-cannot-get-cluster-resources.md", + "title": "Troubleshoot 'Forbidden' error when trying to access AKS cluster resources", + "description": "Troubleshoot 'Error from server (Forbidden)' RBAC-related errors that occur when you try to view Kubernetes resources in an AKS cluster.", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/SupportArticles-docs/support/azure/azure-kubernetes/connectivity/user-cannot-get-cluster-resources.md", + "documentationUrl": "https://learn.microsoft.com/en-us/troubleshoot/azure/azure-kubernetes/connectivity/user-cannot-get-cluster-resources", + "nextSteps": [], + "configurations": { + "permissions": [], + "configurableParams": [ + { + "inputType": "textInput", + "commandKey": "RESOURCE_GROUP", + "title": "Resource Group Name", + "defaultValue": "" + }, + { + "inputType": "textInput", + "commandKey": "CLUSTER_NAME", + "title": "Cluster Name", + "defaultValue": "" + } + ] + } + }, + { + "status": "active", + "key": "SupportArticles-docs/support/azure/azure-kubernetes/connectivity/troubleshoot-cluster-connection-issues-api-server.md", + "title": "Troubleshoot cluster connection issues with the API server", + "description": "Troubleshoot issues that occur when you attempt to connect to the API server of an Azure Kubernetes Service (AKS) cluster.", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/SupportArticles-docs/support/azure/azure-kubernetes/connectivity/troubleshoot-cluster-connection-issues-api-server.md", + "documentationUrl": "https://learn.microsoft.com/en-us/troubleshoot/azure/azure-kubernetes/connectivity/troubleshoot-cluster-connection-issues-api-server", + "nextSteps": [], + "configurations": { + "permissions": [], + "configurableParams": [ + { + "inputType": "textInput", + "commandKey": "RESOURCE_GROUP", + "title": "Resource Group Name", + "defaultValue": "" + }, + { + "inputType": "textInput", + "commandKey": "AKS_CLUSTER", + "title": "AKS Cluster Name", + "defaultValue": "" + } + ] + } + }, + { + "status": "active", + "key": "SupportArticles-docs/support/azure/azure-kubernetes/connectivity/client-ip-address-cannot-access-api-server.md", + "title": "Client IP address can't access the API server", + "description": "Troubleshoot issues caused when the client IP address can't access the API server on an Azure Kubernetes Service (AKS) cluster.", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/SupportArticles-docs/support/azure/azure-kubernetes/connectivity/client-ip-address-cannot-access-api-server.md", + "documentationUrl": "https://learn.microsoft.com/en-us/troubleshoot/azure/azure-kubernetes/connectivity/client-ip-address-cannot-access-api-server", + "nextSteps": [], + "configurations": { + "permissions": [], + "configurableParams": [ + { + "inputType": "textInput", + "commandKey": "RG_NAME", + "title": "Resource Group Name", + "defaultValue": "" + }, + { + "inputType": "textInput", + "commandKey": "CLUSTER_NAME", + "title": "Cluster Name", + "defaultValue": "" + } + ] + } + }, + { + "status": "active", + "key": "SupportArticles-docs/support/azure/azure-kubernetes/create-upgrade-delete/error-code-badrequest-or-invalidclientsecret.md", + "title": "AADSTS7000222 - BadRequest or InvalidClientSecret error", + "description": "Learn how to troubleshoot the BadRequest or InvalidClientSecret error when you try to create or upgrade an Azure Kubernetes Service (AKS) cluster.", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/SupportArticles-docs/support/azure/azure-kubernetes/create-upgrade-delete/error-code-badrequest-or-invalidclientsecret.md", + "documentationUrl": "https://learn.microsoft.com/en-us/troubleshoot/azure/azure-kubernetes/create-upgrade-delete/error-code-badrequest-or-invalidclientsecret", + "nextSteps": [], + "configurations": { + "permissions": [], + "configurableParams": [ + { + "inputType": "textInput", + "commandKey": "RESOURCE_GROUP_NAME", + "title": "Resource Group Name", + "defaultValue": "" + }, + { + "inputType": "textInput", + "commandKey": "AKS_CLUSTER_NAME", + "title": "AKS Cluster Name", + "defaultValue": "" + } + ] + } + }, + { + "status": "active", + "key": "SupportArticles-docs/support/azure/azure-kubernetes/create-upgrade-delete/cannot-scale-cluster-autoscaler-enabled-node-pool.md", + "title": "Cluster autoscaler fails to scale with cannot scale cluster autoscaler enabled node pool error", + "description": "Learn how to troubleshoot the cannot scale cluster autoscaler enabled node pool error when your autoscaler isn't scaling up or down.", + "stackDetails": "", + "sourceUrl": "https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/SupportArticles-docs/support/azure/azure-kubernetes/create-upgrade-delete/cannot-scale-cluster-autoscaler-enabled-node-pool.md", + "documentationUrl": "https://learn.microsoft.com/en-us/troubleshoot/azure/azure-kubernetes/create-upgrade-delete/cannot-scale-cluster-autoscaler-enabled-node-pool", + "nextSteps": [], + "configurations": { + "permissions": [], + "configurableParams": [ + { + "inputType": "textInput", + "commandKey": "AKS_RG_NAME", + "title": "AKS Resource Group Name", + "defaultValue": "" + }, + { + "inputType": "textInput", + "commandKey": "AKS_CLUSTER_NAME", + "title": "AKS Cluster Name", + "defaultValue": "" + } + ] + } } -] +] \ No newline at end of file diff --git a/scenarios/sql-docs/docs/linux/quickstart-install-connect-docker.md b/scenarios/sql-docs/docs/linux/quickstart-install-connect-docker.md new file mode 100644 index 000000000..66b00cb3d --- /dev/null +++ b/scenarios/sql-docs/docs/linux/quickstart-install-connect-docker.md @@ -0,0 +1,1245 @@ +--- +title: "Docker: Install Containers for SQL Server on Linux" +description: This quickstart shows how to use Docker to run the SQL Server Linux container images. You connect to a database and run a query. +author: amitkh-msft +ms.author: amitkh +ms.reviewer: vanto, randolphwest +ms.date: 11/18/2024 +ms.service: sql +ms.subservice: linux +ms.topic: quickstart +ms.custom: + - intro-quickstart + - kr2b-contr-experiment + - linux-related-content +zone_pivot_groups: cs1-command-shell +monikerRange: ">=sql-server-linux-2017 || >=sql-server-2017" +--- +# Quickstart: Run SQL Server Linux container images with Docker + +[!INCLUDE [SQL Server - Linux](../includes/applies-to-version/sql-linux.md)] + + +::: moniker range="=sql-server-linux-2017 || =sql-server-2017" + +In this quickstart, you use Docker to pull and run the [!INCLUDE [sssql17-md](../includes/sssql17-md.md)] Linux container image, [mssql-server-linux](https://mcr.microsoft.com/product/mssql/server/about). Then you can connect with **sqlcmd** to create your first database and run queries. + +For more information on supported platforms, see [Release notes for SQL Server 2017 on Linux](sql-server-linux-release-notes-2017.md). + +> [!WARNING] +> When you stop and remove a container, your [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)] data in the container is permanently deleted. For more information on preserving your data, [create and copy a backup file out of the container](tutorial-restore-backup-in-sql-server-container.md) or use a [container data persistence technique](sql-server-linux-docker-container-configure.md#persist). + +This quickstart creates [!INCLUDE [sssql17-md](../includes/sssql17-md.md)] containers. If you prefer to create Linux containers for different versions of [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)], see the versions of this article for [[!INCLUDE [sssql19-md](../includes/sssql19-md.md)]](quickstart-install-connect-docker.md?view=sql-server-linux-ver15&preserve-view=true#pullandrun2019) or [[!INCLUDE [sssql22-md](../includes/sssql22-md.md)]](quickstart-install-connect-docker.md?view=sql-server-linux-ver16&preserve-view=true#pullandrun2022) versions of this article. + +::: moniker-end + + +::: moniker range="=sql-server-linux-ver15 || =sql-server-ver15" + +In this quickstart, you use Docker to pull and run the [!INCLUDE [sssql19-md](../includes/sssql19-md.md)] Linux container image, [mssql-server-linux](https://mcr.microsoft.com/product/mssql/server/about). Then you can connect with **sqlcmd** to create your first database and run queries. + +For more information on supported platforms, see [Release notes for SQL Server 2019 on Linux](sql-server-linux-release-notes-2019.md). + +> [!WARNING] +> When you stop and remove a container, your [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)] data in the container is permanently deleted. For more information on preserving your data, [create and copy a backup file out of the container](tutorial-restore-backup-in-sql-server-container.md) or use a [container data persistence technique](sql-server-linux-docker-container-configure.md#persist). + +This quickstart creates [!INCLUDE [sssql19-md](../includes/sssql19-md.md)] containers. If you prefer to create Linux containers for different versions of [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)], see the [[!INCLUDE [sssql17-md](../includes/sssql17-md.md)]](quickstart-install-connect-docker.md?view=sql-server-linux-2017&preserve-view=true#pullandrun2017) or [[!INCLUDE [sssql22-md](../includes/sssql22-md.md)]](quickstart-install-connect-docker.md?view=sql-server-linux-ver16&preserve-view=true#pullandrun2022) versions of this article. + +::: moniker-end + + +::: moniker range=">= sql-server-linux-ver16 || >= sql-server-ver16" + +In this quickstart, you use Docker to pull and run the [!INCLUDE [sssql22-md](../includes/sssql22-md.md)] Linux container image, [mssql-server-linux](https://mcr.microsoft.com/product/mssql/server/about). Then you can connect with **sqlcmd** to create your first database and run queries. + +For more information on supported platforms, see [Release notes for SQL Server 2022 on Linux](sql-server-linux-release-notes-2022.md). + +> [!WARNING] +> When you stop and remove a container, your [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)] data in the container is permanently deleted. For more information on preserving your data, [create and copy a backup file out of the container](tutorial-restore-backup-in-sql-server-container.md) or use a [container data persistence technique](sql-server-linux-docker-container-configure.md#persist). + +This quickstart creates [!INCLUDE [sssql22-md](../includes/sssql22-md.md)] containers. If you prefer to create Linux containers for different versions of [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)], see the [[!INCLUDE [sssql17-md](../includes/sssql17-md.md)]](quickstart-install-connect-docker.md?view=sql-server-linux-2017&preserve-view=true#pullandrun2017) or [[!INCLUDE [sssql19-md](../includes/sssql19-md.md)]](quickstart-install-connect-docker.md?view=sql-server-linux-ver15&preserve-view=true#pullandrun2019) versions of this article. + +::: moniker-end + +This image consists of [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)] running on Linux based on Ubuntu. It can be used with the Docker Engine 1.8+ on Linux. + +Starting with [!INCLUDE [sssql22-md](../includes/sssql22-md.md)] CU 14 and [!INCLUDE [sssql19-md](../includes/sssql19-md.md)] CU 28, the container images include the [new mssql-tools18](sql-server-linux-setup-tools.md#install-tools-on-linux) package. The previous directory `/opt/mssql-tools/bin` is being phased out. The new directory for Microsoft ODBC 18 tools is `/opt/mssql-tools18/bin`, aligning with the latest tools offering. For more information about changes and security enhancements, see [ODBC Driver 18.0 for SQL Server Released](https://techcommunity.microsoft.com/blog/sqlserver/odbc-driver-18-0-for-sql-server-released/3169228). + +The examples in this article use the `docker` command. However, most of these commands also work with Podman. Podman provides a command-line interface similar to the Docker Engine. You can [find out more about Podman](https://docs.podman.io/en/latest). + +> [!IMPORTANT] +> **sqlcmd** doesn't currently support the `MSSQL_PID` parameter when creating containers. If you use the **sqlcmd** instructions in this quickstart, you create a container with the Developer edition of [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)]. Use the command line interface (CLI) instructions to create a container using the license of your choice. For more information, see [Deploy and connect to SQL Server Linux containers](sql-server-linux-docker-container-deployment.md). + + + +## Prerequisites + +- Docker Engine 1.8+ on any supported Linux distribution. For more information, see [Install Docker](https://docs.docker.com/engine/installation/). + + +::: moniker range="=sql-server-linux-2017 || =sql-server-2017" + +- For more information on hardware requirements and processor support, see [SQL Server 2016 and 2017: Hardware and software requirements](../sql-server/install/hardware-and-software-requirements-for-installing-sql-server.md) + +::: moniker-end + + +::: moniker range="=sql-server-linux-ver15 || =sql-server-ver15" + +- For more information on hardware requirements and processor support, see [SQL Server 2019: Hardware and software requirements](../sql-server/install/hardware-and-software-requirements-for-installing-sql-server-2019.md) + +::: moniker-end + + +::: moniker range=">= sql-server-linux-ver16 || >= sql-server-ver16" + +- For more information on hardware requirements and processor support, see [SQL Server 2022: Hardware and software requirements](../sql-server/install/hardware-and-software-requirements-for-installing-sql-server-2022.md) + +::: moniker-end + +- Docker `overlay2` storage driver. This driver is the default for most users. If you aren't using this storage provider and need to change, see the instructions and warnings in the [Docker documentation for configuring overlay2](https://docs.docker.com/engine/storage/drivers/overlayfs-driver/#configure-docker-with-the-overlay-or-overlay2-storage-driver). + +- Install the latest **[sqlcmd](../tools/sqlcmd/sqlcmd-utility.md?&tabs=go)** on your Docker host. + +- At least 2 GB of disk space. + +- At least 2 GB of RAM. + +- [System requirements for SQL Server on Linux](sql-server-linux-setup.md#system). + + +::: moniker range="=sql-server-linux-2017 || =sql-server-2017" + + + +## Pull and run the SQL Server Linux container image + +Before starting the following steps, make sure that you select your preferred shell (**bash**, **PowerShell**, or **cmd**) at the top of this article. + +::: zone pivot="cs1-bash" +For the bash commands in this article, `sudo` is used. If you don't want to use `sudo` to run Docker, you can configure a `docker` group and add users to that group. For more information, see [Post-installation steps for Linux](https://docs.docker.com/engine/install/linux-postinstall). +::: zone-end + +## [CLI](#tab/cli) + +### Pull the container image from the registry + +Pull the [!INCLUDE [sssql17-md](../includes/sssql17-md.md)] Linux container image from the Microsoft Container Registry. + +::: zone pivot="cs1-bash" + +```bash +sudo docker pull mcr.microsoft.com/mssql/server:2017-latest +``` + +::: zone-end + +::: zone pivot="cs1-powershell" + +```powershell +docker pull mcr.microsoft.com/mssql/server:2017-latest +``` + +::: zone-end + +::: zone pivot="cs1-cmd" + +```cmd +docker pull mcr.microsoft.com/mssql/server:2017-latest +``` + +::: zone-end + +This quickstart creates [!INCLUDE [sssql17-md](../includes/sssql17-md.md)] containers. If you prefer to create Linux containers for different versions of [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)], see the [[!INCLUDE [sssql19-md](../includes/sssql19-md.md)]](quickstart-install-connect-docker.md?view=sql-server-linux-ver15&preserve-view=true#pullandrun2019) or [[!INCLUDE [sssql22-md](../includes/sssql22-md.md)]](quickstart-install-connect-docker.md?view=sql-server-linux-ver16&preserve-view=true#pullandrun2022) versions of this article. + +The previous command pulls the latest [!INCLUDE [sssql17-md](../includes/sssql17-md.md)] Linux container image. If you want to pull a specific image, you add a colon and the tag name, such as `mcr.microsoft.com/mssql/server:2017-GA-ubuntu`. To see all available images, see the [Microsoft Artifact Registry](https://mcr.microsoft.com/product/mssql/server/about). + +### Run the container + +To run the Linux container image with Docker, you can use the following command from a bash shell or elevated PowerShell command prompt. + +> [!IMPORTANT] +> The `SA_PASSWORD` environment variable is deprecated. Use `MSSQL_SA_PASSWORD` instead. + +::: zone pivot="cs1-bash" + +```bash +sudo docker run -e "ACCEPT_EULA=Y" -e "MSSQL_SA_PASSWORD=" \ + -p 1433:1433 --name sql1 --hostname sql1 \ + -d \ + mcr.microsoft.com/mssql/server:2017-latest +``` + +::: zone-end + +::: zone pivot="cs1-powershell" + +If you're using PowerShell Core, replace the double quotes with single quotes. + +```powershell +docker run -e "ACCEPT_EULA=Y" -e "MSSQL_SA_PASSWORD=" ` + -p 1433:1433 --name sql1 --hostname sql1 ` + -d ` + mcr.microsoft.com/mssql/server:2017-latest +``` + +::: zone-end + +::: zone pivot="cs1-cmd" + +```cmd +docker run -e "ACCEPT_EULA=Y" -e "MSSQL_SA_PASSWORD=" ` + -p 1433:1433 --name sql1 --hostname sql1 ` + -d ` + mcr.microsoft.com/mssql/server:2017-latest +``` + +::: zone-end + +> [!CAUTION] +> [!INCLUDE [password-complexity](includes/password-complexity.md)] If you don't follow these password requirements, the container can't set up [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)], and stops working. You can examine the error log by using the [`docker logs`](https://docs.docker.com/reference/cli/docker/container/logs) command. + +By default, this quickstart creates a container with the Developer edition of [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)]. The process for running production editions in containers is slightly different. For more information, see [Run production container images](./sql-server-linux-docker-container-deployment.md#production). + +The following table provides a description of the parameters in the previous `docker run` example: + +| Parameter | Description | +| --- | --- | +| `-e "ACCEPT_EULA=Y"` | Set the `ACCEPT_EULA` variable to any value to confirm your acceptance of the End-User Licensing Agreement. Required setting for the [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)] image. | +| `-e "MSSQL_SA_PASSWORD="` | Specify your own strong password that is at least eight characters and meets the [Password Policy](../relational-databases/security/password-policy.md). Required setting for the [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)] image. | +| `-e "MSSQL_COLLATION="` | Specify a custom [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)] collation, instead of the default `SQL_Latin1_General_CP1_CI_AS`. | +| `-p 1433:1433` | Map a TCP port on the host environment (first value) with a TCP port in the container (second value). In this example, [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)] is listening on TCP 1433 in the container and this container port is then exposed to TCP port 1433 on the host. | +| `--name sql1` | Specify a custom name for the container rather than a randomly generated one. If you run more than one container, you can't reuse this same name. | +| `--hostname sql1` | Used to explicitly set the container hostname. If you don't specify the hostname, it defaults to the container ID, which is a randomly generated system GUID. | +| `-d` | Run the container in the background (daemon). | +| `mcr.microsoft.com/mssql/server:2017-latest` | The [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)] Linux container image. | + +## [sqlcmd](#tab/sqlcmd) + +### Pull and run the container + +Pull and run the [!INCLUDE [sssql17-md](../includes/sssql17-md.md)] Linux container image from the Microsoft Container Registry. + +::: zone pivot="cs1-bash" + +```bash +sudo sqlcmd create mssql --tag 2017-latest --hostname sql1 --name sql1 --port 1433 --accept-eula +``` + +::: zone-end + +::: zone pivot="cs1-powershell" + +```powershell +sqlcmd create mssql --tag 2017-latest --hostname sql1 --name sql1 --port 1433 --accept-eula +``` + +::: zone-end + +::: zone pivot="cs1-cmd" + +```cmd +sqlcmd create mssql --tag 2017-latest --hostname sql1 --name sql1 --port 1433 --accept-eula +``` + +::: zone-end + +This quickstart creates [!INCLUDE [sssql17-md](../includes/sssql17-md.md)] containers. If you prefer to create Linux containers for different versions of [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)], see the [[!INCLUDE [sssql19-md](../includes/sssql19-md.md)]](quickstart-install-connect-docker.md?view=sql-server-linux-ver15&preserve-view=true#pullandrun2019) or [[!INCLUDE [sssql22-md](../includes/sssql22-md.md)]](quickstart-install-connect-docker.md?view=sql-server-linux-ver16&preserve-view=true#pullandrun2022) versions of this article. + +The previous command uses the latest [!INCLUDE [sssql17-md](../includes/sssql17-md.md)] Linux container image. If you want to pull a specific image, change the tag name, such as `2017-GA-ubuntu`. To see all available images, run the following command: + +::: zone pivot="cs1-bash" + +```bash +sudo sqlcmd create mssql get-tags +``` + +::: zone-end + +::: zone pivot="cs1-powershell" + +```powershell +sqlcmd create mssql get-tags +``` + +::: zone-end + +::: zone pivot="cs1-cmd" + +```cmd +sqlcmd create mssql get-tags +``` + +::: zone-end + +The following table provides a description of the parameters in the previous `sqlcmd create mssql` example: + +| Parameter | Description | +| --- | --- | +| `--ACCEPT-EULA` | Include the `ACCEPT-EULA` flag to confirm your acceptance of the End-User Licensing Agreement. Required setting for the [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)] image. | +| `--port 1433` | Map a TCP port on the host environment and a TCP port in the container. In this example, [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)] is listening on TCP 1433 in the container and this container port is then exposed to TCP port 1433 on the host. | +| `--name sql1` | Specify a custom name for the container rather than a randomly generated one. If you run more than one container, you can't reuse this same name. | +| `--hostname sql1` | Used to explicitly set the container hostname. If you don't specify the hostname, it defaults to the container ID, which is a randomly generated system GUID. | +| `--tag 2017-latest` | The [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)] Linux container image. | + +--- + +### View list of containers + +1. To view your Docker containers, use the `docker ps` command. + + ::: zone pivot="cs1-bash" + + ```bash + sudo docker ps -a + ``` + + ::: zone-end + + ::: zone pivot="cs1-powershell" + + ```powershell + docker ps -a + ``` + + ::: zone-end + + ::: zone pivot="cs1-cmd" + + ```cmd + docker ps -a + ``` + + ::: zone-end + + You should see output similar to the following example: + + ```output + CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES + d4a1999ef83e mcr.microsoft.com/mssql/server:2017-latest "/opt/mssql/bin/perm..." 2 minutes ago Up 2 minutes 0.0.0.0:1433->1433/tcp, :::1433->1433/tcp sql1 + ``` + +1. If the `STATUS` column shows a status of `Up`, then [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)] is running in the container and listening on the port specified in the `PORTS` column. If the `STATUS` column for your [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)] container shows `Exited`, see [Troubleshoot SQL Server Docker containers](sql-server-linux-docker-container-troubleshooting.md). The server is ready for connections once the [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)] error logs display the message: `SQL Server is now ready for client connections. This is an informational message; no user action is required`. You can review the [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)] error log inside the container using the command: + + ```bash + sudo docker exec -t sql1 cat /var/opt/mssql/log/errorlog | grep connection + ``` + + The `--hostname` parameter, as discussed previously, changes the internal name of the container to a custom value. This value is the name you see returned in the following Transact-SQL query: + + ```sql + SELECT @@SERVERNAME, + SERVERPROPERTY('ComputerNamePhysicalNetBIOS'), + SERVERPROPERTY('MachineName'), + SERVERPROPERTY('ServerName'); + ``` + + Setting `--hostname` and `--name` to the same value is a good way to easily identify the target container. + +1. As a final step, [change your SA password](#sapassword) in a production environment, because the `MSSQL_SA_PASSWORD` is visible in `ps -eax` output and stored in the environment variable of the same name. + +::: moniker-end + + +::: moniker range="=sql-server-linux-ver15 || =sql-server-ver15" + + + +## Pull and run the SQL Server Linux container image + +Before starting the following steps, make sure that you select your preferred shell (**bash**, **PowerShell**, or **cmd**) at the top of this article. + +::: zone pivot="cs1-bash" +For the bash commands in this article, `sudo` is used. If you don't want to use `sudo` to run Docker, you can configure a `docker` group and add users to that group. For more information, see [Post-installation steps for Linux](https://docs.docker.com/engine/install/linux-postinstall). +::: zone-end + +## [CLI](#tab/cli) + +### Pull the container from the registry + +Pull the [!INCLUDE [sssql19-md](../includes/sssql19-md.md)] Linux container image from the Microsoft Container Registry. + +::: zone pivot="cs1-bash" + +```bash +docker pull mcr.microsoft.com/mssql/server:2019-latest +``` + +::: zone-end + +::: zone pivot="cs1-powershell" + +```powershell +docker pull mcr.microsoft.com/mssql/server:2019-latest +``` + +::: zone-end + +::: zone pivot="cs1-cmd" + +```cmd +docker pull mcr.microsoft.com/mssql/server:2019-latest +``` + +::: zone-end + +This quickstart creates [!INCLUDE [sssql19-md](../includes/sssql19-md.md)] containers. If you prefer to create Linux containers for different versions of [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)], see the [[!INCLUDE [sssql17-md](../includes/sssql17-md.md)]](quickstart-install-connect-docker.md?view=sql-server-linux-2017&preserve-view=true#pullandrun2017) or [[!INCLUDE [sssql22-md](../includes/sssql22-md.md)]](quickstart-install-connect-docker.md?view=sql-server-linux-ver16&preserve-view=true#pullandrun2022) versions of this article. + +The previous command pulls the latest [!INCLUDE [sssql19-md](../includes/sssql19-md.md)] Linux container image. If you want to pull a specific image, you add a colon and the tag name, such as `mcr.microsoft.com/mssql/server:2019-GA-ubuntu`. To see all available images, see the [Microsoft Artifact Registry](https://mcr.microsoft.com/product/mssql/server/about). + +### Run the container + +To run the Linux container image with Docker, you can use the following command from a bash shell or elevated PowerShell command prompt. + +> [!IMPORTANT] +> The `SA_PASSWORD` environment variable is deprecated. Use `MSSQL_SA_PASSWORD` instead. + +::: zone pivot="cs1-bash" + +```bash +docker run -e "ACCEPT_EULA=Y" -e "MSSQL_SA_PASSWORD=" \ + -p 1433:1433 --name sql1 --hostname sql1 \ + -d \ + mcr.microsoft.com/mssql/server:2019-latest +``` + +::: zone-end + +::: zone pivot="cs1-powershell" + +If you're using PowerShell Core, replace the double quotes with single quotes. + +```powershell +docker run -e "ACCEPT_EULA=Y" -e "MSSQL_SA_PASSWORD=" ` + -p 1433:1433 --name sql1 --hostname sql1 ` + -d ` + mcr.microsoft.com/mssql/server:2019-latest +``` + +> [!CAUTION] +> [!INCLUDE [password-complexity](includes/password-complexity.md)] + +::: zone-end + +::: zone pivot="cs1-cmd" + +```cmd +docker run -e "ACCEPT_EULA=Y" -e "MSSQL_SA_PASSWORD=" ` + -p 1433:1433 --name sql1 --hostname sql1 ` + -d ` + mcr.microsoft.com/mssql/server:2019-latest +``` + +::: zone-end + +> [!CAUTION] +> [!INCLUDE [password-complexity](includes/password-complexity.md)] If you don't follow these password requirements, the container can't set up [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)], and stops working. You can examine the error log by using the [`docker logs`](https://docs.docker.com/reference/cli/docker/container/logs) command. + +By default, this quickstart creates a container with the Developer edition of [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)]. The process for running production editions in containers is slightly different. For more information, see [Run production container images](./sql-server-linux-docker-container-deployment.md#production). + +The following table provides a description of the parameters in the previous `docker run` example: + +| Parameter | Description | +| --- | --- | +| `-e "ACCEPT_EULA=Y"` | Set the `ACCEPT_EULA` variable to any value to confirm your acceptance of the End-User Licensing Agreement. Required setting for the [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)] image. | +| `-e "MSSQL_SA_PASSWORD="` | Specify your own strong password that is at least eight characters and meets the [Password Policy](../relational-databases/security/password-policy.md). Required setting for the [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)] image. | +| `-e "MSSQL_COLLATION="` | Specify a custom [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)] collation, instead of the default `SQL_Latin1_General_CP1_CI_AS`. | +| `-p 1433:1433` | Map a TCP port on the host environment (first value) with a TCP port in the container (second value). In this example, [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)] is listening on TCP 1433 in the container and this container port is then exposed to TCP port 1433 on the host. | +| `--name sql1` | Specify a custom name for the container rather than a randomly generated one. If you run more than one container, you can't reuse this same name. | +| `--hostname sql1` | Used to explicitly set the container hostname. If you don't specify the hostname, it defaults to the container ID, which is a randomly generated system GUID. | +| `-d` | Run the container in the background (daemon). | +| `mcr.microsoft.com/mssql/server:2019-latest` | The [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)] Linux container image. | + +## [sqlcmd](#tab/sqlcmd) + +### Pull and run the container + +Pull and run the [!INCLUDE [sssql19-md](../includes/sssql19-md.md)] Linux container image from the Microsoft Container Registry. + +::: zone pivot="cs1-bash" + +```bash +sudo sqlcmd create mssql --tag 2019-latest --hostname sql1 --name sql1 --port 1433 --accept-eula +``` + +::: zone-end + +::: zone pivot="cs1-powershell" + +```powershell +sqlcmd create mssql --tag 2019-latest --hostname sql1 --name sql1 --port 1433 --accept-eula +``` + +::: zone-end + +::: zone pivot="cs1-cmd" + +```cmd +sqlcmd create mssql --tag 2019-latest --hostname sql1 --name sql1 --port 1433 --accept-eula +``` + +::: zone-end + +This quickstart creates [!INCLUDE [sssql19-md](../includes/sssql19-md.md)] containers. If you prefer to create Linux containers for different versions of [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)], see the [[!INCLUDE [sssql17-md](../includes/sssql17-md.md)]](quickstart-install-connect-docker.md?view=sql-server-linux-2017&preserve-view=true#pullandrun2017) or [[!INCLUDE [sssql22-md](../includes/sssql22-md.md)]](quickstart-install-connect-docker.md?view=sql-server-linux-ver16&preserve-view=true#pullandrun2022) versions of this article. + +The previous command pulls the latest [!INCLUDE [sssql19-md](../includes/sssql19-md.md)] Linux container image. If you want to pull a specific image, change the tag name, such as `2019-GA-ubuntu-16.04`. To see all available images, run the following command: + +::: zone pivot="cs1-bash" + +```bash +sudo sqlcmd create mssql get-tags +``` + +::: zone-end + +::: zone pivot="cs1-powershell" + +```powershell +sqlcmd create mssql get-tags +``` + +::: zone-end + +::: zone pivot="cs1-cmd" + +```cmd +sqlcmd create mssql get-tags +``` + +::: zone-end + +By default, this quickstart creates a container with the Developer edition of [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)]. The process for running production editions in containers is slightly different. For more information, see [Run production container images](./sql-server-linux-docker-container-deployment.md#production). + +The following table provides a description of the parameters in the previous `docker run` example: + +| Parameter | Description | +| --- | --- | +| `--ACCEPT_EULA` | Include the `ACCEPT_EULA` flag to confirm your acceptance of the End-User Licensing Agreement. Required setting for the [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)] image. | +| `--port 1433` | Map a TCP port on the host environment and a TCP port in the container. In this example, [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)] is listening on TCP 1433 in the container and this container port is then exposed to TCP port 1433 on the host. | +| `--name sql1` | Specify a custom name for the container rather than a randomly generated one. If you run more than one container, you can't reuse this same name. | +| `--hostname sql1` | Used to explicitly set the container hostname. If you don't specify the hostname, it defaults to the container ID, which is a randomly generated system GUID. | +| `--tag 2019-latest` | The [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)] Linux container image. | + +--- + +### View list of containers + +1. To view your Docker containers, use the `docker ps` command. + + ::: zone pivot="cs1-bash" + + ```bash + docker ps -a + ``` + + ::: zone-end + + ::: zone pivot="cs1-powershell" + + ```powershell + docker ps -a + ``` + + ::: zone-end + + ::: zone pivot="cs1-cmd" + + ```cmd + docker ps -a + ``` + + ::: zone-end + + You should see output similar to the following example: + + ```output + CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES + d4a1999ef83e mcr.microsoft.com/mssql/server:2019-latest "/opt/mssql/bin/perm..." 2 minutes ago Up 2 minutes 0.0.0.0:1433->1433/tcp, :::1433->1433/tcp sql1 + ``` + +1. If the `STATUS` column shows a status of `Up`, then [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)] is running in the container and listening on the port specified in the `PORTS` column. If the `STATUS` column for your [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)] container shows `Exited`, see [Troubleshoot SQL Server Docker containers](sql-server-linux-docker-container-troubleshooting.md). The server is ready for connections once the [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)] error logs display the message: `SQL Server is now ready for client connections. This is an informational message; no user action is required`. You can review the [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)] error log inside the container using the command: + + ```bash + docker exec -t sql1 cat /var/opt/mssql/log/errorlog | grep connection + ``` + + The `--hostname` parameter, as discussed previously, changes the internal name of the container to a custom value. This value is the name you see returned in the following Transact-SQL query: + + ```sql + SELECT @@SERVERNAME, + SERVERPROPERTY('ComputerNamePhysicalNetBIOS'), + SERVERPROPERTY('MachineName'), + SERVERPROPERTY('ServerName'); + ``` + + Setting `--hostname` and `--name` to the same value is a good way to easily identify the target container. + +1. As a final step, [change your SA password](#sapassword) in a production environment, because the `MSSQL_SA_PASSWORD` is visible in `ps -eax` output and stored in the environment variable of the same name. + +::: moniker-end + + +::: moniker range=">= sql-server-linux-ver16 || >= sql-server-ver16" + + + +## Pull and run the SQL Server Linux container image + +Before starting the following steps, make sure that you select your preferred shell (**bash**, **PowerShell**, or **cmd**) at the top of this article. + +::: zone pivot="cs1-bash" +For the bash commands in this article, `sudo` is used. If you don't want to use `sudo` to run Docker, you can configure a `docker` group and add users to that group. For more information, see [Post-installation steps for Linux](https://docs.docker.com/engine/install/linux-postinstall). +::: zone-end + +## [CLI](#tab/cli) + +### Pull the container image from the registry + +Pull the [!INCLUDE [sssql22-md](../includes/sssql22-md.md)] Linux container image from the Microsoft Container Registry. + +::: zone pivot="cs1-bash" + +```bash +docker pull mcr.microsoft.com/mssql/server:2022-latest +``` + +::: zone-end + +::: zone pivot="cs1-powershell" + +```powershell +docker pull mcr.microsoft.com/mssql/server:2022-latest +``` + +::: zone-end + +::: zone pivot="cs1-cmd" + +```cmd +docker pull mcr.microsoft.com/mssql/server:2022-latest +``` + +::: zone-end + +This quickstart creates [!INCLUDE [sssql22-md](../includes/sssql22-md.md)] containers. If you prefer to create Linux containers for different versions of [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)], see the [[!INCLUDE [sssql17-md](../includes/sssql17-md.md)]](quickstart-install-connect-docker.md?view=sql-server-linux-2017&preserve-view=true#pullandrun2017) or [[!INCLUDE [sssql19-md](../includes/sssql19-md.md)]](quickstart-install-connect-docker.md?view=sql-server-linux-ver15&preserve-view=true#pullandrun2019) versions of this article. + +The previous command pulls the latest [!INCLUDE [sssql22-md](../includes/sssql22-md.md)] Linux container image. If you want to pull a specific image, you add a colon and the tag name, such as `mcr.microsoft.com/mssql/server:2022-GA-ubuntu`. To see all available images, see the [Microsoft Artifact Registry](https://mcr.microsoft.com/product/mssql/server/about). + +### Run the container + +To run the Linux container image with Docker, you can use the following command from a bash shell or elevated PowerShell command prompt. + +> [!IMPORTANT] +> The `SA_PASSWORD` environment variable is deprecated. Use `MSSQL_SA_PASSWORD` instead. + +::: zone pivot="cs1-bash" + +```bash +docker run -e "ACCEPT_EULA=Y" -e "MSSQL_SA_PASSWORD=" \ + -p 1433:1433 --name sql1 --hostname sql1 \ + -d \ + mcr.microsoft.com/mssql/server:2022-latest +``` + +::: zone-end + +::: zone pivot="cs1-powershell" + +If you're using PowerShell Core, replace the double quotes with single quotes. + +```powershell +docker run -e "ACCEPT_EULA=Y" -e "MSSQL_SA_PASSWORD=" ` + -p 1433:1433 --name sql1 --hostname sql1 ` + -d ` + mcr.microsoft.com/mssql/server:2022-latest +``` + +::: zone-end + +::: zone pivot="cs1-cmd" + +```cmd +docker run -e "ACCEPT_EULA=Y" -e "MSSQL_SA_PASSWORD=" ` + -p 1433:1433 --name sql1 --hostname sql1 ` + -d ` + mcr.microsoft.com/mssql/server:2022-latest +``` + +::: zone-end + +> [!CAUTION] +> [!INCLUDE [password-complexity](includes/password-complexity.md)] If you don't follow these password requirements, the container can't set up [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)], and stops working. You can examine the error log by using the [`docker logs`](https://docs.docker.com/reference/cli/docker/container/logs) command. + +By default, this quickstart creates a container with the Developer edition of [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)]. The process for running production editions in containers is slightly different. For more information, see [Run production container images](./sql-server-linux-docker-container-deployment.md#production). + +The following table provides a description of the parameters in the previous `docker run` example: + +| Parameter | Description | +| --- | --- | +| `-e "ACCEPT_EULA=Y"` | Set the `ACCEPT_EULA` variable to any value to confirm your acceptance of the End-User Licensing Agreement. Required setting for the [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)] image. | +| `-e "MSSQL_SA_PASSWORD="` | Specify your own strong password that is at least eight characters and meets the [Password Policy](../relational-databases/security/password-policy.md). Required setting for the [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)] image. | +| `-e "MSSQL_COLLATION="` | Specify a custom [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)] collation, instead of the default `SQL_Latin1_General_CP1_CI_AS`. | +| `-p 1433:1433` | Map a TCP port on the host environment (first value) with a TCP port in the container (second value). In this example, [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)] is listening on TCP 1433 in the container and this container port is then exposed to TCP port 1433 on the host. | +| `--name sql1` | Specify a custom name for the container rather than a randomly generated one. If you run more than one container, you can't reuse this same name. | +| `--hostname sql1` | Used to explicitly set the container hostname. If you don't specify the hostname, it defaults to the container ID, which is a randomly generated system GUID. | +| `-d` | Run the container in the background (daemon). | +| `mcr.microsoft.com/mssql/server:2022-latest` | The [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)] Linux container image. | + + + +## Change the system administrator password + +The system administrator (`sa`) account is a system administrator on the [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)] instance that gets created during setup. After you create your [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)] container, the `MSSQL_SA_PASSWORD` environment variable you specified is discoverable by running `echo $MSSQL_SA_PASSWORD` in the container. For security purposes, you should change your `sa` password in a production environment. + +1. Choose a strong password to use for the `sa` account. [!INCLUDE [password-complexity](includes/password-complexity.md)] + +1. Use `docker exec` to run **sqlcmd** to change the password using Transact-SQL. In the following example, the old and new passwords are read from user input. + + ::: zone pivot="cs1-bash" + + ```bash + docker exec -it sql1 /opt/mssql-tools18/bin/sqlcmd \ + -S localhost -U sa \ + -P "$(read -sp "Enter current SA password: "; echo "${REPLY}")" \ + -Q "ALTER LOGIN sa WITH PASSWORD=\"$(read -sp "Enter new SA password: "; echo "${REPLY}")\"" + ``` + + ::: zone-end + + ::: zone pivot="cs1-powershell" + + ```powershell + docker exec -it sql1 /opt/mssql-tools18/bin/sqlcmd ` + -S localhost -U sa -P "" ` + -Q "ALTER LOGIN sa WITH PASSWORD=''" + ``` + + ::: zone-end + + ::: zone pivot="cs1-cmd" + + ```cmd + docker exec -it sql1 /opt/mssql-tools18/bin/sqlcmd ` + -S localhost -U sa -P "" ` + -Q "ALTER LOGIN sa WITH PASSWORD=''" + ``` + + ::: zone-end + + > [!CAUTION] + > [!INCLUDE [password-complexity](includes/password-complexity.md)] + + Recent versions of **sqlcmd** are secure by default. For more information about connection encryption, see [sqlcmd utility](../tools/sqlcmd/sqlcmd-utility.md) for Windows, and [Connecting with sqlcmd](../connect/odbc/linux-mac/connecting-with-sqlcmd.md) for Linux and macOS. If the connection doesn't succeed, you can add the `-No` option to **sqlcmd** to specify that encryption is optional, not mandatory. + +## Disable the SA account as a best practice + +> [!IMPORTANT] +> You'll need these credentials for later steps. Be sure to write down the user ID and password that you enter here. + +[!INCLUDE [connect-with-sa](includes/connect-with-sa.md)] + +## [sqlcmd](#tab/sqlcmd) + +### Pull and run the container + +Pull and run the [!INCLUDE [sssql22-md](../includes/sssql22-md.md)] Linux container image from the Microsoft Container Registry. + +::: zone pivot="cs1-bash" + +```bash +sudo sqlcmd create mssql --tag 2022-latest --hostname sql1 --name sql1 --port 1433 --accept-eula +``` + +::: zone-end + +::: zone pivot="cs1-powershell" + +```powershell +sqlcmd create mssql --tag 2022-latest --hostname sql1 --name sql1 --port 1433 --accept-eula +``` + +::: zone-end + +::: zone pivot="cs1-cmd" + +```cmd +sqlcmd create mssql --tag 2022-latest --hostname sql1 --name sql1 --port 1433 --accept-eula +``` + +::: zone-end + +This quickstart creates [!INCLUDE [sssql22-md](../includes/sssql22-md.md)] containers. If you prefer to create Linux containers for different versions of [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)], see the [[!INCLUDE [sssql17-md](../includes/sssql17-md.md)]](quickstart-install-connect-docker.md?view=sql-server-linux-2017&preserve-view=true#pullandrun2017) or [[!INCLUDE [sssql19-md](../includes/sssql19-md.md)]](quickstart-install-connect-docker.md?view=sql-server-linux-ver15&preserve-view=true#pullandrun2019) versions of this article. + +The previous command pulls the latest [!INCLUDE [sssql22-md](../includes/sssql22-md.md)] Linux container image. If you want to pull a specific image, change the tag name, such as `2022-CU11-ubuntu-22.04`. To see all available images, run the following command: + +::: zone pivot="cs1-bash" + +```bash +sudo sqlcmd create mssql get-tags +``` + +::: zone-end + +::: zone pivot="cs1-powershell" + +```powershell +sqlcmd create mssql get-tags +``` + +::: zone-end + +::: zone pivot="cs1-cmd" + +```cmd +sqlcmd create mssql get-tags +``` + +::: zone-end + +By default, this quickstart creates a container with the Developer edition of [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)]. The process for running production editions in containers is slightly different. For more information, see [Run production container images](./sql-server-linux-docker-container-deployment.md#production). + +The following table provides a description of the parameters in the previous `docker run` example: + +| Parameter | Description | +| --- | --- | +| `--ACCEPT-EULA` | Include the `--ACCEPT-EULA` flag to confirm your acceptance of the End-User Licensing Agreement. Required setting for the [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)] image. | +| `--port 1433` | Map a TCP port on the host environment and a TCP port in the container. In this example, [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)] is listening on TCP 1433 in the container and this container port is then exposed to TCP port 1433 on the host. | +| `--name sql1` | Specify a custom name for the container rather than a randomly generated one. If you run more than one container, you can't reuse this same name. | +| `--hostname sql1` | Used to explicitly set the container hostname. If you don't specify the hostname, it defaults to the container ID, which is a randomly generated system GUID. | +| `--tag 2022-latest` | The [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)] Linux container image. | + +**sqlcmd** disables the `sa` password and creates a new login based on the current user when it creates a container. Use the following command to view your login information. You need it in later steps. + +::: zone pivot="cs1-bash" + +```bash +sudo sqlcmd config view --raw +``` + +::: zone-end + +::: zone pivot="cs1-powershell" + +```powershell +sqlcmd config view --raw +``` + +::: zone-end + +::: zone pivot="cs1-cmd" + +```cmd +sqlcmd config view --raw +``` + +::: zone-end + +--- + +### View list of containers + +1. To view your Docker containers, use the `docker ps` command. + + ::: zone pivot="cs1-bash" + + ```bash + docker ps -a + ``` + + ::: zone-end + + ::: zone pivot="cs1-powershell" + + ```powershell + docker ps -a + ``` + + ::: zone-end + + ::: zone pivot="cs1-cmd" + + ```cmd + docker ps -a + ``` + + ::: zone-end + + You should see output similar to the following example: + + ```output + CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES + d4a1999ef83e mcr.microsoft.com/mssql/server:2022-latest "/opt/mssql/bin/perm..." 2 minutes ago Up 2 minutes 0.0.0.0:1433->1433/tcp, :::1433->1433/tcp sql1 + ``` + +1. If the `STATUS` column shows a status of `Up`, then [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)] is running in the container and listening on the port specified in the `PORTS` column. If the `STATUS` column for your [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)] container shows `Exited`, see [Troubleshoot SQL Server Docker containers](sql-server-linux-docker-container-troubleshooting.md). The server is ready for connections once the [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)] error logs display the message: `SQL Server is now ready for client connections. This is an informational message; no user action is required`. You can review the [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)] error log inside the container using the command: + + ```bash + docker exec -t sql1 cat /var/opt/mssql/log/errorlog | grep connection + ``` + + The `--hostname` parameter, as discussed previously, changes the internal name of the container to a custom value. This value is the name you see returned in the following Transact-SQL query: + + ```sql + SELECT @@SERVERNAME, + SERVERPROPERTY('ComputerNamePhysicalNetBIOS'), + SERVERPROPERTY('MachineName'), + SERVERPROPERTY('ServerName'); + ``` + + Setting `--hostname` and `--name` to the same value is a good way to easily identify the target container. + +::: moniker-end + +## Connect to SQL Server + +The following steps use the [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)] command-line tool, [sqlcmd utility](../tools/sqlcmd/sqlcmd-utility.md), inside the container to connect to [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)]. + +1. Use the `docker exec -it` command to start an interactive bash shell inside your running container. In the following example, `sql1` is name specified by the `--name` parameter when you created the container. + + ::: zone pivot="cs1-bash" + + ```bash + docker exec -it sql1 "bash" + ``` + + ::: zone-end + + ::: zone pivot="cs1-powershell" + + ```powershell + docker exec -it sql1 "bash" + ``` + + ::: zone-end + + ::: zone pivot="cs1-cmd" + + ```cmd + docker exec -it sql1 "bash" + ``` + + ::: zone-end + + +::: moniker range="=sql-server-linux-2017 || =sql-server-2017" + +1. Once inside the container, connect locally with **sqlcmd**, using its full path. + + ```bash + /opt/mssql-tools/bin/sqlcmd -S localhost -U -P "" + ``` + + Recent versions of **sqlcmd** are secure by default. For more information about connection encryption, see [sqlcmd utility](../tools/sqlcmd/sqlcmd-utility.md) for Windows, and [Connecting with sqlcmd](../connect/odbc/linux-mac/connecting-with-sqlcmd.md) for Linux and macOS. If the connection doesn't succeed, you can add the `-No` option to **sqlcmd** to specify that encryption is optional, not mandatory. + + You can omit the password on the command-line to be prompted to enter it. For example: + + ```bash + /opt/mssql-tools/bin/sqlcmd -S localhost -U + ``` + +::: moniker-end + + +::: moniker range="=sql-server-linux-ver15 || =sql-server-ver15" + +1. Once inside the container, connect locally with **sqlcmd**, using its full path. + + ```bash + /opt/mssql-tools18/bin/sqlcmd -S localhost -U -P "" + ``` + + Recent versions of **sqlcmd** are secure by default. For more information about connection encryption, see [sqlcmd utility](../tools/sqlcmd/sqlcmd-utility.md) for Windows, and [Connecting with sqlcmd](../connect/odbc/linux-mac/connecting-with-sqlcmd.md) for Linux and macOS. If the connection doesn't succeed, you can add the `-No` option to **sqlcmd** to specify that encryption is optional, not mandatory. + + You can omit the password on the command-line to be prompted to enter it. For example: + + ```bash + /opt/mssql-tools18/bin/sqlcmd -S localhost -U + ``` + +::: moniker-end + + +::: moniker range="= sql-server-linux-ver16 || = sql-server-ver16" + +1. Once inside the container, connect locally with **sqlcmd**, using its full path. + + ```bash + /opt/mssql-tools18/bin/sqlcmd -S localhost -U -P "" + ``` + + Recent versions of **sqlcmd** are secure by default. For more information about connection encryption, see [sqlcmd utility](../tools/sqlcmd/sqlcmd-utility.md) for Windows, and [Connecting with sqlcmd](../connect/odbc/linux-mac/connecting-with-sqlcmd.md) for Linux and macOS. If the connection doesn't succeed, you can add the `-No` option to **sqlcmd** to specify that encryption is optional, not mandatory. + + You can omit the password on the command-line to be prompted to enter it. For example: + + ```bash + /opt/mssql-tools18/bin/sqlcmd -S localhost -U + ``` + +::: moniker-end + +1. If successful, you should get to a **sqlcmd** command prompt: `1>`. + +## Create and query data + +The following sections walk you through using **sqlcmd** and Transact-SQL to create a new database, add data, and run a query. + +### Create a new database + +The following steps create a new database named `TestDB`. + +1. From the **sqlcmd** command prompt, paste the following Transact-SQL command to create a test database: + + ```sql + CREATE DATABASE TestDB; + ``` + +1. On the next line, write a query to return the name of all of the databases on your server: + + ```sql + SELECT name + FROM sys.databases; + ``` + +1. The previous two commands weren't run immediately. Type `GO` on a new line to run the previous commands: + + ```sql + GO + ``` + +### Insert data + +Next create a new table, `Inventory`, and insert two new rows. + +1. From the *sqlcmd* command prompt, switch context to the new `TestDB` database: + + ```sql + USE TestDB; + ``` + +1. Create new table named `Inventory`: + + ```sql + CREATE TABLE Inventory + ( + id INT, + name NVARCHAR (50), + quantity INT + ); + ``` + +1. Insert data into the new table: + + ```sql + INSERT INTO Inventory + VALUES (1, 'banana', 150); + + INSERT INTO Inventory + VALUES (2, 'orange', 154); + ``` + +1. Type `GO` to run the previous commands: + + ```sql + GO + ``` + +### Select data + +Now, run a query to return data from the `Inventory` table. + +1. From the **sqlcmd** command prompt, enter a query that returns rows from the `Inventory` table where the quantity is greater than 152: + + ```sql + SELECT * + FROM Inventory + WHERE quantity > 152; + ``` + +1. Run the command: + + ```sql + GO + ``` + +### Exit the sqlcmd command prompt + +1. To end your **sqlcmd** session, type `QUIT`: + + ```sql + QUIT + ``` + +1. To exit the interactive command-prompt in your container, type `exit`. Your container continues to run after you exit the interactive bash shell. + + + +## Connect from outside the container + +## [CLI](#tab/cli) + +You can also connect to the [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)] instance on your Docker machine from any external Linux, Windows, or macOS tool that supports SQL connections. The external tool uses the IP address for the host machine. + +The following steps use **sqlcmd** outside of your container to connect to [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)] running in the container. These steps assume that you already have the [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)] command-line tools installed outside of your container. The same principles apply when using other tools, but the process of connecting is unique to each tool. + +1. Find the IP address for your container's host machine, using `ifconfig` or `ip addr`. + +1. For this example, install the **sqlcmd** tool on your client machine. For more information, see [sqlcmd utility](../tools/sqlcmd/sqlcmd-utility.md) or [Install the SQL Server command-line tools sqlcmd and bcp on Linux](sql-server-linux-setup-tools.md). + +1. Run **sqlcmd** specifying the IP address and the port mapped to port 1433 in your container. In this example, the port is the same as port 1433 on the host machine. If you specified a different mapped port on the host machine, you would use it here. You also need to open the appropriate inbound port on your firewall to allow the connection. + + Recent versions of **sqlcmd** are secure by default. If the connection doesn't succeed, and you're using version 18 or higher, you can add the `-No` option to **sqlcmd** to specify that encryption is optional, not mandatory. + + ::: zone pivot="cs1-bash" + + ```text + sudo sqlcmd -S ,1433 -U -P "" + ``` + + ::: zone-end + + ::: zone pivot="cs1-powershell" + + ```powershell + sqlcmd -S ,1433 -U -P "" + ``` + + ::: zone-end + + ::: zone pivot="cs1-cmd" + + ```cmd + sqlcmd -S ,1433 -U -P "" + ``` + + ::: zone-end + + > [!CAUTION] + > [!INCLUDE [password-complexity](includes/password-complexity.md)] + +1. Run Transact-SQL commands. When finished, type `QUIT`. + +## [sqlcmd](#tab/sqlcmd) + +You can also connect to the [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)] instance on your Docker machine from any external Linux, Windows, or macOS tool that supports SQL connections. The external tool uses the IP address for the host machine. + +The following steps use **sqlcmd** outside of your container to connect to [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)] running in the container. The same principles apply when using other tools, but the process of connecting is unique to each tool. + +1. Run **sqlcmd** in the same session you used to create your container. It keeps track of the connection information via contexts so you can easily connect at any time. `sqlcmd config view` can be used to view your available contexts. + + ::: zone pivot="cs1-bash" + + ```text + sudo sqlcmd + ``` + + ::: zone-end + + ::: zone pivot="cs1-powershell" + + ```powershell + sqlcmd query + ``` + + ::: zone-end + + ::: zone pivot="cs1-cmd" + + ```cmd + sqlcmd query + ``` + + ::: zone-end + +1. Run Transact-SQL commands. When finished, type `QUIT`. + +--- + +Other common tools to connect to [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)] include: + +- [SQL Server extension for Visual Studio Code](../tools/visual-studio-code/sql-server-develop-use-vscode.md) +- [Use SQL Server Management Studio on Windows to manage SQL Server on Linux](sql-server-linux-manage-ssms.md) +- [What is Azure Data Studio?](/azure-data-studio/what-is-azure-data-studio) +- [mssql-cli (Preview)](https://github.com/dbcli/mssql-cli/blob/master/doc/usage_guide.md) +- [Manage SQL Server on Linux with PowerShell Core](sql-server-linux-manage-powershell-core.md) + +## Remove your container + +## [CLI](#tab/cli) + +If you want to remove the [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)] container used in this tutorial, run the following commands: + +::: zone pivot="cs1-bash" + +```text +docker stop sql1 +docker rm sql1 +``` + +::: zone-end + +::: zone pivot="cs1-powershell" + +```powershell +docker stop sql1 +docker rm sql1 +``` + +::: zone-end + +::: zone pivot="cs1-cmd" + +```cmd +docker stop sql1 +docker rm sql1 +``` + +::: zone-end + +## [sqlcmd](#tab/sqlcmd) + +If you want to remove the [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)] container used in this tutorial, run the following command: + +::: zone pivot="cs1-bash" + +```text +sudo sqlcmd delete --force +``` + +::: zone-end + +::: zone pivot="cs1-powershell" + +```powershell +sqlcmd delete --force +``` + +::: zone-end + +::: zone pivot="cs1-cmd" + +```cmd +sqlcmd delete --force +``` + +::: zone-end + +--- + +## Docker demo + +After you finish using the [!INCLUDE [ssnoversion-md](../includes/ssnoversion-md.md)] Linux container image for Docker, you might want to know how Docker is used to improve development and testing. The following video shows how Docker can be used in a continuous integration and deployment scenario. + +> [!VIDEO https://channel9.msdn.com/Events/Connect/2017/T152/player] + +## Related tasks + +- [Run multiple SQL Server containers](sql-server-linux-docker-container-deployment.md#multiple) +- [Persist your data](sql-server-linux-docker-container-configure.md#persist) + +## Related content + +- [Restore a SQL Server database in a Linux container](tutorial-restore-backup-in-sql-server-container.md) +- [Troubleshoot SQL Server Docker containers](sql-server-linux-docker-container-troubleshooting.md) +- [mssql-docker GitHub repository](https://github.com/microsoft/mssql-docker) + +[!INCLUDE [contribute-to-content](../includes/paragraph-content/contribute-to-content.md)] \ No newline at end of file diff --git a/scenarios/upstream/FlatcarOnAzure/flatcar-on-azure.md b/scenarios/upstream/FlatcarOnAzure/flatcar-on-azure.md new file mode 100644 index 000000000..aaaf474a2 --- /dev/null +++ b/scenarios/upstream/FlatcarOnAzure/flatcar-on-azure.md @@ -0,0 +1,187 @@ +--- +title: 'Running Flatcar Container Linux on Microsoft Azure' +description: 'Deploy Flatcar Container Linux in Microsoft Azure by creating resource groups and using official marketplace images.' +ms.topic: article +ms.date: 03/17/2025 +author: naman-msft +ms.author: namanparikh +ms.custom: innovation-engine, azure, flatcar +--- + +## Creating resource group via Microsoft Azure CLI + +Follow the [installation and configuration guides][azure-cli] for the Microsoft Azure CLI to set up your local installation. + +Instances on Microsoft Azure must be created within a resource group. Create a new resource group with the following command: + +```bash +export RANDOM_SUFFIX=$(openssl rand -hex 3) +export RESOURCE_GROUP_NAME="group-1$RANDOM_SUFFIX" +export REGION="WestUS2" +az group create --name $RESOURCE_GROUP_NAME --location $REGION +``` + +Results: + + +```json +{ + "id": "/subscriptions/xxxxx/resourceGroups/group-1xxx", + "location": "WestUS2", + "managedBy": null, + "name": "group-1xxx", + "properties": { + "provisioningState": "Succeeded" + }, + "tags": null, + "type": "Microsoft.Resources/resourceGroups" +} +``` + +Now that you have a resource group, you can choose a channel of Flatcar Container Linux you would like to install. + +## Using the official image from the Marketplace + +Official Flatcar Container Linux images for all channels are available in the Marketplace. +Flatcar is published by the `kinvolk` publisher on Marketplace. +Flatcar Container Linux is designed to be [updated automatically][update-docs] with different schedules per channel. Updating +can be [disabled][reboot-docs], although it is not recommended to do so. The [release notes][release-notes] contain +information about specific features and bug fixes. + +The following command will query for the latest image URN specifier through the Azure CLI: + +```bash +az vm image list --all -p kinvolk -f flatcar -s stable-gen2 --query '[-1]' +``` + +Results: + + + +```json +{ + "architecture": "x64", + "offer": "flatcar-container-linux-free", + "publisher": "kinvolk", + "sku": "stable-gen2", + "urn": "kinvolk:flatcar-container-linux-free:stable-gen2:3815.2.0", + "version": "3815.2.0" +} +``` + +Use the offer named `flatcar-container-linux-free`; there is also a legacy offer called `flatcar-container-linux` with the same contents. +The SKU, which is the third element of the image URN, relates to one of the release channels and also depends on whether to use Hyper-V Generation 1 or 2 VMs. +Generation 2 instance types use UEFI boot and should be preferred, the SKU matches the pattern `-gen`: `alpha-gen2`, `beta-gen2` or `stable-gen2`. +For Generation 1 instance types drop the `-gen2` from the SKU: `alpha`, `beta` or `stable`. +Note: _`az vm image list -s` flag matches parts of the SKU, which means that `-s stable` will return both the `stable` and `stable-gen2` SKUs._ + +Before being able to use the offers, you may need to accept the legal terms once, which is demonstrated for `flatcar-container-linux-free` and `stable-gen2`: + +```bash +az vm image terms show --publish kinvolk --offer flatcar-container-linux-free --plan stable-gen2 +az vm image terms accept --publish kinvolk --offer flatcar-container-linux-free --plan stable-gen2 +``` + +For quick tests the official Azure CLI also supports an alias for the latest Flatcar stable image: + +```bash +az vm create --name node-1 --resource-group $RESOURCE_GROUP_NAME --admin-username core --image FlatcarLinuxFreeGen2 --generate-ssh-keys +``` + +Results: + + + +```json +{ + "fqdns": null, + "id": "/subscriptions/xxxxx/resourceGroups/group-1xxx/providers/Microsoft.Compute/virtualMachines/node-1", + "location": "WestUS2", + "name": "node-1", + "powerState": "VM running", + "provisioningState": "Succeeded", + "resourceGroup": "group-1xxx", + "zones": null +} +``` + +### CoreVM + +Flatcar images are also published under an offer called `flatcar-container-linux-corevm-amd64`. This offer does not require accepting image terms and does not require specifying plan information when creating instances or building derived images. The content of the images matches the other offers. + +```bash +az vm image list --all -p kinvolk -f flatcar-container-linux-corevm-amd64 -s stable-gen2 --query '[-1]' +``` + +Results: + + + +```json +{ + "architecture": "x64", + "offer": "flatcar-container-linux-corevm-amd64", + "publisher": "kinvolk", + "sku": "stable-gen2", + "urn": "kinvolk:flatcar-container-linux-corevm-amd64:stable-gen2:3815.2.0", + "version": "3815.2.0" +} +``` + +### ARM64 + +Arm64 images are published under the offer called `flatcar-container-linux-corevm`. These are Generation 2 images—the only supported option on Azure for Arm64 instances—so the SKU contains only the release channel name without the `-gen2` suffix: `alpha`, `beta`, or `stable`. This offer has the same properties as the `CoreVM` offer described above. + +```bash +az vm image list --all --architecture arm64 -p kinvolk -f flatcar -s stable --query '[-1]' +``` + +Results: + + + +```json +{ + "architecture": "Arm64", + "offer": "flatcar-container-linux-corevm", + "publisher": "kinvolk", + "sku": "stable", + "urn": "kinvolk:flatcar-container-linux-corevm:stable:3815.2.0", + "version": "3815.2.0" +} +``` + +### Flatcar Pro Images + +Flatcar Pro images were paid marketplace images that came with commercial support and extra features. All the previous features of Flatcar Pro images, such as support for NVIDIA GPUs, are now available to all users in standard Flatcar marketplace images. + +### Plan information for building your image from the Marketplace Image + +When building an image based on the Marketplace image you sometimes need to specify the original plan. The plan name is the image SKU (for example, `stable`), the plan product is the image offer (for example, `flatcar-container-linux-free`), and the plan publisher is the same (`kinvolk`). + +## Community Shared Image Gallery + +While the Marketplace images are recommended, it sometimes might be easier or required to use Shared Image Galleries—for example, when using Packer for Kubernetes CAPI images. + +A public Shared Image Gallery hosts recent Flatcar Stable images for amd64. Here is how to list the image definitions (for now you will only find `flatcar-stable-amd64`) and the image versions they provide: + +```bash +az sig image-definition list-community --public-gallery-name flatcar-23485951-527a-48d6-9d11-6931ff0afc2e --location westeurope +az sig image-version list-community --public-gallery-name flatcar-23485951-527a-48d6-9d11-6931ff0afc2e --gallery-image-definition flatcar-stable-amd64 --location westeurope +``` + +A second gallery, `flatcar4capi-742ef0cb-dcaa-4ecb-9cb0-bfd2e43dccc0`, exists for prebuilt Kubernetes CAPI images. It has image definitions for each CAPI version—for example, `flatcar-stable-amd64-capi-v1.26.3` provides recent Flatcar Stable versions. + +[flatcar-user]: https://groups.google.com/forum/#!forum/flatcar-linux-user +[etcd-docs]: https://etcd.io/docs +[quickstart]: ../ +[reboot-docs]: ../../setup/releases/update-strategies +[azure-cli]: https://docs.microsoft.com/en-us/cli/azure/overview +[butane-configs]: ../../provisioning/config-transpiler +[irc]: irc://irc.freenode.org:6667/#flatcar +[docs]: ../../ +[resource-group]: https://docs.microsoft.com/en-us/azure/architecture/best-practices/naming-conventions#naming-rules-and-restrictions +[storage-account]: https://docs.microsoft.com/en-us/azure/storage/common/storage-account-overview#naming-storage-accounts +[azure-flatcar-image-upload]: https://github.com/flatcar/flatcar-cloud-image-uploader +[release-notes]: https://flatcar.org/releases +[update-docs]: ../../setup/releases/update-strategies \ No newline at end of file diff --git a/tools/Dockerfile b/tools/Dockerfile new file mode 100644 index 000000000..98ba93968 --- /dev/null +++ b/tools/Dockerfile @@ -0,0 +1,47 @@ +FROM python:3.13-slim + +WORKDIR /app + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + curl \ + openssl \ + bash \ + ca-certificates \ + git \ + jq \ + wget \ + tar \ + unzip \ + apt-transport-https \ + gnupg \ + lsb-release \ + krb5-user \ + libkrb5-dev \ + gcc \ + python3-dev \ + && rm -rf /var/lib/apt/lists/* + +# Install Azure CLI directly using Microsoft script (more reliable) +RUN curl -sL https://aka.ms/InstallAzureCLIDeb | bash + +# Verify Azure CLI was installed and is on PATH +RUN which az && az --version + +# Install Innovation Engine during build +RUN curl -Lks https://raw.githubusercontent.com/Azure/InnovationEngine/v0.2.3/scripts/install_from_release.sh | bash -s -- v0.2.3 + +# Copy requirements and install dependencies +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# Copy the application code +COPY . . + +# Set environment variables (these will be overridden at runtime) +ENV AZURE_OPENAI_API_KEY="your_api_key_here" +ENV AZURE_OPENAI_ENDPOINT="your_endpoint_here" +ENV PATH="/root/.local/bin:/usr/local/bin:/usr/bin:/bin:${PATH}" + +# Set the entrypoint +ENTRYPOINT ["python", "ada.py"] diff --git a/tools/README.md b/tools/README.md new file mode 100644 index 000000000..122e1e33c --- /dev/null +++ b/tools/README.md @@ -0,0 +1,195 @@ +# ADA - AI Documentation Assistant + +ADA (AI Documentation Assistant) helps you create, convert, and manage Executable Documents efficiently using Azure OpenAI and Innovation Engine. + +## Features + +- **Convert to Exec Docs**: Transform existing markdown files to executable documents +- **Generate New Exec Docs**: Create new executable documents from a workload description +- **Reference Integration**: Include content from URLs and local files when generating documents +- **Script Documentation**: Create comprehensive explanations for shell scripts +- **PII Redaction**: Automatically redact sensitive information from result blocks +- **Security Analysis**: Perform comprehensive security vulnerability assessments +- **SEO Optimization**: Enhance document visibility and searchability +- **Centralized Logging**: Track operations across sessions in a global log +- **Docker Support**: Run ADA in an isolated container environment + +## Prerequisites + +- Python 3.6 or higher +- Azure OpenAI API key and endpoint +- Docker (optional, for containerized usage) + +## Installation + +### Option 1: Local Installation + +1. Clone the repository: + ```bash + git clone + cd /tools + ``` + +2. Install the required Python packages: + ```bash + pip install -r requirements.txt + ``` + +3. Set Azure OpenAI API credentials as environment variables: + ```bash + export AZURE_OPENAI_API_KEY= + export AZURE_OPENAI_ENDPOINT= + ``` + + To obtain an Azure OpenAI API key and endpoint, follow these steps: + + 1. **Sign in to the Azure Portal**: + - Navigate to [https://portal.azure.com](https://portal.azure.com) and log in with your Azure credentials. + + 2. **Create an Azure OpenAI Resource**: + - In the Azure Portal, select "Create a resource". + - Search for "Azure OpenAI" and select it from the results. + - Click "Create" to begin the setup process. + - Fill in the required details: + - **Subscription**: Choose your Azure subscription. + - **Resource Group**: Select an existing resource group or create a new one. + - **Region**: Choose the region closest to your location. + - **Name**: Provide a unique name for your Azure OpenAI resource. + - **Pricing Tier**: Select the appropriate pricing tier (e.g., Standard S0). + - Click "Review + create" and then "Create" to deploy the resource. + + 3. **Deploy a Model in Azure AI Studio**: + - After creating your Azure OpenAI resource, navigate to the **Overview** page of your resource. + - Click on "Go to Azure AI Studio" to open the Azure AI Studio interface. + - In Azure AI Studio, select "Deployments" from the left-hand menu. + - Click "Deploy model" and choose `gpt-4.1` from the Azure OpenAI collection. + - Provide a deployment name and configure any additional settings as needed. + - Click "Deploy" to deploy the model. + + 4. **Access Keys and Endpoint**: + - Once the deployment is complete, return to your Azure OpenAI resource in the Azure Portal. + - In the left-hand menu under "Resource Management", select "Keys and Endpoint". + - Here, you'll find your **Endpoint** URL and two **API keys** (`KEY1` and `KEY2`). + - Copy the endpoint URL and one of the API keys; you'll need them to authenticate your API calls. + + 5. **Set Environment Variables in Linux**: + - Open your terminal. + - Edit the [.bashrc](http://_vscodecontentref_/2) file using a text editor, such as `nano`: + ```bash + nano ~/.bashrc + ``` + - Add the following lines at the end of the file, replacing `` and `` with the values you obtained earlier: + ```bash + export AZURE_OPENAI_API_KEY="" + export AZURE_OPENAI_ENDPOINT="" + ``` + - Save and exit the editor (`Ctrl + X`, then `Y`, and `Enter` for nano). + - Apply the changes by sourcing the [.bashrc](http://_vscodecontentref_/3) file: + ```bash + source ~/.bashrc + ``` + - To verify that the environment variables are set correctly, you can use the `printenv` command: + ```bash + printenv | grep AZURE_OPENAI + ``` + This should display the variables you just set. + + By following these steps, you'll have your Azure OpenAI API key and endpoint configured, a model deployed, and your environment variables set up in a Linux environment, ready for integration into your applications. + + For a visual walkthrough of creating an Azure OpenAI resource and deploying a model, you might find the following video helpful: + +4. Run ADA: + ```bash + python ada.py + ``` + +### Option 2: Docker Installation + +1. Build the Docker image: + ```bash + docker build -t ada-tool . + ``` + +2. Run ADA in a Docker container: + ```bash + docker run -it --rm \ + -e AZURE_OPENAI_API_KEY="your_api_key_here" \ + -e AZURE_OPENAI_ENDPOINT="your_endpoint_here" \ + -v "$(pwd):/app/workspace" \ + -v "$HOME/.azure:/root/.azure" \ + -w /app/workspace \ + ada-tool + ``` + +3. Run ADA: + ```bash + ./run-ada.sh + ``` +## Usage + +1. Select from the available options: + - Option 1: Convert an existing markdown file to an Exec Doc + - Option 2: Generate a new Exec Doc from a workload description + - Option 3: Create descriptions for your shell script + - Option 4: Redact PII from your Doc + - Option 5: Perform security analysis on your Doc + - Option 6: Perform SEO optimization on your Doc + +2. Follow the prompts for each option: + - For file conversion: provide the path to your source file + - For generating new docs: describe the workload and optionally add reference data + - For script documentation: provide the path to your script and context + - For PII redaction: provide the path to your source document + - For security analysis: provide the path to the document to analyze + - For SEO optimization: provide the path to the document to optimize + +## Output Location + +- When generating a new Exec Doc (option 2), ADA creates a dedicated folder for the output +- For all other operations, ADA saves output files in the same directory as the source file +- Execution logs are saved in a centralized log.json file in the script directory + +## Data Sources Integration + +When generating a new Exec Doc, you can incorporate content from: +- Web URLs (HTML content will be extracted) +- Local files (content will be read directly) + +These sources provide additional context for more comprehensive document generation. + +## Advanced Features + +### Centralized Logging +ADA maintains a comprehensive log of all operations in a centralized log.json file, tracking: +- Document creation and conversion +- Script documentation +- PII redaction +- Security analysis +- SEO optimization +- Success rates and execution times + +### Error Resolution System +When errors occur during testing, ADA employs a sophisticated resolution system: +- Analyzes error messages to determine their source +- Uses progressive troubleshooting strategies +- Provides specific fixes for different error patterns +- Remembers previous errors to avoid repetitive solutions + +## Requirements + +ADA depends on the following Python packages: +- azure-identity>=1.17.1 +- beautifulsoup4>=4.12.2 +- openai>=1.65.1 +- requests>=2.31.0 +- requests-kerberos>=0.12.0 +- requests-ntlm>=1.1.0 +- requests-toolbelt>=1.0.0 + +## License + +This project is licensed under the MIT License - see the LICENSE file for details. + +## Contributing + +Please read CONTRIBUTING.md for details on our code of conduct and the process for submitting pull requests. diff --git a/tools/ada.py b/tools/ada.py new file mode 100644 index 000000000..7e98c38d4 --- /dev/null +++ b/tools/ada.py @@ -0,0 +1,2393 @@ +# WELCOME TO ADA - AI DOCUMENTATION ASSISTANT + +import os +import sys +import subprocess +import shutil +from importlib.metadata import version, PackageNotFoundError +import csv +import time +from datetime import datetime +from openai import AzureOpenAI +from collections import defaultdict +import re +import json +import yaml +import requests +from bs4 import BeautifulSoup +import difflib + +client = AzureOpenAI( + api_key=os.getenv("AZURE_OPENAI_API_KEY"), + api_version="2024-12-01-preview", + azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT") +) + +deployment_name = 'gpt-4.1' + +REQUIRED_PACKAGES = [ + 'openai', + 'azure-identity', + 'requests', +] + +for package in REQUIRED_PACKAGES: + try: + # Attempt to get the package version + version(package) + except PackageNotFoundError: + subprocess.check_call([sys.executable, "-m", "pip", "install", package]) + +system_prompt = """Exec Docs is a vehicle that transforms standard markdown into interactive, executable learning content, allowing code commands within the document to be run step-by-step or “one-click”. This is powered by the Innovation Engine, an open-source CLI tool that powers the execution and testing of these markdown scripts and can integrate with automated CI/CD pipelines. You are an Exec Doc writing expert. You will either write a new exec doc from scratch if no doc is attached or update an existing one if it is attached. You must adhere to the following rules while presenting your output: + +## IF YOU ARE UPDATING AN EXISTING DOC + +Ensure that every piece of information outside of code blocks – such as metadata, descriptions, comments, instructions, and any other narrative content – is preserved. The final output should be a comprehensive document that retains all correct code blocks as well as the rich contextual and descriptive details from the source doc, creating the best of both worlds. + +### Prerequisites + +Check if all prerequisites below are met before writing the Exec Doc. ***If any of the below prerequisites are not met, then either add them to the Exec Doc in progress or find another valid doc that can fulfill them. Do not move to the next step until then*** + +1. Ensure your Exec Doc is a markdown file. + + >**Note:** If you are converting an existing Azure Doc to an Exec Doc, you can either find it in your fork or copy the raw markdown content of the Azure Doc into a new markdown file in your local repo (this can be found by clicking "Raw" in the GitHub view of the Azure Doc). + +2. Ensure your Exec Doc is written with the LF line break type. + + **Example:** + + ![LF VSCode](https://github.com/MicrosoftDocs/executable-docs/assets/146123940/3501cd38-2aa9-4e98-a782-c44ae278fc21) + + >**Note:** The button will appear according to the IDE you are using. For the VS Code IDE, you can check this by clicking on the LF/CLRF button at the bottom right corner of the screen. + +3. Ensure all files that your Exec Doc references live under the same parent folder as your Exec Doc + + **Example:** + + If your Exec Doc ***my-exec-doc.md*** references a script file ***my-script.yaml*** within, the script file should be in the same folder as the Exec Doc. + + ```bash + ├── master-folder + │ └── parent-folder + │ ├── my-exec-doc.md + │ └── my-script.yaml + ``` + +4. Code blocks are used to provide examples, commands, or other code snippets in Exec Docs. They are distinguished by a triple backtick (```) at the start and end of the block. + + Ensure that the Exec Doc contains at least 1 code block and every input code block's type in the Exec Doc is taken from this list: + + - bash + - azurecli + - azure-cli-interactive + - azurecli-interactive + + **Example:** + + ```bash + az group create --name $MY_RESOURCE_GROUP_NAME --location $REGION + ``` + + >**Note:** This rule does not apply to output code blocks, which are used to display the results of commands, scripts, or other operations. These blocks help in illustrating what the expected output should look like. They include, but are not limited to, the following types: _output, json, yaml, console, text, and log._ + + >**Note:** While Innovation Engine can _parse_ a code block of any type, given its current features, it can only _execute_ code blocks of the types above. So, it is important to ensure that the code blocks in your Exec Doc are of the types above. + +5. Headings are used to organize content in a document. The number of hashes indicates the level of the heading. For example, a single hash (#) denotes an h1 heading, two hashes (##) denote an h2 heading, and so on. Innovation Engine uses headings to structure the content of an Exec Doc and to provide a clear outline of the document's contents. + + Ensure there is at least one h1 heading in the Exec Doc, denoted by a single hash (#) at the start of the line. + + **Example:** + + ```markdown + # Quickstart: Deploy an Azure Kubernetes Service (AKS) cluster using Azure CLI + ``` + +### Writing Requirements + +6. Ensure that the Exec Doc does not include any commands or descriptions related to logging into Azure (e.g., `az login`) or setting the subscription ID. The user is expected to have already logged in to Azure and set their subscription beforehand. Do not include these commands or any descriptions about them in the Exec Doc. + +7. Ensure that the Exec Doc does not require any user interaction during its execution. The document should not include any commands or scripts that prompt the user for input or expect interaction with the terminal. All inputs must be predefined and handled automatically within the script. + +8. Appropriately add metadata at the start of the Exec Doc. Here are some mandatory fields: + + - title = the title of the Exec Doc + - description = the description of the Exec Doc + - ms.topic = what kind of a doc it is e.g. article, blog, etc. + - ms.date = the current date in the format MM/DD/YYYY + - author = author's GitHub username + - ms.author = author's username (e.g. Microsoft Alias) + - **ms.custom = comma-separated list of tags to identify the Exec Doc (innovation-engine is the one tag that is mandatory in this list)** + + **Example:** + + ```yaml + --- + title: 'Quickstart: Deploy an Azure Kubernetes Service (AKS) cluster using Azure CLI' + description: Learn how to quickly deploy a Kubernetes cluster and deploy an application in Azure Kubernetes Service (AKS) using Azure CLI. + ms.topic: quickstart + ms.date: 11/11/2021 + author: namanparikh + ms.author: namanaprikh + ms.custom: devx-track-azurecli, mode-api, innovation-engine, linux-related-content + --- + ``` + +9. Ensure the environment variable names are not placeholders i.e. <> but have a certain generic, useful name. For the location/region parameter, default to "eastus2" or "canadacentral" or "centralindia". Additionally, appropriately add descriptions below every section explaining what is happening in that section in crisp but necessary detail so that the user can learn as they go. + +10. Don't start and end your answer with ``` backticks!!! Don't add backticks to the metadata at the top!!!. + +11. Ensure that any info, literally any info whether it is a comment, tag, description, etc., which is not within a code block remains unchanged. Preserve ALL details of the doc. + +12. Environment variables are dynamic values that store configuration settings, system paths, and other information that can be accessed throughout a doc. By using environment variables, you can separate configuration details from the code, making it easier to manage and deploy applications in an environment like Exec Docs. + + Declare environment variables _as they are being used_ in the Exec Doc using the export command. This is a best practice to ensure that the variables are accessible throughout the doc. + + ### Example Exec Doc 1 - Environment variables declared at the _top_ of an Exec Doc, not declared as used + + **Environment Variables Section** + + We are at the start of the Exec Doc and are declaring environment variables that will be used throughout the doc. + + ```bash + export REGION="canadacentral" + ``` + + **Test Section** + + We are now in the middle of the Exec Doc and we will create an AKS cluster. + + ```bash + az aks create --resource-group MyResourceGroup --name MyAKSCluster --location $REGION + ``` + + ### Example Exec Doc 2 - Environment Variables declared as used** + + **Test Section** + + We are in the middle of the Exec Doc and we will create an AKS cluster. + + ```bash + export REGION="candacentral" + export RESOURCE_GROUP_NAME="MyResourceGroup" + export AKS_CLUSTER_NAME="MyAKSCluster" + az aks create --resource-group $RESOURCE_GROUP_NAME --name $AKS_CLUSTER_NAME --location $REGION + ``` + + >**Note:** If you are converting an existing Azure Doc to an Exec Doc and the Azure Doc does not environment variables at all, it is an Exec Doc writing best practice to add them. Additionally, if the Azure Doc has environment variables but they are not declared as they are being used, it is recommended to update them to follow this best practice. + + >**Note:** Don't have any spaces around the equal sign when declaring environment variables. + +13. A major component of Exec Docs is automated infrastructure deployment on the cloud. While testing the doc, if you do not update relevant environment variable names, the doc will fail when run/executed more than once as the resource group or other resources will already exist from the previous runs. + + Add a random suffix at the end of _relevant_ environment variable(s). The example below shows how this would work when you are creating a resource group. + + **Example:** + + ```bash + export RANDOM_SUFFIX=$(openssl rand -hex 3) + export REGION="eastus" + az group create --name "MyResourceGroup$RANDOM_SUFFIX" --location $REGION + ``` + + >**Note:** Add a random suffix to relevant variables that are likely to be unique for each deployment, such as resource group names, VM names, and other resources that need to be uniquely identifiable. However, do not add a random suffix to variables that are constant or environment-specific, such as region, username, or configuration settings that do not change between deployments. + + >**Note:** You can generate your own random suffix or use the one provided in the example above. The `openssl rand -hex 3` command generates a random 3-character hexadecimal string. This string is then appended to the resource group name to ensure that the resource group name is unique for each deployment. + +14. In Exec Docs, result blocks are distinguished by a custom expected_similarity comment tag followed by a code block. These result blocks indicate to Innovation Engine what the minimum degree of similarity should be between the actual and the expected output of a code block (one which returns something in the terminal that is relevant to benchmark against). Learn More: [Result Blocks](https://github.com/Azure/InnovationEngine/blob/main/README.md#result-blocks). + + Add result block(s) below code block(s) that you would want Innovation Engine to verify i.e. code block(s) which produce an output in the terminal that is relevant to benchmark against. Follow these steps when adding a result block below a code block for the first time: + + - Check if the code block does not already have a result block below it. If it does, ensure the result block is formatted correctly, as shown in the example below, and move to the next code block. + - [Open Azure Cloudshell](https://ms.portal.azure.com/#cloudshell/) + - **[Optional]**: Set your active subscription to the one you are using to test Exec Docs. Ideally, this sub should have permissions to run commands in your tested Exec Docs. Run the following command: + + ```bash + az account set --subscription "" + ``` + - Run the command in the code block in cloudshell. If it returns an output that you would want Innovation Engine to verify, copy the output from the terminal and paste it in a new code block below the original code block. The way a result code block should be formatted has been shown below, in this case for the command [az group create --name "MyResourceGroup123" --location eastus](http://_vscodecontentref_/1). + + **Example:** + ```markdown + Results: + + + + ```output + {{ + "id": "/subscriptions/abcabc-defdef-ghighi-jkljkl/resourceGroups/MyResourceGroup123", + "location": "eastus", + "managedBy": null, + "name": "MyResourceGroup123", + "properties": {{ + "provisioningState": "Succeeded" + }}, + "tags": null, + "type": "Microsoft.Resources/resourceGroups" + }} + ``` + ``` + - If you run into an error while executing a code block or the code block is running in an infinite loop, update the Exec Doc based on the error stack trace, restart/clear Cloudshell, and rerun the command block(s) from the start until you reach that command block. This is done to override any potential issues that may have occurred during the initial run. More guidance is given in the [FAQ section](#frequently-asked-questions-faqs) below. + + >**Note:** The expected similarity value is a percentage of similarity between 0 and 1 which specifies how closely the true output needs to match the template output given in the results block - 0 being no similarity, 1 being an exact match. If you are uncertain about the value, it is recommended to set the expected similarity to 0.3 i.e. 30% expected similarity to account for small variations. Once you have run the command multiple times and are confident that the output is consistent, you can adjust the expected similarity value accordingly. + + >**Note:** If you are executing a command in Cloudshell which references a yaml/json file, you would need to create the yaml/json file in Cloudshell and then run the command. This is because Cloudshell does not support the execution of commands that reference local files. You can add the file via the cat command or by creating the file in the Cloudshell editor. + + >**Note:** Result blocks are not required but recommended for commands that return some output in the terminal. They help Innovation Engine verify the output of a command and act as checkpoints to ensure that the doc is moving in the right direction. + +15. Redacting PII from the output helps protect sensitive information from being inadvertently shared or exposed. This is crucial for maintaining privacy, complying with data protection regulations, and furthering the company's security posture. + + Ensure result block(s) have all the PII (Personally Identifiable Information) stricken out from them and replaced with x’s. + + **Example:** + + ```markdown + Results: + + + + ```output + {{ + "id": "/subscriptions/xxxxx-xxxxx-xxxxx-xxxxx/resourceGroups/MyResourceGroupxxx", + "location": "eastus", + "managedBy": null, + "name": "MyResourceGroupxxx", + "properties": {{ + "provisioningState": "Succeeded" + }}, + "tags": null, + "type": "Microsoft.Resources/resourceGroups" + }} + ``` + ``` + + >**Note:** The number of x's used to redact PII need not be the same as the number of characters in the original PII. Furthermore, it is recommended not to redact the key names in the output, only the values containing the PII (which are usually strings). + + >**Note:** Here are some examples of PII in result blocks: Unique identifiers for resources, Email Addresses, Phone Numbers, IP Addresses, Credit Card Numbers, Social Security Numbers (SSNs), Usernames, Resource Names, Subscription IDs, Resource Group Names, Tenant IDs, Service Principal Names, Client IDs, Secrets and Keys. + +16. If you are converting an existing Azure Doc to an Exec Doc and if the existing doc contains a "Delete Resources" (or equivalent section) comprising resource/other deletion command(s), remove the code blocks in that section or remove that section entirely + + >**Note:** We remove commands from this section ***only*** in Exec Docs. This is because Innovation Engine executes all relevant command(s) that it encounters, inlcuding deleting the resources. That would be counterproductive to automated deployment of cloud infrastructure + +17. If the original document lists a prerequisite resource (such as an AKS cluster, VM, storage account, etc.), you MUST NOT add any new commands to create that resource in the Exec Doc. + + - **Example:** If the doc says "This article assumes you have an existing AKS cluster," do NOT add `az aks create` or any equivalent cluster creation commands. Only include steps for interacting with or managing the existing resource. + - This rule applies to any resource type, not just AKS. Always respect explicit prerequisites and never override them by adding creation steps. + - If the prerequisite is stated in any form (e.g., "Before you begin, create a resource group"), treat that resource as pre-existing and do not add creation commands for it. + - If you are unsure whether a resource should be created, always preserve the prerequisite as stated and avoid introducing creation commands for that resource. + + +## WRITE AND ONLY GIVE THE EXEC DOC USING THE ABOVE RULES FOR THE FOLLOWING WORKLOAD: """ + +# Add this after imports +def print_header(text, style=None): + """Print a header with customized boundary symbols based on content importance. + + Args: + text: The header text to display + style: Symbol style or None for automatic selection + """ + # Auto-select symbol based on text content if style is None + # if style is None: + if "WELCOME" in text or "TITLE" in text.upper(): + style = "=" # Most important - main titles + elif "ERROR" in text.upper() or "FAILED" in text.upper(): + style = "!" # Errors and failures + elif "SUCCESS" in text.upper() or "COMPLETED" in text.upper(): + style = "+" # Success messages + elif "MENU" in text.upper() or "OPTIONS" in text.upper(): + style = "-" # Menu sections + elif "STEPS" in text.upper() or "PROCEDURE" in text.upper(): + style = "~" # Procedural sections + elif "NOTE" in text.upper() or "TIP" in text.upper(): + style = "*" # Notes and tips + else: + style = "·" # Default for other sections + + width = min(os.get_terminal_size().columns, 70) + border = style * width + print(f"\n{border}") + + # Center the text if it's shorter than the width + if len(text) < width - 4: + padding = (width - len(text)) // 2 + print(" " * padding + text) + else: + # If text is too long, wrap it + import textwrap + for line in textwrap.wrap(text, width=width-4): + print(f" {line}") + + print(f"{border}\n") + +def print_message(text, prefix="", indent=0, color=None): + """Print formatted message with optional prefix.""" + indent_str = " " * indent + for line in text.split("\n"): + print(f"{indent_str}{prefix}{line}") + +def install_innovation_engine(): + if shutil.which("ie") is not None: + print_message("\nInnovation Engine is already installed.") + return + print_message("Installing Innovation Engine...", prefix="🔧 ") + subprocess.check_call( + "curl -Lks https://raw.githubusercontent.com/Azure/InnovationEngine/v0.2.3/scripts/install_from_release.sh | /bin/bash -s -- v0.2.3", + shell=True, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL + ) + print_message("\nInnovation Engine installed successfully.\n") + +def get_last_error_log(): + log_file = "ie.log" + if os.path.exists(log_file): + with open(log_file, "r") as f: + lines = f.readlines() + error_index = None + for i in range(len(lines) - 1, -1, -1): + if "level=error" in lines[i]: + error_index = i + break + if error_index is not None: + return "".join(lines[error_index:]) + return "No error log found." + +def generate_script_description(script_path, context=""): + """Generate descriptions around a shell script without modifying the code.""" + if not os.path.isfile(script_path): + print_message(f"\nError: The file {script_path} does not exist.") + return None + + try: + with open(script_path, "r") as f: + script_content = f.read() + except Exception as e: + print_message(f"\nError reading script: {e}") + return None + + # Create output filename + script_name = os.path.splitext(os.path.basename(script_path))[0] + output_file = f"{script_name}_documented.md" + + print_message("\nGenerating documentation for shell script...") + + # Prepare prompt for the LLM + script_prompt = f"""Create an Exec Doc that explains this shell script in detail. + DO NOT CHANGE ANY CODE in the script. Instead: + 1. Add clear descriptions before and after each functional block + 2. Explain what each section does + 3. Format as a proper markdown document with appropriate headings and structure + 4. Include all the necessary metadata in the front matter + + Script context provided by user: {context} + + Here is the script content: + ``` + {script_content} + ``` + """ + + response = client.chat.completions.create( + model=deployment_name, + messages=[ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": script_prompt} + ] + ) + + doc_content = response.choices[0].message.content + + # Save the generated documentation + try: + with open(output_file, "w") as f: + f.write(doc_content) + print_message(f"\nScript documentation saved to: {output_file}") + return output_file + except Exception as e: + print_message(f"\nError saving documentation: {e}") + return None + +def redact_pii_from_doc(doc_path): + """Redact PII from result blocks in an Exec Doc.""" + if not os.path.isfile(doc_path): + print_message(f"\nError: The file {doc_path} does not exist.") + return None + + try: + with open(doc_path, "r") as f: + doc_content = f.read() + except Exception as e: + print_message(f"\nError reading document: {e}") + return None + + # Create output filename + doc_name = os.path.splitext(os.path.basename(doc_path))[0] + output_file = f"{doc_name}_redacted.md" + + print_message("\nRedacting PII from document...") + + # Use the LLM to identify and redact PII + redaction_prompt = """Redacting PII from the output helps protect sensitive information from being inadvertently shared or exposed. This is crucial for maintaining privacy, complying with data protection regulations, and furthering the company's security posture. + + Ensure result block(s) have all the PII (Personally Identifiable Information) stricken out from them and replaced with x’s. + + **Example:** + + ```markdown + Results: + + + + ```output + {{ + "id": "/subscriptions/xxxxx-xxxxx-xxxxx-xxxxx/resourceGroups/MyResourceGroupxxx", + "location": "eastus", + "managedBy": null, + "name": "MyResourceGroupxxx", + "properties": {{ + "provisioningState": "Succeeded" + }}, + "tags": null, + "type": "Microsoft.Resources/resourceGroups" + }} + ``` + ``` + + >**Note:** The number of x's used to redact PII need not be the same as the number of characters in the original PII. Furthermore, it is recommended not to redact the key names in the output, only the values containing the PII (which are usually strings). + + >**Note:** Here are some examples of PII in result blocks: Unique identifiers for resources, Email Addresses, Phone Numbers, IP Addresses, Credit Card Numbers, Social Security Numbers (SSNs), Usernames, Resource Names, Subscription IDs, Resource Group Names, Tenant IDs, Service Principal Names, Client IDs, Secrets and Keys. + + Document content: + """ + + response = client.chat.completions.create( + model=deployment_name, + messages=[ + {"role": "system", "content": "You are an AI specialized in PII redaction. Either redact the PII or return the document as is - nothing els is acceptable."}, + {"role": "user", "content": redaction_prompt + "\n\n" + doc_content} + ] + ) + + redacted_content = response.choices[0].message.content + + # Save the redacted document + try: + with open(output_file, "w") as f: + f.write(redacted_content) + print_message(f"\nRedacted document saved to: {output_file}") + return output_file + except Exception as e: + print_message(f"\nError saving redacted document: {e}") + return None + +def generate_dependency_files(doc_path): + """Extract and generate dependency files referenced in an Exec Doc.""" + if not os.path.isfile(doc_path): + print_message(f"\nError: The file {doc_path} does not exist.") + return False, [] + + try: + with open(doc_path, "r") as f: + doc_content = f.read() + except Exception as e: + print_message(f"\nError reading document: {e}") + return False, [] + + # Directory where the doc is located + doc_dir = os.path.dirname(doc_path) or "." + + print_message("\nAnalyzing document for dependencies...") + + # First, detect file creation patterns in the document to avoid conflicts + file_creation_patterns = [ + # Cat heredoc to a file + (r'cat\s*<<\s*[\'"]?(EOF|END)[\'"]?\s*>\s*([^\s;]+)', 1), + # Echo content to a file + (r'echo\s+.*?>\s*([^\s;]+)', 0), + # Tee command + (r'tee\s+([^\s;]+)', 0) + ] + + doc_created_files = [] + for pattern, group_idx in file_creation_patterns: + matches = re.findall(pattern, doc_content, re.DOTALL) + for match in matches: + if isinstance(match, tuple): + filename = match[group_idx] + else: + filename = match + doc_created_files.append(filename) + + if doc_created_files: + print_message("\nDetected file creation commands in document:") + for file in doc_created_files: + print_message(f" - {file}") + + # Enhanced prompt for better dependency file identification + dependency_prompt = """Analyze this Exec Doc and identify ANY files that the user is instructed to create. + + Look specifically for: + 1. Files where the doc says "Create a file named X" or similar instructions + 2. Files that are referenced in commands (e.g., kubectl apply -f filename.yaml) + 3. YAML files (configuration, templates, manifests) + 4. JSON files (configuration, templates, API payloads) + 5. Shell scripts (.sh files) + 6. Terraform files (.tf or .tfvars) + 7. Any other files where content is provided and meant to be saved separately + + IMPORTANT: Include files even if their full content is provided in the document! + If the doc instructs the user to create a file and provides its content, this IS a dependency file. + Look for patterns like "create the following file" or "save this content to filename.xyz". + + For each file you identify: + 1. Extract the exact filename with its extension + 2. Use the exact content provided in the document + 3. Format your response as a JSON list + """ + + response = client.chat.completions.create( + model=deployment_name, + messages=[ + {"role": "system", "content": "You are an AI specialized in extracting and generating dependency files."}, + {"role": "user", "content": dependency_prompt + "\n\n" + doc_content} + ] + ) + + created_dep_files = [] + + try: + # Extract the JSON part from the response with improved robustness + response_text = response.choices[0].message.content + + # Find JSON content between triple backticks with more flexible pattern matching + json_match = re.search(r'```(?:json)?(.+?)```', response_text, re.DOTALL) + if json_match: + # Clean the extracted JSON content + json_content = json_match.group(1).strip() + try: + dependency_list = json.loads(json_content) + except json.JSONDecodeError: + # Try removing any non-JSON text at the beginning or end + json_content = re.search(r'(\[.+?\])', json_content, re.DOTALL) + if json_content: + dependency_list = json.loads(json_content.group(1)) + else: + raise ValueError("Could not extract valid JSON from response") + else: + # Try to parse the entire response as JSON + try: + dependency_list = json.loads(response_text) + except json.JSONDecodeError: + # Last resort: look for anything that looks like a JSON array + array_match = re.search(r'\[(.*?)\]', response_text.replace('\n', ''), re.DOTALL) + if array_match: + try: + dependency_list = json.loads('[' + array_match.group(1) + ']') + except: + raise ValueError("Could not extract valid JSON from response") + else: + raise ValueError("Response did not contain valid JSON") + + if not dependency_list: + print_message("\nNo dependency files identified.") + return True, [] + + # Filter out dependency files that have inline creation commands in the document + filtered_deps = [] + for dep in dependency_list: + filename = dep.get("filename") + if not filename: + continue + + if filename in doc_created_files: + print_message(f"\nWARNING: File '{filename}' is both created in document and identified as a dependency.") + print_message(f" - Skipping dependency management for this file to avoid conflicts.") + continue + + filtered_deps.append(dep) + + # Create each dependency file with type-specific handling + created_files = [] + for dep in filtered_deps: + filename = dep.get("filename") + content = dep.get("content") + file_type = dep.get("type", "").lower() + + if not filename or not content: + continue + + file_path = os.path.join(doc_dir, filename) + + # Check if file already exists + if os.path.exists(file_path): + print_message(f"\nFile already exists: {filename} - Skipping") + # Load content from existing file + try: + with open(file_path, "r") as f: + existing_content = f.read() + created_dep_files.append({ + "filename": filename, + "path": file_path, + "type": file_type, + "content": existing_content # Include content + }) + except Exception as e: + print_message(f"\nWarning: Could not read content from {filename}: {e}") + created_dep_files.append({ + "filename": filename, + "path": file_path, + "type": file_type + }) + continue + + # Validate and format content based on file type + try: + if filename.endswith('.json') or file_type == 'json': + # Validate JSON + try: + parsed = json.loads(content) + content = json.dumps(parsed, indent=2) # Pretty-print_message JSON + except json.JSONDecodeError: + print_message(f"\nWarning: Content for {filename} is not valid JSON. Saving as plain text.") + + elif filename.endswith('.yaml') or filename.endswith('.yml') or file_type == 'yaml': + # Validate YAML + try: + parsed = yaml.safe_load(content) + content = yaml.dump(parsed, default_flow_style=False) # Pretty-print_message YAML + except yaml.YAMLError: + print_message(f"\nWarning: Content for {filename} is not valid YAML. Saving as plain text.") + + elif filename.endswith('.tf') or filename.endswith('.tfvars') or file_type == 'terraform': + # Just store terraform files as-is + pass + + elif filename.endswith('.sh') or file_type == 'shell': + # Ensure shell scripts are executable + is_executable = True + + # Write the file + with open(file_path, "w") as f: + f.write(content) + + # Make shell scripts executable if needed + if (filename.endswith('.sh') or file_type == 'shell') and 'is_executable' in locals() and is_executable: + os.chmod(file_path, os.stat(file_path).st_mode | 0o111) # Add executable bit + + created_files.append(filename) + created_dep_files.append({ + "filename": filename, + "path": file_path, + "type": file_type, + "content": content + }) + except Exception as e: + print_message(f"\nError creating {filename}: {e}") + + if created_files: + print_message(f"\nCreated {len(created_files)} dependency files: {', '.join(created_files)}") + else: + print_message("\nNo new dependency files were created.") + + return True, created_dep_files + except Exception as e: + print_message(f"\nError generating dependency files: {e}") + print_message("\nResponse from model was not valid JSON. Raw response:") + return False, [] + +# Add this function after generate_dependency_files function (approximately line 609) + +def transform_document_for_dependencies(doc_path, dependency_files): + """Remove file creation commands from document when using dependency files.""" + if not dependency_files: + return False + + try: + with open(doc_path, "r") as f: + doc_content = f.read() + + original_content = doc_content + modified = False + + for dep_file in dependency_files: + filename = dep_file["filename"] + + # Pattern to match cat/EOF blocks for file creation + cat_pattern = re.compile( + r'```(?:bash|azurecli|azure-cli-interactive|azurecli-interactive)\s*\n' + r'(.*?cat\s*<<\s*[\'"]?(EOF|END)[\'"]?\s*>\s*' + re.escape(filename) + r'.*?EOF.*?)' + r'\n```', + re.DOTALL + ) + + # Replace with a reference to the external file + if cat_pattern.search(doc_content): + replacement = f"```bash\n# Using external file: {filename}\n```\n\n" + doc_content = cat_pattern.sub(replacement, doc_content) + modified = True + print_message(f"\nTransformed document to use external {filename} instead of inline creation") + + # Handle other file creation patterns (echo, tee) + echo_pattern = re.compile( + r'```(?:bash|azurecli|azure-cli-interactive|azurecli-interactive)\s*\n' + r'(.*?echo\s+.*?>\s*' + re.escape(filename) + r'.*?)' + r'\n```', + re.DOTALL + ) + if echo_pattern.search(doc_content): + replacement = f"```bash\n# Using external file: {filename}\n```\n\n" + doc_content = echo_pattern.sub(replacement, doc_content) + modified = True + + if modified: + with open(doc_path, "w") as f: + f.write(doc_content) + print_message("\nDocument transformed to use external dependency files") + return True + return False + except Exception as e: + print_message(f"\nError transforming document: {e}") + return False + +def update_dependency_file(file_info, error_message, main_doc_path): + """Update a dependency file based on error message.""" + filename = file_info["filename"] + file_path = file_info["path"] + file_type = file_info["type"] + + print_message(f"\nUpdating dependency file: {filename} based on error...") + + try: + with open(file_path, "r") as f: + file_content = f.read() + + with open(main_doc_path, "r") as f: + doc_content = f.read() + + # Prompt for fixing the dependency file + fix_prompt = f"""The following dependency file related to the Exec Doc is causing errors: + + File: {filename} + Type: {file_type} + Error: {error_message} + + Here is the current content of the file: + + {file_content} + + Here is the main Exec Doc for context: + + {doc_content} + + Please fix the issue in the dependency file. Return ONLY the corrected file content, nothing else. + """ + + response = client.chat.completions.create( + model=deployment_name, + messages=[ + {"role": "system", "content": "You are an AI specialized in fixing technical issues in configuration and code files."}, + {"role": "user", "content": fix_prompt} + ] + ) + + updated_content = response.choices[0].message.content + + # Remove any markdown formatting that might have been added + updated_content = re.sub(r'^```.*$', '', updated_content, flags=re.MULTILINE) + updated_content = re.sub(r'^```$', '', updated_content, flags=re.MULTILINE) + updated_content = updated_content.strip() + + # Validate the updated content based on file type + if filename.endswith('.json') or file_type == 'json': + try: + parsed = json.loads(updated_content) + updated_content = json.dumps(parsed, indent=2) # Pretty-print_message JSON + except json.JSONDecodeError: + print_message(f"\nWarning: Updated content for {filename} is not valid JSON.") + + elif filename.endswith('.yaml') or filename.endswith('.yml') or file_type == 'yaml': + try: + parsed = yaml.safe_load(updated_content) + updated_content = yaml.dump(parsed, default_flow_style=False) # Pretty-print_message YAML + except yaml.YAMLError: + print_message(f"\nWarning: Updated content for {filename} is not valid YAML.") + + # Write the updated content to the file + with open(file_path, "w") as f: + f.write(updated_content) + + print_message(f"\nUpdated dependency file: {filename}") + return True + except Exception as e: + print_message(f"\nError updating dependency file {filename}: {e}") + return False + +def analyze_error(error_log, dependency_files=[]): + """Analyze error log to determine if issue is in main doc or dependency files.""" + if not dependency_files: + return {"type": "main_doc", "file": None} + + for dep_file in dependency_files: + filename = dep_file["filename"] + # Check if error mentions the dependency file name + if filename in error_log: + return { + "type": "dependency_file", + "file": dep_file, + "message": error_log + } + + # If no specific dependency file is mentioned, check for patterns + error_patterns = [ + r"Error: open (.*?): no such file or directory", + r"couldn't find file (.*?)( |$|\n)", + r"failed to read (.*?):( |$|\n)", + r"file (.*?) not found", + r"YAML|yaml parsing error", + r"JSON|json parsing error", + r"invalid format in (.*?)( |$|\n)" + ] + + for pattern in error_patterns: + matches = re.search(pattern, error_log, re.IGNORECASE) + if matches and len(matches.groups()) > 0: + file_mentioned = matches.group(1) + for dep_file in dependency_files: + if dep_file["filename"] in file_mentioned: + return { + "type": "dependency_file", + "file": dep_file, + "message": error_log + } + + # Default to main doc if no specific dependency file issues found + return {"type": "main_doc", "file": None} + +def remove_backticks_from_file(file_path): + with open(file_path, "r") as f: + lines = f.readlines() + + if lines and "```" in lines[0]: + lines = lines[1:] + + if lines and "```" in lines[-1]: + lines = lines[:-1] + + # Remove backticks before and after the metadata section + if lines and "---" in lines[0]: + for i in range(1, len(lines)): + if "---" in lines[i]: + if "```" in lines[i + 1]: + lines = lines[:i + 1] + lines[i + 2:] + break + + with open(file_path, "w") as f: + f.writelines(lines) + +def setup_output_folder(input_type, input_name, title=None): + """Create a folder to store all iterations of the document.""" + if title: + # Use the title if provided (cleaner folder name) + base_name = title.replace(' ', '_').replace(':', '').replace(';', '').replace('/', '_') + base_name = re.sub(r'[^\w\-_]', '', base_name) # Remove special chars + else: + # Fallback to old naming scheme if title not available + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + if input_type == 'file': + base_name = os.path.splitext(os.path.basename(input_name))[0] + elif input_type == 'workload_description': + base_name = "_".join(input_name.split()[:3]) + else: + base_name = "exec_doc" + base_name = f"{timestamp}_{input_type}_{base_name}" + + # Handle duplicate folder names + folder_name = base_name + counter = 1 + while os.path.exists(folder_name): + folder_name = f"{base_name}_{counter}" + counter += 1 + + # Create the folder at the script's location + os.makedirs(folder_name, exist_ok=True) + + return folder_name + +def check_existing_log(input_path=None): + """Check if global log.json exists at the script level. + + Args: + input_path: Optional path (no longer needed for logging) + + Returns: + Tuple of (exists, log_path, existing_data) + exists: Boolean indicating if log.json exists + log_path: Path to the log file + existing_data: Dictionary containing the existing log data + """ + # Get the directory where the script is located + script_dir = os.path.dirname(os.path.realpath(__file__)) + log_file_path = os.path.join(script_dir, "log.json") + + # Check if log.json exists in the script directory + if os.path.isfile(log_file_path): + try: + with open(log_file_path, 'r') as f: + existing_data = json.load(f) + return True, log_file_path, existing_data + except Exception as e: + print_message(f"\nWarning: Found log.json but couldn't read it: {e}") + + return False, log_file_path, None + +def calculate_success_rate(log_data): + """Calculate success rate for doc creation/conversion attempts.""" + entries = log_data.get("doc_creation", []) + log_data.get("doc_conversion", []) + if not entries: + return 0 + success_count = sum(1 for entry in entries if entry.get("Result") == "Success") + return round(success_count / len(entries), 2) + +def calculate_total_execution_time(log_data): + """Sum up execution time across all operations.""" + total = 0 + for section in log_data: + if section != "info" and isinstance(log_data[section], list): + total += sum(entry.get("Execution Time (in seconds)", 0) for entry in log_data[section]) + return total + +def update_progress_log(output_folder, new_data, input_type, user_intent=None, existing_data=None): + """Update the JSON progress log with the new structure.""" + # Get the directory where the script is located + script_dir = os.path.dirname(os.path.realpath(__file__)) + log_file = os.path.join(script_dir, "log.json") + + # Map input_type to appropriate section name + section_map = { + 'file': 'doc_conversion', + 'workload_description': 'doc_creation', + 'shell_script': 'script_documentation', + 'pii_redaction': 'pii_redaction', + 'security_check': 'security_analysis', + 'seo_optimization': 'seo_optimization' + } + + section_name = section_map.get(input_type, 'other_operations') + + # Start with a clean structure + if not existing_data or not isinstance(existing_data, dict): + # Initialize brand new log structure + log_data = { + "info": { + "Creation Date": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), + "Last Modified Date": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), + "Total Operations": 1, # Starting with this operation + "Success Rate": 0, # No previous data + "Operation Summary": { + "doc_creation": 0, + "doc_conversion": 0, + "script_documentation": 0, + "security_analysis": 0, + "pii_redaction": 0, + "seo_optimization": 0 + }, + "Total Execution Time (in seconds)": 0 # No previous data + }, + section_name: [] # Initialize the current section + } + # Update the operation count for this type + log_data["info"]["Operation Summary"][section_name] = 1 + else: + # Use existing structure + log_data = existing_data + + # Ensure info section exists with proper structure + if "info" not in log_data: + log_data["info"] = { + "Creation Date": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), + "Last Modified Date": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), + "Total Operations": 0, + "Success Rate": 0, + "Operation Summary": { + "doc_creation": 0, + "doc_conversion": 0, + "script_documentation": 0, + "security_analysis": 0, + "pii_redaction": 0, + "seo_optimization": 0 + }, + "Total Execution Time (in seconds)": 0 + } + + # Add project folder information to each entry + for entry in new_data: + entry["Project Folder"] = output_folder + # Add user intent if provided + if user_intent: + entry["User Intent"] = user_intent + + # Create section if it doesn't exist + if section_name not in log_data: + log_data[section_name] = [] + + # Add new data to the appropriate section + if isinstance(new_data, list): + log_data[section_name].extend(new_data) + else: + log_data[section_name].append(new_data) + + # Update metrics in info section + log_data["info"]["Last Modified Date"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + log_data["info"]["Total Operations"] = sum(len(log_data.get(section, [])) for section in log_data if section != "info") + log_data["info"]["Success Rate"] = calculate_success_rate(log_data) + + for section in section_map.values(): + if section in log_data: + log_data["info"]["Operation Summary"][section] = len(log_data[section]) + + log_data["info"]["Total Execution Time (in seconds)"] = calculate_total_execution_time(log_data) + + # Write updated log to file with pretty formatting + with open(log_file, 'w') as f: + json.dump(log_data, f, indent=4) + +def collect_iteration_data(input_type, user_input, output_file, attempt, errors, start_time, success): + """Collect data for a single iteration.""" + return { + 'Timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"), + 'Type': input_type, + 'Input': user_input, + 'Output': output_file, + 'Attempt Number': attempt, + 'Errors Encountered': errors, + 'Execution Time (in seconds)': round(time.time() - start_time), # Rounded to nearest second + 'Result': "Success" if success else "Failure" + } + +def generate_title_from_description(description, display=False): + """Generate a title for the Exec Doc based on the workload description.""" + + title_prompt = """Create a concise, descriptive title for an Executable Document (Exec Doc) based on the following workload description. + The title should: + 1. Be clear and informative + 2. Start with an action verb (Deploy, Create, Configure, etc.) when appropriate + 3. Mention the main Azure service(s) involved + 4. Be formatted like a typical Azure quickstart or tutorial title + 5. Not exceed 10 words + + Return ONLY the title text, nothing else. + + Workload description: + """ + + try: + response = client.chat.completions.create( + model=deployment_name, + messages=[ + {"role": "system", "content": "You are an AI specialized in creating concise, descriptive titles."}, + {"role": "user", "content": title_prompt + description} + ] + ) + + title = response.choices[0].message.content.strip() + # Remove any quotes, backticks or other formatting that might be included + title = title.strip('"\'`') + + # Only print the header if display is True + if display: + print_header(f"Title: {title}", "=") + + return title + except Exception as e: + print_message(f"\nError generating title: {e}") + return "Azure Executable Documentation Guide" # Default fallback title + +def perform_security_check(doc_path): + """Perform a comprehensive security vulnerability check on an Exec Doc.""" + if not os.path.isfile(doc_path): + print_message(f"\nError: The file {doc_path} does not exist.") + return None + + try: + with open(doc_path, "r") as f: + doc_content = f.read() + except Exception as e: + print_message(f"\nError reading document: {e}") + return None + + # Create output filename + doc_name = os.path.splitext(os.path.basename(doc_path))[0] + output_file = f"{doc_name}_security_report.md" + + print_message("\nPerforming comprehensive security vulnerability analysis...") + + # Use the LLM to analyze security vulnerabilities + security_prompt = """Conduct a thorough, state-of-the-art security vulnerability analysis of this Exec Doc. Analyze both static aspects (code review) and dynamic aspects (runtime behavior). + + Focus on: + 1. Authentication and authorization vulnerabilities + 2. Potential for privilege escalation + 3. Resource exposure risks + 4. Data handling and privacy concerns + 5. Network security considerations + 6. Input validation vulnerabilities + 7. Command injection risks + 8. Cloud-specific security threats + 9. Compliance issues with security best practices + 10. Secret management practices + + Structure your report with the following sections: + 1. Executive Summary - Overall risk assessment + 2. Methodology - How the analysis was performed + 3. Findings - Detailed description of each vulnerability found + 4. Recommendations - Specific remediation steps for each issue + 5. Best Practices - General security improvements + + For each vulnerability found, include: + - Severity (Critical, High, Medium, Low) + - Location in code + - Description of the vulnerability + - Potential impact + - Recommended fix with code example where appropriate + + Use the OWASP Top 10 and cloud security best practices as frameworks for your analysis. + Format the output as a professional Markdown document with appropriate headings, tables, and code blocks. + + Document content: + """ + + response = client.chat.completions.create( + model=deployment_name, + messages=[ + {"role": "system", "content": "You are an AI specialized in security vulnerability assessment and report generation."}, + {"role": "user", "content": security_prompt + "\n\n" + doc_content} + ] + ) + + report_content = response.choices[0].message.content + + # Save the security report + try: + with open(output_file, "w") as f: + f.write(report_content) + print_message(f"\nSecurity analysis report saved to: {output_file}") + return output_file + except Exception as e: + print_message(f"\nError saving security report: {e}") + return None + +def perform_seo_check(doc_path, checklist_path="seo-checklist.md"): + """Perform an SEO optimization check on an Exec Doc using the SEO checklist.""" + if not os.path.isfile(doc_path): + print_message(f"\nError: The file {doc_path} does not exist.") + return None + + if not os.path.isfile(checklist_path): + print_message(f"\nError: The SEO checklist file {checklist_path} does not exist.") + return None + + try: + with open(doc_path, "r") as f: + doc_content = f.read() + + with open(checklist_path, "r") as f: + checklist_content = f.read() + except Exception as e: + print_message(f"\nError reading files: {e}") + return None + + # Create output filename + doc_name = os.path.splitext(os.path.basename(doc_path))[0] + output_file = f"{doc_name}_seo_optimized.md" + + print_message("\nPerforming SEO optimization check...") + + # Use the LLM to analyze and optimize the document for SEO + seo_prompt = """You are an SEO optimization expert. Analyze and optimize the provided document according to the SEO checklist. + + For each item in the checklist: + 1. Check if the document meets the criteria + 2. If not, optimize the document to meet the criteria + 3. Comment on the changes you made + + When optimizing: + - Preserve the document's original meaning and technical accuracy + - Make sure the document flows naturally and reads well + - Only change what needs to be changed for SEO purposes + + Provide your output as the fully optimized document. Return ONLY the updated document, nothing else. + + SEO Checklist: + + {checklist_content} + + Document to optimize: + + {doc_content} + """ + + seo_prompt = seo_prompt.format( + checklist_content=checklist_content, + doc_content=doc_content + ) + + response = client.chat.completions.create( + model=deployment_name, + messages=[ + {"role": "system", "content": "You are an AI specialized in SEO optimization for technical documentation."}, + {"role": "user", "content": seo_prompt} + ] + ) + + optimized_content = response.choices[0].message.content + + # Save the optimized document + try: + with open(output_file, "w") as f: + f.write(optimized_content) + print_message(f"\nSEO optimized document saved to: {output_file}") + return output_file + except Exception as e: + print_message(f"\nError saving optimized document: {e}") + return None + +def analyze_user_intent(user_input, input_type): + """Analyze the user's intent based on their input.""" + if input_type == 'file': + # For file input, we'll analyze the file content + try: + with open(user_input, "r") as f: + file_content = f.read()[:1000] # Read first 1000 chars for analysis + prompt = f"Analyze this document beginning and summarize what the user is trying to do in one concise sentence:\n\n{file_content}" + except: + return "Convert an existing document to an executable format" + else: + # For workload descriptions, analyze the description + prompt = f"Analyze the following user request and summarize their core intent in one concise sentence:\n\n\"{user_input}\"" + + try: + response = client.chat.completions.create( + model=deployment_name, + messages=[ + {"role": "system", "content": "You analyze user requests and extract the core intent."}, + {"role": "user", "content": prompt + "\n\nStart with 'User intends to...' and keep it short."} + ] + ) + + intent = response.choices[0].message.content.strip() + # Remove any quotes or formatting + intent = intent.strip('"\'`') + print_message(f"\nUser intent: {intent}") + return intent + except Exception as e: + print_message(f"\nError analyzing user intent: {e}") + return "Execute commands related to Azure resources" # Default fallback + +def generate_script_description_with_content(script_path, context="", output_file_path=None): + """Generate descriptions around a shell script without modifying the code with custom output path.""" + if not os.path.isfile(script_path): + print_message(f"\nError: The file {script_path} does not exist.") + return None + + try: + with open(script_path, "r") as f: + script_content = f.read() + except Exception as e: + print_message(f"\nError reading script: {e}") + return None + + # Create default output filename if not provided + if not output_file_path: + script_name = os.path.splitext(os.path.basename(script_path))[0] + output_file_path = f"{script_name}_documented.md" + + # Prepare prompt for the LLM + script_prompt = f"""Create an Exec Doc that explains this shell script in detail. + DO NOT CHANGE ANY CODE in the script. Instead: + 1. Add clear descriptions before and after each functional block + 2. Explain what each section does + 3. Format as a proper markdown document with appropriate headings and structure + 4. Include all the necessary metadata in the front matter + + Script context provided by user: {context} + + Here is the script content: + ``` + {script_content} + ``` + """ + + response = client.chat.completions.create( + model=deployment_name, + messages=[ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": script_prompt} + ] + ) + + doc_content = response.choices[0].message.content + + # Save the generated documentation + try: + with open(output_file_path, "w") as f: + f.write(doc_content) + remove_backticks_from_file(output_file_path) + return doc_content + except Exception as e: + print_message(f"\nError saving documentation: {e}") + return None + +def redact_pii_from_doc_with_path(doc_path, output_file_path=None): + """Redact PII from result blocks in an Exec Doc with custom output path.""" + if not os.path.isfile(doc_path): + print_message(f"\nError: The file {doc_path} does not exist.") + return None + + try: + with open(doc_path, "r") as f: + doc_content = f.read() + except Exception as e: + print_message(f"\nError reading document: {e}") + return None + + # Create default output filename if not provided + if not output_file_path: + doc_name = os.path.splitext(os.path.basename(doc_path))[0] + output_file_path = f"{doc_name}_redacted.md" + + # Use the LLM to identify and redact PII + redaction_prompt = """Redacting PII from the output helps protect sensitive information from being inadvertently shared or exposed. This is crucial for maintaining privacy, complying with data protection regulations, and furthering the company's security posture. + + Ensure result block(s) have all the PII (Personally Identifiable Information) stricken out from them and replaced with x's. + + **Example:** + + ```markdown + Results: + + + + ```output + {{ + "id": "/subscriptions/xxxxx-xxxxx-xxxxx-xxxxx/resourceGroups/MyResourceGroupxxx", + "location": "eastus", + "managedBy": null, + "name": "MyResourceGroupxxx", + "properties": {{ + "provisioningState": "Succeeded" + }}, + "tags": null, + "type": "Microsoft.Resources/resourceGroups" + }} + ``` + ``` + + >**Note:** The number of x's used to redact PII need not be the same as the number of characters in the original PII. Furthermore, it is recommended not to redact the key names in the output, only the values containing the PII (which are usually strings). + + >**Note:** Here are some examples of PII in result blocks: Unique identifiers for resources, Email Addresses, Phone Numbers, IP Addresses, Credit Card Numbers, Social Security Numbers (SSNs), Usernames, Resource Names, Subscription IDs, Resource Group Names, Tenant IDs, Service Principal Names, Client IDs, Secrets and Keys. + + Document content: + """ + + response = client.chat.completions.create( + model=deployment_name, + messages=[ + {"role": "system", "content": "You are an AI specialized in PII redaction. Either redact the PII or return the document as is - nothing els is acceptable."}, + {"role": "user", "content": redaction_prompt + "\n\n" + doc_content} + ] + ) + + redacted_content = response.choices[0].message.content + + # Save the redacted document + try: + with open(output_file_path, "w") as f: + f.write(redacted_content) + remove_backticks_from_file(output_file_path) + return redacted_content + except Exception as e: + print_message(f"\nError saving redacted document: {e}") + return None + +def perform_security_check_with_path(doc_path, output_file_path=None): + """Perform a comprehensive security vulnerability check on an Exec Doc with custom output path.""" + if not os.path.isfile(doc_path): + print_message(f"\nError: The file {doc_path} does not exist.") + return None + + try: + with open(doc_path, "r") as f: + doc_content = f.read() + except Exception as e: + print_message(f"\nError reading document: {e}") + return None + + # Create default output filename if not provided + if not output_file_path: + doc_name = os.path.splitext(os.path.basename(doc_path))[0] + output_file_path = f"{doc_name}_security_report.md" + + # Use the LLM to analyze security vulnerabilities + security_prompt = """Conduct a thorough, state-of-the-art security vulnerability analysis of this Exec Doc. Analyze both static aspects (code review) and dynamic aspects (runtime behavior). + + Focus on: + 1. Authentication and authorization vulnerabilities + 2. Potential for privilege escalation + 3. Resource exposure risks + 4. Data handling and privacy concerns + 5. Network security considerations + 6. Input validation vulnerabilities + 7. Command injection risks + 8. Cloud-specific security threats + 9. Compliance issues with security best practices + 10. Secret management practices + + Structure your report with the following sections: + 1. Executive Summary - Overall risk assessment + 2. Methodology - How the analysis was performed + 3. Findings - Detailed description of each vulnerability found + 4. Recommendations - Specific remediation steps for each issue + 5. Best Practices - General security improvements + + For each vulnerability found, include: + - Severity (Critical, High, Medium, Low) + - Location in code + - Description of the vulnerability + - Potential impact + - Recommended fix with code example where appropriate + + Use the OWASP Top 10 and cloud security best practices as frameworks for your analysis. + Format the output as a professional Markdown document with appropriate headings, tables, and code blocks. + + Document content: + """ + + response = client.chat.completions.create( + model=deployment_name, + messages=[ + {"role": "system", "content": "You are an AI specialized in security vulnerability assessment and report generation."}, + {"role": "user", "content": security_prompt + "\n\n" + doc_content} + ] + ) + + report_content = response.choices[0].message.content + + # Save the security report + try: + with open(output_file_path, "w") as f: + f.write(report_content) + remove_backticks_from_file(output_file_path) + return report_content + except Exception as e: + print_message(f"\nError saving security report: {e}") + return None + +def perform_seo_check_with_path(doc_path, checklist_path="seo-checklist.md", output_file_path=None): + """Perform an SEO optimization check on an Exec Doc using the SEO checklist with custom output path.""" + if not os.path.isfile(doc_path): + print_message(f"\nError: The file {doc_path} does not exist.") + return None + + if not os.path.isfile(checklist_path): + print_message(f"\nError: The SEO checklist file {checklist_path} does not exist.") + return None + + try: + with open(doc_path, "r") as f: + doc_content = f.read() + + with open(checklist_path, "r") as f: + checklist_content = f.read() + except Exception as e: + print_message(f"\nError reading files: {e}") + return None + + # Create default output filename if not provided + if not output_file_path: + doc_name = os.path.splitext(os.path.basename(doc_path))[0] + output_file_path = f"{doc_name}_seo_optimized.md" + + # Use the LLM to analyze and optimize the document for SEO + seo_prompt = """You are an SEO optimization expert. Analyze and optimize the provided document according to the SEO checklist. + + For each item in the checklist: + 1. Check if the document meets the criteria + 2. If not, optimize the document to meet the criteria + 3. Comment on the changes you made + + When optimizing: + - Preserve the document's original meaning and technical accuracy + - Make sure the document flows naturally and reads well + - Only change what needs to be changed for SEO purposes + + Provide your output as the fully optimized document. Return ONLY the updated document, nothing else. + + SEO Checklist: + + {checklist_content} + + Document to optimize: + + {doc_content} + """ + + seo_prompt = seo_prompt.format( + checklist_content=checklist_content, + doc_content=doc_content + ) + + response = client.chat.completions.create( + model=deployment_name, + messages=[ + {"role": "system", "content": "You are an AI specialized in SEO optimization for technical documentation."}, + {"role": "user", "content": seo_prompt} + ] + ) + + optimized_content = response.choices[0].message.content + + # Save the optimized document + try: + with open(output_file_path, "w") as f: + f.write(optimized_content) + remove_backticks_from_file(output_file_path) + return optimized_content + except Exception as e: + print_message(f"\nError saving optimized document: {e}") + return None + +# Add this function to get user feedback +def get_user_feedback(document_path): + """Get user feedback by allowing direct edits or text input.""" + # Extract attempt number from filename for better messaging + attempt_info = "" + if "attempt_" in document_path: + attempt_num = document_path.split("attempt_")[1].split("_")[0] + result = "successful" if "success" in document_path else "failed" + attempt_info = f"Attempt #{attempt_num} ({result})" + + print_header(f"FEEDBACK REQUESTED FOR {attempt_info}", "-") + print_message(f"Document location: {document_path}") + print_message("\nYou can provide feedback in two ways:") + print_message("1. Edit the document directly in your editor, then return here", prefix=" ✏️ ") + print_message("2. Type your suggestions below", prefix=" 💬 ") + + # Save original state to detect changes + with open(document_path, "r") as f: + original_content = f.read() + + # Get text feedback if any + feedback = input("\n>> Your feedback (or press Enter to keep going): ") + + # Check if file was modified + with open(document_path, "r") as f: + current_content = f.read() + + if current_content != original_content: + print_message("\n✅ Document changes detected and will be incorporated!") + # Restore original for proper AI processing + with open(document_path, "w") as f: + f.write(original_content) + # Return the edited content as feedback + return f"I've updated the document. Here is my revised version:\n\n{current_content}" + + # Always return CLI feedback, even if only text + return feedback if feedback.strip() else None + +def get_content_from_url(url): + """Extract content from a URL.""" + try: + response = requests.get(url, timeout=10) + response.raise_for_status() + + # Parse HTML content + soup = BeautifulSoup(response.text, 'html.parser') + + # Remove script and style elements + for script in soup(["script", "style"]): + script.extract() + + # Get text content + text = soup.get_text(separator='\n', strip=True) + + # Clean up text + lines = (line.strip() for line in text.splitlines()) + chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) + text = '\n'.join(chunk for chunk in chunks if chunk) + + return text + except Exception as e: + print_message(f"Error fetching content from URL {url}: {e}", color="red") + return f"[Failed to fetch content from {url}]" + +def get_content_from_file(file_path): + """Extract content from a local file.""" + try: + with open(file_path, 'r', encoding='utf-8') as file: + return file.read() + except Exception as e: + print_message(f"Error reading file {file_path}: {e}", color="red") + return f"[Failed to read file {file_path}]" + +def collect_data_sources(): + """Collect data sources from the user.""" + + choice = input("\nWould you like to add data sources the AI should use to generate the doc? (y/n): ").lower().strip() + + if choice != 'y': + return "" + + sources = [] + print_message("\nEnter data sources (URLs or local file paths) one per line. When finished, enter a blank line:") + + line_num = 1 + while True: + source = input(f"\n{line_num}. ").strip() + if not source: + break + + # Detect if it's a URL or file path + if source.startswith(('http://', 'https://')): + print_message("") + print_message(f"Fetching content from URL: {source}...", prefix="🔗 ") + content = get_content_from_url(source) + sources.append(f"--- Content from URL: {source} ---\n{content}\n") + else: + if os.path.exists(source): + print_message(f"Reading file: {source}...", prefix="📄 ") + content = get_content_from_file(source) + sources.append(f"\n--- Content from file: {source} ---\n{content}\n") + else: + print_message(f"File not found: {source}", color="red") + + line_num += 1 + + if sources: + print_message(f"\nCollected content from {len(sources)} source(s).", prefix="✓ ") + return "\n\n".join(sources) + else: + print_message("\nNo valid sources provided.", color="yellow") + return "" + +def requires_aks_cluster(doc_path): + """ + Determine if the Exec Doc requires an existing AKS cluster as a prerequisite. + If 'az aks create' is present, ask the LLM for clarification. + If not present, assume AKS cluster is a prerequisite. + """ + try: + with open(doc_path, "r") as f: + doc_content = f.read() + # Simple string match for 'az aks create' (case-insensitive) + if "az aks create" not in doc_content.lower(): + aks_prompt = f""" +You are an expert in Azure and Kubernetes documentation. Given the following markdown document, answer with ONLY 'yes' or 'no' (no punctuation, no explanation): Does this document require an existing Azure Kubernetes Service (AKS) cluster as a prerequisite (i.e., does it assume the cluster is already created and available for use, rather than creating it as part of the steps)? Only answer 'yes' or 'no'. + +Document: +--- +{doc_content} +--- +""" + response = client.chat.completions.create( + model=deployment_name, + messages=[ + {"role": "system", "content": "You are an expert in Azure and Kubernetes documentation."}, + {"role": "user", "content": aks_prompt} + ] + ) + answer = response.choices[0].message.content.strip().lower() + print(answer) + return answer.startswith("y") + else: + # If 'az aks create' is present, assume AKS cluster is not a prerequisite + return False + except Exception: + return False + +def extract_aks_env_vars(doc_path): + """Use LLM to extract AKS-related environment variable names from the Exec Doc.""" + var_map = { + "resource_group": "RESOURCE_GROUP_NAME", + "cluster_name": "AKS_CLUSTER_NAME", + "region": "REGION" + } + try: + with open(doc_path, "r") as f: + doc_content = f.read() + aks_var_prompt = """ +You are an expert in Azure and Kubernetes documentation. Given the following markdown document, extract the actual environment variable names used for: +1. Resource group name +2. AKS cluster name +3. Region + +Return your answer as a JSON object with the following keys: +- resource_group +- cluster_name +- region + +If any variable is not found, use the default values: +- resource_group: RESOURCE_GROUP_NAME +- cluster_name: AKS_CLUSTER_NAME +- region: REGION + +ONLY return the JSON object, nothing else. + +Document: +--- +{doc} +--- +""".format(doc=doc_content[:6000]) # Limit to first 6000 chars for prompt size + + response = client.chat.completions.create( + model=deployment_name, + messages=[ + {"role": "system", "content": "You are an expert in Azure and Kubernetes documentation."}, + {"role": "user", "content": aks_var_prompt} + ] + ) + import json + answer = response.choices[0].message.content.strip() + # Try to parse the JSON from the LLM response + var_map_llm = json.loads(answer) + # Fallback to defaults if any key is missing + for k in var_map: + if not var_map_llm.get(k): + var_map_llm[k] = var_map[k] + return var_map_llm + except Exception: + return var_map + +# Replace the menu display in main() function +def main(): + while True: + print_header("WELCOME TO ADA - AI DOCUMENTATION ASSISTANT", "=") + print_message("This tool helps you write and troubleshoot Executable Documents efficiently!\n") + + print_header("MENU OPTIONS", "-") + print_message("1. Convert file to Exec Doc", prefix=" 📄 ") + print_message("2. Generate new Exec Doc from scratch", prefix=" 🔍 ") + print_message("3. Create descriptions for your shell script", prefix=" 📝 ") + print_message("4. Redact PII from your Doc", prefix=" 🔒 ") + print_message("5. Give security analysis report on your Doc", prefix=" 🛡️ ") + print_message("6. Perform SEO optimization check on your Doc", prefix=" 📊 ") + print_message("\nEnter 1-6 to select an option or any other key to exit.") + + choice = input("\n>> Your choice: ") + + + if choice not in ["1", "2", "3", "4", "5", "6"]: + print_message("\nThank you for using ADA! Goodbye!") + break + + if choice == "1": + user_input = input("\nEnter the path to your markdown file: ") + if not os.path.isfile(user_input) or not user_input.endswith('.md'): + print_message("\nInvalid file path or file type. Please provide a valid markdown file.") + continue + + # Add new option for interactive mode + interactive_mode = input("\nEnable interactive mode (you will be prompted for feedback after each step)? (y/n): ").lower() == 'y' + + input_type = 'file' + with open(user_input, "r") as f: + input_content = f.read() + input_content = f"CONVERT THE FOLLOWING EXISTING DOCUMENT INTO AN EXEC DOC. THIS IS A CONVERSION TASK, NOT CREATION FROM SCRATCH. DON'T EXPLAIN WHAT YOU ARE DOING BEHIND THE SCENES INSIDE THE DOC. PRESERVE ALL ORIGINAL CONTENT, STRUCTURE, AND NARRATIVE OUTSIDE OF CODE BLOCKS:\n\n{input_content}" + # We'll generate dependency files later in the process + dependency_files = [] + generate_deps = input("\nMake new files referenced in the doc for its execution? (y/n): ").lower() == 'y' + elif choice == "2": + user_input = input("\nDescribe your workload for the new Exec Doc: ") + if not user_input: + print_message("\nInvalid input. Please provide a workload description.") + continue + + workload_description = user_input.strip() + + # Ask for additional data sources + reference_data = collect_data_sources() + + # Add reference data to the workload description if available + if reference_data: + print_message("\nReference data will be incorporated into document generation.", prefix="📚 ") + user_input = f"{workload_description}\n\nREFERENCE DATA:\n{reference_data}" + else: + user_input = workload_description + + # Add new option for interactive mode + interactive_mode = input("\nEnable interactive mode (you will be prompted for feedback after each step)? (y/n): ").lower() == 'y' + + input_type = 'workload_description' + input_content = user_input + dependency_files = [] + generate_deps = True + elif choice == "3": + user_input = input("\nEnter the path to your shell script: ") + context = input("\nProvide additional context for the script (optional): ") + if not os.path.isfile(user_input): + print_message("\nInvalid file path. Please provide a valid shell script.") + continue + input_type = 'shell_script' + + # Get user intent + user_intent = analyze_user_intent(user_input, input_type) + + # Check for existing log.json + log_exists, log_path, existing_data = check_existing_log() + + if log_exists: + print_message(f"\nFound existing progress log. Will append results.") + else: + print_message(f"\nCreating new progress log.") + + # Create a new folder for outputs + output_folder = os.path.dirname(user_input) or "." + print_message(f"\nAll files will be saved to: {output_folder}") + + # Initialize tracking + all_iterations_data = [] + start_time = time.time() + + # Generate documentation + print_message("\nGenerating documentation for shell script...") + output_file_name = os.path.join(output_folder, f"{os.path.splitext(os.path.basename(user_input))[0]}_documented.md") + + # Store output in the same directory as the source script + output_file_name = f"{os.path.splitext(user_input)[0]}_documented.md" + + # Call the function with modified path + output_content = generate_script_description_with_content(user_input, context, output_file_name) + + # Create iteration data + iteration_data = collect_iteration_data( + input_type, + user_input, + output_file_name, + 1, # First attempt + "", # No errors + start_time, + True # Assume success + ) + all_iterations_data.append(iteration_data) + + if log_exists: + update_progress_log(output_folder, all_iterations_data, input_type, user_intent, existing_data) + else: + update_progress_log(output_folder, all_iterations_data, input_type, user_intent) + + print_message(f"\nScript documentation saved to: {output_file_name}") + continue + elif choice == "4": + user_input = input("\nEnter the path to your Exec Doc for PII redaction: ") + if not os.path.isfile(user_input) or not user_input.endswith('.md'): + print_message("\nInvalid file path or file type. Please provide a valid markdown file.") + continue + input_type = 'pii_redaction' + + # Get user intent + user_intent = analyze_user_intent(user_input, input_type) + + # Check for existing log.json + log_exists, log_path, existing_data = check_existing_log() + + if log_exists: + print_message(f"\nFound existing progress log. Will append results.") + else: + print_message(f"\nCreating new progress log.") + + # Create output folder + doc_title = f"Documentation_for_{os.path.basename(user_input)}" + output_folder = os.path.dirname(user_input) or "." + + # Initialize tracking + all_iterations_data = [] + start_time = time.time() + + # Perform redaction + print_message("\nRedacting PII from document...") + + # Store output in the same directory as the source doc + output_file_name = f"{os.path.splitext(user_input)[0]}_redacted.md" + + # Call with modified path + output_content = redact_pii_from_doc_with_path(user_input, output_file_name) + + # Create iteration data + iteration_data = collect_iteration_data( + input_type, + user_input, + output_file_name, + 1, # First attempt + "", # No errors + start_time, + True # Assume success + ) + all_iterations_data.append(iteration_data) + + if log_exists: + update_progress_log(output_folder, all_iterations_data, input_type, user_intent, existing_data) + else: + update_progress_log(output_folder, all_iterations_data, input_type, user_intent) + + print_message(f"\nRedacted document saved to: {output_file_name}") + continue + elif choice == "5": + user_input = input("\nEnter the path to your Exec Doc for security analysis: ") + if not os.path.isfile(user_input) or not user_input.endswith('.md'): + print_message("\nInvalid file path or file type. Please provide a valid markdown file.") + continue + input_type = 'security_check' + + # Get user intent + user_intent = analyze_user_intent(user_input, input_type) + + # Check for existing log.json + log_exists, log_path, existing_data = check_existing_log() + + if log_exists: + print_message(f"\nFound existing progress log. Will append results.") + else: + print_message(f"\nCreating new progress log.") + + # Create a new folder for outputs + output_folder = os.path.dirname(user_input) or "." + print_message(f"\nAll files will be saved to: {output_folder}") + + # Initialize tracking + all_iterations_data = [] + start_time = time.time() + + # Perform security check + print_message("\nPerforming comprehensive security vulnerability analysis...") + + # Store output in the same directory as the source doc + output_file_name = f"{os.path.splitext(user_input)[0]}_security_report.md" + + # Call with modified path + output_content = perform_security_check_with_path(user_input, output_file_name) + + # Create iteration data + iteration_data = collect_iteration_data( + input_type, + user_input, + output_file_name, + 1, # First attempt + "", # No errors + start_time, + True # Assume success + ) + all_iterations_data.append(iteration_data) + + if log_exists: + update_progress_log(output_folder, all_iterations_data, input_type, user_intent, existing_data) + else: + update_progress_log(output_folder, all_iterations_data, input_type, user_intent) + + print_message(f"\nSecurity analysis complete. Report saved to: {output_file_name}") + continue + elif choice == "6": + user_input = input("\nEnter the path to your Exec Doc for SEO optimization: ") + checklist_path = input("\nEnter the path to the SEO checklist (default: seo-checklist.md): ") or "seo-checklist.md" + + if not os.path.isfile(user_input) or not user_input.endswith('.md'): + print_message(f"\nError: {user_input} is not a valid markdown file.") + continue + + input_type = 'seo_optimization' + + # Get user intent + user_intent = analyze_user_intent(user_input, input_type) + + # Check for existing log.json + log_exists, log_path, existing_data = check_existing_log() + + if log_exists: + print_message(f"\nFound existing progress log. Will append results.") + else: + print_message(f"\nCreating new progress log.") + + # Create a new folder for outputs + output_folder = os.path.dirname(user_input) or "." + print_message(f"\nAll files will be saved to: {output_folder}") + + # Initialize tracking + all_iterations_data = [] + start_time = time.time() + + # Perform SEO check + print_message("\nPerforming SEO optimization check...") + + # Store output in the same directory as the source doc + output_file_name = f"{os.path.splitext(user_input)[0]}_seo_optimized.md" + + # Call with modified path + output_content = perform_seo_check_with_path(user_input, checklist_path, output_file_name) + + # Create iteration data + iteration_data = collect_iteration_data( + input_type, + user_input, + output_file_name, + 1, # First attempt + "", # No errors + start_time, + True # Assume success + ) + all_iterations_data.append(iteration_data) + + if log_exists: + update_progress_log(output_folder, all_iterations_data, input_type, user_intent, existing_data) + else: + update_progress_log(output_folder, all_iterations_data, input_type, user_intent) + + print_message(f"\nSEO optimized document saved to: {output_file_name}") + continue + else: + print_message("\nInvalid choice. Exiting.") + continue + + # Generate title first if it's a workload description + if input_type == 'workload_description': + doc_title = generate_title_from_description(user_input, display=True) + else: + doc_title = os.path.splitext(os.path.basename(user_input))[0] + + # Analyze user intent + user_intent = analyze_user_intent(user_input, input_type) + + # Check for existing log.json + log_exists, log_path, existing_data = check_existing_log() + + if log_exists: + print_message(f"\nFound existing progress log. Will append results.") + else: + print_message(f"\nCreating new progress log.") + + # Create a new folder only for option 2 (workload description) + if input_type == 'workload_description': + output_folder = setup_output_folder(input_type, user_input, doc_title) + print_message(f"\nAll files will be saved to: {output_folder}") + else: + # For other options, use the source file's directory + output_folder = os.path.dirname(user_input) or "." + + # Initialize tracking + all_iterations_data = [] + + install_innovation_engine() + + max_attempts = 11 + attempt = 1 + # if input_type == 'file': + # output_file = f"{os.path.splitext(user_input)[0]}_converted.md" + # else: + # output_file = f"{generate_title_from_description(user_input)}_ai_generated.md" + + start_time = time.time() + errors_encountered = [] + errors_text = "" # Initialize errors_text here + success = False + dependency_files_generated = False + additional_instruction = "" + + while attempt <= max_attempts: + iteration_start_time = time.time() + iteration_errors = [] + made_dependency_change = False + output_file = os.path.join(output_folder, f"attempt_{attempt}.md") + if attempt == 1: + print_header(f"Attempt {attempt}: Generating Exec Doc", "-") + response = client.chat.completions.create( + model=deployment_name, + messages=[ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": input_content} + ] + ) + output_file_content = response.choices[0].message.content + + with open(output_file, "w") as f: + f.write(output_file_content) + + # Generate dependency files after first creation + if generate_deps and not dependency_files_generated: + _, dependency_files = generate_dependency_files(output_file) + dependency_files_generated = True + + # Add this new line to transform the document after dependency generation + if dependency_files: + transform_document_for_dependencies(output_file, dependency_files) + else: + print_header(f"Attempt {attempt}: Fixing Exec Doc", "-") + + # Analyze if the error is in the main doc or in dependency files + error_analysis = analyze_error(errors_text, dependency_files) + + if error_analysis["type"] == "dependency_file" and error_analysis["file"]: + # If error is in a dependency file, try to fix it + dep_file = error_analysis["file"] + print_message(f"\nDetected issue in dependency file: {dep_file['filename']}") + update_dependency_file(dep_file, error_analysis["message"], output_file) + made_dependency_change = True # Set the flag + else: + # If error is in main doc or unknown, update the main doc + response = client.chat.completions.create( + model=deployment_name, + messages=[ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": input_content}, + {"role": "assistant", "content": output_file_content}, + {"role": "user", "content": f"The following error(s) have occurred during testing:\n{errors_text}\n{additional_instruction}\n\nPlease carefully analyze these errors and make necessary corrections to the document to prevent them from happening again. Try to find different solutions if the same errors keep occurring. \nGiven that context, please think hard and don't hurry. I want you to correct the converted document in ALL instances where this error has been or can be found. Then, correct ALL other errors apart from this that you see in the doc. ONLY GIVE THE UPDATED DOC, NOTHING ELSE"} + ] + ) + output_file_content = response.choices[0].message.content + + with open(output_file, "w") as f: + f.write(output_file_content) + + # Check if we need to regenerate dependency files after updating main doc + if generate_deps and dependency_files_generated: + # Regenerate dependency files if major changes were made to the main doc + _, updated_dependency_files = generate_dependency_files(output_file) + if updated_dependency_files: + dependency_files = updated_dependency_files + + remove_backticks_from_file(output_file) + + aks_prereq = requires_aks_cluster(output_file) + if aks_prereq: + print_header(f"Running Innovation Engine using an existing AKS cluster since its a prerequisite to run this doc", "-") + var_names = extract_aks_env_vars(output_file) + ie_cmd = [ + "ie", "execute", output_file, + "--var", f"{var_names['resource_group']}=myAKSResourceGroup0de552", + "--var", f"{var_names['cluster_name']}=myAKSCluster0de552", + "--var", f"{var_names['region']}=canadacentral" + ] + else: + print_header(f"Running Innovation Engine tests", "-") + ie_cmd = ["ie", "test", output_file] + try: + result = subprocess.run(ie_cmd, capture_output=True, text=True, timeout=660) + except subprocess.TimeoutExpired: + print_message("\nThe 'ie test' command timed out after 11 minutes.") + errors_encountered.append("The 'ie test' command timed out after 11 minutes.") + attempt += 1 + continue # Proceed to the next attempt + + if result.returncode == 0: + print_message("All tests passed successfully!", prefix="✅ ") + success = True + + # Update the iteration file + iteration_file = os.path.join(output_folder, f"attempt_{attempt}_success.md") + os.rename(output_file, iteration_file) # ⬅️ move, don't duplicate + output_file = iteration_file + with open(iteration_file, "w") as f: + f.write(output_file_content) + + # Collect iteration data + iteration_data = collect_iteration_data( + input_type, + user_input, + iteration_file, + attempt, + "", # No errors in successful run + iteration_start_time, + True + ) + all_iterations_data.append(iteration_data) + + print_header(f"Producing Exec Doc...", "-") + + if input_type == 'file': + response = client.chat.completions.create( + model=deployment_name, + messages=[ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": input_content}, + {"role": "assistant", "content": output_file_content}, + {"role": "user", "content": f"Take the working converted Exec Doc and merge it with the original source document provided for conversion as needed. Ensure that every piece of information outside of code blocks – such as metadata, descriptions, comments, instructions, and any other narrative content – is preserved. The final output should be a comprehensive document that retains all correct code blocks as well as the rich contextual and descriptive details from the source doc, creating the best of both worlds. ONLY GIVE THE UPDATED DOC, NOTHING ELSE"} + ] + ) + output_file_content = response.choices[0].message.content + + iteration_file = os.path.join(output_folder, f"attempt_{attempt}_{'success' if success else 'failure'}.md") + with open(iteration_file, "w") as f: + f.write(output_file_content) + with open(output_file, "w") as f: + f.write(output_file_content) + + # Generate dependency files for successful docs if not already done + if (input_type == 'file' or input_type == 'workload_description') and not dependency_files_generated and generate_deps: + print_message("\nGenerating dependency files for the successful document...") + _, dependency_files = generate_dependency_files(output_file) + + remove_backticks_from_file(output_file) + break + else: + error_log = get_last_error_log() + errors_encountered.append(error_log.strip()) # Keep for overall tracking + iteration_errors.append(error_log.strip()) # For this iteration only + errors_text = "\n\n ".join(errors_encountered) + iteration_errors_text = "\n\n ".join(iteration_errors) + + # Process and categorize error messages + error_counts = defaultdict(int) + # Extract the core error message - focus on the actual error type + error_key = "" + for line in error_log.strip().split('\n'): + if 'Error:' in line: + error_key = line.strip() + break + + if not error_key and error_log.strip(): + error_key = error_log.strip().split('\n')[0] # Use first line if no clear error + + # Store this specific error type and count occurrences + if error_key: + error_counts[error_key] += 1 + for prev_error in errors_encountered[:-1]: # Check previous errors + if error_key in prev_error: + error_counts[error_key] += 1 + + # Progressive strategies based on error repetition + strategies = [ + "Look carefully at the exact error message and fix that specific issue.", + "Simplify the code block causing the error. Break it into smaller, simpler steps.", + "Remove the result block from the code block causing the error.", + "Try a completely different command or approach that achieves the same result.", + "Fundamentally reconsider this section. Replace it with the most basic, reliable approach possible.", + "Remove the problematic section entirely and rebuild it from scratch with a minimalist approach." + ] + + # Determine which strategy to use based on error count + if error_key in error_counts: + strategy_index = min(error_counts[error_key] - 1, len(strategies) - 1) + current_strategy = strategies[strategy_index] + + additional_instruction = f""" + Error '{error_key}' has occurred {error_counts[error_key]} times. + + NEW STRATEGY: {current_strategy} + + Previous approaches aren't working. Make a significant change following this strategy. + Focus on reliability over complexity. Remember to provide valid JSON output where needed. + """ + else: + additional_instruction = "" + + print_message(f"\nError: {error_log.strip()}") + + # Update the iteration file + iteration_file = os.path.join(output_folder, f"attempt_{attempt}_failure.md") + os.rename(output_file, iteration_file) # ⬅️ move, don't duplicate + output_file = iteration_file + with open(iteration_file, "w") as f: + f.write(output_file_content) + + # Collect iteration data + iteration_data = collect_iteration_data( + input_type, + user_input, + iteration_file, + attempt, + iteration_errors_text, # Only errors from this iteration + iteration_start_time, + False + ) + all_iterations_data.append(iteration_data) + + if 'interactive_mode' in locals() and interactive_mode: + feedback = get_user_feedback(iteration_file) + if feedback: + print_message("\nIncorporating your feedback for the next attempt...") + + # If the user edited the doc, feedback will start with "I've updated the document..." + if feedback.startswith("I've updated the document. Here is my revised version:"): + # Extract the revised content + revised_content = feedback.split("Here is my revised version:", 1)[1].strip() + # Compute the diff between previous and revised content + diff = '\n'.join(difflib.unified_diff( + output_file_content.splitlines(), + revised_content.splitlines(), + fromfile='before.md', + tofile='after.md', + lineterm='' + )) + # Use the diff as context for the LLM + feedback_prompt = ( + "The user has directly edited the document. " + "Here is the unified diff between the previous and revised version:\n\n" + f"{diff}\n\n" + "Update the document to incorporate these changes, ensuring all Exec Doc requirements and formatting rules are still met. " + "ONLY GIVE THE UPDATED DOC, NOTHING ELSE." + ) + # Use the revised content as the new output_file_content for next run + output_file_content = revised_content + else: + # CLI feedback: pass as explicit instruction + feedback_prompt = ( + "Please incorporate the following feedback into the document while maintaining all Exec Doc requirements and formatting rules:\n\n" + f"{feedback}\n\n" + "ONLY GIVE THE UPDATED DOC, NOTHING ELSE." + ) + + # Call the LLM with feedback context + response = client.chat.completions.create( + model=deployment_name, + messages=[ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": input_content}, + {"role": "assistant", "content": output_file_content}, + {"role": "user", "content": feedback_prompt} + ] + ) + output_file_content = response.choices[0].message.content + + # Save the updated content back to the file + with open(iteration_file, "w") as f: + f.write(output_file_content) + + print_message("\nFeedback incorporated. Running tests with your changes...") + else: + iteration_feedback = "" + + # Only increment attempt if we didn't make a dependency change + if not made_dependency_change: + attempt += 1 + success = False + + if log_exists: + update_progress_log(output_folder, all_iterations_data, input_type, user_intent, existing_data) + else: + update_progress_log(output_folder, all_iterations_data, input_type, user_intent) + + # Replace this section after the while loop + if success: + # Don't create a duplicate file if attempt was successful - just copy/rename the last one + last_success_file = os.path.join(output_folder, f"attempt_{attempt}_success.md") + # final_file = os.path.join(output_folder, f"FINAL_OUTPUT_success.md") + # shutil.copy2(last_success_file, final_file) + # Update output_file to point to final file + output_file = last_success_file + else: + # For failures, create a new final file + final_file = os.path.join(output_folder, f"FINAL_OUTPUT_failure_final.md") + with open(final_file, "w") as f: + f.write(output_file_content) + # Update output_file to point to final file + output_file = final_file + + # # Update output_file variable to point to the final file + # output_file = final_file + + if attempt > max_attempts: + print_message(f"\n{'#'*40}\nMaximum attempts reached without passing all tests.\n{'#'*40}") + + end_time = time.time() + execution_time = end_time - start_time + + print_message(f"\nThe updated file is stored at: {output_file}\n") + +if __name__ == "__main__": + main() diff --git a/tools/requirements.txt b/tools/requirements.txt new file mode 100644 index 000000000..f55b87e47 --- /dev/null +++ b/tools/requirements.txt @@ -0,0 +1,8 @@ +azure-identity>=1.17.1 +beautifulsoup4>=4.12.2 +openai>=1.65.1 +requests>=2.31.0 +requests-kerberos>=0.12.0 +requests-ntlm>=1.1.0 +requests-toolbelt>=1.0.0 +PyYAML>=5.4.1 diff --git a/tools/seo-checklist.md b/tools/seo-checklist.md new file mode 100644 index 000000000..c84f833a3 --- /dev/null +++ b/tools/seo-checklist.md @@ -0,0 +1,119 @@ +--- +title: SEO Checklist for Documentation +description: Use this SEO checklist as a useful final review when you're developing new content or publishing updates to current content. +ms.date: 07/27/2023 +author: ps0394 +ms.author: paulsanders +ms.topic: contributor-guide +ms.service: learn +ms.custom: internal-contributor-guide +--- + +# SEO checklist + +This checklist is a short reference to the basics of on-page SEO for contributors to the Learn platform. Review this checklist prior to publishing new or updated content once you do your keyword research and establish your user intent. + +## URL + +The page URL displays on the search engine results page (SERP) and contributes to both rank and relevance. The URL should: + +> [!div class="checklist"] +> * Include the primary keyword. +> * Follow the URL naming convention for your site. +> * Should be 75 characters max using lowercase letters, numbers, and hyphens. +> * Once chosen, it shouldn't be changed as it affects tracking and could send conflicting signals to search engines. + +Learn more about [how to optimize URLs](/help/platform/seo-optimize-urls). + +## Meta title + +The meta title is displayed on the SERP and browser bar heading. It has the greatest impact on search rank and click-through-rate (CTR). The meta title isn't the same as the H1. The meta title should: + +> [!div class="checklist"] +> * Include the primary keyword. +> * Be written in title case (capitalize every word except for words like a, an, of, the, etc.) +> * Be 30-65 characters in length. (The entire title is the metadata title + the metadata titleSuffix + the suffix "| Microsoft Learn." Some teams use titleSuffix to add the product name. If your team doesn't use titleSuffix then add the product brand name into the metadata title.) +> * Show enough information for users to determine relevance. +> * Describe a specific scenario or benefit. + +Learn more about [how to optimize meta titles](/help/platform/seo-meta-title). + +## Meta description + +The meta description is a short block of text (usually one to three sentences) that gives users a preview of what your page is about. It should be a summary that entices users to click. The meta description allows the search engine and user to understand the page content. The meta description should: + +> [!div class="checklist"] +> * Include the primary keyword. +> * Be between 120 and 165 characters including spaces. +> * Describe the specific scenario and/or benefit of the article. +> * Entice users to click through to the page. +> * Include a call-to-action to improve the click-through-rate. + +Learn more about [how to optimize meta descriptions](/help/platform/seo-meta-description). + +## Main heading (H1) + +The H1 is the main heading at the top of the article. H1 is the second most important text string for search rank and relevance. They also help crawlers and copilots make sense of the page content. H1 isn't the same as the meta title. The H1 should: + +> [!div class="checklist"] +> * Include the primary keyword. +> * Be unique and specific (although it's acceptible to copy the meta title). + +Learn more about [how to optimize main headings](/help/platform/seo-main-headings). + +## Introduction + +The introduction is the first paragraph of the article. It should explain what the article is about and show the benefit to the user. It also helps crawlers and copilots make sense of the page content. The introduction should: + +> [!div class="checklist"] +> * Include the primary keyword in the first or second sentence. +> * Describe what the article is about early in the paragraph. +> * Describe the specific scenario and/or benefit of the article. +> * Be clear and concise. + +Learn more about [how to optimize introductions](/help/platform/seo-page-content-structure). + +## Subheadings (H2-H3) + +H2s divide the primary sections on a page. Search engines often display H2s as extra links below the meta description. They also help crawlers and copilots make sense of the page content. Subheadings should: + +> [!div class="checklist"] +> * Include secondary keywords. +> * Be descriptive of a section's content. +> * Use a heading hierarchy (H2-H3) without skipping a level. + +Learn more about [how to optimize subheadings](/help/platform/seo-page-content-structure). + +## Image alt text + +Although alt text is primarily an accessibility feature, it can also improve SEO. Alt text can get a page one result in the image carousel, even if your article isn't on page one of the search results otherwise. Alt text should: + +> [!div class="checklist"] +> * Include the primary keyword and/or secondary keywords when possible. +> * Describe the contents of the image clearly and concisely. +> * Be between 40 to 150 characters. +> * Increase accessibility for users. +> * Avoid language like “image of” or "picture of." + +Learn more about [how to optimize image alt text](/help/platform/seo-page-content-structure). + +## Image filenames + +Like alt text, image filenames can get a page one result in the image carousel, even if your article isn't on page one of the search results otherwise. Image filenames should: + +> [!div class="checklist"] +> * Include the primary keyword and/or secondary keywords. +> * Use letters, numbers, and hyphens only. +> * Be a maximum of 80 characters including spaces. +> * Describe the image. + +Learn more about [how to optimize image filenames](/help/platform/seo-page-content-structure). + +## Search Results Preview +The Learn Authoring Pack for Visual Studio Code now includes a Search Results Preview to help verify your title and description are helpful to users when returned in search results. + +> [!div class="checklist"] +> * Access the Command Palette with **ALT + M**. +> * Select **Search Results Preview**. + +Learn more about the [anatomy of a webpage](/help/platform/seo-anatomy-webpage).