diff --git a/build/linux/installer/datafiles/base_container.data b/build/linux/installer/datafiles/base_container.data index bdacf647d..fc50075ea 100644 --- a/build/linux/installer/datafiles/base_container.data +++ b/build/linux/installer/datafiles/base_container.data @@ -287,6 +287,8 @@ chmod 666 /var/opt/microsoft/docker-cimprov/log/arc_k8s_cluster_identity.log touch /var/opt/microsoft/docker-cimprov/log/fluentd.log chmod 666 /var/opt/microsoft/docker-cimprov/log/fluentd.log +touch /var/opt/microsoft/docker-cimprov/log/testing-podinventory.json +chmod 666 /var/opt/microsoft/docker-cimprov/log/testing-podinventory.json %Postuninstall_10 # If we're an upgrade, skip all of this cleanup diff --git a/kubernetes/linux/Dockerfile b/kubernetes/linux/Dockerfile index 1ae7bef61..b0ceda4ae 100644 --- a/kubernetes/linux/Dockerfile +++ b/kubernetes/linux/Dockerfile @@ -17,7 +17,7 @@ ENV KUBE_CLIENT_BACKOFF_BASE 1 ENV KUBE_CLIENT_BACKOFF_DURATION 0 ENV RUBY_GC_HEAP_OLDOBJECT_LIMIT_FACTOR 0.9 RUN /usr/bin/apt-get update && /usr/bin/apt-get install -y libc-bin wget openssl curl sudo python-ctypes init-system-helpers net-tools rsyslog cron vim dmidecode apt-transport-https gnupg && rm -rf /var/lib/apt/lists/* -COPY setup.sh main.sh defaultpromenvvariables defaultpromenvvariables-rs defaultpromenvvariables-sidecar mdsd.xml envmdsd $tmpdir/ +COPY kubeclient-4.9.2.gem setup.sh main.sh defaultpromenvvariables defaultpromenvvariables-rs defaultpromenvvariables-sidecar mdsd.xml envmdsd $tmpdir/ WORKDIR ${tmpdir} # copy docker provider shell bundle to use the agent image diff --git a/kubernetes/linux/dockerbuild/existingClusterOnboarding.json b/kubernetes/linux/dockerbuild/existingClusterOnboarding.json new file mode 100644 index 000000000..73c8398dd --- /dev/null +++ b/kubernetes/linux/dockerbuild/existingClusterOnboarding.json @@ -0,0 +1,44 @@ +{ + "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "parameters": { + "aksResourceId": { + "type": "string", + "metadata": { + "description": "AKS Cluster Resource ID" + } + }, + "aksResourceLocation": { + "type": "string", + "metadata": { + "description": "Location of the AKS resource e.g. \"East US\"" + } + }, + "workspaceResourceId": { + "type": "string", + "metadata": { + "description": "Azure Monitor Log Analytics Resource ID" + } + } + }, + "resources": [ + { + "name": "[split(parameters('aksResourceId'),'/')[8]]", + "type": "Microsoft.ContainerService/managedClusters", + "location": "[parameters('aksResourceLocation')]", + "apiVersion": "2018-03-31", + "properties": { + "mode": "Incremental", + "id": "[parameters('aksResourceId')]", + "addonProfiles": { + "omsagent": { + "enabled": false, + "config": { + "logAnalyticsWorkspaceResourceID": "[parameters('workspaceResourceId')]" + } + } + } + } + } + ] +} \ No newline at end of file diff --git a/kubernetes/linux/dockerbuild/existingClusterParam.json b/kubernetes/linux/dockerbuild/existingClusterParam.json new file mode 100644 index 000000000..6fdcc601e --- /dev/null +++ b/kubernetes/linux/dockerbuild/existingClusterParam.json @@ -0,0 +1,15 @@ +{ + "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentParameters.json#", + "contentVersion": "1.0.0.0", + "parameters": { + "aksResourceId": { + "value": "/subscriptions/b4b0cbab-64dc-42bc-9d95-ea7c8d0cf188/resourcegroups/khushi-watch-test/providers/Microsoft.ContainerService/managedClusters/khushi-watch-test" + }, + "aksResourceLocation": { + "value": "West US 2" + }, + "workspaceResourceId": { + "value": "/subscriptions/b4b0cbab-64dc-42bc-9d95-ea7c8d0cf188/resourcegroups/defaultresourcegroup-scus/providers/microsoft.operationalinsights/workspaces/defaultworkspace-b4b0cbab-64dc-42bc-9d95-ea7c8d0cf188-scus" + } + } +} \ No newline at end of file diff --git a/kubernetes/linux/dockerbuild/khushi-watch-test b/kubernetes/linux/dockerbuild/khushi-watch-test new file mode 100644 index 000000000..69729d1ec --- /dev/null +++ b/kubernetes/linux/dockerbuild/khushi-watch-test @@ -0,0 +1,20 @@ +apiVersion: v1 +clusters: +- cluster: + certificate-authority-data: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUU2RENDQXRDZ0F3SUJBZ0lRWDdub2hxOWthLzY4OHBzSHh0b1RHekFOQmdrcWhraUc5dzBCQVFzRkFEQU4KTVFzd0NRWURWUVFERXdKallUQWdGdzB5TVRBM01UWXlNekl4TVRoYUdBOHlNRFV4TURjeE5qSXpNekV4T0ZvdwpEVEVMTUFrR0ExVUVBeE1DWTJFd2dnSWlNQTBHQ1NxR1NJYjNEUUVCQVFVQUE0SUNEd0F3Z2dJS0FvSUNBUUMvCmR1aE1wZkxhWURuTHoyUjNrVW55M2hwbHREdGQ4cmlvakE1ZUZteXUveUJZcUl5Qi9LUDNMQWd4ajVwLzRtYlQKT2RqT0pseElKcW8vRlVGdU51b1E3YkRpSlVpZ0I3UVVQVVl0dVZCTUtFUHpuZk8yUFB1VXY3UVloZjNXMmpQegp1cUNGWkFRVnZUeDZmVTZLRjNhNkt1aC9pSkZLY2c1YnZGUVpnOWVNSy91QjlBdXZUSEx0aS9VRUJjWmM5UXBICm9hbklsUGo1N3dVV01zV25veWxRTndwSk1WczE4aDhaMStYS1pYakkvN0dVRTZhaEZwTVdmaU5GWDgybGNlbEQKQ0NwdzZSL3NiRyt1dXRuRURXQVpPRGFSOERpdFRBc3hyUHJMNHdDWXI5d05tY0tSQ3JwUXFsb3p6WFNGZGN5bApwbXZDMExYOVUwSGdZOURMMTZHaDUxbHRrSnN0dS9velhCd1BSM1JrdVVJZ3Q3WmpkOERSTmtlbVVvdGlyZHkvCm16VTJmb2hpSWE0a3VKR1JjVnR0RHZ0eVFSYTNPMzNHMzNQRVJTbkxCblpHSzRHMkVMbTBxTDh1WVFOZ1IveHoKeWlTdmozSlUrOVZoRFplaFlyOWNMWXFuSXRLci84anhVSFozU3RkNjhpSkFvQUJwZzFFWFRDc3pxamdxckJnagpLaG5YNHFCTlpQVmVUZjNrQjVkZ0VsZlJtMXIvcGsxblpFQmMrQllkcjBoSmNKTGgzSEYyR2hCM1BLc1NwUXE0CmVibVhQalZmanNxeFhSYVN2MTdzUTloSmhHRXN6WlBHU29SUjA1b3UwaU04ZHF3M0hjTnI4NzJuYW5uK2pXK24KTS9EaWxrSnJRY25WUVJLc1IxVkJ2ZnhKNjJzOFA2Ynl6V2FWL3NMNm1RSURBUUFCbzBJd1FEQU9CZ05WSFE4QgpBZjhFQkFNQ0FxUXdEd1lEVlIwVEFRSC9CQVV3QXdFQi96QWRCZ05WSFE0RUZnUVVCRG1ORE9VczRrVE9OLzlVClI3bkQ0MGNnR1VVd0RRWUpLb1pJaHZjTkFRRUxCUUFEZ2dJQkFEdGZ6M2pjV2lNeE1zaDIyT3I1RWV2R1pTRU0Kd0JDZmQ0QWMwUXlENkRmVkdaUFlwZmU3aDIzV1dKUWs5T05ncUszdFJhY1FUTTA0N2R3WUJGL3FTOEU2M3pKSQpINmZxL1lwQW9adHpZRkdaZTU1bUFaWWFQTlZWeDdvaHdoMzV2RTdFY0JaZ3JuS1pUN0EzOXlOWHRka2F2cThYCmhHQzdMV1lIMUxjQlhxckxHZlkvL1FpNnYraXdzZWlFdDkvUnRrSjhJK0VFd2JTWU9EazY5Z1lhSUtyK2RJaDgKbXdZM0d3VTZJUXl6MkFqb3pCaEcxbzQ1dzh1K1dKOTVyZEtrS0lEVG0ySTBNMEQ3dWhnZ0ZDT2srY25sOWNtSQozNkdqM2hIMlNFWC9Ea0tKN0orZ1o4bG1rc01HbGh3bGZ3K3FqRXVkVEQ4bCs3a1QrWWpMbEl3dTZRVkhvOGxWCkwyZ3diTG9tTzNJdU9IdHo4VUkxNVRJYSt4QmNsNDVSek1ZWWo0NHcvY2JiZStxUDE5THFQSUZXaXl1OG51UE4KUVhtcUtVR2wyOE83RmtteWFOckpuQURKNmZ5YU5nY21Bb1RrSU1oSk5IMHd2UkNjcTBGaXdJNkFYSGhEczJXYQpDNUkvS0c4NDg3dnlIRUJON3RhcldLN2RMMEhjcWt5cGl4cVkyNERRSnZLVUlIdUlHVk5sMGtza1Z4OWRKekxOCnkrd25SWndUTEVxRkNLSlMzU1F4eFl5Z0xLd09uZGVGTG1TSG1vbGJYM3IzYllDdkZLQ3BlNFA2V1Iwb0M1b20KOGZvMUpwLzI4SnF3K091RTJQZkFhRGFBaFNtK2tmb3V3WnIyWUNHSm50RjFTOEdlSFQxanFFRkJSWnp1UWd3eQp3R3cySnB0cjFEdmJvWmhaCi0tLS0tRU5EIENFUlRJRklDQVRFLS0tLS0K + server: https://khushi-watch-test-dns-9d093a4f.hcp.westus2.azmk8s.io:443 + name: khushi-watch-test +contexts: +- context: + cluster: khushi-watch-test + user: clusterUser_khushi-watch-test_khushi-watch-test + name: khushi-watch-test +current-context: khushi-watch-test +kind: Config +preferences: {} +users: +- name: clusterUser_khushi-watch-test_khushi-watch-test + user: + client-certificate-data: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUZIakNDQXdhZ0F3SUJBZ0lSQVArdktyZDVaRms2ajFycDVCS01adjh3RFFZSktvWklodmNOQVFFTEJRQXcKRFRFTE1Ba0dBMVVFQXhNQ1kyRXdIaGNOTWpFd056RTJNak15TVRFNFdoY05Nak13TnpFMk1qTXpNVEU0V2pBdwpNUmN3RlFZRFZRUUtFdzV6ZVhOMFpXMDZiV0Z6ZEdWeWN6RVZNQk1HQTFVRUF4TU1iV0Z6ZEdWeVkyeHBaVzUwCk1JSUNJakFOQmdrcWhraUc5dzBCQVFFRkFBT0NBZzhBTUlJQ0NnS0NBZ0VBcDhoV0piY3F4emFrejkrZjdRdG0KV0xvVzB3RFo0czlacmlDM25xZ2ZzaFBiYkMwSWlCdGxtQ05lYU1pRTVXeDBnby90VXQ5VUtvUlAwbzQ0Rlh2SApEZGVhU1J1OEllZnh2K1VUOXpkR0VwZDc4ejFBdlF3djhKYysyUjk5b2RnbWtYS0ZXeVhZb0UxRXpSMk9mbVVJCnVOVnNlNldoUHdCbFhNWXZmL0IwYU0rcHg0QzNZOGxnT2p4QTdybFRYYW1pNUlIemxZclQ0aVdtbHI1c05LczAKbkJkVWpicGtsdngxZk5DalR4S0tRaGFNRGdMc1dUUXBaVjI1b0FzYXhIcUx0Z0g3TytZaGZ2bnBJNUxLV3JNego1by9TQ3Vya0phK1RyWnVTbnJKWmgvR21NK09wczhXT1NJYVJpcEtLZlVtajNONzlUeEhkWTJVdThpNnZiTS9MCjhHMVZVYThRV3BnZ0wrbmtSWWRZQmdjSENKVjJZRUovSnlmSm9oK3c0RFF1aUV5RmZyNndmWlpKZGNnRlVRcTgKQllJZ29HNnZGQ3hMNEZwRW5ueWJHcFBLemczVUtmVmxCMGJ0Zkw0TjRZM3ZPYzhPeDgrWHZBWFMzSDdXV1I2TQpUeTFvMGxua1RRYW1uUVo1U2Y4TFU0OERHU2R5NFIzUTdiNW9UTU5TYmVHc0t0WXlxYURPQjE4WW0zTE9CQ0dvCnU4VE4vTFlsaFRHY0NSVklTOFE2SEoyYUZ0L1Z6b04zbXZlYitsUDF3LzNNaTltYjIyOTFsQ25hUG9pcE1memUKdFpoVUNOUURmUGpVRlFyS0dKODNhVzZkTWRRTVY0dXNzWG1Dbno5NXlWeEx6eEtRSVEzM0RKYWhVNlQ1VUtxRQpqY3ZqY09MWmFTdDE5Z21DMXh5QlF4OENBd0VBQWFOV01GUXdEZ1lEVlIwUEFRSC9CQVFEQWdXZ01CTUdBMVVkCkpRUU1NQW9HQ0NzR0FRVUZCd01DTUF3R0ExVWRFd0VCL3dRQ01BQXdId1lEVlIwakJCZ3dGb0FVQkRtTkRPVXMKNGtUT04vOVVSN25ENDBjZ0dVVXdEUVlKS29aSWh2Y05BUUVMQlFBRGdnSUJBRS9XNDdtU21PS0ZSYmJQV01aVQppTmROdmpzOVQ5QTVSc25WYk9uKzN5WFdGSk9pM0JucFBlOHNRc094MTlva0c4K2pIN2lpZWFmOW1SNEtUKzFJCjNMSy9uMWRNTVUzZXRTdE9zcmpob2ZYS3dNbVB3YklDYXAwaGs1T2w5Wk0xaFVIWEtGWGVMME9qQ2piVG5wa0gKYzdTd0JEd1NLcS83bDhYaWdYN29wclFxRTZUNFRLMWI5SDFQQ1JjZWw0UFJZZ0lwTXdHTWZOckFuK2F3dkJpVApZTmpudXljdEpHdXlXT2xGYVl6V281Skh3b2JSQUpJaG9sN1RFckM5Z28yUlpTL1dwSXNYbHhLREFaSnNmUGFkCmsrSmQzNlV6eU1QRnNvUHZqUWhlajlIVnl0Wk8rY1piYVlYbkd1eWx0U3Y2dVErUTd2cEtJVmoxTEN5cVJUL0kKWkxjM2ovMTNUNGFwLzExKytOaHRYNmtRM0JJRDdQS2VXd2YxeFF2NVRPTExIbG5VMTQ0WXI3b1NqK1QzWEdCSAp0bENqdHF5clFxMERZaitGaVhyb3JuUGJRMFFybG0zVE5lTVRLbktUSzdWMDl4a2FMRW8xalgrOHkxMzkwUzVMCk03bWRVTmlzS1A4T09EUTNMSVVETWdYRWEwWFFxdnJmMnJlVG9pVFhwZlJaK3JZU0RMVitsT3VoYzhlSFN6aVQKWXE0dDhJMlplMHdUUzdrTkhiMzlPYVVHYTc1Y2ZEdHZBcWxNWUk3UXhkU3hVYXNFd2Y0NytwUXdMZEl0UWFXcgp3cFZ2dk5nNnBIZEdVbG1VQVJ0dEhNMnR5YWhRNERXVmFoM0lubFlDYzRqVEtuNGc1aldxZkhBNWFsOEtNTWhQCmpNVWFVRGMzVkN5RGFacHRDeWRHQUgrLwotLS0tLUVORCBDRVJUSUZJQ0FURS0tLS0tCg== + client-key-data: LS0tLS1CRUdJTiBSU0EgUFJJVkFURSBLRVktLS0tLQpNSUlKS0FJQkFBS0NBZ0VBcDhoV0piY3F4emFrejkrZjdRdG1XTG9XMHdEWjRzOVpyaUMzbnFnZnNoUGJiQzBJCmlCdGxtQ05lYU1pRTVXeDBnby90VXQ5VUtvUlAwbzQ0Rlh2SERkZWFTUnU4SWVmeHYrVVQ5emRHRXBkNzh6MUEKdlF3djhKYysyUjk5b2RnbWtYS0ZXeVhZb0UxRXpSMk9mbVVJdU5Wc2U2V2hQd0JsWE1ZdmYvQjBhTStweDRDMwpZOGxnT2p4QTdybFRYYW1pNUlIemxZclQ0aVdtbHI1c05LczBuQmRVamJwa2x2eDFmTkNqVHhLS1FoYU1EZ0xzCldUUXBaVjI1b0FzYXhIcUx0Z0g3TytZaGZ2bnBJNUxLV3JNejVvL1NDdXJrSmErVHJadVNuckpaaC9HbU0rT3AKczhXT1NJYVJpcEtLZlVtajNONzlUeEhkWTJVdThpNnZiTS9MOEcxVlVhOFFXcGdnTCtua1JZZFlCZ2NIQ0pWMgpZRUovSnlmSm9oK3c0RFF1aUV5RmZyNndmWlpKZGNnRlVRcThCWUlnb0c2dkZDeEw0RnBFbm55YkdwUEt6ZzNVCktmVmxCMGJ0Zkw0TjRZM3ZPYzhPeDgrWHZBWFMzSDdXV1I2TVR5MW8wbG5rVFFhbW5RWjVTZjhMVTQ4REdTZHkKNFIzUTdiNW9UTU5TYmVHc0t0WXlxYURPQjE4WW0zTE9CQ0dvdThUTi9MWWxoVEdjQ1JWSVM4UTZISjJhRnQvVgp6b04zbXZlYitsUDF3LzNNaTltYjIyOTFsQ25hUG9pcE1memV0WmhVQ05RRGZQalVGUXJLR0o4M2FXNmRNZFFNClY0dXNzWG1Dbno5NXlWeEx6eEtRSVEzM0RKYWhVNlQ1VUtxRWpjdmpjT0xaYVN0MTlnbUMxeHlCUXg4Q0F3RUEKQVFLQ0FnQUhyaXdEWGZjZlYya0Qrd0NmSVQ1MklVNmFLaGZQUWg2ZzBlNlYzS3hXd29IdzJiN2lQQjdTY0F3SwpUK09GZlFsNFVJNVVsTlNOZmJFSnVtam0wdHV4em9USmcvT0F1ZFZmSzJWV2s3a3BjTFhEMUxINTlXemNYcEFKCjhGOFg0WVVpYzFPWGNJd1NDbmR6ekQ2Um1wNWpsNkYzcDRWU0ZQcU8zS09mLzZuVWdtMExMT0U0T0NlbmdzcVcKSXZXbCsvWHc3K0h1bm9SRWZlUzZVYzB5UEFRVWdSemx2L3FLenRPeCt2cit5Nko0ay8rbFJJejlLRjRjdmNXWgpoZWlieGVCUFhKZmJqaFZLY3JZeFlxN1FxQk5nSW9WQzZxMjI2K3FlcjlodVcwdXQ1V251UmlXbWpReU9WVnFrCk5VRVJxeVZOSHdnSmJvL3IxTjNwU0NuUG1WT0Vnd0t2Qk1iN24zZFE2ME1TZHIxVkNKOVl5ZkI3TGlVODVOdHYKY01CWWt3Y1FocWxPR1o0WlZJa2VjaWxvZnBmY2lJVWlzWmJpMTlsUnB4SHBoRzUvR0N3K3p4VVJ2U0xud0Rqcgo2Zy81ZCthWjQ0bFlSSzIwS09OQ003VnA1dHRnZHJwcmtnK211cldBZ0tIcThJQTB6KzhzZlVYWDNXUFoyelVTCnppb2NGcmlmdXhENm1aMEFHZEhRaFdqK1dMSEpYdnJtaENtYmFsUEk5NllVVWRxZlo0RHdkL1ZZcXlLOEI2TjkKM0RFVnI4SlhKU1piUFV1MEZDcS80amV2R29JekJPVHBrTVU1RGtSUlA4ODgwY1ZLMGoxNmwxYXhiajNYb3hVVQpuWmQ1UDhSMWx6dTNONzdYUHNydXBPRDloUmJXRDFiQnVKN0E4aER0bWlQZ3hwTUxRUUtDQVFFQXkrWmZHMWJqClpnNUZqcWhOclR2dDVENmlmWmorUkkrL0YyYlpLdjdsdVRRRkgwemkvazhMSk44VFQ2b3ZYQTFHM29vQ1ZNZnAKNUZEYmphL3l1azJKejFwcCtiZ1pYeHdwRUdpSm0vRjZRVGQvM2Q0RzhGL1hnQnZIb3FHSXZCTzRFRlBCcTFlTgppRGNCMEUzdnZTWEM5bTFsWGgvSEVHczYySVg2bG5Nd2tYbUcrRTZMT0NTRTNQcTFUMDQxaWdUOTNYTmIzQnZxCm1YWWNBeGlzZVJMUGY3aExpNVVaNTJ1ekJLUDJ1OHg0R2lEOVlPQm5oc1g2dnBIUVExOWozUjlLMm52U3ppRjMKMWlTWVVLbWhIU0xwY3RHY0ZzYTdRYWJvVklpdGYzOWEzRHl4dHZXNndPOEx1TWVGRnMzdytqR3JrbTVIZ1BjZgpMY1A0T203T2xOa2JmUUtDQVFFQTBxZHZqUkYrMWxlRHZJUFhuSy9wTk5kTUNza21DTko1b25MTllKTWxNMVJTCk4velBZbjg3bXRsbnBzb3QrR0ZXS0pEaHkwTHlhUk4zZERacHJvRWxzOFVnNUt5NFZiZTV3OFloKzR5K0FrMFQKM3lwSTVDdUllNFpaTndpbEF5RkVWTkNhTXJaU3Y1eGpaR0pZQU1aa05mRE5XU3NSbFowUEkycFhYRDFCVnA5UQo4djVDWG5UTVZrTSs4NVZVN3dKMHl3QU1IRDdqVGM1RGVnaGZJT3NlZzVIc3RBTTJSbXc3dHU5YVVlL0t5eGVDCjZPY2NOMGhtT3VuZ2lPakE5MXYxWm5nV0lxaUZMZFVUUG9YS0JsWlRwbSt5QUJCTC92M0dWYm5NcUdzcnFlV3kKaS9ZQ1ZoSlVXcGZPa2t2djVUTTJXU0hKTGZYejU3Q25oL1Jodi80RHl3S0NBUUVBbGVlYWlJa0t5ZDg0T0RkNApWQ0k1TlpMdTk1UGhiQnFhak9QcVNYZjVBSDVFUGN6VEhkQ1RDdHFPWWdWbXFEQ0NwOTJpOVIyODBVUzVCYVFUCnVmQ0RudFNFRVRuT1BXU0F0RFdHNWdWVXNsblJRaGFYMTJVL0ZFcFlMVExCU2pUZEgvUFQ4TnAvaldPVGk4ZWcKdDlqcFN5OEdWWHJiYVRETXBKOXJxZXlxQ21ua1Z1MjM0T0RJWllaVWdpZW5xUDhlZlE3d3ZCUXlGLzBEZnl0RwpzZ1NvVk9memNuMTkrK3ZzUXo4Z2lOVThmMGs5djFsOEExUE9rd2kwcXJPWXdkcmR0MTlOam9xQWhnbUZpZGdNClFWV0hlQWl1enZmd2Q4WDBEU0luSEJOUGc1ZUR1RlZVcGIrWlNKSTNRSnJMemNWeXRFY3JmcDh3WnY4cW9oc1EKK2RIY0tRS0NBUUIydGF5RFRzeXZkWG5qamxpL0Q5TFgyRXdkOStUYksrRW56cEkyVGpXMGkxd1orUG5WZytYUApDWjBEdlFQUzZPZG96Tnl4Y3ZTd2lpdlM3YWI0bEFidkc3UEJxaVBuQ2paQlFUSjlVMzd3UlFkaHg2NTBCcVJQCkdCTEdsTFNJNHdKaTJYdE1BTHI1QitScStaQ21QeWJSenZXcHZqK1dsSCtuY1pIeGhRT0JFUjdKRU1mTit2djcKME1GMCt6NTF3bWlXelZ3RnZ0clJTZDkwc2FzS0hmV0FKNGZBbWQ3SUtNNHQ5UXh0Q0RpNzRtLy9WOTNxdVg0Qwp6VEdmZGFyb3VvVzhUUWdNVi9OVk5MSTNsYVdYbEtabk1LS2FycFJsQ2hYdTBWbW52MTFIelEybEdlbVBINTB4CndFTEU1czQxMTNwVkdGa0s4WU9WbVZPUHBnUS84YUdwQW9JQkFFa256SzNMeElSRmowVHB4TlJaSEp1NVNVdHkKTGRGaDQwd3kyUjNJWS9SSGpBbmx1TTBmUUQydjN1ZTY1VHhaenY4Y2tiZ1pKTkE0eGxTd2M0a3U5M3pkSkxYZwpLZzZPTU52cWZBbW9nMzkvekV3cEZ2MEZsdXpYTWdEdjVaSk5EYm0yMjhMaFJaQ1E3TFdKNWZMZXREY1BVaFRVCmd0UW9WVGZ6Zm5jTVMwa3IzQzdFSnQxam91UXMvNFhrVlEvZTkweTVkNTkwRGV5YmRSNnlDMWt5bU9sWVZJWDgKK0drdkxuZnFpMnZRZzdVR1hNTEY1LzRRNjZYRWl3NzVxcEdDTUFTdnNSa216L2FWYkpFNzNvR2dnZ1BGc3ZOOQpVcEFuR1NqaUFpd2Z5RmJ5d0NRV2hnUG1SbDhXV2xjTStZM0QxL1lLTElTVU5xQURnMGZXVXdkYVhFMD0KLS0tLS1FTkQgUlNBIFBSSVZBVEUgS0VZLS0tLS0K + token: 71b6ec454624539f8cf4a07c38d4c0efb015da0d5a9720766ab1e1d7c68c5920906350d2f4745a395a55abc5620c5f0002124c2f870358b61670078f137b5aa9 diff --git a/kubernetes/linux/kubeclient-4.9.2.gem b/kubernetes/linux/kubeclient-4.9.2.gem new file mode 100644 index 000000000..dc158c6e9 Binary files /dev/null and b/kubernetes/linux/kubeclient-4.9.2.gem differ diff --git a/kubernetes/linux/setup.sh b/kubernetes/linux/setup.sh index ad7cc2232..1d3b68521 100644 --- a/kubernetes/linux/setup.sh +++ b/kubernetes/linux/setup.sh @@ -52,7 +52,12 @@ sudo apt-get install ruby2.6 ruby2.6-dev gcc make -y gem install fluentd -v "1.12.2" --no-document fluentd --setup ./fluent gem install gyoku iso8601 --no-doc - +# kubeclient gem +sudo apt-get install libmagickwand-dev -y +# sudo gem install kubeclient --no-document +sudo gem install kubeclient-4.9.2.gem +# mmap2 gem +sudo gem install mmap2 rm -f $TMPDIR/docker-cimprov*.sh rm -f $TMPDIR/azure-mdsd*.deb diff --git a/kubernetes/omsagent.yaml b/kubernetes/omsagent-watch.yaml similarity index 93% rename from kubernetes/omsagent.yaml rename to kubernetes/omsagent-watch.yaml index 617c81f38..b4d2cd144 100644 --- a/kubernetes/omsagent.yaml +++ b/kubernetes/omsagent-watch.yaml @@ -333,8 +333,8 @@ metadata: type: Opaque data: #BASE64 ENCODED (Both WSID & KEY) INSIDE DOUBLE QUOTE ("") - WSID: "VALUE_WSID" - KEY: "VALUE_KEY" + WSID: "NDAzMWRlYjctNzliZS00NTgxLThhMzAtNDdiY2FhMzU3YmE2Cg==" + KEY: "WUZQZG9FR2ttYlQzVkJwWm85VWkyUlJLR2ErVkVETE9sQWJTMTdzOEVyVXpHejZPWER2TVc0bVhoYzlPb0lVZEc2OTdjSitFbDBmVlRyT0x3SWpFVVE9PQo=" --- apiVersion: apps/v1 kind: DaemonSet @@ -368,8 +368,8 @@ spec: value: "3" containers: - name: omsagent - image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod06112021" - imagePullPolicy: IfNotPresent + image: "mcr.microsoft.com/azuremonitor/containerinsights/cidev:khushi-test-73" + imagePullPolicy: Always resources: limits: cpu: 500m @@ -380,12 +380,14 @@ spec: env: # azure devops pipeline uses AKS_RESOURCE_ID and AKS_REGION hence ensure to uncomment these - name: AKS_RESOURCE_ID - value: "VALUE_AKS_RESOURCE_ID_VALUE" + value: "/subscriptions/b4b0cbab-64dc-42bc-9d95-ea7c8d0cf188/resourcegroups/khushi-watch-test/providers/Microsoft.ContainerService/managedClusters/khushi-watch-test" - name: AKS_REGION - value: "VALUE_AKS_RESOURCE_REGION_VALUE" + value: "West US 2" # this used for e2e test and setting this just emits some additional log statements which used for the e2e tests - name: ISTEST value: "true" + - name: USEMMAP + value: "true" #Uncomment below two lines for ACS clusters and set the cluster names manually. Also comment out the above two lines for ACS clusters #- name: ACS_RESOURCE_NAME # value: "my_acs_cluster_name" @@ -446,8 +448,8 @@ spec: timeoutSeconds: 15 #Only in sidecar scraping mode - name: omsagent-prometheus - image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod06112021" - imagePullPolicy: IfNotPresent + image: "mcr.microsoft.com/azuremonitor/containerinsights/cidev:khushi-test-73" + imagePullPolicy: Always resources: limits: cpu: 500m @@ -458,9 +460,9 @@ spec: env: # azure devops pipeline uses AKS_RESOURCE_ID and AKS_REGION hence ensure to uncomment these - name: AKS_RESOURCE_ID - value: "VALUE_AKS_RESOURCE_ID_VALUE" + value: "/subscriptions/b4b0cbab-64dc-42bc-9d95-ea7c8d0cf188/resourcegroups/khushi-watch-test/providers/Microsoft.ContainerService/managedClusters/khushi-watch-test" - name: AKS_REGION - value: "VALUE_AKS_RESOURCE_REGION_VALUE" + value: "West US 2" #Uncomment below two lines for ACS clusters and set the cluster names manually. Also comment out the above two lines for ACS clusters #- name: ACS_RESOURCE_NAME # value: "my_acs_cluster_name" @@ -589,8 +591,8 @@ spec: serviceAccountName: omsagent containers: - name: omsagent - image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod06112021" - imagePullPolicy: IfNotPresent + image: "mcr.microsoft.com/azuremonitor/containerinsights/cidev:khushi-test-73" + imagePullPolicy: Always resources: limits: cpu: 1 @@ -600,12 +602,14 @@ spec: memory: 250Mi env: - name: AKS_RESOURCE_ID - value: "VALUE_AKS_RESOURCE_ID_VALUE" + value: "/subscriptions/b4b0cbab-64dc-42bc-9d95-ea7c8d0cf188/resourcegroups/khushi-watch-test/providers/Microsoft.ContainerService/managedClusters/khushi-watch-test" - name: AKS_REGION - value: "VALUE_AKS_RESOURCE_REGION_VALUE" + value: "West US 2" # this used for e2e test and setting this just emits some additional log statements which used for the e2e tests - name: ISTEST - value: "true" + value: "true" + - name: USEMMAP + value: "true" # Uncomment below two lines for ACS clusters and set the cluster names manually. Also comment out the above two lines for ACS clusters #- name: ACS_RESOURCE_NAME # value: "my_acs_cluster_name" @@ -651,15 +655,15 @@ spec: - mountPath: /etc/config/osm-settings name: osm-settings-vol-config readOnly: true - livenessProbe: - exec: - command: - - /bin/bash - - -c - - /opt/livenessprobe.sh - initialDelaySeconds: 60 - periodSeconds: 60 - timeoutSeconds: 15 + # livenessProbe: + # exec: + # command: + # - /bin/bash + # - -c + # - /opt/livenessprobe.sh + # initialDelaySeconds: 60 + # periodSeconds: 60 + # timeoutSeconds: 15 affinity: nodeAffinity: # affinity to schedule on to ephemeral os node if its available @@ -679,6 +683,10 @@ spec: operator: In values: - linux + - key: watch-test + operator: In + values: + - watch - key: type operator: NotIn values: @@ -769,9 +777,9 @@ spec: env: # azure devops pipeline uses AKS_RESOURCE_ID and AKS_REGION hence ensure to uncomment these - name: AKS_RESOURCE_ID - value: "VALUE_AKS_RESOURCE_ID_VALUE" + value: "/subscriptions/b4b0cbab-64dc-42bc-9d95-ea7c8d0cf188/resourcegroups/khushi-watch-test/providers/Microsoft.ContainerService/managedClusters/khushi-watch-test" - name: AKS_REGION - value: "VALUE_AKS_RESOURCE_REGION_VALUE" + value: "West US 2" #- name: ACS_RESOURCE_NAME # value: "my_acs_cluster_name" - name: CONTROLLER_TYPE diff --git a/source/plugins/ruby/in_kube_podinventory.rb b/source/plugins/ruby/in_kube_podinventory.rb index 5598602cd..dccb4be79 100644 --- a/source/plugins/ruby/in_kube_podinventory.rb +++ b/source/plugins/ruby/in_kube_podinventory.rb @@ -3,6 +3,7 @@ require 'fluent/plugin/input' + module Fluent::Plugin require_relative "podinventory_to_mdm" @@ -20,6 +21,8 @@ def initialize require "yajl" require "set" require "time" + require "kubeclient" + require "mmap/mmap" require_relative "kubernetes_container_inventory" require_relative "KubernetesApiClient" @@ -33,6 +36,8 @@ def initialize @PODS_CHUNK_SIZE = 0 @PODS_EMIT_STREAM_BATCH_SIZE = 0 + @watchRestartCount = 0 + @podCount = 0 @serviceCount = 0 @controllerSet = Set.new [] @@ -40,7 +45,11 @@ def initialize @controllerData = {} @podInventoryE2EProcessingLatencyMs = 0 @podsAPIE2ELatencyMs = 0 - + + @noticeHash = {} + @collection_version = "" + @podInventoryHash = {} + @kubeperfTag = "oneagent.containerInsights.LINUX_PERF_BLOB" @kubeservicesTag = "oneagent.containerInsights.KUBE_SERVICES_BLOB" @containerInventoryTag = "oneagent.containerInsights.CONTAINER_INVENTORY_BLOB" @@ -74,12 +83,28 @@ def start $log.warn("in_kube_podinventory::start: setting to default value since got PODS_EMIT_STREAM_BATCH_SIZE nil or empty") @PODS_EMIT_STREAM_BATCH_SIZE = 200 end + + # create kubernetes watch client + ssl_options = { + ca_file: "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt", + verify_ssl: OpenSSL::SSL::VERIFY_PEER, + } + timeouts = { + open: 60, # default setting (in seconds) + read: nil # read never times out + } + getTokenStr = "Bearer " + KubernetesApiClient.getTokenStr + auth_options = { bearer_token: KubernetesApiClient.getTokenStr } + @KubernetesWatchClient = Kubeclient::Client.new("https://#{ENV["KUBERNETES_SERVICE_HOST"]}:#{ENV["KUBERNETES_PORT_443_TCP_PORT"]}/api/", "v1", ssl_options: ssl_options, auth_options: auth_options, as: :parsed, timeouts: timeouts) + @Watcher = nil $log.info("in_kube_podinventory::start: PODS_EMIT_STREAM_BATCH_SIZE @ #{@PODS_EMIT_STREAM_BATCH_SIZE}") @finished = false @condition = ConditionVariable.new @mutex = Mutex.new - @thread = Thread.new(&method(:run_periodic)) + @watchthread = Thread.new(&method(:watch)) + @runthread = Thread.new(&method(:run_periodic)) @@podTelemetryTimeTracker = DateTime.now.to_time.to_i + @@WatcherTimeTracker = DateTime.now.to_time.to_i end end @@ -89,11 +114,68 @@ def shutdown @finished = true @condition.signal } - @thread.join + @watchthread.join + @runthread.join super # This super must be at the end of shutdown method end end + def populate_podinventory_cache(podInventory) + batchTime = Time.now.utc.iso8601 + serviceRecords = @serviceRecords + begin + if !podInventory["items"].nil? && !podInventory["items"].empty? + podInventory["items"].each do |item| + # Extract needed fields using getPodInventoryRecords and create a hash mapping uid -> record + podInventoryRecords = getPodInventoryRecords(item, serviceRecords, batchTime) + podInventoryRecords.each { |record| + uid = record["PodUid"] + @podInventoryHash[uid] = record + } + end + end + rescue => exception + $log.info("in_kube_podinventory::populate_podinventory_cache : populating podInventoryHash failed.") + $log.debug_backtrace(exception.backtrace) + end + end + + def watch + loop do + enumerate + serviceRecords = @serviceRecords + begin + @Watcher = @KubernetesWatchClient.watch_pods(resource_version: @collection_version, timeoutSeconds: 300, as: :parsed) + @Watcher.each do |notice| + $log.info("in_kube_podinventory::watch : inside watch pods! collection version: #{@collection_version}.") + if !notice.nil? && !notice.empty? + $log.info("in_kube_podinventory::watch : notice was not null and not empty.") + if notice["type"] == "ERROR" + $log.info("in_kube_podinventory::watch : notice type was error. restarting watch.") + break + end + item = notice["object"] + batchTime = Time.now.utc.iso8601 + # Construct record with necessary fields (same fields as getPodInventoryRecords) + records = getPodInventoryRecords(item, serviceRecords, batchTime) + record = records.first() + record["NoticeType"] = notice["type"] + # $log.info("in_kube_podinventory::watch : record looks like: #{record}") + @mutex.synchronize { + @noticeHash[item["metadata"]["uid"]] = record + } + $log.info("in_kube_podinventory::watch : number of items in noticeHash = #{@noticeHash.size}") + end + end + rescue => exception + $log.warn("in_kube_podinventory::watch : watch events session got broken and re-establishing the session.") + $log.debug_backtrace(exception.backtrace) + end + sleep 1 + @watchRestartCount += 1 + end + end + def enumerate(podList = nil) begin podInventory = podList @@ -105,9 +187,10 @@ def enumerate(podList = nil) @controllerData = {} currentTime = Time.now batchTime = currentTime.utc.iso8601 - serviceRecords = [] + @serviceRecords = [] @podInventoryE2EProcessingLatencyMs = 0 - podInventoryStartTime = (Time.now.to_f * 1000).to_i + podInventoryStartTime = (Time.now.to_f * 1000).to_i + @podInventoryHash = {} # Get services first so that we dont need to make a call for very chunk $log.info("in_kube_podinventory::enumerate : Getting services from Kube API @ #{Time.now.utc.iso8601}") @@ -121,9 +204,9 @@ def enumerate(podList = nil) $log.info("in_kube_podinventory::enumerate:End:Parsing services data using yajl @ #{Time.now.utc.iso8601}") serviceInfo = nil # service inventory records much smaller and fixed size compared to serviceList - serviceRecords = KubernetesApiClient.getKubeServicesInventoryRecords(serviceList, batchTime) + @serviceRecords = KubernetesApiClient.getKubeServicesInventoryRecords(serviceList, batchTime) # updating for telemetry - @serviceCount += serviceRecords.length + @serviceCount += @serviceRecords.length serviceList = nil end @@ -134,25 +217,37 @@ def enumerate(podList = nil) continuationToken = nil $log.info("in_kube_podinventory::enumerate : Getting pods from Kube API @ #{Time.now.utc.iso8601}") continuationToken, podInventory = KubernetesApiClient.getResourcesAndContinuationToken("pods?limit=#{@PODS_CHUNK_SIZE}") + if !podInventory["metadata"].nil? && !podInventory["metadata"].empty? && !podInventory["metadata"]["resourceVersion"].nil? && !podInventory["metadata"]["resourceVersion"].empty? + @collection_version = podInventory["metadata"]["resourceVersion"] + end + $log.info("in_kube_podinventory::enumerate : Received collection version: #{@collection_version}") $log.info("in_kube_podinventory::enumerate : Done getting pods from Kube API @ #{Time.now.utc.iso8601}") podsAPIChunkEndTime = (Time.now.to_f * 1000).to_i @podsAPIE2ELatencyMs = (podsAPIChunkEndTime - podsAPIChunkStartTime) if (!podInventory.nil? && !podInventory.empty? && podInventory.key?("items") && !podInventory["items"].nil? && !podInventory["items"].empty?) $log.info("in_kube_podinventory::enumerate : number of pod items :#{podInventory["items"].length} from Kube API @ #{Time.now.utc.iso8601}") - parse_and_emit_records(podInventory, serviceRecords, continuationToken, batchTime) + populate_podinventory_cache(podInventory) + parse_and_emit_records(podInventory, @serviceRecords, continuationToken, batchTime) else $log.warn "in_kube_podinventory::enumerate:Received empty podInventory" end #If we receive a continuation token, make calls, process and flush data until we have processed all data while (!continuationToken.nil? && !continuationToken.empty?) + $log.info("in_kube_podinventory::enumerate : continuation token is not null and not empty") podsAPIChunkStartTime = (Time.now.to_f * 1000).to_i continuationToken, podInventory = KubernetesApiClient.getResourcesAndContinuationToken("pods?limit=#{@PODS_CHUNK_SIZE}&continue=#{continuationToken}") + # need to add collection resource version here + if !podInventory["metadata"].nil? && !podInventory["metadata"].empty? && !podInventory["metadata"]["resourceVersion"].nil? && !podInventory["metadata"]["resourceVersion"].empty? + @collection_version = podInventory["metadata"]["resourceVersion"] + end + $log.info("in_kube_podinventory::enumerate : continuation token was not null. received collection version: #{@collection_version}") podsAPIChunkEndTime = (Time.now.to_f * 1000).to_i @podsAPIE2ELatencyMs = @podsAPIE2ELatencyMs + (podsAPIChunkEndTime - podsAPIChunkStartTime) if (!podInventory.nil? && !podInventory.empty? && podInventory.key?("items") && !podInventory["items"].nil? && !podInventory["items"].empty?) $log.info("in_kube_podinventory::enumerate : number of pod items :#{podInventory["items"].length} from Kube API @ #{Time.now.utc.iso8601}") - parse_and_emit_records(podInventory, serviceRecords, continuationToken, batchTime) + populate_podinventory_cache(podInventory) + parse_and_emit_records(podInventory, @serviceRecords, continuationToken, batchTime) else $log.warn "in_kube_podinventory::enumerate:Received empty podInventory" end @@ -161,34 +256,8 @@ def enumerate(podList = nil) @podInventoryE2EProcessingLatencyMs = ((Time.now.to_f * 1000).to_i - podInventoryStartTime) # Setting these to nil so that we dont hold memory until GC kicks in podInventory = nil - serviceRecords = nil - - # Adding telemetry to send pod telemetry every 5 minutes - timeDifference = (DateTime.now.to_time.to_i - @@podTelemetryTimeTracker).abs - timeDifferenceInMinutes = timeDifference / 60 - if (timeDifferenceInMinutes >= 5) - telemetryFlush = true - end - - # Flush AppInsights telemetry once all the processing is done - if telemetryFlush == true - telemetryProperties = {} - telemetryProperties["Computer"] = @@hostName - telemetryProperties["PODS_CHUNK_SIZE"] = @PODS_CHUNK_SIZE - telemetryProperties["PODS_EMIT_STREAM_BATCH_SIZE"] = @PODS_EMIT_STREAM_BATCH_SIZE - ApplicationInsightsUtility.sendCustomEvent("KubePodInventoryHeartBeatEvent", telemetryProperties) - ApplicationInsightsUtility.sendMetricTelemetry("PodCount", @podCount, {}) - ApplicationInsightsUtility.sendMetricTelemetry("ServiceCount", @serviceCount, {}) - telemetryProperties["ControllerData"] = @controllerData.to_json - ApplicationInsightsUtility.sendMetricTelemetry("ControllerCount", @controllerSet.length, telemetryProperties) - if @winContainerCount > 0 - telemetryProperties["ClusterWideWindowsContainersCount"] = @winContainerCount - ApplicationInsightsUtility.sendCustomEvent("WindowsContainerInventoryEvent", telemetryProperties) - end - ApplicationInsightsUtility.sendMetricTelemetry("PodInventoryE2EProcessingLatencyMs", @podInventoryE2EProcessingLatencyMs, telemetryProperties) - ApplicationInsightsUtility.sendMetricTelemetry("PodsAPIE2ELatencyMs", @podsAPIE2ELatencyMs, telemetryProperties) - @@podTelemetryTimeTracker = DateTime.now.to_time.to_i - end + # TODO: commenting next line for watch + # @serviceRecords = nil rescue => errorStr $log.warn "in_kube_podinventory::enumerate:Failed in enumerate: #{errorStr}" $log.debug_backtrace(errorStr.backtrace) @@ -373,6 +442,176 @@ def parse_and_emit_records(podInventory, serviceRecords, continuationToken, batc end #begin block end end + def parse_and_emit_merge_updates(podInventoryRecords) + currentTime = Time.now + emitTime = Fluent::Engine.now + batchTime = currentTime.utc.iso8601 + eventStream = Fluent::MultiEventStream.new + containerInventoryStream = Fluent::MultiEventStream.new + kubePerfEventStream = Fluent::MultiEventStream.new + insightsMetricsEventStream = Fluent::MultiEventStream.new + @@istestvar = ENV["ISTEST"] + + continuationToken = nil + emittedPodCount = 0 + + begin #begin block start + timeDifference = (DateTime.now.to_time.to_i - @@WatcherTimeTracker).abs + timeDifferenceInMinutes = timeDifference / 60 + if (timeDifferenceInMinutes >= 5) + $log.info("parse_and_emit_merge_updates::resetting watcher to handle api server timeout :#{Time.now.utc.iso8601}") + @@WatcherTimeTracker = DateTime.now.to_time.to_i + if !@Watcher.nil? + @Watcher.finish + end + end + + # Getting windows nodes from kubeapi + winNodes = KubernetesApiClient.getWindowsNodesArray + podInventoryRecords.each do |uid, record| + if !record.nil? + record["CollectionTime"] = batchTime + eventStream.add(emitTime, record) if record + @inventoryToMdmConvertor.process_pod_inventory_record(record) + end + end + + if @PODS_EMIT_STREAM_BATCH_SIZE > 0 && eventStream.count >= @PODS_EMIT_STREAM_BATCH_SIZE + $log.info("in_kube_podinventory::parse_and_emit_merge_updates: number of pod inventory records emitted #{eventStream.count} @ #{Time.now.utc.iso8601}") + if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0) + $log.info("kubePodInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}") + end + router.emit_stream(@tag, eventStream) if eventStream + emittedPodCount += eventStream.count + # Updating value for AppInsights telemetry + @podCount += emittedPodCount + eventStream = Fluent::MultiEventStream.new + end + + if eventStream.count > 0 + $log.info("in_kube_podinventory::parse_and_emit_merge_updates: number of pod inventory records emitted #{eventStream.count} @ #{Time.now.utc.iso8601}") + router.emit_stream(@tag, eventStream) if eventStream + if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0) + $log.info("kubePodInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}") + end + emittedPodCount += eventStream.count + # Updating value for AppInsights telemetry + @podCount += emittedPodCount + eventStream = nil + end + + $log.info("parse_and_emit_merge_updates:: emittedPodCount = #{emittedPodCount}") + rescue => errorStr + $log.warn "Failed in parse_and_emit_merge_updates pod inventory: #{errorStr}. backtrace: #{errorStr.backtrace}" + $log.debug_backtrace(errorStr.backtrace) + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) + end #begin block end + end + + def merge_updates + begin + $log.info("in_kube_podinventory::merge_updates: number of updates in notice hash #{@noticeHash.size} @#{Time.now.utc.iso8601}") + telemetryFlush = false + currentTime = Time.now + batchTime = currentTime.utc.iso8601 + uidList = [] + + # Get services first so that we dont need to make a call for very chunk + $log.info("in_kube_podinventory::enumerate : Getting services from Kube API @ #{Time.now.utc.iso8601}") + serviceInfo = KubernetesApiClient.getKubeResourceInfo("services") + # serviceList = JSON.parse(KubernetesApiClient.getKubeResourceInfo("services").body) + $log.info("in_kube_podinventory::enumerate : Done getting services from Kube API @ #{Time.now.utc.iso8601}") + + if !serviceInfo.nil? + $log.info("in_kube_podinventory::enumerate:Start:Parsing services data using yajl @ #{Time.now.utc.iso8601}") + serviceList = Yajl::Parser.parse(StringIO.new(serviceInfo.body)) + $log.info("in_kube_podinventory::enumerate:End:Parsing services data using yajl @ #{Time.now.utc.iso8601}") + serviceInfo = nil + # service inventory records much smaller and fixed size compared to serviceList + @serviceRecords = KubernetesApiClient.getKubeServicesInventoryRecords(serviceList, batchTime) + # updating for telemetry + @serviceCount += @serviceRecords.length + serviceList = nil + end + + @mutex.synchronize { + noticeHashLockStartTime = (Time.now.to_f * 1000).to_i + @noticeHash.each do |uid, record| + uidList.append(uid) + case record["NoticeType"] + when "ADDED" + @podInventoryHash[uid] = record + when "MODIFIED" + if @podInventoryHash[uid].nil? + @podInventoryHash[uid] = record + else + # TODO: remove below log statement later + $log.info("in_kube_podinventory::merge_updates : pure modify case") + @podInventoryHash[uid] = record + end + when "DELETED" + if @podInventoryHash.key?(uid) + @podInventoryHash.delete(uid) + else + $log.info("in_kube_podinventory::merge_updates: key did not exist in hash so unable to delete.") + end + when "BOOKMARK" + $log.info("in_kube_podinventory::merge_updates: received a BOOKMARK event.") + else + $log.info("in_kube_podinventory::merge_updates: something went wrong and didn't enter any cases for switch, notice type was #{record["NoticeType"]}") + end + end + + # remove all looked at uids from the noticeHash + uidList.each do |uid| + @noticeHash.delete(uid) + end + # TODO: copy noticeHash to tempHash and use tempHash to loop through so we dont lock on it for a long time + + noticeHashLockTotalTime = ((Time.now.to_f * 1000).to_i - noticeHashLockStartTime) + $log.info("in_kube_podinventory::merge_updates : notice hash lock total time = #{noticeHashLockTotalTime}") + $log.info("in_kube_podinventory::merge_updates : number of pods in @podInventoryHash = #{@podInventoryHash.size}, size = ") + } + parse_and_emit_merge_updates(@podInventoryHash) + + # Adding telemetry to send pod telemetry every 5 minutes + timeDifference = (DateTime.now.to_time.to_i - @@podTelemetryTimeTracker).abs + timeDifferenceInMinutes = timeDifference / 60 + if (timeDifferenceInMinutes >= 5) + telemetryFlush = true + end + + # Flush AppInsights telemetry once all the processing is done + if telemetryFlush == true + telemetryProperties = {} + telemetryProperties["Computer"] = @@hostName + telemetryProperties["PODS_CHUNK_SIZE"] = @PODS_CHUNK_SIZE + telemetryProperties["PODS_EMIT_STREAM_BATCH_SIZE"] = @PODS_EMIT_STREAM_BATCH_SIZE + telemetryProperties["WatchRestartCount"] = @watchRestartCount + # reset watchRestartCount + @watchRestartCount = 0 + ApplicationInsightsUtility.sendCustomEvent("KubePodInventoryHeartBeatEvent", telemetryProperties) + ApplicationInsightsUtility.sendMetricTelemetry("PodCount", @podCount, {}) + # TODO: fix serviceCount => cant get actual value unless we watch for it or call it every minute + # get service count in merge_updates so it happens every minute + ApplicationInsightsUtility.sendMetricTelemetry("ServiceCount", @serviceCount, {}) + telemetryProperties["ControllerData"] = @controllerData.to_json + ApplicationInsightsUtility.sendMetricTelemetry("ControllerCount", @controllerSet.length, telemetryProperties) + if @winContainerCount > 0 + telemetryProperties["ClusterWideWindowsContainersCount"] = @winContainerCount + ApplicationInsightsUtility.sendCustomEvent("WindowsContainerInventoryEvent", telemetryProperties) + end + ApplicationInsightsUtility.sendMetricTelemetry("PodInventoryE2EProcessingLatencyMs", @podInventoryE2EProcessingLatencyMs, telemetryProperties) + ApplicationInsightsUtility.sendMetricTelemetry("PodsAPIE2ELatencyMs", @podsAPIE2ELatencyMs, telemetryProperties) + @@podTelemetryTimeTracker = DateTime.now.to_time.to_i + end + rescue => errorStr + $log.warn "in_kube_podinventory::merge_updates:Failed in merge_updates: #{errorStr}" + $log.debug_backtrace(errorStr.backtrace) + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) + end + end + def run_periodic @mutex.lock done = @finished @@ -392,11 +631,11 @@ def run_periodic @mutex.unlock if !done begin - $log.info("in_kube_podinventory::run_periodic.enumerate.start #{Time.now.utc.iso8601}") - enumerate - $log.info("in_kube_podinventory::run_periodic.enumerate.end #{Time.now.utc.iso8601}") + $log.info("in_kube_podinventory::run_periodic.merge_updates.start #{Time.now.utc.iso8601}") + merge_updates + $log.info("in_kube_podinventory::run_periodic.merge_updates.end #{Time.now.utc.iso8601}") rescue => errorStr - $log.warn "in_kube_podinventory::run_periodic: enumerate Failed to retrieve pod inventory: #{errorStr}" + $log.warn "in_kube_podinventory::run_periodic: merge_updates Failed to retrieve pod inventory: #{errorStr}" ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) end end @@ -650,4 +889,4 @@ def getServiceNameFromLabels(namespace, labels, serviceRecords) return serviceName end end # Kube_Pod_Input -end # module +end # module \ No newline at end of file