From d66d5b9205ca57a4b38149a4a420a0f4fa8dde8b Mon Sep 17 00:00:00 2001 From: Jay Patel <78554593+jarpat@users.noreply.github.com> Date: Fri, 29 Apr 2022 12:35:02 -0400 Subject: [PATCH 1/3] (IAC-422) Update EKS Module (#122) * wip - initial version bumps * initial working draft * WIP code changes for EKS module 17 to 18 upgrade * working version, pared down and hardcoded to a single node group * switch to EKS managed nodegroup and map more variables to v18 module * working version with cluster SG/R fixed * handle all taints cases and fixed typo default node group name * add custom launch template name * rename worker_groups to node_groups for consistency with AWS * update Terraform version to 1.1.6 * revert all versions except aws provider required by EKS module * Update TFv1.0.0 in readme and remove extraneous default_node_group call * removed output variable no longer supported * conditionally create SG rule for BYO SecurityGroup * Update docs Co-authored-by: Mano Meenaksh --- Dockerfile | 0 README.md | 2 +- locals.tf | 118 ++++++++++++++++++++++++++++++++++++---------------- main.tf | 94 +++++++++++++++++++++++++++++------------ outputs.tf | 3 -- security.tf | 1 + versions.tf | 2 +- 7 files changed, 152 insertions(+), 68 deletions(-) mode change 100644 => 100755 Dockerfile mode change 100644 => 100755 locals.tf mode change 100644 => 100755 main.tf mode change 100644 => 100755 outputs.tf diff --git a/Dockerfile b/Dockerfile old mode 100644 new mode 100755 diff --git a/README.md b/README.md index 46c13e2b..041915e2 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ This project contains Terraform scripts to provision the AWS cloud infrastructur >- Amazon VPC and Security Group >- Managed Amazon Elastic Kubernetes Service (EKS) - >- Self-managed node groups with required labels and taints + >- Amazon EKS managed node groups with required labels and taints >- Infrastructure to deploy the SAS Viya CAS server in SMP or MPP mode >- Amazon Elastic Block Storage (EBS) for NFS >- Amazon Elastic File System (EFS) diff --git a/locals.tf b/locals.tf old mode 100644 new mode 100755 index eb04f6d2..9aa62c9f --- a/locals.tf +++ b/locals.tf @@ -5,7 +5,6 @@ locals { security_group_id = var.security_group_id == null ? aws_security_group.sg[0].id : data.aws_security_group.sg[0].id cluster_security_group_id = var.cluster_security_group_id == null ? aws_security_group.cluster_security_group.0.id : var.cluster_security_group_id workers_security_group_id = var.workers_security_group_id == null ? aws_security_group.workers_security_group.0.id : var.workers_security_group_id - cluster_name = "${var.prefix}-eks" # CIDRs @@ -31,47 +30,92 @@ locals { kubeconfig_path = var.iac_tooling == "docker" ? "/workspace/${local.kubeconfig_filename}" : local.kubeconfig_filename kubeconfig_ca_cert = data.aws_eks_cluster.cluster.certificate_authority.0.data - # Mapping node_pools to worker_groups - default_node_pool = [ - { - name = "default" - instance_type = var.default_nodepool_vm_type - root_volume_size = var.default_nodepool_os_disk_size - root_volume_type = var.default_nodepool_os_disk_type - root_iops = var.default_nodepool_os_disk_iops - asg_desired_capacity = var.default_nodepool_node_count - asg_min_size = var.default_nodepool_min_nodes - asg_max_size = var.default_nodepool_max_nodes - kubelet_extra_args = "--node-labels=${replace(replace(jsonencode(var.default_nodepool_labels), "/[\"\\{\\}]/", ""), ":", "=")} --register-with-taints=${join(",", var.default_nodepool_taints)}" - additional_userdata = (var.default_nodepool_custom_data != "" ? file(var.default_nodepool_custom_data) : "") - metadata_http_endpoint = var.default_nodepool_metadata_http_endpoint - metadata_http_tokens = var.default_nodepool_metadata_http_tokens - metadata_http_put_response_hop_limit = var.default_nodepool_metadata_http_put_response_hop_limit - + # Mapping node_pools to node_groups + default_node_pool = { + default = { + name = "default" + instance_types = [var.default_nodepool_vm_type] + block_device_mappings = { + xvda = { + device_name = "/dev/xvda" + ebs = { + volume_type = var.default_nodepool_os_disk_type + volume_size = var.default_nodepool_os_disk_size + iops = var.default_nodepool_os_disk_iops + } + } + } + desired_size = var.default_nodepool_node_count + min_size = var.default_nodepool_min_nodes + max_size = var.default_nodepool_max_nodes + taints = { for i, taint in var.default_nodepool_taints : "default-${i}"=> { + "key" = split("=", taint)[0], + "value"= split(":", split("=", taint)[1])[0], + "effect"=length(regexall(":No", taint)) > 0 ? upper(replace(split(":", split("=", taint)[1])[1], "No", "NO_")) : upper(replace(split(":", split("=", taint)[1])[1], "No", "_NO_")) + } + } + labels = var.default_nodepool_labels + # User data + bootstrap_extra_args = "--kubelet-extra-args '--node-labels=${replace(replace(jsonencode(var.default_nodepool_labels), "/[\"\\{\\}]/", ""), ":", "=")} --register-with-taints=${join(",", var.default_nodepool_taints)} ' " + post_bootstrap_user_data = (var.default_nodepool_custom_data != "" ? file(var.default_nodepool_custom_data) : "") + metadata_options = { + http_endpoint = var.default_nodepool_metadata_http_endpoint + http_tokens = var.default_nodepool_metadata_http_tokens + http_put_response_hop_limit = var.default_nodepool_metadata_http_put_response_hop_limit + } + # Launch Template + create_launch_template = true + launch_template_name = "${local.cluster_name}-default-lt" + launch_template_use_name_prefix = true + tags = var.autoscaling_enabled ? merge(var.tags, { key = "k8s.io/cluster-autoscaler/${local.cluster_name}", value = "owned", propagate_at_launch = true }, { key = "k8s.io/cluster-autoscaler/enabled", value = "true", propagate_at_launch = true}) : var.tags } - ] + } - user_node_pool = [ - for np_key, np_value in var.node_pools : - { - name = np_key - instance_type = np_value.vm_type - root_volume_size = np_value.os_disk_size - root_volume_type = np_value.os_disk_type - root_iops = np_value.os_disk_iops - asg_desired_capacity = var.autoscaling_enabled ? np_value.min_nodes == 0 ? 1 : np_value.min_nodes : np_value.min_nodes # TODO - Remove when moving to managed nodes - asg_min_size = np_value.min_nodes - asg_max_size = np_value.max_nodes - kubelet_extra_args = "--node-labels=${replace(replace(jsonencode(np_value.node_labels), "/[\"\\{\\}]/", ""), ":", "=")} --register-with-taints=${join(",", np_value.node_taints)}" - additional_userdata = (np_value.custom_data != "" ? file(np_value.custom_data) : "") - metadata_http_endpoint = np_value.metadata_http_endpoint - metadata_http_tokens = np_value.metadata_http_tokens - metadata_http_put_response_hop_limit = np_value.metadata_http_put_response_hop_limit + user_node_pool = { + for key, np_value in var.node_pools : + key => { + name = key + instance_types = [np_value.vm_type] + disk_size = np_value.os_disk_size + block_device_mappings = { + xvda = { + device_name = "/dev/xvda" + ebs = { + volume_type = np_value.os_disk_type + volume_size = np_value.os_disk_size + iops = np_value.os_disk_iops + } + } + } + desired_size = var.autoscaling_enabled ? np_value.min_nodes == 0 ? 1 : np_value.min_nodes : np_value.min_nodes # TODO - Remove when moving to managed nodes + min_size = np_value.min_nodes + max_size = np_value.max_nodes + # AWS EKS Taints - https://docs.aws.amazon.com/eks/latest/userguide/node-taints-managed-node-groups.html + taints ={ for i, taint in np_value.node_taints: "${key}-${i}"=> { # to handle multiple taints, add index i to key for uniqueness + "key" = split("=", taint)[0], + "value"= split(":", split("=", taint)[1])[0], + "effect"=length(regexall(":No", taint)) > 0 ? upper(replace(split(":", split("=", taint)[1])[1], "No", "NO_")) : upper(replace(split(":", split("=", taint)[1])[1], "No", "_NO_")) + } + } + labels = np_value.node_labels + # User data + bootstrap_extra_args = "--kubelet-extra-args '--node-labels=${replace(replace(jsonencode(np_value.node_labels), "/[\"\\{\\}]/", ""), ":", "=")} --register-with-taints=${join(",", np_value.node_taints)}' " + post_bootstrap_user_data = (np_value.custom_data != "" ? file(np_value.custom_data) : "") + metadata_options = { + http_endpoint = var.default_nodepool_metadata_http_endpoint + http_tokens = var.default_nodepool_metadata_http_tokens + http_put_response_hop_limit = var.default_nodepool_metadata_http_put_response_hop_limit + } + # Launch Template + create_launch_template = true + launch_template_name = "${local.cluster_name}-${key}-lt" + launch_template_use_name_prefix = true + tags = var.autoscaling_enabled ? merge(var.tags, { key = "k8s.io/cluster-autoscaler/${local.cluster_name}", value = "owned", propagate_at_launch = true }, { key = "k8s.io/cluster-autoscaler/enabled", value = "true", propagate_at_launch = true}) : var.tags } - ] + } # Merging the default_node_pool into the work_groups node pools - worker_groups = concat(local.default_node_pool, local.user_node_pool) + node_groups = merge(local.default_node_pool, local.user_node_pool) # PostgreSQL postgres_servers = var.postgres_servers == null ? {} : { for k, v in var.postgres_servers : k => merge( var.postgres_server_defaults, v, )} diff --git a/main.tf b/main.tf old mode 100644 new mode 100755 index 6b50edf4..045b87a5 --- a/main.tf +++ b/main.tf @@ -5,12 +5,12 @@ # provider "aws" { - region = var.location - profile = var.aws_profile - shared_credentials_file = var.aws_shared_credentials_file - access_key = var.aws_access_key_id - secret_key = var.aws_secret_access_key - token = var.aws_session_token + region = var.location + profile = var.aws_profile + shared_credentials_file = var.aws_shared_credentials_file + access_key = var.aws_access_key_id + secret_key = var.aws_secret_access_key + token = var.aws_session_token } data "aws_eks_cluster" "cluster" { @@ -80,37 +80,79 @@ module "vpc" { # EKS Setup - https://github.com/terraform-aws-modules/terraform-aws-eks module "eks" { source = "terraform-aws-modules/eks/aws" - version = "17.1.0" + version = "18.7.1" cluster_name = local.cluster_name cluster_version = var.kubernetes_version + cluster_enabled_log_types = [] # disable cluster control plan logging + create_cloudwatch_log_group = false cluster_endpoint_private_access = true - cluster_create_endpoint_private_access_sg_rule = true # NOTE: If true cluster_endpoint_private_access_cidrs must always be set - cluster_endpoint_private_access_sg = [local.security_group_id] - cluster_endpoint_private_access_cidrs = local.cluster_endpoint_private_access_cidrs cluster_endpoint_public_access = var.cluster_api_mode == "public" ? true : false cluster_endpoint_public_access_cidrs = local.cluster_endpoint_public_access_cidrs - write_kubeconfig = false - subnets = module.vpc.private_subnets + + subnet_ids = module.vpc.private_subnets vpc_id = module.vpc.vpc_id tags = var.tags enable_irsa = var.autoscaling_enabled - - manage_worker_iam_resources = var.workers_iam_role_name == null ? true : false - workers_role_name = var.workers_iam_role_name - manage_cluster_iam_resources = var.cluster_iam_role_name == null ? true : false - cluster_iam_role_name = var.cluster_iam_role_name - worker_create_security_group = false - worker_security_group_id = local.workers_security_group_id - cluster_create_security_group = false + ################################################################################ + # Cluster Security Group + ################################################################################ + create_cluster_security_group = false # v17: cluster_create_security_group cluster_security_group_id = local.cluster_security_group_id + # Extend cluster security group rules + cluster_security_group_additional_rules = { + egress_nodes_ephemeral_ports_tcp = { + description = "To node 1025-65535" + protocol = "tcp" + from_port = 1025 + to_port = 65535 + type = "egress" + source_node_security_group = true + } + } + + ################################################################################ + # Node Security Group + ################################################################################ + create_node_security_group = false #v17: worker_create_security_group + node_security_group_id = local.workers_security_group_id #v17: worker_security_group_id + # Extend node-to-node security group rules + node_security_group_additional_rules = { + ingress_self_all = { + description = "Node to node all ports/protocols" + protocol = "-1" + from_port = 0 + to_port = 0 + type = "ingress" + self = true + } + egress_all = { + description = "Node all egress" + protocol = "-1" + from_port = 0 + to_port = 0 + type = "egress" + cidr_blocks = ["0.0.0.0/0"] + ipv6_cidr_blocks = ["::/0"] + } + } - workers_group_defaults = { - tags = var.autoscaling_enabled ? [ { key = "k8s.io/cluster-autoscaler/${local.cluster_name}", value = "owned", propagate_at_launch = true }, { key = "k8s.io/cluster-autoscaler/enabled", value = "true", propagate_at_launch = true} ] : null - metadata_http_tokens = "required" - metadata_http_put_response_hop_limit = 1 - iam_instance_profile_name = var.workers_iam_role_name + ################################################################################ + # Handle BYO IAM policy + ################################################################################ + create_iam_role = var.cluster_iam_role_name == null ? true : false # v17: manage_cluster_iam_resources + iam_role_name = var.cluster_iam_role_name # v17: cluster_iam_role_name + iam_role_additional_policies = [ + "arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly" + ] + + ## Use this to define any values that are common and applicable to all Node Groups + eks_managed_node_group_defaults = { + create_security_group = false + vpc_security_group_ids = [local.workers_security_group_id] } - worker_groups = local.worker_groups + + ## Any individual Node Group customizations should go here + eks_managed_node_groups = local.node_groups } module "autoscaling" { diff --git a/outputs.tf b/outputs.tf old mode 100644 new mode 100755 index 85ade237..3401e2ef --- a/outputs.tf +++ b/outputs.tf @@ -8,9 +8,6 @@ output "kube_config" { sensitive = true } -output "worker_iam_role_arn" { - value = module.eks.worker_iam_role_arn -} output "cluster_iam_role_arn" { value = module.eks.cluster_iam_role_arn } diff --git a/security.tf b/security.tf index 71099cfc..4954f519 100644 --- a/security.tf +++ b/security.tf @@ -21,6 +21,7 @@ resource "aws_security_group" "sg" { resource "aws_security_group_rule" "vms" { count = ( length(local.vm_public_access_cidrs) > 0 + && var.security_group_id == null && ( (var.create_jump_public_ip && var.create_jump_vm ) || (var.create_nfs_public_ip && var.storage_type == "standard") ) diff --git a/versions.tf b/versions.tf index a062a753..b1f6bf38 100644 --- a/versions.tf +++ b/versions.tf @@ -3,7 +3,7 @@ terraform { required_providers { aws = { source = "hashicorp/aws" - version = "3.43.0" + version = "3.72.0" } random = { source = "hashicorp/random" From 4a7b4541f4216dc27dac4bde91b58669a30a0975 Mon Sep 17 00:00:00 2001 From: manoatsas <32555022+manoatsas@users.noreply.github.com> Date: Fri, 29 Apr 2022 18:21:01 -0400 Subject: [PATCH 2/3] (IAC-502) support processor type in node pools (#132) * wip - initial version bumps * initial working draft * WIP code changes for EKS module 17 to 18 upgrade * working version, pared down and hardcoded to a single node group * switch to EKS managed nodegroup and map more variables to v18 module * working version with cluster SG/R fixed * handle all taints cases and fixed typo default node group name * add custom launch template name * rename worker_groups to node_groups for consistency with AWS * update Terraform version to 1.1.6 * revert all versions except aws provider required by EKS module * Update TFv1.0.0 in readme and remove extraneous default_node_group call * removed output variable no longer supported * conditionally create SG rule for BYO SecurityGroup * Update docs * initial draft to support CPU type * add and update example tfvars with cpu_type * VM type for GPU, doc updates Co-authored-by: Jay Patel --- Dockerfile | 1 - docs/CONFIG-VARS.md | 3 +- examples/sample-input-byo.tfvars | 4 + examples/sample-input-connect.tfvars | 5 + examples/sample-input-custom-data.tfvars | 4 + examples/sample-input-gpu.tfvars | 130 +++++++++++++++++++++++ examples/sample-input-ha.tfvars | 4 + examples/sample-input-minimal.tfvars | 2 + examples/sample-input.tfvars | 4 + locals.tf | 3 +- variables.tf | 5 + 11 files changed, 162 insertions(+), 3 deletions(-) create mode 100644 examples/sample-input-gpu.tfvars diff --git a/Dockerfile b/Dockerfile index 9d39bfb4..f63e7e97 100755 --- a/Dockerfile +++ b/Dockerfile @@ -15,7 +15,6 @@ RUN yum -y install git openssh jq which \ && chmod 755 ./kubectl /viya4-iac-aws/docker-entrypoint.sh \ && mv ./kubectl /usr/local/bin/kubectl \ && chmod g=u -R /etc/passwd /etc/group /viya4-iac-aws \ - && git config --system --add safe.directory /viya4-iac-aws \ && terraform init ENV TF_VAR_iac_tooling=docker diff --git a/docs/CONFIG-VARS.md b/docs/CONFIG-VARS.md index 58267e3b..24942243 100644 --- a/docs/CONFIG-VARS.md +++ b/docs/CONFIG-VARS.md @@ -233,7 +233,8 @@ Additional node pools can be created separately from the default node pool. This |
Name
|
Description
|
Type
|
Default
|
Notes
| | :--- | :--- | :--- | :--- | :--- | -| vm_type | Type of the node pool VMs | string | | | +| vm_type | Type of the node pool VMs | string | | https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instance-types.html | +| cpu_type | Processor type CPU/GPU | string | AL2_x86_64| [AMI type](https://docs.aws.amazon.com/eks/latest/APIReference/API_Nodegroup.html#AmazonEKS-Type-Nodegroup-amiType) – Choose Amazon Linux 2 (AL2_x86_64) for Linux non-GPU instances, Amazon Linux 2 GPU Enabled (AL2_x86_64_GPU) for Linux GPU instances| | os_disk_type | Disk type for node pool VMs | string | | `gp2` or `io1` | | os_disk_size | Disk size for node pool VMs in GB | number | | | | os_disk_iops | Amount of provisioned [IOPS](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ebs-io-characteristics.html) | number | | For `io1`, you MUST set the value to your desired IOPS value. Reference [Amazon EBS volume types](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ebs-volume-types.html) for details on values based on the `os_disk_type` selected.| diff --git a/examples/sample-input-byo.tfvars b/examples/sample-input-byo.tfvars index 6a3b1af5..167ff88d 100644 --- a/examples/sample-input-byo.tfvars +++ b/examples/sample-input-byo.tfvars @@ -50,6 +50,7 @@ storage_type = "standard" node_pools = { cas = { "vm_type" = "m5.2xlarge" + "cpu_type" = "AL2_x86_64" "os_disk_type" = "gp2" "os_disk_size" = 200 "os_disk_iops" = 0 @@ -66,6 +67,7 @@ node_pools = { }, compute = { "vm_type" = "m5.8xlarge" + "cpu_type" = "AL2_x86_64" "os_disk_type" = "gp2" "os_disk_size" = 200 "os_disk_iops" = 0 @@ -83,6 +85,7 @@ node_pools = { }, stateless = { "vm_type" = "m5.4xlarge" + "cpu_type" = "AL2_x86_64" "os_disk_type" = "gp2" "os_disk_size" = 200 "os_disk_iops" = 0 @@ -99,6 +102,7 @@ node_pools = { }, stateful = { "vm_type" = "m5.4xlarge" + "cpu_type" = "AL2_x86_64" "os_disk_type" = "gp2" "os_disk_size" = 200 "os_disk_iops" = 0 diff --git a/examples/sample-input-connect.tfvars b/examples/sample-input-connect.tfvars index 1488e7ff..4e1c86de 100644 --- a/examples/sample-input-connect.tfvars +++ b/examples/sample-input-connect.tfvars @@ -40,6 +40,7 @@ storage_type = "standard" node_pools = { cas = { "vm_type" = "m5.2xlarge" + "cpu_type" = "AL2_x86_64" "os_disk_type" = "gp2" "os_disk_size" = 200 "os_disk_iops" = 0 @@ -56,6 +57,7 @@ node_pools = { }, compute = { "vm_type" = "m5.8xlarge" + "cpu_type" = "AL2_x86_64" "os_disk_type" = "gp2" "os_disk_size" = 200 "os_disk_iops" = 0 @@ -73,6 +75,7 @@ node_pools = { }, connect = { "vm_type" = "m5.8xlarge" + "cpu_type" = "AL2_x86_64" "os_disk_type" = "gp2" "os_disk_size" = 200 "os_disk_iops" = 0 @@ -90,6 +93,7 @@ node_pools = { }, stateless = { "vm_type" = "m5.4xlarge" + "cpu_type" = "AL2_x86_64" "os_disk_type" = "gp2" "os_disk_size" = 200 "os_disk_iops" = 0 @@ -106,6 +110,7 @@ node_pools = { }, stateful = { "vm_type" = "m5.4xlarge" + "cpu_type" = "AL2_x86_64" "os_disk_type" = "gp2" "os_disk_size" = 200 "os_disk_iops" = 0 diff --git a/examples/sample-input-custom-data.tfvars b/examples/sample-input-custom-data.tfvars index a7e14807..c13ad084 100644 --- a/examples/sample-input-custom-data.tfvars +++ b/examples/sample-input-custom-data.tfvars @@ -40,6 +40,7 @@ storage_type = "standard" node_pools = { cas = { "vm_type" = "i3.8xlarge" + "cpu_type" = "AL2_x86_64" "os_disk_type" = "gp2" "os_disk_size" = 200 "os_disk_iops" = 0 @@ -56,6 +57,7 @@ node_pools = { }, compute = { "vm_type" = "m5.8xlarge" + "cpu_type" = "AL2_x86_64" "os_disk_type" = "gp2" "os_disk_size" = 200 "os_disk_iops" = 0 @@ -73,6 +75,7 @@ node_pools = { }, stateless = { "vm_type" = "m5.4xlarge" + "cpu_type" = "AL2_x86_64" "os_disk_type" = "gp2" "os_disk_size" = 200 "os_disk_iops" = 0 @@ -89,6 +92,7 @@ node_pools = { }, stateful = { "vm_type" = "m5.4xlarge" + "cpu_type" = "AL2_x86_64" "os_disk_type" = "gp2" "os_disk_size" = 200 "os_disk_iops" = 0 diff --git a/examples/sample-input-gpu.tfvars b/examples/sample-input-gpu.tfvars new file mode 100644 index 00000000..068c9885 --- /dev/null +++ b/examples/sample-input-gpu.tfvars @@ -0,0 +1,130 @@ +# !NOTE! - These are only a subset of the variables in CONFIG-VARS.md provided +# as examples. Customize this file to add any variables from CONFIG-VARS.md whose +# default values you want to change. + +# **************** REQUIRED VARIABLES **************** +# These required variables' values MUST be provided by the User +prefix = "" +location = "" # e.g., "us-east-1" +# **************** REQUIRED VARIABLES **************** + +# !NOTE! - Without specifying your CIDR block access rules, ingress traffic +# to your cluster will be blocked by default. + +# ************** RECOMMENDED VARIABLES *************** +default_public_access_cidrs = [] # e.g., ["123.45.6.89/32"] +ssh_public_key = "~/.ssh/id_rsa.pub" +# ************** RECOMMENDED VARIABLES *************** + +# Tags for all tagable items in your cluster. +tags = { } # e.g., { "key1" = "value1", "key2" = "value2" } + +# Postgres config - By having this entry a database server is created. If you do not +# need an external database server remove the 'postgres_servers' +# block below. +postgres_servers = { + default = {}, +} + +## Cluster config +kubernetes_version = "1.21" +default_nodepool_node_count = 2 +default_nodepool_vm_type = "m5.2xlarge" +default_nodepool_custom_data = "" + +## General +efs_performance_mode = "maxIO" +storage_type = "standard" + +## Cluster Node Pools config +node_pools = { + cas = { + "vm_type" = "m5.2xlarge" + "cpu_type" = "AL2_x86_64" + "os_disk_type" = "gp2" + "os_disk_size" = 200 + "os_disk_iops" = 0 + "min_nodes" = 1 + "max_nodes" = 5 + "node_taints" = ["workload.sas.com/class=cas:NoSchedule"] + "node_labels" = { + "workload.sas.com/class" = "cas" + } + "custom_data" = "" + "metadata_http_endpoint" = "enabled" + "metadata_http_tokens" = "required" + "metadata_http_put_response_hop_limit" = 1 + }, + gpu_cas = { + "vm_type" = "p2.8xlarge" + "cpu_type" = "AL2_x86_64_GPU" + "os_disk_type" = "gp2" + "os_disk_size" = 200 + "os_disk_iops" = 0 + "min_nodes" = 1 + "max_nodes" = 5 + "node_taints" = ["nvidia.com/gpu=present:NoSchedule"] + "node_labels" = { + "workload.sas.com/class" = "cas" + } + "custom_data" = "" + "metadata_http_endpoint" = "enabled" + "metadata_http_tokens" = "required" + "metadata_http_put_response_hop_limit" = 1 + }, + compute = { + "vm_type" = "m5.8xlarge" + "cpu_type" = "AL2_x86_64" + "os_disk_type" = "gp2" + "os_disk_size" = 200 + "os_disk_iops" = 0 + "min_nodes" = 1 + "max_nodes" = 5 + "node_taints" = ["workload.sas.com/class=compute:NoSchedule"] + "node_labels" = { + "workload.sas.com/class" = "compute" + "launcher.sas.com/prepullImage" = "sas-programming-environment" + } + "custom_data" = "" + "metadata_http_endpoint" = "enabled" + "metadata_http_tokens" = "required" + "metadata_http_put_response_hop_limit" = 1 + }, + stateless = { + "vm_type" = "m5.4xlarge" + "cpu_type" = "AL2_x86_64" + "os_disk_type" = "gp2" + "os_disk_size" = 200 + "os_disk_iops" = 0 + "min_nodes" = 1 + "max_nodes" = 5 + "node_taints" = ["workload.sas.com/class=stateless:NoSchedule"] + "node_labels" = { + "workload.sas.com/class" = "stateless" + } + "custom_data" = "" + "metadata_http_endpoint" = "enabled" + "metadata_http_tokens" = "required" + "metadata_http_put_response_hop_limit" = 1 + }, + stateful = { + "vm_type" = "m5.4xlarge" + "cpu_type" = "AL2_x86_64" + "os_disk_type" = "gp2" + "os_disk_size" = 200 + "os_disk_iops" = 0 + "min_nodes" = 1 + "max_nodes" = 3 + "node_taints" = ["workload.sas.com/class=stateful:NoSchedule"] + "node_labels" = { + "workload.sas.com/class" = "stateful" + } + "custom_data" = "" + "metadata_http_endpoint" = "enabled" + "metadata_http_tokens" = "required" + "metadata_http_put_response_hop_limit" = 1 + } +} + +# Jump Server +create_jump_vm = true diff --git a/examples/sample-input-ha.tfvars b/examples/sample-input-ha.tfvars index 4da71d33..59a0a72f 100644 --- a/examples/sample-input-ha.tfvars +++ b/examples/sample-input-ha.tfvars @@ -43,6 +43,7 @@ storage_type = "ha" node_pools = { cas = { "vm_type" = "i3.8xlarge" + "cpu_type" = "AL2_x86_64" "os_disk_type" = "gp2" "os_disk_size" = 200 "os_disk_iops" = 0 @@ -59,6 +60,7 @@ node_pools = { }, compute = { "vm_type" = "m5.8xlarge" + "cpu_type" = "AL2_x86_64" "os_disk_type" = "gp2" "os_disk_size" = 200 "os_disk_iops" = 0 @@ -76,6 +78,7 @@ node_pools = { }, stateless = { "vm_type" = "m5.4xlarge" + "cpu_type" = "AL2_x86_64" "os_disk_type" = "gp2" "os_disk_size" = 200 "os_disk_iops" = 0 @@ -92,6 +95,7 @@ node_pools = { }, stateful = { "vm_type" = "m5.4xlarge" + "cpu_type" = "AL2_x86_64" "os_disk_type" = "gp2" "os_disk_size" = 200 "os_disk_iops" = 0 diff --git a/examples/sample-input-minimal.tfvars b/examples/sample-input-minimal.tfvars index 038b6210..91a2287f 100644 --- a/examples/sample-input-minimal.tfvars +++ b/examples/sample-input-minimal.tfvars @@ -41,6 +41,7 @@ cluster_node_pool_mode = "minimal" node_pools = { cas = { "vm_type" = "r5.xlarge" + "cpu_type" = "AL2_x86_64" "os_disk_type" = "gp2" "os_disk_size" = 200 "os_disk_iops" = 0 @@ -57,6 +58,7 @@ node_pools = { }, generic = { "vm_type" = "m5.2xlarge" + "cpu_type" = "AL2_x86_64" "os_disk_type" = "gp2" "os_disk_size" = 200 "os_disk_iops" = 0 diff --git a/examples/sample-input.tfvars b/examples/sample-input.tfvars index bf5eed43..9a3174cd 100644 --- a/examples/sample-input.tfvars +++ b/examples/sample-input.tfvars @@ -40,6 +40,7 @@ storage_type = "standard" node_pools = { cas = { "vm_type" = "m5.2xlarge" + "cpu_type" = "AL2_x86_64" "os_disk_type" = "gp2" "os_disk_size" = 200 "os_disk_iops" = 0 @@ -56,6 +57,7 @@ node_pools = { }, compute = { "vm_type" = "m5.8xlarge" + "cpu_type" = "AL2_x86_64" "os_disk_type" = "gp2" "os_disk_size" = 200 "os_disk_iops" = 0 @@ -73,6 +75,7 @@ node_pools = { }, stateless = { "vm_type" = "m5.4xlarge" + "cpu_type" = "AL2_x86_64" "os_disk_type" = "gp2" "os_disk_size" = 200 "os_disk_iops" = 0 @@ -89,6 +92,7 @@ node_pools = { }, stateful = { "vm_type" = "m5.4xlarge" + "cpu_type" = "AL2_x86_64" "os_disk_type" = "gp2" "os_disk_size" = 200 "os_disk_iops" = 0 diff --git a/locals.tf b/locals.tf index 9aa62c9f..7cbae0ee 100755 --- a/locals.tf +++ b/locals.tf @@ -75,7 +75,8 @@ locals { for key, np_value in var.node_pools : key => { name = key - instance_types = [np_value.vm_type] + instance_types = [np_value.vm_type] + ami_type = np_value.cpu_type disk_size = np_value.os_disk_size block_device_mappings = { xvda = { diff --git a/variables.tf b/variables.tf index f397699a..21abb56d 100644 --- a/variables.tf +++ b/variables.tf @@ -183,6 +183,7 @@ variable node_pools { description = "Node pool definitions" type = map(object({ vm_type = string + cpu_type = string os_disk_type = string os_disk_size = number os_disk_iops = number @@ -199,6 +200,7 @@ variable node_pools { default = { cas = { "vm_type" = "m5.2xlarge" + "cpu_type" = "AL2_x86_64" "os_disk_type" = "gp2" "os_disk_size" = 200 "os_disk_iops" = 0 @@ -215,6 +217,7 @@ variable node_pools { }, compute = { "vm_type" = "m5.8xlarge" + "cpu_type" = "AL2_x86_64" "os_disk_type" = "gp2" "os_disk_size" = 200 "os_disk_iops" = 0 @@ -232,6 +235,7 @@ variable node_pools { }, stateless = { "vm_type" = "m5.4xlarge" + "cpu_type" = "AL2_x86_64" "os_disk_type" = "gp2" "os_disk_size" = 200 "os_disk_iops" = 0 @@ -248,6 +252,7 @@ variable node_pools { }, stateful = { "vm_type" = "m5.4xlarge" + "cpu_type" = "AL2_x86_64" "os_disk_type" = "gp2" "os_disk_size" = 200 "os_disk_iops" = 0 From f30519cd3a5cae9859671c213155ba1ad7344d7b Mon Sep 17 00:00:00 2001 From: Jay Patel Date: Tue, 10 May 2022 10:31:45 -0400 Subject: [PATCH 3/3] fix Dockerfile --- Dockerfile | 1 + 1 file changed, 1 insertion(+) mode change 100755 => 100644 Dockerfile diff --git a/Dockerfile b/Dockerfile old mode 100755 new mode 100644 index f63e7e97..9d39bfb4 --- a/Dockerfile +++ b/Dockerfile @@ -15,6 +15,7 @@ RUN yum -y install git openssh jq which \ && chmod 755 ./kubectl /viya4-iac-aws/docker-entrypoint.sh \ && mv ./kubectl /usr/local/bin/kubectl \ && chmod g=u -R /etc/passwd /etc/group /viya4-iac-aws \ + && git config --system --add safe.directory /viya4-iac-aws \ && terraform init ENV TF_VAR_iac_tooling=docker