diff --git a/CHANGELOG.md b/CHANGELOG.md index dfdb7133..8c071d39 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## [1.0.83] - 2023-12-04 + + - [SSR Automation Documents] Centralize/modularize SSR's SSM automation documents + ## [1.0.82] - 2023-12-01 - [CloudFront Geo Restriction] Mark country codes as nonsensitive in terraform. diff --git a/ssr-automation-documents/README.md b/ssr-automation-documents/README.md new file mode 100644 index 00000000..5cfd382e --- /dev/null +++ b/ssr-automation-documents/README.md @@ -0,0 +1,18 @@ +SSR Automation Documents +=================== + +Set of modules for creating Simple Systems Manager (SSM) automation documents + +## Usage + +```hcl +resource "aws_sns_topic" "my_cool_topic" { + name = "my-cool-topic" + display_name = "My Super Cool SNS Topic" +} + +module "scan_ecr_images" { + source = "github.com/massgov/mds-terraform-common//ssr-automation-documents/scan-ecr-image?ref=1.x" + default_alerting_topic = aws_sns_topic.my_cool_topic.arn +} +``` \ No newline at end of file diff --git a/ssr-automation-documents/check-tableau-license/main.tf b/ssr-automation-documents/check-tableau-license/main.tf new file mode 100644 index 00000000..6c0e7536 --- /dev/null +++ b/ssr-automation-documents/check-tableau-license/main.tf @@ -0,0 +1,18 @@ +locals { + region = coalesce(var.region, data.aws_region.current.name) +} + +data "aws_region" "current" {} + +resource "aws_ssm_document" "ssr_check_tableau_license" { + name = "SSR-CheckTableauLicenses" + document_format = "YAML" + document_type = "Automation" + content = templatefile( + "${path.module}/templates/check_tableau_licenses.yml", + { + region = local.region + alerts_topic_arn = var.default_alerting_topic + } + ) +} \ No newline at end of file diff --git a/ssr-automation-documents/check-tableau-license/outputs.tf b/ssr-automation-documents/check-tableau-license/outputs.tf new file mode 100644 index 00000000..229a7b56 --- /dev/null +++ b/ssr-automation-documents/check-tableau-license/outputs.tf @@ -0,0 +1,7 @@ +output "document_arn" { + value = aws_ssm_document.ssr_check_tableau_license.arn +} + +output "latest_document_version" { + value = aws_ssm_document.ssr_check_tableau_license.latest_version +} \ No newline at end of file diff --git a/ssr-automation-documents/check-tableau-license/templates/check_tableau_licenses.yml b/ssr-automation-documents/check-tableau-license/templates/check_tableau_licenses.yml new file mode 100644 index 00000000..52c3c4d4 --- /dev/null +++ b/ssr-automation-documents/check-tableau-license/templates/check_tableau_licenses.yml @@ -0,0 +1,146 @@ +description: |- + ### Purpose + Checks the number of days til tableau license expires + ### Usage + #### Parameters + - `AutomationAssumeRole` - (required, string); ARN of role to assume while performing this automation + - `InstanceIds` - (required, list); EC2 instances on which automation should check licenses + - `AlertsTopicArn` - (optional, string); the ARN of the SNS Topic to which results should be pushed (default: `${alerts_topic_arn}`) + - `DaysTilLicenseExpirationThreshold` - (optional, number); number of days left until tableau license expires before automation should send an alert (default: 7) +schemaVersion: '0.3' +assumeRole: '{{AutomationAssumeRole}}' +parameters: + InstanceIds: + type: 'List' + AlertsTopicArn: + type: String + default: ${alerts_topic_arn} + DaysTilLicenseExpirationThreshold: + type: Integer + default: 7 + AutomationAssumeRole: + type: 'AWS::IAM::Role::Arn' +mainSteps: + - name: PerformLicenseCheck + action: 'aws:runCommand' + onFailure: 'step:AlertOnError' + inputs: + DocumentName: AWS-RunShellScript + InstanceIds: + - '{{InstanceIds}}' + ServiceRoleArn: '{{AutomationAssumeRole}}' + MaxErrors: '1' + Parameters: + commands: + - sudo su + - 'if [[ -z $TABLEAU_SERVER_DATA_DIR_VERSION ]]; then source /etc/opt/tableau/tableau_server/environment.bash; fi' + - /opt/tableau/tableau_server/packages/customer-bin.$TABLEAU_SERVER_DATA_DIR_VERSION/tsm license list | grep -oP "\d\d?\/\d\d?\/\d\d" + outputs: + - Name: commandId + Selector: $.CommandId + Type: String + - name: EvaluateLicenseCheckOutput + action: 'aws:executeScript' + onFailure: 'step:AlertOnError' + inputs: + Runtime: python3.8 + Handler: check_license_expiration + Script: | + import boto3 + from datetime import datetime, timedelta + + ssm_client = boto3.client('ssm') + + def check_license_expiration(events, context): + instance_ids = events["instance_ids"] + license_list_command_id = events["license_list_command_id"] + days_til_license_expiration_threshold = events["days_til_license_expiration_threshold"] + + now = datetime.now() + + send_message = False + message_parts = [ + f"Automated Tableau maintenance was performed at {now.strftime('%d %b %Y %H:%M')} UTC.", + "Results:" + ] + + for instance_id in instance_ids: + license_list_command_invocation = ssm_client.get_command_invocation( + CommandId=license_list_command_id, + InstanceId=instance_id + ) + + send_message_for_instance = False + instance_message_parts = [] + + if (license_list_command_invocation["Status"] != "Success"): + send_message_for_instance = True + command_url = f"https://${region}.console.aws.amazon.com/systems-manager/run-command/{license_list_command_id}?region=${region}" + instance_message_parts = [ + f"- Command to check Tableau license status on EC2 instance `{instance_id}` " + + f"unexpectedly returned a `{license_list_command_invocation['Status']}` status. " + + f"(Command results: {command_url})" + ] + else: + license_list_command_output = license_list_command_invocation["StandardOutputContent"] + + [expiration_date_string, *_] = license_list_command_output.split("\n") + expiration_date = datetime.strptime(expiration_date_string, "%m/%d/%y") + + send_message_for_instance = expiration_date <= (now + timedelta(days=days_til_license_expiration_threshold)) + if (send_message_for_instance): + instance_message_parts = [ + f"- Tableau license on EC2 instance `{instance_id}` expires on {expiration_date.date().strftime('%b %-d %Y')}", + ] + + send_message = send_message or send_message_for_instance + message_parts.extend(instance_message_parts) + + message = "\n".join(message_parts) + + return { + "message": message, + "send_message": "True" if send_message else "False" + } + InputPayload: + instance_ids: '{{InstanceIds}}' + license_list_command_id: '{{ PerformLicenseCheck.commandId}}' + days_til_license_expiration_threshold: '{{DaysTilLicenseExpirationThreshold}}' + outputs: + - Name: message + Selector: $.Payload.message + Type: String + - Name: sendMessage + Selector: $.Payload.send_message + Type: String + - name: BranchOnLicenseCheck + action: 'aws:branch' + inputs: + Choices: + - NextStep: NotifyResults + Variable: '{{EvaluateLicenseCheckOutput.sendMessage}}' + StringEquals: 'True' + Default: Exit + - name: NotifyResults + action: 'aws:executeAutomation' + isEnd: true + inputs: + DocumentName: AWS-PublishSNSNotification + RuntimeParameters: + TopicArn: '{{AlertsTopicArn}}' + Message: '{{EvaluateLicenseCheckOutput.message}}' + - name: Exit + action: 'aws:sleep' + isEnd: true + inputs: + Duration: PT5S + - name: AlertOnError + action: 'aws:executeAutomation' + inputs: + DocumentName: AWS-PublishSNSNotification + DocumentVersion: $LATEST + RuntimeParameters: + TopicArn: '{{AlertsTopicArn}}' + Message: 'There was an error running `SSR-CheckTableauLicenses` automation' + description: Alert + isEnd: true diff --git a/ssr-automation-documents/check-tableau-license/variables.tf b/ssr-automation-documents/check-tableau-license/variables.tf new file mode 100644 index 00000000..99360caf --- /dev/null +++ b/ssr-automation-documents/check-tableau-license/variables.tf @@ -0,0 +1,10 @@ +variable "region" { + type = string + description = "Region where automation should be performed. Defaults to provider-configured region" + default = null +} + +variable "default_alerting_topic" { + type = string + description = "Default SNS topic to use for alerting" +} diff --git a/ssr-automation-documents/check-tableau-license/versions.tf b/ssr-automation-documents/check-tableau-license/versions.tf new file mode 100644 index 00000000..4cfb7a59 --- /dev/null +++ b/ssr-automation-documents/check-tableau-license/versions.tf @@ -0,0 +1,9 @@ +terraform { + required_version = ">= 0.13" + required_providers { + aws = { + source = "hashicorp/aws" + version = ">= 4.0" + } + } +} diff --git a/ssr-automation-documents/clean-rds-cluster-snapshots/main.tf b/ssr-automation-documents/clean-rds-cluster-snapshots/main.tf new file mode 100644 index 00000000..9fd30b54 --- /dev/null +++ b/ssr-automation-documents/clean-rds-cluster-snapshots/main.tf @@ -0,0 +1,18 @@ +locals { + region = coalesce(var.region, data.aws_region.current.name) +} + +data "aws_region" "current" {} + +resource "aws_ssm_document" "ssr_clean_up_rds_cluster_snapshots" { + name = "SSR-CleanUpRDSClusterSnapshots" + document_format = "YAML" + document_type = "Automation" + content = templatefile( + "${path.module}/templates/clean_up_rds_cluster_snapshots.yml", + { + region = local.region + alerts_topic_arn = var.default_alerting_topic + } + ) +} diff --git a/ssr-automation-documents/clean-rds-cluster-snapshots/outputs.tf b/ssr-automation-documents/clean-rds-cluster-snapshots/outputs.tf new file mode 100644 index 00000000..d1f9405b --- /dev/null +++ b/ssr-automation-documents/clean-rds-cluster-snapshots/outputs.tf @@ -0,0 +1,7 @@ +output "document_arn" { + value = aws_ssm_document.ssr_clean_up_rds_cluster_snapshots.arn +} + +output "latest_document_version" { + value = aws_ssm_document.ssr_clean_up_rds_cluster_snapshots.latest_version +} \ No newline at end of file diff --git a/ssr-automation-documents/clean-rds-cluster-snapshots/templates/clean_up_rds_cluster_snapshots.yml b/ssr-automation-documents/clean-rds-cluster-snapshots/templates/clean_up_rds_cluster_snapshots.yml new file mode 100644 index 00000000..9d990c7f --- /dev/null +++ b/ssr-automation-documents/clean-rds-cluster-snapshots/templates/clean_up_rds_cluster_snapshots.yml @@ -0,0 +1,104 @@ +description: |- + ### Purpose + This automation performs an RDS snapshot cleanup + ### Usage + #### Parameters + - `AutomationAssumeRole` - (required, string) role to assume while performing this automation + - `DBClusterIdentifier` - (required, string) identifier of parent RDS cluster + - `RetentionPeriodDays` - (optional, Integer) number of days after which snapshots should be cleaned up (default: 90) + - `AlertsTopicArn` - (optional, string) the ARN of the SNS topic to alert in case of failure (default: `${alerts_topic_arn}`) +schemaVersion: '0.3' +assumeRole: '{{AutomationAssumeRole}}' +parameters: + AutomationAssumeRole: + type: 'AWS::IAM::Role::Arn' + description: Role to assume + DBClusterIdentifier: + type: String + description: Identifier of the parent RDS cluster + RetentionPeriodDays: + type: Integer + default: 90 + description: 'number of days after which snapshots should be cleaned up ' + AlertsTopicArn: + type: String + default: '${alerts_topic_arn}' +mainSteps: + - name: DeleteDbClusterSnapshots + action: 'aws:executeScript' + description: |- + ## DeleteDbClusterSnapshots + + Accepts an RDS cluster identifier and a retention period and deletes cluster snapshots + outside the retention period. If all snapshots are stale, the most recent will be + retained + timeoutSeconds: 300 + onFailure: 'step:AlertOnError' + onCancel: Abort + inputs: + Runtime: python3.8 + Handler: clean_up_db_snapshots + Script: |- + import boto3 + from datetime import date + + rds_client = boto3.client('rds', region_name='${region}') + + def clean_up_db_snapshots(events, context): + db_cluster_identifier = events["db_cluster_identifier"] + retention_period_days = events["retention_period_days"] + + describe_results = rds_client.describe_db_cluster_snapshots( + SnapshotType="manual", + DBClusterIdentifier=db_cluster_identifier, + IncludeShared=False, + IncludePublic=False + ) + + today = date.today() + deleted_snapshot_identifiers = [] + + snapshots_ascending = sorted( + describe_results["DBClusterSnapshots"], + key=( + lambda snapshot: + snapshot["SnapshotCreateTime"] if "SnapshotCreateTime" in snapshot + else snapshot["ClusterCreateTime"] + ) + ) + + for snapshot in snapshots_ascending: + if snapshot["Status"] != "available": + continue + time_delta = today - snapshot["SnapshotCreateTime"].date() + if time_delta.days < retention_period_days: + continue + if not len(deleted_snapshot_identifiers) < len(snapshots_ascending) - 1: + # leave at least one snapshot, preferring the most recently created one + break + + snapshot_identifier = snapshot["DBClusterSnapshotIdentifier"] + rds_client.delete_db_cluster_snapshot( + DBClusterSnapshotIdentifier=snapshot_identifier + ) + deleted_snapshot_identifiers.append(snapshot_identifier) + + return {'deleted_snapshot_identifiers': deleted_snapshot_identifiers} + InputPayload: + db_cluster_identifier: '{{DBClusterIdentifier}}' + retention_period_days: '{{RetentionPeriodDays}}' + outputs: + - Name: deletedClusterSnapshotIdentifiers + Selector: $.Payload.deleted_snapshot_identifiers + Type: StringList + isEnd: true + - name: AlertOnError + action: 'aws:executeAutomation' + inputs: + DocumentName: AWS-PublishSNSNotification + DocumentVersion: $LATEST + RuntimeParameters: + TopicArn: '{{AlertsTopicArn}}' + Message: 'Error running `SSR-CleanUpRDSSnapshots` automation: failed to delete `{{DBClusterIdentifier}}` snapshot(s)' + description: Alert + isEnd: true diff --git a/ssr-automation-documents/clean-rds-cluster-snapshots/variables.tf b/ssr-automation-documents/clean-rds-cluster-snapshots/variables.tf new file mode 100644 index 00000000..99360caf --- /dev/null +++ b/ssr-automation-documents/clean-rds-cluster-snapshots/variables.tf @@ -0,0 +1,10 @@ +variable "region" { + type = string + description = "Region where automation should be performed. Defaults to provider-configured region" + default = null +} + +variable "default_alerting_topic" { + type = string + description = "Default SNS topic to use for alerting" +} diff --git a/ssr-automation-documents/clean-rds-cluster-snapshots/versions.tf b/ssr-automation-documents/clean-rds-cluster-snapshots/versions.tf new file mode 100644 index 00000000..6362d6df --- /dev/null +++ b/ssr-automation-documents/clean-rds-cluster-snapshots/versions.tf @@ -0,0 +1,10 @@ + +terraform { + required_version = ">= 0.13" + required_providers { + aws = { + source = "hashicorp/aws" + version = ">= 4.0" + } + } +} diff --git a/ssr-automation-documents/clean-rds-snapshots/main.tf b/ssr-automation-documents/clean-rds-snapshots/main.tf new file mode 100644 index 00000000..545c440e --- /dev/null +++ b/ssr-automation-documents/clean-rds-snapshots/main.tf @@ -0,0 +1,18 @@ +locals { + region = coalesce(var.region, data.aws_region.current.name) +} + +data "aws_region" "current" {} + +resource "aws_ssm_document" "ssr_clean_up_rds_snapshots" { + name = "SSR-CleanUpRDSSnapshots" + document_format = "YAML" + document_type = "Automation" + content = templatefile( + "${path.module}/templates/clean_up_rds_snapshots.yml", + { + region = local.region + alerts_topic_arn = var.default_alerting_topic + } + ) +} diff --git a/ssr-automation-documents/clean-rds-snapshots/outputs.tf b/ssr-automation-documents/clean-rds-snapshots/outputs.tf new file mode 100644 index 00000000..f7701663 --- /dev/null +++ b/ssr-automation-documents/clean-rds-snapshots/outputs.tf @@ -0,0 +1,7 @@ +output "document_arn" { + value = aws_ssm_document.ssr_clean_up_rds_snapshots.arn +} + +output "latest_document_version" { + value = aws_ssm_document.ssr_clean_up_rds_snapshots.latest_version +} \ No newline at end of file diff --git a/ssr-automation-documents/clean-rds-snapshots/templates/clean_up_rds_snapshots.yml b/ssr-automation-documents/clean-rds-snapshots/templates/clean_up_rds_snapshots.yml new file mode 100644 index 00000000..f8825a0a --- /dev/null +++ b/ssr-automation-documents/clean-rds-snapshots/templates/clean_up_rds_snapshots.yml @@ -0,0 +1,102 @@ +description: |- + ### Purpose + This automation performs an RDS snapshot cleanup + ### Usage + #### Parameters + - `AutomationAssumeRole` - (required, string) role to assume while performing this automation + - `DBInstanceIdentifier` - (required, string) identifier of parent RDS instance + - `RetentionPeriodDays` - (optional, Integer) number of days after which snapshots should be cleaned up (default: 90) + - `AlertsTopicArn` - (optional, string) the ARN of the SNS topic to alert in case of failure (default: `${alerts_topic_arn}`) +schemaVersion: '0.3' +assumeRole: '{{AutomationAssumeRole}}' +parameters: + AutomationAssumeRole: + type: 'AWS::IAM::Role::Arn' + description: Role to assume + DBInstanceIdentifier: + type: String + description: Identifier of the parent RDS instance + RetentionPeriodDays: + type: Integer + default: 90 + description: 'number of days after which snapshots should be cleaned up ' + AlertsTopicArn: + type: String + default: '${alerts_topic_arn}' +mainSteps: + - name: DeleteDbSnapshots + action: 'aws:executeScript' + description: |- + ## DeleteDbSnapshots + + Accepts an RDS instance identifier and a retention period and deletes snapshots + outside the retention period. If all snapshots are stale, the most recent will be + retained + timeoutSeconds: 300 + onFailure: 'step:AlertOnError' + onCancel: Abort + inputs: + Runtime: python3.8 + Handler: clean_up_db_snapshots + Script: |- + import boto3 + from datetime import date + + rds_client = boto3.client('rds', region_name='${region}') + + def clean_up_db_snapshots(events, context): + db_instance_identifier = events["db_instance_identifier"] + retention_period_days = events["retention_period_days"] + + describe_results = rds_client.describe_db_snapshots( + SnapshotType="manual", + DBInstanceIdentifier=db_instance_identifier, + IncludeShared=False, + IncludePublic=False + ) + + today = date.today() + deleted_snapshot_identifiers = [] + + snapshots_ascending = sorted( + describe_results['DBSnapshots'], + key=( + lambda snapshot: + snapshot["SnapshotCreateTime"] if "SnapshotCreateTime" in snapshot + else snapshot["InstanceCreateTime"] + ) + ) + + for snapshot in snapshots_ascending: + if snapshot["Status"] != "available": + continue + time_delta = today - snapshot["SnapshotCreateTime"].date() + if time_delta.days < retention_period_days: + continue + if not len(deleted_snapshot_identifiers) < len(snapshots_ascending) - 1: + # leave at least one snapshot, preferring the most recently created one + break + + snapshot_identifier = snapshot["DBSnapshotIdentifier"] + rds_client.delete_db_snapshot(DBSnapshotIdentifier=snapshot_identifier) + deleted_snapshot_identifiers.append(snapshot_identifier) + + return {'deleted_snapshot_identifiers': deleted_snapshot_identifiers} + InputPayload: + db_instance_identifier: '{{DBInstanceIdentifier}}' + retention_period_days: '{{RetentionPeriodDays}}' + outputs: + - Name: deletedSnapshotIdentifiers + Selector: $.Payload.deleted_snapshot_identifiers + Type: StringList + isEnd: true + - name: AlertOnError + action: 'aws:executeAutomation' + inputs: + DocumentName: AWS-PublishSNSNotification + DocumentVersion: $LATEST + RuntimeParameters: + TopicArn: '{{AlertsTopicArn}}' + Message: 'Error running `SSR-CleanUpRDSSnapshots` automation: failed to delete `{{DBInstanceIdentifier}}` snapshot(s)' + description: Alert + isEnd: true diff --git a/ssr-automation-documents/clean-rds-snapshots/variables.tf b/ssr-automation-documents/clean-rds-snapshots/variables.tf new file mode 100644 index 00000000..99360caf --- /dev/null +++ b/ssr-automation-documents/clean-rds-snapshots/variables.tf @@ -0,0 +1,10 @@ +variable "region" { + type = string + description = "Region where automation should be performed. Defaults to provider-configured region" + default = null +} + +variable "default_alerting_topic" { + type = string + description = "Default SNS topic to use for alerting" +} diff --git a/ssr-automation-documents/clean-rds-snapshots/versions.tf b/ssr-automation-documents/clean-rds-snapshots/versions.tf new file mode 100644 index 00000000..6362d6df --- /dev/null +++ b/ssr-automation-documents/clean-rds-snapshots/versions.tf @@ -0,0 +1,10 @@ + +terraform { + required_version = ">= 0.13" + required_providers { + aws = { + source = "hashicorp/aws" + version = ">= 4.0" + } + } +} diff --git a/ssr-automation-documents/perform-tableau-maintenance/main.tf b/ssr-automation-documents/perform-tableau-maintenance/main.tf new file mode 100644 index 00000000..94c243a3 --- /dev/null +++ b/ssr-automation-documents/perform-tableau-maintenance/main.tf @@ -0,0 +1,11 @@ +resource "aws_ssm_document" "ssr_run_tableau_maintenance" { + name = "SSR-RunTableauMaintenance" + document_format = "YAML" + document_type = "Automation" + content = templatefile( + "${path.module}/templates/run_tableau_maintenance.yml", + { + alerts_topic_arn = var.default_alerting_topic + } + ) +} \ No newline at end of file diff --git a/ssr-automation-documents/perform-tableau-maintenance/output.tf b/ssr-automation-documents/perform-tableau-maintenance/output.tf new file mode 100644 index 00000000..8d307416 --- /dev/null +++ b/ssr-automation-documents/perform-tableau-maintenance/output.tf @@ -0,0 +1,7 @@ +output "document_arn" { + value = aws_ssm_document.ssr_run_tableau_maintenance.arn +} + +output "latest_document_version" { + value = aws_ssm_document.ssr_run_tableau_maintenance.latest_version +} \ No newline at end of file diff --git a/ssr-automation-documents/perform-tableau-maintenance/templates/run_tableau_maintenance.yml b/ssr-automation-documents/perform-tableau-maintenance/templates/run_tableau_maintenance.yml new file mode 100644 index 00000000..63c449fb --- /dev/null +++ b/ssr-automation-documents/perform-tableau-maintenance/templates/run_tableau_maintenance.yml @@ -0,0 +1,72 @@ +description: |- + ### Purpose + Runs `tsm maintenance cleanup` command + ### Usage + #### Parameters + - `AutomationAssumeRole` - (required, string); ARN of role to assume while performing this automation + - `InstanceIds` - (required, list); EC2 instances on which automation should perform maintenance + - `AlertsTopicArn` - (optional, string); the ARN of the SNS Topic to which results should be pushed (default: `${alerts_topic_arn}`) +schemaVersion: '0.3' +assumeRole: '{{AutomationAssumeRole}}' +parameters: + InstanceIds: + type: 'List' + AlertsTopicArn: + type: String + default: '${alerts_topic_arn}' + AutomationAssumeRole: + type: 'AWS::IAM::Role::Arn' +mainSteps: + - name: DescribeInstances + action: 'aws:executeAwsApi' + onFailure: 'step:AlertOnError' + inputs: + Service: ec2 + Api: DescribeInstances + InstanceIds: + - '{{InstanceIds}}' + outputs: + - Name: instanceId + Selector: '$.Reservations[0].Instances[0].InstanceId' + Type: String + - name: PerformTableauMaintenance + action: 'aws:runCommand' + onFailure: 'step:AlertOnError' + inputs: + DocumentName: AWS-RunShellScript + InstanceIds: + - '{{InstanceIds}}' + ServiceRoleArn: '{{AutomationAssumeRole}}' + MaxErrors: '1' + Parameters: + commands: + - sudo su + - 'if [[ -z $TABLEAU_SERVER_DATA_DIR_VERSION ]]; then source /etc/opt/tableau/tableau_server/environment.bash; fi' + - /opt/tableau/tableau_server/packages/customer-bin.$TABLEAU_SERVER_DATA_DIR_VERSION/tsm maintenance cleanup + outputs: + - Name: commandId + Selector: $.CommandId + Type: String + - name: WaitForMaintenanceCommand + action: 'aws:waitForAwsResourceProperty' + onFailure: 'step:AlertOnError' + inputs: + Service: ssm + Api: GetCommandInvocation + PropertySelector: $.StatusDetails + DesiredValues: + - Success + CommandId: '{{PerformTableauMaintenance.commandId}}' + InstanceId: '{{DescribeInstances.instanceId}}' + timeoutSeconds: 90 + isEnd: true + - name: AlertOnError + action: 'aws:executeAutomation' + inputs: + DocumentName: AWS-PublishSNSNotification + DocumentVersion: $LATEST + RuntimeParameters: + TopicArn: '{{AlertsTopicArn}}' + Message: 'Error running `SSR-RunTableauMaintenance` automation: `tsm maintenance cleanup` command failed on at least one EC2 instance' + description: Alert + isEnd: true \ No newline at end of file diff --git a/ssr-automation-documents/perform-tableau-maintenance/variables.tf b/ssr-automation-documents/perform-tableau-maintenance/variables.tf new file mode 100644 index 00000000..9022aab0 --- /dev/null +++ b/ssr-automation-documents/perform-tableau-maintenance/variables.tf @@ -0,0 +1,4 @@ +variable "default_alerting_topic" { + type = string + description = "Default SNS topic to use for alerting" +} diff --git a/ssr-automation-documents/perform-tableau-maintenance/versions.tf b/ssr-automation-documents/perform-tableau-maintenance/versions.tf new file mode 100644 index 00000000..4cfb7a59 --- /dev/null +++ b/ssr-automation-documents/perform-tableau-maintenance/versions.tf @@ -0,0 +1,9 @@ +terraform { + required_version = ">= 0.13" + required_providers { + aws = { + source = "hashicorp/aws" + version = ">= 4.0" + } + } +} diff --git a/ssr-automation-documents/scan-ecr-image/main.tf b/ssr-automation-documents/scan-ecr-image/main.tf new file mode 100644 index 00000000..a3e06eeb --- /dev/null +++ b/ssr-automation-documents/scan-ecr-image/main.tf @@ -0,0 +1,22 @@ +locals { + region = coalesce(var.region, data.aws_region.current.name) + account_id = coalesce(var.account_id, data.aws_caller_identity.current.account_id) +} + +data "aws_region" "current" {} + +data "aws_caller_identity" "current" {} + +resource "aws_ssm_document" "ssr_scan_ecr_image" { + name = "SSR-ScanECRImage" + document_format = "YAML" + document_type = "Automation" + content = templatefile( + "${path.module}/templates/scan_ecr_image_document.yml", + { + region = local.region + account_id = local.account_id + alerts_topic_arn = var.default_alerting_topic + } + ) +} diff --git a/ssr-automation-documents/scan-ecr-image/outputs.tf b/ssr-automation-documents/scan-ecr-image/outputs.tf new file mode 100644 index 00000000..d42d5da7 --- /dev/null +++ b/ssr-automation-documents/scan-ecr-image/outputs.tf @@ -0,0 +1,7 @@ +output "document_arn" { + value = aws_ssm_document.ssr_scan_ecr_image.arn +} + +output "latest_document_version" { + value = aws_ssm_document.ssr_scan_ecr_image.latest_version +} \ No newline at end of file diff --git a/ssr-automation-documents/scan-ecr-image/templates/scan_ecr_image_document.yml b/ssr-automation-documents/scan-ecr-image/templates/scan_ecr_image_document.yml new file mode 100644 index 00000000..c27a196f --- /dev/null +++ b/ssr-automation-documents/scan-ecr-image/templates/scan_ecr_image_document.yml @@ -0,0 +1,164 @@ +description: |- + ### Purpose + This automation initiates an image scan for a particular ECR repository and image tag and publishes the result to the specified SNS Topic + ### Usage + #### Parameters + - `AutomationAssumeRole` - (required) role to assume while performing this automation + - `RepositoryName` - (string, required); the ECR repository containing the target image + - `AlertsTopicArn` - (string, optional); the ARN of the SNS Topic to which results should be pushed (default: `${alerts_topic_arn}`) + - `ImageTag` - (string, optional); the target image tag (default: `latest`) + - `AlertingSeverityLevelThreshold` - (string, optional); if vulnerabilities are found at this level or above, an alert will be sent to `AlertsTopicArn` (default: `HIGH`) +schemaVersion: '0.3' +assumeRole: '{{AutomationAssumeRole}}' +parameters: + RepositoryName: + type: String + description: Name of the ECR repository + AlertsTopicArn: + type: String + default: '${alerts_topic_arn}' + description: ARN of SNS topic + ImageTag: + type: String + default: latest + description: ECR tag to use when initiating scan + AutomationAssumeRole: + type: 'AWS::IAM::Role::Arn' + description: Role to assume + AlertingSeverityLevelThreshold: + type: String + default: HIGH + description: Severity level threshold after which an alert will be sent + allowedValues: + - INFORMATIONAL + - LOW + - MEDIUM + - HIGH + - CRITICAL +mainSteps: + - name: StartImageScan + action: 'aws:executeAwsApi' + onFailure: 'step:AlertOnError' + inputs: + Service: ecr + Api: StartImageScan + repositoryName: '{{RepositoryName}}' + imageId: + imageTag: '{{ImageTag}}' + - name: Wait + action: 'aws:waitForAwsResourceProperty' + onFailure: 'step:AlertOnError' + inputs: + Service: ecr + Api: DescribeImageScanFindings + PropertySelector: '$.imageScanStatus.status' + DesiredValues: + - COMPLETE + repositoryName: '{{RepositoryName}}' + imageId: + imageTag: '{{ImageTag}}' + timeoutSeconds: 30 + maxAttempts: 10 + - name: GetImageScanResults + action: 'aws:executeAwsApi' + onFailure: 'step:AlertOnError' + inputs: + Service: ecr + Api: DescribeImageScanFindings + repositoryName: '{{RepositoryName}}' + imageId: + imageTag: '{{ImageTag}}' + outputs: + - Name: severityFindings + Selector: '$.imageScanFindings.findingSeverityCounts' + Type: StringMap + - Name: imageDigest + Selector: '$.imageId.imageDigest' + Type: String + - name: TransformResults + action: 'aws:executeScript' + onFailure: 'step:AlertOnError' + inputs: + Handler: handler + Runtime: python3.8 + InputPayload: + repository_name: '{{RepositoryName}}' + image_digest: '{{GetImageScanResults.imageDigest}}' + severity_findings: '{{GetImageScanResults.severityFindings}}' + alerting_severity_level_threshold: '{{AlertingSeverityLevelThreshold}}' + Script: |- + from datetime import datetime + + SORTED_SEVERITY_LEVELS = [ + 'INFORMATIONAL', + 'LOW', + 'MEDIUM', + 'HIGH', + 'CRITICAL' + ] + + def handler(events, context): + alerting_severity_level_threshold = events["alerting_severity_level_threshold"] + repository_name = events["repository_name"] + image_digest = events["image_digest"] + severity_findings = events["severity_findings"] + + now = datetime.utcnow() + scan_results_uri = f'https://${region}.console.aws.amazon.com/ecr/repositories/private/${account_id}/{repository_name}/_/image/{image_digest}/scan-results?region=${region}' + + message = "\n".join([ + f'An automated ECR scan on {repository_name}:{image_digest} was performed at {now.strftime("%d %b %Y %H:%M")} UTC.', + 'Number of findings, by severity category:', + *[f'- {level.title()}: {severity_findings[level]}' for level in SORTED_SEVERITY_LEVELS], + "\n", + f'The full results of the scan can be found here: {scan_results_uri}', + ]) + + threshold_index = SORTED_SEVERITY_LEVELS.index(alerting_severity_level_threshold) + target_severities = SORTED_SEVERITY_LEVELS[threshold_index::] + send_message = any([ + True for severity in target_severities if (severity in severity_findings and severity_findings[severity] > 0) + ]) + + return { + 'send_message': 'True' if send_message else 'False', + 'message': message + } + outputs: + - Name: message + Selector: '$.Payload.message' + Type: String + - Name: sendMessage + Type: String + Selector: '$.Payload.send_message' + - name: CheckTransformedResults + action: 'aws:branch' + inputs: + Choices: + - NextStep: NotifyResults + Variable: '{{TransformResults.sendMessage}}' + StringEquals: 'True' + Default: Exit + - name: NotifyResults + action: 'aws:executeAutomation' + isEnd: true + inputs: + DocumentName: AWS-PublishSNSNotification + RuntimeParameters: + TopicArn: '{{AlertsTopicArn}}' + Message: '{{TransformResults.message}}' + - name: Exit + action: 'aws:sleep' + isEnd: true + inputs: + Duration: PT5S + - name: AlertOnError + action: 'aws:executeAutomation' + inputs: + DocumentName: AWS-PublishSNSNotification + DocumentVersion: $LATEST + RuntimeParameters: + TopicArn: '{{AlertsTopicArn}}' + Message: 'There was an error running the `SSR-ScanECRImage` automation on {{RepositoryName}}:{{ImageTag}}' + description: Alert + isEnd: true diff --git a/ssr-automation-documents/scan-ecr-image/variables.tf b/ssr-automation-documents/scan-ecr-image/variables.tf new file mode 100644 index 00000000..96702a45 --- /dev/null +++ b/ssr-automation-documents/scan-ecr-image/variables.tf @@ -0,0 +1,16 @@ +variable "region" { + type = string + description = "Region where automation should be performed. Defaults to provider-configured region" + default = null +} + +variable "account_id" { + type = string + description = "AWS account where automation should be performed. Defaults to provider-configured account ID" + default = null +} + +variable "default_alerting_topic" { + type = string + description = "Default SNS topic to use for alerting" +} diff --git a/ssr-automation-documents/scan-ecr-image/versions.tf b/ssr-automation-documents/scan-ecr-image/versions.tf new file mode 100644 index 00000000..4cfb7a59 --- /dev/null +++ b/ssr-automation-documents/scan-ecr-image/versions.tf @@ -0,0 +1,9 @@ +terraform { + required_version = ">= 0.13" + required_providers { + aws = { + source = "hashicorp/aws" + version = ">= 4.0" + } + } +}