Skip to content

Commit d6c4fcf

Browse files
authored
Merge pull request #4690 from GSA-TTS/main
2 parents 07d0b7d + d122cc5 commit d6c4fcf

16 files changed

+254
-22
lines changed

.github/workflows/deploy-application.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ jobs:
7777
cf_password: ${{ secrets.CF_PASSWORD }}
7878
cf_org: gsa-tts-oros-fac
7979
cf_space: ${{ env.space }}
80-
command: cf run-task gsa-fac -k 7G -m 3G --name deploy_backup --command "./fac-backup-util.sh v0.1.10 deploy_backup" --wait
80+
command: cf run-task gsa-fac -k 7G -m 3G --name deploy_backup --command "./fac-backup-util.sh v0.1.11 deploy_backup" --wait
8181

8282
- name: Deploy Preview to cloud.gov
8383
if: ${{ inputs.environment == 'preview' }}
@@ -124,5 +124,5 @@ jobs:
124124
secrets: inherit
125125
with:
126126
environment: ${{ inputs.environment }}
127-
util_version: "v0.1.10"
127+
util_version: "v0.1.11"
128128
backup_operation: "check_tables"

.github/workflows/fac-backup-scheduler.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,6 @@ jobs:
1919
secrets: inherit
2020
with:
2121
environment: ${{ matrix.environment.name }}
22-
util_version: "v0.1.10"
22+
util_version: "v0.1.11"
2323
backup_operation: "scheduled_backup"
2424

.github/workflows/fac-backup-util-scheduled.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
---
22
name: Backup the database with fac-backup-utility
33
### Common Commands:
4-
# ./fac-backup-util.sh v0.1.10 scheduled_backup
5-
# ./fac-backup-util.sh v0.1.10 daily_backup
4+
# ./fac-backup-util.sh v0.1.11 scheduled_backup
5+
# ./fac-backup-util.sh v0.1.11 daily_backup
66
on:
77
workflow_call:
88
inputs:

.github/workflows/fac-backup-util.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
---
22
name: Backup the database with fac-backup-utility
33
### Common Commands:
4-
# ./fac-backup-util.sh v0.1.10 initial_backup
5-
# ./fac-backup-util.sh v0.1.10 deploy_backup
4+
# ./fac-backup-util.sh v0.1.11 initial_backup
5+
# ./fac-backup-util.sh v0.1.11 deploy_backup
66
on:
77
workflow_dispatch:
88
inputs:

.github/workflows/fac-check-tables-scheduler.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,5 +20,5 @@ jobs:
2020
secrets: inherit
2121
with:
2222
environment: ${{ matrix.environment.name }}
23-
util_version: "v0.1.10"
23+
util_version: "v0.1.11"
2424
backup_operation: "check_tables"

.github/workflows/fac-check-tables.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
---
22
name: Check existing tables in an environment
33
### Common Commands:
4-
# ./fac-backup-util.sh v0.1.10 check_tables
4+
# ./fac-backup-util.sh v0.1.11 check_tables
55
on:
66
workflow_dispatch:
77
inputs:

backend/config/settings.py

+9-8
Original file line numberDiff line numberDiff line change
@@ -359,15 +359,16 @@
359359
)
360360

361361
elif service["instance_name"] == "backups":
362+
# Backups AWS S3 bucket for the app's backup files
362363
s3_creds = service["credentials"]
363-
# Used for backing up the database https://django-dbbackup.readthedocs.io/en/master/storage.html#id2
364-
DBBACKUP_STORAGE = "storages.backends.s3boto3.S3Boto3Storage"
365-
DBBACKUP_STORAGE_OPTIONS = {
366-
"access_key": s3_creds["access_key_id"],
367-
"secret_key": s3_creds["secret_access_key"],
368-
"bucket_name": s3_creds["bucket"],
369-
"default_acl": "private", # type: ignore
370-
}
364+
365+
AWS_BACKUPS_ACCESS_KEY_ID = s3_creds["access_key_id"]
366+
AWS_BACKUPS_SECRET_ACCESS_KEY = s3_creds["secret_access_key"]
367+
AWS_BACKUPS_STORAGE_BUCKET_NAME = s3_creds["bucket"]
368+
AWS_S3_BACKUPS_REGION_NAME = s3_creds["region"]
369+
AWS_S3_BACKUPS_ENDPOINT = s3_creds["endpoint"]
370+
AWS_S3_BACKUPS_ENDPOINT_URL = f"https://{AWS_S3_BACKUPS_ENDPOINT}"
371+
AWS_PRIVATE_DEFAULT_ACL = "private"
371372

372373
# secure headers
373374
MIDDLEWARE.append("csp.middleware.CSPMiddleware")
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
from django.core.management.base import BaseCommand
2+
from dissemination.models import MigrationInspectionRecord
3+
from config.settings import ENVIRONMENT, GSA_MIGRATION
4+
from django.db.models import Q
5+
6+
import logging
7+
8+
9+
logger = logging.getLogger(__name__)
10+
logger.setLevel(logging.WARNING)
11+
12+
13+
class Command(BaseCommand):
14+
help = """
15+
Replace 'GSA_MIGRATION' with '' in policies_content and rate_content
16+
of census_data in a note in dissemination_migrationinspectionrecord
17+
18+
Usage:
19+
manage.py update_migrationinspectionrecord
20+
--year <audit year>
21+
"""
22+
23+
def add_arguments(self, parser):
24+
parser.add_argument(
25+
"--year", help="Year(2016 through 2022)", type=str, default="2022"
26+
)
27+
28+
def is_year_invalid(self, year):
29+
valid_years = ["2016", "2017", "2018", "2019", "2020", "2021", "2022"]
30+
return year not in valid_years
31+
32+
def handle(self, *args, **options):
33+
if ENVIRONMENT not in [
34+
"LOCAL",
35+
"DEVELOPMENT",
36+
"PREVIEW",
37+
"STAGING",
38+
"PRODUCTION",
39+
]:
40+
print(f"Environment is not as expected, ENVIRONMENT={ENVIRONMENT}")
41+
return
42+
43+
year = options.get("year")
44+
if self.is_year_invalid(year):
45+
print(
46+
f"Invalid year {year}. Expecting 2016 / 2017 / 2018 / 2019 / 2020 / 2021 / 2022"
47+
)
48+
return
49+
50+
migrationinspectionrecords = MigrationInspectionRecord.objects.filter(
51+
Q(audit_year=year)
52+
)
53+
print(f"Count of {year} submissions: {len(migrationinspectionrecords)}")
54+
55+
count = 0
56+
for migrationinspectionrecord in migrationinspectionrecords:
57+
notes = []
58+
is_updated = False
59+
for note in migrationinspectionrecord.note:
60+
if (
61+
note[0]["transformation_functions"][0]
62+
== "xform_missing_notes_records"
63+
) & (note[0]["census_data"][0]["value"] == GSA_MIGRATION):
64+
note[0]["census_data"][0]["value"] = ""
65+
is_updated = True
66+
notes += [note]
67+
if is_updated:
68+
migrationinspectionrecord.note = notes
69+
migrationinspectionrecord.save()
70+
count += 1
71+
72+
print("Number of records updated = ", count)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
# Usage:
2+
# Do a delete: python manage.py delete_stale_backups --days X --delete true
3+
# List objects: python manage.py delete_stale_backups --days X
4+
5+
import boto3
6+
from datetime import datetime, timezone, timedelta
7+
from django.conf import settings
8+
from django.core.management.base import BaseCommand
9+
import sys
10+
11+
12+
class Command(BaseCommand):
13+
def add_arguments(self, parser):
14+
parser.add_argument(
15+
"--days",
16+
type=int,
17+
required=True,
18+
help="Max age a key(file) in days can have before we want to delete it. Value must be (14) or greater.",
19+
)
20+
parser.add_argument(
21+
"--delete",
22+
required=False,
23+
default=False,
24+
help="True/False. Actually do a delete. If not specified, just list the keys found that match.",
25+
)
26+
27+
def handle(self, *args, **options):
28+
days = options["days"]
29+
delete = options["delete"]
30+
31+
if days < 14:
32+
print(
33+
"Days cannot less than 14 to prevent up-to-date backups from being deleted. Exiting..."
34+
)
35+
sys.exit(1)
36+
37+
s3_client = boto3.client(
38+
"s3",
39+
aws_access_key_id=settings.AWS_BACKUPS_ACCESS_KEY_ID,
40+
aws_secret_access_key=settings.AWS_BACKUPS_SECRET_ACCESS_KEY,
41+
endpoint_url=settings.AWS_S3_BACKUPS_ENDPOINT_URL,
42+
)
43+
44+
paginator = s3_client.get_paginator("list_objects_v2")
45+
pages = paginator.paginate(
46+
Bucket=settings.AWS_BACKUPS_STORAGE_BUCKET_NAME, Prefix="backups/"
47+
)
48+
49+
delete_older_than = datetime.now(timezone.utc) - timedelta(days=days)
50+
total_count = 0
51+
for page in pages:
52+
if "Contents" in page:
53+
for obj in page["Contents"]:
54+
55+
# Get the last modified date of the object
56+
last_modified = obj["LastModified"]
57+
58+
# If the object is older than one week, delete it
59+
# s3_client.delete_object(Bucket=settings.AWS_STORAGE_BUCKET_NAME, Key=f"backups/{item.file.name}")
60+
if delete:
61+
if last_modified < delete_older_than:
62+
print(
63+
f"Deleting {obj['Key']} last modified on {last_modified}"
64+
)
65+
s3_client.delete_object(
66+
Bucket=settings.AWS_BACKUPS_STORAGE_BUCKET_NAME,
67+
Key=obj["Key"],
68+
)
69+
total_count += 1
70+
else:
71+
print(
72+
f"Object {obj['Key']} younger than {delete_older_than}. Not deleting."
73+
)
74+
else:
75+
print(
76+
f"Delete not sent. {obj['Key']} was last modified on {last_modified}"
77+
)
78+
else:
79+
print("No objects found in the bucket.")
80+
print(f"Total number of objects deleted: {total_count}")

docs/backups_and_restores.md

+5-5
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ Information regarding the fac-backup-utility can be found [at the repository](ht
3434
Database backups occur in the following ways:
3535
1. An initial backup, where a backup has not been run in the target environment. This input of `initial_backup` is important, as when it does a the `db_to_db` command, it will not truncate the target table, as the table does not exist in the destination database.
3636
```bash
37-
./fac-backup-util.sh v0.1.10 initial_backup
37+
./fac-backup-util.sh v0.1.11 initial_backup
3838
# Curl the utility
3939
# Install AWS
4040
# DB to S3 table dump (backups)
@@ -44,7 +44,7 @@ Database backups occur in the following ways:
4444

4545
2. A deploy backup, where the `db_to_db` function is not called. This is a standard backup strategy before the application deploys, to ensure the s3 contents of the primary s3 are sync'd to the backups bucket, and a table dump is stored in the backups bucket.
4646
```bash
47-
./fac-backup-util.sh v0.1.10 deploy_backup
47+
./fac-backup-util.sh v0.1.11 deploy_backup
4848
# Curl the utility
4949
# Install AWS
5050
# DB to S3 table dump (backups)
@@ -53,7 +53,7 @@ Database backups occur in the following ways:
5353

5454
3. A scheduled backup is run every two hours, across each environment, ensuring that we have a clean backup in s3, rds, and the bucket contents are in sync.
5555
```bash
56-
./fac-backup-util.sh v0.1.10 scheduled_backup
56+
./fac-backup-util.sh v0.1.11 scheduled_backup
5757
# Curl the utility
5858
# Install AWS
5959
# DB to S3 table dump (fac-db -> backups)
@@ -66,7 +66,7 @@ Restoring from backups can be run via workflow, from designated individuals. The
6666

6767
1. S3 Restore takes a `operation-mm-DD-HH` input (ex `scheduled-06-04-10`), and is required for the backups to be restored. The utility looks in `s3://${bucket}/backups/operation-mm-DD-HH/` for its table dumps, and without supplying the target backups, it will not restore. Once it does a `--data-only` restoration, it will then sync the files from the backups bucket to the application bucket. We do this to ensure the contents of the application bucket are up to date, relative to the data in the database. We know that if we use the latest folder in `/backups/` then the contents of the s3 are the latest available, from the prior backup.
6868
```bash
69-
./fac-restore-util.sh v0.1.10 s3_restore scheduled-06-04-10
69+
./fac-restore-util.sh v0.1.11 s3_restore scheduled-06-04-10
7070
# Curl the utility
7171
# Install AWS
7272
# DB to S3 table dump (backups -> fac-db) [Truncate target table before --data-only pg_restore]
@@ -81,7 +81,7 @@ daily-mm-dd
8181

8282
2. Database to database restoration also can occur as well, using `psql` to dump the tables from the cold store database to the live database.
8383
```bash
84-
./fac-restore-util.sh v0.1.10 db_restore
84+
./fac-restore-util.sh v0.1.11 db_restore
8585
# Curl the utility
8686
# Install AWS
8787
# DB to DB table dump (fac-snapshot-db -> fac-db) [Truncate target table before dump]

docs/deleting-backups.md

+53
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
### Informational document regarding Management Command "delete_stale_backups"
2+
3+
The purpose of this document is to highlight examples for when a developer wishes to delete stale backups from the s3 bucket `backups`.
4+
5+
**Warning:** This command is classified as a destructive command, and should only be run after receiving confirmation from members of the team, and after putting a formal annoucement in the developer slack channel. It is advised that after this command is run, to take a formal backup of the environment just for extra precautions.
6+
7+
#### Information:
8+
The management command is located here: [delete_stale_backups.py](../backend/support/management/commands/delete_stale_backups.py). This command accepts two inputs. `--days` & `--delete`.
9+
- The value of `--days` must be greater than or equal to `14` (`--days 14`)
10+
- The value of `--delete` is required to actually perform the delete, and is a boolean (`--delete true`)
11+
- The full command to perform a delete will look like this:
12+
`python manage.py delete_stale_backups --days 14 --delete true`
13+
14+
#### How to perform a delete
15+
1. Login to cloud.gov `cf login -a api.fr.cloud.gov --sso`
16+
2. Select the target environment if you have not done so after successful authentication `cf t -s <env>`
17+
3. Open a new terminal and tail the logs `cf logs gsa-fac | grep "delete_stale_backups"`
18+
4. Run the command via tasks:
19+
`cf run-task gsa-fac -k 2G -m 3G --name delete_stale_backups --command "python manage.py delete_stale_backups --days 14 --delete true" --wait`
20+
5. Wait for the command to finish.
21+
6. Navigate to [The backup environment action](https://github.com/GSA-TTS/FAC/actions/workflows/fac-backup-util.yml) and perform a backup with the following inputs or alternatively, navigate to [the scheduled backup action](https://github.com/GSA-TTS/FAC/actions/workflows/fac-backup-scheduler.yml) and run.
22+
```sh
23+
branch: main
24+
environment: <env where backups were just deleted (dev/staging/prod)>
25+
version: v0.1.11
26+
operation: on_demand_backup
27+
```
28+
29+
#### Operation outputs examples (Fail):
30+
```
31+
~$ python manage.py delete_stale_backups --days 13
32+
Days cannot less than 14 to prevent up-to-date backups from being deleted. Exiting...
33+
~$
34+
35+
~$ python manage.py delete_stale_backups --days 0 --delete true
36+
Days cannot less than 14 to prevent up-to-date backups from being deleted. Exiting...
37+
~$
38+
39+
~$ python manage.py delete_stale_backups --days 14 --delete true
40+
Object backups/on-demand/02-04-13/public-audit_access.dump younger than 2025-01-22 18:44:02.406263+00:00. Not deleting.
41+
Object backups/on-demand/02-04-13/public-audit_deletedaccess.dump younger than 2025-01-22 18:44:02.406263+00:00. Not deleting.
42+
[...]
43+
```
44+
45+
#### Operation outputs example (Pass):
46+
```
47+
~$ python manage.py delete_stale_backups --days 14 --delete true
48+
49+
Deleting backups/on-demand/02-03-19/public-audit_access.dump last modified on 2025-01-22 18:44:02.406263+00:00
50+
Deleting backups/on-demand/02-03-19/public-audit_deletedaccess.dump last modified on 2025-01-22 18:44:02.406263+00:00
51+
[...]
52+
```
53+

terraform/sandbox/sandbox.tf

+11
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ module "sandbox" {
88
login_client_id = var.login_client_id
99
login_secret_key = var.login_secret_key
1010
branch_name = var.branch_name
11+
backups_s3_id = module.sandbox-backups-bucket.bucket_id
1112

1213
database_plan = "medium-gp-psql"
1314
https_proxy_instances = 1
@@ -17,3 +18,13 @@ module "sandbox" {
1718
}
1819
)
1920
}
21+
22+
module "sandbox-backups-bucket" {
23+
source = "github.com/gsa-tts/terraform-cloudgov//s3?ref=v1.1.0"
24+
25+
cf_org_name = var.cf_org_name
26+
cf_space_name = "sandbox"
27+
name = "backups"
28+
s3_plan_name = "basic"
29+
tags = ["s3"]
30+
}

terraform/shared/modules/app/app.tf

+4
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,10 @@ resource "cloudfoundry_app" "fac_app" {
7474
service_instance = var.public_s3_id
7575
}
7676

77+
service_binding {
78+
service_instance = var.backups_s3_id
79+
}
80+
7781
service_binding {
7882
service_instance = var.db_id
7983
}

terraform/shared/modules/app/variables.tf

+5
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,11 @@ variable "public_s3_id" {
4848
description = "the full string of the public s3 resource id"
4949
}
5050

51+
variable "backups_s3_id" {
52+
type = string
53+
description = "the full string of the backups s3 resource id"
54+
}
55+
5156
variable "db_id" {
5257
type = string
5358
description = "the full string of the core db resource id"

terraform/shared/modules/sandbox/app.tf

+1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ module "fac-app" {
1212
new_relic_creds_id = cloudfoundry_user_provided_service.credentials.id
1313
private_s3_id = module.s3-private.bucket_id
1414
public_s3_id = module.s3-public.bucket_id
15+
backups_s3_id = var.backups_s3_id
1516
db_id = module.database.instance_id
1617
backup_db_id = module.snapshot-database.instance_id
1718
app_instances = 1

0 commit comments

Comments
 (0)