Skip to content

Commit 38f6c43

Browse files
committed
Merge pull request #1 from mgoodfellow/hash-previous-backup-output
Write a hash file and handle updates etc.
2 parents a0ee263 + d8daa64 commit 38f6c43

7 files changed

Lines changed: 169 additions & 26 deletions

File tree

README.rst

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ It supports the following features:
1717
- Plan based backups
1818
- Custom command run pre-backup
1919
- Storing to S3
20+
- Calculating MD5 hashes of the backup set to avoid uploading duplicate backup sets
2021
- Emailing the result of the backup plans
2122
- Python standard logging framework
2223

@@ -54,6 +55,7 @@ file
5455
"AWS_REGION": "this is a region",
5556
"EMAIL_FROM": "[email protected]",
5657
"EMAIL_TO": "[email protected]",
58+
"HASH_CHECK_FILE": "plan_hashes.txt",
5759
"Plans": [
5860
{
5961
"Name": "MySQL Backup",
@@ -108,11 +110,34 @@ Run the backup tool using the following method:
108110
109111
s3backup.run_plans()
110112
111-
See ``test.py`` in the ``src`` folder for an example.
113+
See ``test.py`` for an example.
114+
115+
File Hashing
116+
------------
117+
118+
After a backup set is created an MD5 hash is calculated for it. This is then compared against a previously calculated
119+
hash for that particular plan name.
120+
121+
**NOTE:** Do not change the generated HASH_CHECK_FILE!
122+
123+
Finally, be aware of a "gotcha" - the hashes are keyed on the *plan name* - therefore changing the plan name will
124+
cause the backup script to think it needs to upload a new backup set.
125+
126+
Emails
127+
------
128+
129+
An email will be sent after each plan runs. The email will either report a success or a failure. In the event
130+
of a success, it will be reported if there was a new uploaded backup set (and the file name), otherwise it will
131+
state that no changes were detected and no upload was made.
132+
133+
If there was a failure while running the backup, the exception message will be emailed, and the logs can be
134+
referred to for further information.
112135

113136
Future Improvements
114137
-------------------
115138

116139
These are some of the planned future improvements:
117140

118141
- Run multiple pre-backup commands (by providing an array)
142+
- Allow custom format strings for the output files (instead of the default date/time format)
143+
- Modification of the glob2 library to allow hidden files to be included

S3Backup/__init__.py

Lines changed: 33 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131

3232
class S3BackupTool:
3333

34-
def __init__(self, config_file="config.json", log_file="s3backup.log"):
34+
def __init__(self, config_file="config.json"):
3535
logger.info('Initialising...')
3636

3737
try:
@@ -52,17 +52,42 @@ def run_plans(self):
5252
logger.info('Executing plan %d of %d', counter, len(self.PLANS))
5353

5454
try:
55-
plan.run()
56-
self.__send_status_email(plan, True)
55+
updated, output_file = plan.run()
56+
self.__send_success_email(plan, updated, output_file)
5757
except Exception, e:
5858
logger.error('Failed to run plan: %s', e)
59-
self.__send_status_email(plan, False, e)
59+
self.__send_failure_email(plan, e)
6060

6161
counter += 1
6262

6363
logger.info('Finished running backup plans')
6464

65-
def __send_status_email(self, plan, success, exception=None):
65+
def __send_success_email(self, plan, updated, output_file):
66+
subject = '[S3-Backup] [SUCCESS] - Plan: %s' % plan.name
67+
68+
body = 'The backup plan, %s, run at %s was SUCCESSFUL\n\n' % (
69+
plan.name,
70+
strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()))
71+
72+
if updated:
73+
body += 'The backup set had changed, so a new backup was uploaded: %s' % output_file
74+
else:
75+
body += 'The backup set had not changed. No new backup uploaded'
76+
77+
self.__send_status_email(subject, body)
78+
79+
def __send_failure_email(self, plan, exception):
80+
subject = '[S3-Backup] [FAILURE] - Plan: %s' % plan.name
81+
82+
body = 'The backup plan, %s, run at %s was a FAILURE\n\n' % (
83+
plan.name,
84+
strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()))
85+
86+
body += '\n\nDetailed failure information:\n\n%s' % exception
87+
88+
self.__send_status_email(subject, body)
89+
90+
def __send_status_email(self, subject, body):
6691
if self.CONFIGURATION['EMAIL_FROM'] is None or self.CONFIGURATION['EMAIL_TO'] is None:
6792
logger.debug('Email not provided, so status update not sent')
6893
return
@@ -72,25 +97,13 @@ def __send_status_email(self, plan, success, exception=None):
7297
aws_access_key_id=self.CONFIGURATION['AWS_KEY'],
7398
aws_secret_access_key=self.CONFIGURATION['AWS_SECRET'])
7499

75-
result = 'SUCCESS'
76-
if not success:
77-
result = 'FAILURE'
78-
79-
body = 'The backup plan, %s, run at %s was %s' % (
80-
plan.name,
81-
strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()),
82-
result)
83-
84-
if exception is not None:
85-
body += '\n\nDetailed failure information:\n\n%s' % exception
86-
87100
try:
88101
conn.send_email(
89102
self.CONFIGURATION['EMAIL_FROM'],
90-
'[S3-Backup] [%s] - Plan: %s' % (result, plan.name),
103+
subject,
91104
body,
92105
[self.CONFIGURATION['EMAIL_TO']])
93106
except Exception, e:
94-
logger.error('Failed to send email to {0:s} regarding plan: {1:s}'.format(self.CONFIGURATION['EMAIL_TO'],
95-
plan.name),
107+
logger.error('Failed to send email to {0:s} with subject {1:s}'.format(self.CONFIGURATION['EMAIL_TO'],
108+
subject),
96109
e)

S3Backup/config_loader.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828

2929
logger = logging.getLogger(name='config_loader')
3030

31-
required_root_values = ['AWS_KEY', 'AWS_SECRET', 'AWS_BUCKET', 'AWS_REGION', 'Plans']
31+
required_root_values = ['AWS_KEY', 'AWS_SECRET', 'AWS_BUCKET', 'AWS_REGION', 'HASH_CHECK_FILE', 'Plans']
3232
optional_root_values = ['EMAIL_FROM', 'EMAIL_TO']
3333

3434
def config_setup(config_file):
@@ -39,6 +39,7 @@ def config_setup(config_file):
3939
'AWS_SECRET': '',
4040
'AWS_BUCKET': '',
4141
'AWS_REGION': '',
42+
'HASH_CHECK_FILE': '',
4243
'EMAIL_FROM': None,
4344
'EMAIL_TO': None
4445
}

S3Backup/hash_file.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
import hashlib
2+
import os
3+
from shutil import move
4+
from tempfile import mkstemp
5+
6+
BLOCKSIZE=65535
7+
8+
def find_hash(hash_file, plan_name):
9+
# Try to find the hash in the hash file
10+
filename = os.path.normpath(hash_file)
11+
if os.path.isfile(filename):
12+
plan_hashes = open(filename, 'r').readlines()
13+
for line in plan_hashes:
14+
parts = line.strip().split('=')
15+
if len(parts) == 2 and parts[0] == plan_name:
16+
return parts[1]
17+
18+
return None
19+
20+
def update_hash(hash_file, plan_name, hash_value):
21+
# Do the update (create the file if it doesn't exist)
22+
filename = os.path.normpath(hash_file)
23+
24+
# If it doesn't exist, we shortcut this
25+
if not os.path.isfile(hash_file):
26+
with open(hash_file, 'w') as new_file:
27+
new_file.write('%s=%s\n' % (plan_name, hash_value))
28+
return
29+
30+
# Otherwise, we need to rebuild the file
31+
fh, abs_path = mkstemp()
32+
is_written = False
33+
34+
with open(abs_path, 'w') as new_file:
35+
with open(filename, 'r') as old_file:
36+
# Handle existing entries in the file
37+
for line in old_file:
38+
parts = line.strip().split('=')
39+
if parts[0] == plan_name:
40+
is_written = True
41+
new_file.write('%s=%s\n' % (plan_name, hash_value))
42+
else:
43+
new_file.write(line)
44+
45+
# If the hash wasn't already in the file
46+
if not is_written:
47+
new_file.write('%s=%s\n' % (plan_name, hash_value))
48+
49+
os.close(fh)
50+
51+
# Remove original file
52+
os.remove(hash_file)
53+
54+
# Move new file
55+
move(abs_path, hash_file)
56+
57+
def calc_hash(filename):
58+
hasher = hashlib.md5()
59+
with open(filename, 'rb') as afile:
60+
buf = afile.read(BLOCKSIZE)
61+
while len(buf) > 0:
62+
hasher.update(buf)
63+
buf = afile.read(BLOCKSIZE)
64+
return hasher.hexdigest()

S3Backup/plan.py

Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
from zipfile import ZipFile
3030
import time
3131
import boto.ses
32+
from S3Backup import hash_file
3233

3334
required_plan_values = ['Name', 'Src', 'OutputPrefix']
3435
optional_plan_values = ['Command']
@@ -52,6 +53,8 @@ def __init__(self, raw_plan, configuration):
5253
self.command = None
5354
self.output_file = '%s_%s.zip' % (raw_plan['OutputPrefix'], time.strftime("%Y-%m-%d_%H-%M-%S"))
5455

56+
self.new_hash = None
57+
5558
if 'Command' in raw_plan:
5659
self.command = raw_plan['Command']
5760

@@ -63,7 +66,9 @@ def run(self):
6366
The plan is run in the following order:
6467
1) (if applicable) Run the external command provided
6568
2) Zip source file(s) to destination file
66-
3) Upload destination file to S3 bucket
69+
3) Perform hash check to see if there are any changes (which would require an upload)
70+
4) Upload destination file to S3 bucket
71+
5) Update hash file with new hash
6772
"""
6873
logger.info('Running plan "%s"', self.name)
6974

@@ -74,12 +79,24 @@ def run(self):
7479
# 2) Zip the source file to the destination file
7580
self.__zip_files()
7681

77-
# 3) Upload destination file to S3 bucket
82+
updated = False
83+
7884
try:
79-
self.__upload()
85+
# 3) Perform hash check to see if there are any changes (which would require an upload)
86+
if not self.__hash_check():
87+
# 4) Upload destination file to S3 bucket
88+
self.__upload()
89+
90+
# 5) Update hash file with new hash
91+
self.__update_hash()
92+
93+
updated = True
94+
8095
finally:
8196
self.__cleanup()
8297

98+
return updated, self.output_file
99+
83100
def __run_command(self):
84101
logger.info('Executing custom command...')
85102

@@ -137,6 +154,27 @@ def __upload(self):
137154
logger.error('Failed to upload backup file to S3: %s', e)
138155
raise
139156

157+
def __hash_check(self):
158+
previous_hash = hash_file.find_hash(self.CONFIGURATION['HASH_CHECK_FILE'], self.name)
159+
160+
if previous_hash is None:
161+
logger.debug('No previous hash found for plan %s', self.name)
162+
else:
163+
logger.debug('Got a previous hash for plan %s of %s', self.name, previous_hash)
164+
165+
self.new_hash = hash_file.calc_hash(self.output_file)
166+
167+
logger.debug('New hash for plan %s of %s', self.name, self.new_hash)
168+
169+
return previous_hash == self.new_hash
170+
171+
def __update_hash(self):
172+
if self.new_hash is None:
173+
logger.error('Could not update hash as no hash was found')
174+
return
175+
176+
hash_file.update_hash(self.CONFIGURATION['HASH_CHECK_FILE'], self.name, self.new_hash)
177+
140178
def __cleanup(self):
141179
logger.info('Cleaning up temporary file: %s', self.output_file)
142180
try:

TODO.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# Features to be added
22

33
1. Ability to choose custom format strings on output file (rather than automatically appending date/time)
4-
2. Ability to run multiple commands
4+
2. Ability to run multiple commands
5+
3. Modify the glob2 library to support hidden files

config.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
"AWS_REGION": "this is a region",
66
"EMAIL_FROM": "[email protected]",
77
"EMAIL_TO": "[email protected]",
8+
"HASH_CHECK_FILE": "plan_hashes.txt",
89
"Plans": [
910
{
1011
"Name": "MySQL Backup",

0 commit comments

Comments
 (0)