Skip to content

Commit

Permalink
Add a diff option for bills to show file changes before writing updat…
Browse files Browse the repository at this point in the history
…es to disk
  • Loading branch information
JoshData committed May 14, 2022
1 parent cb52513 commit 27b1243
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 20 deletions.
23 changes: 19 additions & 4 deletions tasks/bills.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,10 @@ def process_bill(bill_id, options):
# Convert and write out data.json and data.xml.
utils.write(
json.dumps(bill_data, indent=2, sort_keys=True),
os.path.dirname(fdsys_xml_path) + '/data.json')
os.path.dirname(fdsys_xml_path) + '/data.json',
{
"diff": options.get("diff")
})

from bill_info import create_govtrack_xml
with open(os.path.dirname(fdsys_xml_path) + '/data.xml', 'wb') as xml_file:
Expand All @@ -151,7 +154,10 @@ def process_bill(bill_id, options):
# file under a new path.
utils.write(
utils.read(_path_to_billstatus_file(bill_id).replace(".xml", "-lastmod.txt")),
os.path.join(os.path.dirname(fdsys_xml_path), "data-fromfdsys-lastmod.txt"))
os.path.join(os.path.dirname(fdsys_xml_path), "data-fromfdsys-lastmod.txt"),
{
"diff": options.get("diff")
})

return {
"ok": True,
Expand Down Expand Up @@ -294,9 +300,18 @@ def reparse_actions(bill_id, options):

wrote_any = False

if options.get("diff"):
confirmer = utils.show_diff_ask_ok
else:
# If no --diff is given, just check that
# the content hasn't changed --- don't bother
# writing out anything with identical content.
def confirmer(source, revised, fn):
return source != revised

# Write new data.json file.
revised = json.dumps(bill_data, indent=2, sort_keys=True)
if utils.show_diff_ask_ok(source, revised, data_json_fn):
if confirmer(source, revised, data_json_fn):
utils.write(revised, data_json_fn)
wrote_any = True

Expand All @@ -306,7 +321,7 @@ def reparse_actions(bill_id, options):
with open(data_xml_fn, 'r') as xml_file:
source = xml_file.read()
revised = create_govtrack_xml(bill_data, options)
if utils.show_diff_ask_ok(source, revised.decode("utf8"), data_xml_fn):
if confirmer(source, revised.decode("utf8"), data_xml_fn):
with open(data_xml_fn, 'wb') as xml_file:
xml_file.write(revised)
wrote_any = True
Expand Down
27 changes: 11 additions & 16 deletions tasks/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,31 +347,26 @@ def write(content, destination, options={}):
if options.get("diff"):
# Instead of writing the file, do a comparison with what's on disk
# to test any changes. But be nice and replace any update date with
# what's in the previous file so we avoid spurrious changes. Use
# how updated_at appears in the JSON and in the XML.
# what's in the previous file so we avoid spurrious changes in the
# diff. Use how updated_at appears in the JSON and in the XML.
if os.path.exists(destination):
with open(destination) as f:
existing_content = f.read()
source = f.read()
revised = content
for pattern in ('"updated_at": ".*?"', 'updated=".*?"'):
m1 = re.search(pattern, existing_content)
m2 = re.search(pattern, content)
m1 = re.search(pattern, source)
m2 = re.search(pattern, revised)
if m1 and m2:
content = content.replace(m2.group(0), m1.group(0))
revised = revised.replace(m2.group(0), m1.group(0))

# Avoid writing to disk and spawning `diff` by checking if
# the files match in memory.
if content == existing_content:
if revised == source:
return

# Shell `diff` and let it display output directly to the console.
# Write `content` to disk first so diff can see it. Maybe more
# efficient to pipe?
fn = "/tmp/congress-changed-file"
with open(fn, 'w') as f:
f.write(content)
os.system("diff -u %s %s" % (destination, fn))
os.unlink(fn)
return
if not show_diff_ask_ok(source, revised, destination):
# User cancelled save.
return

# Save the content to disk.
mkdir_p(os.path.dirname(destination))
Expand Down

0 comments on commit 27b1243

Please sign in to comment.