Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes for detecting 'ordered reported' and bills passed under suspension by rule #284

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 40 additions & 16 deletions tasks/bill_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -847,6 +847,32 @@ def parse_bill_action(action_dict, prev_status, bill_id, title):
if new_status:
status = new_status

m = re.search(r"Pursuant to .* the following bills passed under suspension of the rules: (.*)\.$", line, re.I)
if m:
# The list should certainly include this bill, but was it passed "as amended"?
as_amended = None
bill_list = m.group(1)
bill_list = bill_list.replace("and the following resolution was agreed to under suspension of the rules: ", "")
bill_list = bill_list.replace("and the following resolutions were agreed to under suspension of the rules: ", "")
bill_list = bill_list.replace("and ", "")
bill_list = re.split(r"\s*(?:;|,(?! as amended))\s*", bill_list)
for bill_item in bill_list:
bill_item = bill_item.lower().replace(".", "").replace(" ", "").split(",")
if bill_item[0] == (bill_type + number):
as_amended = len(bill_item) > 1
if as_amended is None: raise ValueError("Did not find bill in list: " + line)

vote_type = "vote" if (bill_type[0] == "h") else "vote2"
pass_fail = "pass"
action["type"] = "vote"
action["vote_type"] = vote_type
action["how"] = "by special rule"
action["where"] = "h"
action["result"] = pass_fail
new_status = new_status_after_vote(vote_type, pass_fail == "pass", "h", bill_type, False, as_amended, title, prev_status)
if new_status:
status = new_status

# House motions to table adversely dispose of a pending matter, if agreed to. An agreed-to "motion to table the measure",
# which is very infrequent, kills the legislation. If not agreed to, nothing changes. So this regex only captures
# agreed-to motions to table.
Expand All @@ -860,7 +886,7 @@ def parse_bill_action(action_dict, prev_status, bill_id, title):

# In order to classify this as resulting in the same thing as regular failed vote on passage, new_status_after_vote
# needs to know if this was a vote in the originating chamber or not.
if prev_status == "INTRODUCED" or bill_id.startswith("hres"):
if prev_status in ("INTRODUCED", "REPORTED") or bill_id.startswith("hres"):
vote_type = "vote"
elif False:
vote_type = "vote2"
Expand Down Expand Up @@ -999,27 +1025,25 @@ def parse_bill_action(action_dict, prev_status, bill_id, title):
if new_status:
status = new_status

# PSUDO-REPORTING (because GovTrack did this, but should be changed)

# TODO: Make a new status for this as pre-reported.
m = re.search(r"Placed on (the )?([\w ]+) Calendar( under ([\w ]+))?[,\.] Calendar No\. (\d+)\.|Committee Agreed to Seek Consideration Under Suspension of the Rules|Ordered to be Reported", line, re.I)
# Useless. But GovTrack has had it.
m = re.search(r"Placed on (the )?([\w ]+) Calendar( under ([\w ]+))?[,\.] Calendar No\. (\d+)\.", line, re.I)
if m != None:
# TODO: This makes no sense.
if prev_status in ("INTRODUCED", "REFERRED"):
status = "REPORTED"

action["type"] = "calendar"

# TODO: Useless. But good for GovTrack compatibility.
if m.group(2): # not 'Ordered to be Reported'
action["calendar"] = m.group(2)
action["under"] = m.group(4)
action["number"] = m.group(5)
action["calendar"] = m.group(2)
action["under"] = m.group(4)
action["number"] = m.group(5)

# COMMITTEE ACTIONS

# Ordered Reported (because GovTrack did this, but maybe should be changed to not combine with actual reported bills)
m = re.search(r"Ordered to be Reported|Committee Agreed to Seek Consideration Under Suspension of the Rules", line, re.I)
if m != None:
action["type"] = "ordered-reported"
if prev_status in ("INTRODUCED", "REFERRED"):
status = "REPORTED"

# reported
m = re.search(r"Committee on (.*)\. Reported by", line, re.I)
m = re.search(r"Committee on (.*)\. (Original measure )?[Rr]eported (to Senate )?by", line, re.I)
if m != None:
action["type"] = "reported"
action["committee"] = m.group(1)
Expand Down
78 changes: 57 additions & 21 deletions tasks/bills.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ def run(options):
if bill_id:
to_fetch = bill_id.split(",")
else:
if options.get("matching_action_regex"):
options["matching_action_regex"] = re.compile(options["matching_action_regex"])

to_fetch = get_bills_to_process(options)

if not to_fetch:
Expand Down Expand Up @@ -64,6 +67,12 @@ def filter_ints(seq):
# Not an integer.
continue
congresses = sorted(filter_ints(os.listdir(get_data_path())))

# If we're reprocessing actions, start with the 93rd Congress.
# Before that we may have bill data from other sources that don't
# conform to the usual action parsing logic.
if options.get("reparse_actions"):
congresses = filter(lambda c : c >= 93, congresses)
else:
congresses = sorted([int(c) for c in options['congress'].split(',')])

Expand All @@ -74,28 +83,42 @@ def filter_ints(seq):

# walk through all bill types in that congress
# (sort by bill type so that we proceed in a stable order each run)

bill_types = [bill_type for bill_type in os.listdir(get_data_path(congress)) if not bill_type.startswith(".")]
path = get_data_path(congress)
if not os.path.exists(path): continue
bill_types = [bill_type for bill_type in os.listdir(path) if not bill_type.startswith(".")]

for bill_type in sorted(bill_types):

# walk through each bill in that congress and bill type
# (sort by bill number so that we proceed in a normal order)

bills = [bill for bill in os.listdir(get_data_path(congress, bill_type)) if not bill.startswith(".")]
path = get_data_path(congress, bill_type)
if not os.path.exists(path): continue
bills = [bill for bill in os.listdir(path) if not bill.startswith(".")]
for bill_type_and_number in sorted(
bills,
key = lambda x : int(x.replace(bill_type, ""))
):

bill_id = bill_type_and_number + "-" + congress

if options.get("matching_action_regex"):
# Include bills that have an action that matches a regular expression.
fn = get_data_path(congress, bill_type, bill_type_and_number, "data.json")
if os.path.exists(fn):
with open(fn) as f:
bill = json.load(f)
for action in bill['actions']:
if action.get('text') and options["matching_action_regex"].search(action['text']):
yield bill_id
continue # don't check modification dates

fn = get_data_path(congress, bill_type, bill_type_and_number, govinfo.FDSYS_BILLSTATUS_FILENAME)
if os.path.exists(fn):
# The GovInfo.gov bulk data file exists. Does our JSON data
# file need to be updated?
bulkfile_lastmod = utils.read(fn.replace(".xml", "-lastmod.txt"))
parse_lastmod = utils.read(get_data_path(congress, bill_type, bill_type_and_number, "data-fromfdsys-lastmod.txt"))
if bulkfile_lastmod != parse_lastmod or options.get("force"):
bill_id = bill_type_and_number + "-" + congress
yield bill_id

def process_bill(bill_id, options):
Expand All @@ -115,7 +138,10 @@ def process_bill(bill_id, options):
# Convert and write out data.json and data.xml.
utils.write(
json.dumps(bill_data, indent=2, sort_keys=True),
os.path.dirname(fdsys_xml_path) + '/data.json')
os.path.dirname(fdsys_xml_path) + '/data.json',
{
"diff": options.get("diff")
})

from bill_info import create_govtrack_xml
with open(os.path.dirname(fdsys_xml_path) + '/data.xml', 'wb') as xml_file:
Expand All @@ -128,7 +154,10 @@ def process_bill(bill_id, options):
# file under a new path.
utils.write(
utils.read(_path_to_billstatus_file(bill_id).replace(".xml", "-lastmod.txt")),
os.path.join(os.path.dirname(fdsys_xml_path), "data-fromfdsys-lastmod.txt"))
os.path.join(os.path.dirname(fdsys_xml_path), "data-fromfdsys-lastmod.txt"),
{
"diff": options.get("diff")
})

return {
"ok": True,
Expand Down Expand Up @@ -239,42 +268,50 @@ def process_amendments(bill_id, bill_amendments, options):
def reparse_actions(bill_id, options):
# Load an existing bill status JSON file.
data_json_fn = output_for_bill(bill_id, 'json')
if not os.path.exists(data_json_fn):
return {
"ok": True,
"saved": False,
"reason": "no file",
}
source = utils.read(data_json_fn)
bill_data = json.loads(source)

# Munge data.
from bill_info import parse_bill_action
title = bill_info.current_title_for(bill_data['titles'], 'official')
old_status = None
old_status = "INTRODUCED"
for action in bill_data['actions']:
new_action, new_status = parse_bill_action(action, old_status, bill_id, title)
if new_status:
old_status = new_status
action['status'] = new_status
elif 'status' in action:
del action['status']
# clear out deleted keys
for key in ('vote_type', 'how', 'where', 'result', 'roll', 'suspension', 'calendar', 'under', 'number', 'committee', 'pocket', 'law', 'congress'):
if key in action and key not in new_action:
del action['key']
del action[key]
action.update(new_action)

status, status_date = bill_info.latest_status(bill_data['actions'], bill_data['introduced_at'])
bill_data['status'] = status
bill_data['status_at'] = status_date

# Show user a diff on the console to accept changes.
def show_diff_ask_ok(source, revised, fn):
if source == revised: return False # nothing to do
def split_lines(s): return [l+"\n" for l in s.split("\n")]
import sys
from difflib import unified_diff
sys.stdout.writelines(unified_diff(split_lines(source), split_lines(revised), fromfile=fn, tofile=fn))
return input("Apply change? (y/n) ").strip() == "y"

wrote_any = False

if options.get("diff"):
confirmer = utils.show_diff_ask_ok
else:
# If no --diff is given, just check that
# the content hasn't changed --- don't bother
# writing out anything with identical content.
def confirmer(source, revised, fn):
return source != revised

# Write new data.json file.
revised = json.dumps(bill_data, indent=2, sort_keys=True)
if show_diff_ask_ok(source, revised, data_json_fn):
if confirmer(source, revised, data_json_fn):
utils.write(revised, data_json_fn)
wrote_any = True

Expand All @@ -284,7 +321,7 @@ def split_lines(s): return [l+"\n" for l in s.split("\n")]
with open(data_xml_fn, 'r') as xml_file:
source = xml_file.read()
revised = create_govtrack_xml(bill_data, options)
if show_diff_ask_ok(source, revised.decode("utf8"), data_xml_fn):
if confirmer(source, revised.decode("utf8"), data_xml_fn):
with open(data_xml_fn, 'wb') as xml_file:
xml_file.write(revised)
wrote_any = True
Expand All @@ -294,4 +331,3 @@ def split_lines(s): return [l+"\n" for l in s.split("\n")]
"saved": wrote_any,
"reason": "no changes or changes skipped by user",
}

40 changes: 24 additions & 16 deletions tasks/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,31 +347,26 @@ def write(content, destination, options={}):
if options.get("diff"):
# Instead of writing the file, do a comparison with what's on disk
# to test any changes. But be nice and replace any update date with
# what's in the previous file so we avoid spurrious changes. Use
# how updated_at appears in the JSON and in the XML.
# what's in the previous file so we avoid spurrious changes in the
# diff. Use how updated_at appears in the JSON and in the XML.
if os.path.exists(destination):
with open(destination) as f:
existing_content = f.read()
source = f.read()
revised = content
for pattern in ('"updated_at": ".*?"', 'updated=".*?"'):
m1 = re.search(pattern, existing_content)
m2 = re.search(pattern, content)
m1 = re.search(pattern, source)
m2 = re.search(pattern, revised)
if m1 and m2:
content = content.replace(m2.group(0), m1.group(0))
revised = revised.replace(m2.group(0), m1.group(0))

# Avoid writing to disk and spawning `diff` by checking if
# the files match in memory.
if content == existing_content:
if revised == source:
return

# Shell `diff` and let it display output directly to the console.
# Write `content` to disk first so diff can see it. Maybe more
# efficient to pipe?
fn = "/tmp/congress-changed-file"
with open(fn, 'w') as f:
f.write(content)
os.system("diff -u %s %s" % (destination, fn))
os.unlink(fn)
return
if not show_diff_ask_ok(source, revised, destination):
# User cancelled save.
return

# Save the content to disk.
mkdir_p(os.path.dirname(destination))
Expand All @@ -382,6 +377,19 @@ def write(content, destination, options={}):
f.write(content)
f.close()


def show_diff_ask_ok(source, revised, fn):
# Show user a diff on the console to accept changes.
source = re.sub(r"\s*\n", "\n", source) # old files had trailing spaces
revised = re.sub(r"\s*\n", "\n", revised) # be consistent in normalization
if source == revised: return False # nothing to do
def split_lines(s): return [l+"\n" for l in s.split("\n")]
import sys
from difflib import unified_diff
sys.stdout.writelines(unified_diff(split_lines(source), split_lines(revised), fromfile=fn, tofile=fn))
return input("Apply change? (y/n) ").strip() == "y"


def write_json(data, destination):
return write(
json.dumps(data,
Expand Down