forked from deedy/edgar
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdump_to_csv.py
More file actions
90 lines (77 loc) · 3.87 KB
/
dump_to_csv.py
File metadata and controls
90 lines (77 loc) · 3.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import sqlite3
import json
import csv
from typing import List, Dict
from schema import Executive, Education
def dump_to_csv(output_file: str = 'executives.csv'):
"""Dump executive data to CSV with expanded education fields"""
db_path = 'def14a_filings/filings.db'
with sqlite3.connect(db_path) as conn:
# Get all executive records with company names
rows = conn.execute("""
SELECT c.name as company_name, e.cik, e.filing_date, e.data
FROM executive_data e
JOIN companies c ON e.cik = c.cik
ORDER BY c.name, e.filing_date DESC
""").fetchall()
if not rows:
print("No executive data found")
return
# Define base fieldnames
fieldnames = [
'company_name', 'cik', 'filing_date',
'name', 'age', 'current_role',
'past_roles', 'compensation_salary',
'compensation_stock', 'compensation_bonus',
'compensation_other', 'compensation_total',
'compensation_year', 'start_date',
'board_member', 'committee_memberships',
'other_board_memberships', 'notable_achievements',
# Add education fields for up to 3 degrees
'education1_degree', 'education1_field', 'education1_university', 'education1_year',
'education2_degree', 'education2_field', 'education2_university', 'education2_year',
'education3_degree', 'education3_field', 'education3_university', 'education3_year'
]
with open(output_file, 'w', newline='', encoding='utf-8') as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
for company_name, cik, filing_date, data_json in rows:
data = json.loads(data_json)
# Convert lists to strings
past_roles = '; '.join(data.get('past_roles', []))
committee_memberships = '; '.join(data.get('committee_memberships', []))
other_board_memberships = '; '.join(data.get('other_board_memberships', []))
# Base row data
row = {
'company_name': company_name,
'cik': cik,
'filing_date': filing_date,
'name': data.get('name'),
'age': data.get('age'),
'current_role': data.get('current_role'),
'past_roles': past_roles,
'compensation_salary': data.get('compensation_salary'),
'compensation_stock': data.get('compensation_stock'),
'compensation_bonus': data.get('compensation_bonus'),
'compensation_other': data.get('compensation_other'),
'compensation_total': data.get('compensation_total'),
'compensation_year': data.get('compensation_year'),
'start_date': data.get('start_date'),
'board_member': data.get('board_member'),
'committee_memberships': committee_memberships,
'other_board_memberships': other_board_memberships,
'notable_achievements': data.get('notable_achievements')
}
# Add education data
education_list = data.get('education', [])
for i, edu in enumerate(education_list[:3], 1): # Limit to 3 degrees
row.update({
f'education{i}_degree': edu.get('degree'),
f'education{i}_field': edu.get('field'),
f'education{i}_university': edu.get('university'),
f'education{i}_year': edu.get('year')
})
writer.writerow(row)
print(f"Wrote {len(rows)} executives to {output_file}")
if __name__ == "__main__":
dump_to_csv()