-
Notifications
You must be signed in to change notification settings - Fork 0
/
money.py
165 lines (125 loc) · 5.32 KB
/
money.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
"""
Parses and processes ING statement csv files.
"""
import csv
import datetime
import glob
import re
import logging
class IngStatementParser(object):
def __init__(self):
pass
def parse(self, statement_file):
with open(statement_file) as csv_file:
reader = csv.DictReader(csv_file, delimiter=';')
for row in reader:
if row['Bedrag'] is not '':
description = ' '.join(row['Omschrijving'].split())
description = self.__strip_amount_from_detail(description)
description = self.__strip_date_from_detail(description)
description = self.__strip_hour_from_detail(description)
if self.__is_visa_statement(description):
description = 'VISA Payment'
else:
parts = description.split('-')
description = parts[0]
if len(parts) > 2:
description = description + parts[2]
description_extra = ' '.join(parts[3:])
recipient = self.__extract_pre_country_details(description_extra)
country = self.__extract_country_code(description_extra)
description = ' '.join(row['Omschrijving'].split())
yield Statement(description, float(row['Bedrag'].replace(',', '.')),
datetime.datetime.strptime(row['Boekingsdatum'], "%d/%m/%Y"),
row['Rekening tegenpartij'],
Account(row['Naam van de rekening'], row['Rekeningnummer']), row['Munteenheid'],
' '.join(row['Detail van de omzet'].split()), ' '.join(row['Bericht'].split()))
@staticmethod
def __extract_country_code(detail):
match_country = re.search('\s[A-Z][A-Z][A-Z]\s', detail)
if match_country:
start = match_country.span()[0] + 1
end = match_country.span()[1] - 1
return detail[start:end]
else:
return ''
@staticmethod
def __extract_pre_country_details(detail):
data = re.split('\s[A-Z][A-Z][A-Z]\s', detail)
return data[0]
@staticmethod
def __strip_hour_from_detail(detail):
date = re.split('[0-9]+.[0-9]+ uur', detail)
return ' '.join(date)
@staticmethod
def __strip_date_from_detail(detail):
date = re.split('[0-9]+/[0-9]+', detail)
return ' '.join(date)
@staticmethod
def __strip_amount_from_detail(detail):
amount = re.split('- [0-9]+,[0-9]+', detail)
return ' '.join(amount)
@staticmethod
def __is_visa_statement(detail):
if detail.startswith('BCC-ING'):
return True
return False
def __convert_visa_detail(self, detail):
pass
class StatementParserFactory(object):
def __init__(self):
pass
def create_parser(self, parser_type):
if parser_type == 'ing':
return IngStatementParser()
else:
raise NotImplementedError('No such parser exists')
class Account(object):
def __init__(self, name, number):
self.name = name
self.number = number
def __str__(self):
return f'{self.name} - {self.number}'
class Statement(object):
def __init__(self, description, amount, timestamp, recipient, account, currency='Eur', details='', message=''):
self.description = description
self.amount = amount
self.timestamp = timestamp
self.recipient = recipient
self.account = account
self.currency = currency
self.details = details
self.message = message
def __str__(self):
return f'''{abs(self.amount)} ({self.currency}) | {self.recipient} | {self.description} | {self.timestamp} | {self.account} | {self.details} | {self.message}'''
def get_years(statements):
return {statement.timestamp.year for statement in statements}
def get_months(statements):
return {statement.timestamp.month for statement in statements}
def get_statements_grouped_per_year_and_month(statements, years, months):
grouped_statements = {year: {month: [] for month in months} for year in years}
for year in years:
for month in months:
logging.debug("Collecting statements for %s - %s", year, month)
grouped_statements[year][month] = \
[
statement for statement in statements
if statement.timestamp.year == year
and statement.timestamp.month == month
]
return grouped_statements
def main():
logging.basicConfig(format='%(asctime)-15s %(funcName)s %(levelname)s %(message)s')
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
statements = []
for file in glob.glob("*.csv"):
logger.info("Processing file %s", file)
statements.extend(StatementParserFactory().create_parser('ing').parse(file))
years = get_years(statements)
months = get_months(statements)
grouped_statements = get_statements_grouped_per_year_and_month(statements, years, months)
for statement in grouped_statements[2016][1]:
logger.debug(statement)
if __name__ == '__main__':
main()