forked from bradysalz/mint-sankey
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsankey_gen.py
334 lines (262 loc) · 10.1 KB
/
sankey_gen.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
#!/usr/bin/env python3.6
"""Sakey diagram generator for budgeting 'fun'
See the README for more info.
"""
import csv
from datetime import datetime
import typing
from enum import Enum
from typing import Dict, List
import toml
from transaction import Transaction
class TransactionType(Enum):
DEBIT = "debit",
CREDIT = "credit",
BOTH = ""
def parse_csv(fname: str) -> List[Transaction]:
"""Parse a CSV file into a list of transactions
Args:
fname: filename
use_labels: if a label is not None, use that as the category instead
Returns:
Each row as a Transaction stored in a list
"""
transactions = []
with open(fname, 'r', encoding='ISO-8859-1') as csvfile:
csv_reader = csv.reader(csvfile)
next(csv_reader) # skip header row
for row in csv_reader:
t = Transaction()
t.load_from_csv(row)
transactions.append(t)
return transactions
def add_paystub(f: typing.IO,
earnings: float,
pretax_vals: Dict,
*,
scale: float = 2,
use_percent: bool = False) -> int:
"""Create SankeyMatic strings from configuration income+pretax info
Args:
f: output file
earnings: net income
pretax_vals: dictionary with all pretax items and their value
scale: scaling factor to apply to all values (based on time period)
use_percent: use percentages or absolute vals
The format is:
{Source} [{Amount}] {Type}
Returns:
total take home income over the plotting period
"""
take_home = earnings * scale
if use_percent:
f.write(f'Spending [{int(100)}] Wages\n')
else:
f.write(f'Spending [{int(take_home)}] Wages\n')
sorted_pretax = sorted(pretax_vals.items(), key=lambda kv: kv[1])
sorted_pretax.reverse()
for name, value in sorted_pretax:
if use_percent:
f.write(f'Wages [{int(100 * value / earnings)}] {name}\n')
else:
f.write(f'Wages [{int(value * scale)}] {name}\n')
take_home -= value * scale
if use_percent:
val = int(100 * take_home / earnings / scale)
else:
val = int(take_home)
f.write(f'Wages [{val}] Total Income\n')
return int(take_home)
def filter_transactions(transactions: List[Transaction], start_date: datetime,
end_date: datetime, vendors: List[str],
categories: List[str],
use_labels: bool,
transaction_type: TransactionType = TransactionType.DEBIT) -> List[Transaction]:
"""Filter transactions based on date, vendor, and type
Args:
transactions: list of all transactions
start_date: ignore all transactions before this date
end_date: ignore all transactions after this date
vendors: filter transactions from these vendors
categories: filter transactions within these categories
use_labels: check labels in addition to categories
transaction_type: only include Transaction Type if not both
Returns:
Filtered list of transactions
"""
filt_trans = []
for t in transactions:
if t.date < start_date or t.date > end_date:
continue
if t.vendor in vendors:
continue
if use_labels and t.label in categories:
continue
if t.category in categories:
continue
if transaction_type is not TransactionType.BOTH:
if (transaction_type is TransactionType.DEBIT and not t.debit) or (transaction_type is TransactionType.CREDIT and t.debit):
continue
filt_trans.append(t)
return filt_trans
def summarize_transactions(transactions: List[Transaction], use_labels: bool,
threshold: int) -> Dict[str, int]:
"""Bundle transactions up by category and calculate total amount per
Args:
transactions: list of all transactions
use_labels: if True, uses labels instead of categories if they exist
threshold: minimum amount for a category
if below the threshold, the categorys thrown into "Misc"
Returns:
dict of category name, category value pairs
"""
category_sums = {}
for t in transactions:
if use_labels and t.label != '':
category = t.label
else:
category = t.category
if t.debit:
amount = t.amount
else:
amount = 0 - t.amount
if category in category_sums:
category_sums[category] += amount
else:
category_sums[category] = amount
misc_amt = 0
for name in category_sums.copy():
if 0 < category_sums[name] < threshold:
misc_amt += category_sums.pop(name)
if misc_amt:
category_sums['Misc'] = misc_amt
return category_sums
def add_income_transactions(f: typing.IO, transactions: List[Transaction],
config: Dict) -> int:
"""Generate SankeyMatic strings from Income credit
Args:
f: output file
transactions: list of all transactions
config: config file
"""
start_date = datetime.strptime(config['time']['start_date'], '%m/%d/%Y')
end_date = datetime.strptime(config['time']['end_date'], '%m/%d/%Y')
filt_trans = filter_transactions(
transactions=transactions,
start_date=start_date,
end_date=end_date,
vendors=config['transactions']['ignore_vendors'],
categories=config['transactions']['ignore_categories'],
use_labels=config['transactions']['prefer_labels'],
transaction_type=TransactionType.CREDIT)
summed_categories = summarize_transactions(
transactions=filt_trans,
use_labels=config['transactions']['prefer_labels'],
threshold=config['transactions']['category_threshold'])
work_total = sum(summed_categories.values())
# sorted_cat = sorted(summed_categories.items(), key=lambda kv: kv[1])
# sorted_cat.reverse()
# for name, value in sorted_cat:
# if config['transactions']['use_percentages']:
# f.write(f'{name} [{int(100 * value / work_total)}] Total Income\n')
# else:
# f.write(f'{name} [{value}] Total Income\n')
return 0 - work_total
def add_transactions(f: typing.IO, transactions: List[Transaction],
take_home: int, config: Dict):
"""Generate SankeyMatic strings from filtered transactions
Args:
f: output file
transactions: list of all transactions
take_home: total take home pay for the period
config: config file
"""
start_date = datetime.strptime(config['time']['start_date'], '%m/%d/%Y')
end_date = datetime.strptime(config['time']['end_date'], '%m/%d/%Y')
category_groups = config['categories']
filt_trans = filter_transactions(
transactions=transactions,
start_date=start_date,
end_date=end_date,
vendors=config['transactions']['ignore_vendors'],
categories=config['transactions']['ignore_categories'],
use_labels=config['transactions']['prefer_labels'],
transaction_type=TransactionType.BOTH)
summed_categories = summarize_transactions(
transactions=filt_trans,
use_labels=config['transactions']['prefer_labels'],
threshold=config['transactions']['category_threshold'])
expenditure = 0
sorted_cat = sorted(summed_categories.items(), key=lambda kv: kv[1])
sorted_cat.reverse()
all_cats = {}
used_cats = []
for key in category_groups:
key_total = 0
for name, value in sorted_cat:
if name == key:
key_total += value
used_cats.append(name)
for cat in category_groups[key]:
if cat == name:
used_cats.append(name)
key_total += value
all_cats[key, cat] = value
if key_total > 0:
expenditure += key_total
all_cats['Total Income', key] = key_total
for name, value in sorted_cat:
if name in used_cats:
continue
all_cats['Total Income', name] = value
expenditure += value
# if config['transactions']['use_percentages']:
# savings = int(100 * (take_home - expenditure) / take_home)
# else:
savings = 0 - expenditure
if savings < 0:
all_cats['From Savings', 'Total Income'] = 0 - savings
else:
all_cats['Total Income', 'To Savings'] = savings
sorted_cat = sorted(all_cats.items(), key=lambda kv: kv[1])
sorted_cat.reverse()
for key, value in sorted_cat:
if value > 0:
f.write(f'{key[0]} [{value}] {key[1]}\n')
else:
f.write(f'{key[1]} [{0 - value}] {key[0]}\n')
def main(*, config_file: str = None):
"""Generate the SankeyMatic-formatted data"""
if config_file:
config_file = open(config_file, 'r')
else:
try:
config_file = open('config.toml', 'r')
except IOError:
config_file = open('config-sample.toml', 'r')
config = toml.load(config_file)
config_file.close()
if config['paths']['use_custom_input']:
transactions = parse_csv(config['paths']['input_file'])
else:
transactions = parse_csv('data/transactions.csv')
if config['paths']['use_custom_output']:
fname = config['paths']['output_path']
else:
fname = 'output.txt'
output_file = open(fname, 'w')
start_date = datetime.strptime(config['time']['start_date'], '%m/%d/%Y')
end_date = datetime.strptime(config['time']['end_date'], '%m/%d/%Y')
scale = (end_date - start_date).days / 14
# take_home = add_paystub(
# output_file,
# config['paycheck']['net_earnings'],
# config['paycheck']['pretax'],
# scale=scale,
# use_percent=config['transactions']['use_percentages'])
#
take_home = add_income_transactions(output_file, transactions, config)
add_transactions(output_file, transactions, take_home, config)
output_file.close()
if __name__ == "__main__":
main()