-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPR_Senators.py
144 lines (116 loc) · 4.13 KB
/
PR_Senators.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import numpy as np
import pandas as pd
import RandomSVD as SVD
import matplotlib.pyplot as plt
# Plan:
# check for string "Fecha", which appears right after each senator's name
# For each senator make a row
# For each row check each line until we find a line that is all numbers.
# and then associate the first non number line with that to that column number
# Then give each type of vote a number
# -1 for en contra, 1 for a favor, 0 otherwise
def vote_to_num(text):
""" Converts the type of vote to 1, -1, or 0"""
if text == "A favor":
return 1
if text == "En contra":
return -1
return 0
# removes blank lines from the txt file
f = open("PRSenate1.txt", "r")
lines = f.readlines()
f.close()
lines = filter(lambda x: not x.isspace(), lines)
f = open("noblanks.txt", "w")
f.write("".join(lines))
f.close()
# Makes a list of senators
f = open("noblanks.txt", "r")
senators = []
previous = ""
for line in f:
line = line.removesuffix("\n")
if "Fecha" in line:
senators = senators + [[[previous.removeprefix("Votante : ")]]]
previous = line
f.close()
# adding political party to each senator:
for i in range(len(senators)):
if i in [0, 2, 3, 6, 7, 9, 10, 12, 13, 14, 15, 16, 18, 19, 20, 21, 22, 23, 24, 28, 29]:
senators[i][0] = senators[i][0] + ["PNP"]
elif i in [1, 5, 8, 11, 17, 25, 26,]:
senators[i][0] = senators[i][0] + ["PPD"]
elif i == 4:
senators[i][0] = senators[i][0] + ["PIP"]
elif i == 27:
senators[i][0] = senators[i][0] + ["indep."]
senators_ = senators
print(senators_)
parties = []
for i in range(len(senators)):
parties = parties + [senators[i][0][1]]
votes_per_sen = []
# records each senators vote for each bill
def read_txt(x, y):
senators = senators_
for file in range(x, y):
for n in range(len(senators)):
bill_num = []
passed_senator = False
f = open("PRSenate" + str(file) + ".txt", "r")
sen_name = senators[n][0][0]
for pos, l in enumerate(f):
l = l.replace("\r", "").replace("\n", "")
if "Votante" in l and passed_senator:
break
if sen_name in l:
passed_senator = True
if passed_senator:
if l.isnumeric():
bill_num.append(l)
if ("Ausente" in l or "A favor" in l or "En contra" in l or "Abstenido" in l) and passed_senator and bill_num:
senators[n].append([bill_num.pop(0), vote_to_num(l)])
f.close()
print(senators)
# Figuring out the numbers of the distinct bill numbers
billNums = dict()
billCount = 0
for L in senators:
for i in range(1, len(L)):
Li = L[i][0]
if Li not in billNums:
billNums[Li] = billCount
billCount = billCount + 1
# Creating the matrix/table
myData = np.zeros([30, len(billNums)])
for i in range(30):
Li = senators[i]
for j in range(1, len(Li)):
Lj = Li[j]
jdx = billNums[Lj[0]]
myData[i, jdx] = Lj[1]
df = pd.DataFrame(myData)
for n in range(len(list(billNums.keys()))):
df = df.rename(columns={n : list(billNums.keys())[n]})
for m in range(len(senators)):
df = df.rename(index={m : senators[m][0][0]})
print(df)
print(np.shape(myData))
return myData
# Plots error of truncated SVD for each rank.
def plot_err(year, d):
error = np.zeros([np.linalg.matrix_rank(d), 1])
for k in range(1, np.linalg.matrix_rank(d)):
[U, S, V] = SVD.rsvd(d, k, 5, 1)
Ak = np.zeros((30, np.linalg.matrix_rank(d)), int)
for i in range(k):
Ak = U @ np.diag(S) @ V
error[k - 1] = np.linalg.norm(d - Ak) / np.linalg.norm(d)
plt.plot(error)
plt.title("Error " + str(year))
plt.show()
# returns U and Sigma
def u_sig(d):
r = np.linalg.matrix_rank(d)
[U, S, _] = SVD.rsvd(d, r, 5, 1)
return [U, S]