-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathSimpleLinearRegression.py
More file actions
101 lines (76 loc) · 2.74 KB
/
SimpleLinearRegression.py
File metadata and controls
101 lines (76 loc) · 2.74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import pickle
class SimpleLinearRegression:
def __init__(self):
self.theta_0 = 0.0
self.theta_1 = 0.0
self.r2_score = 0.0
def fit(self, x: np.array, y: np.array, learning_rate=0.1, iterations=10000, precision=1e-6, visualizer=False):
"""
Finds the best parameters theta_0 and theta_1 (bias and weight) for given input X and output y, using gradient descent.
"""
if len(x) != len(y):
raise ValueError("The dataset is invalid. Their length differ")
norm_theta_0 = 0.0
norm_theta_1 = 0.0
# Normalizing data
max_x = max(x)
min_x = min(x)
if max_x == min_x:
raise ValueError("The dataset is invalid. The input data has no variance.")
x = (x - min_x) / (max_x - min_x)
max_y = max(y)
min_y = min(y)
if max_y == min_y:
raise ValueError("The dataset is invalid. The output data has no variance.")
y = (y - min_y) / (max_y - min_y)
m = float(len(x))
if visualizer:
# Setup
x_line = np.array([min(x), max(x)])
y_line = x_line * norm_theta_1 + norm_theta_0
plt.ion()
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(x, y, 'bx')
line, = ax.plot(x_line, y_line, 'r')
for i in range(iterations):
if visualizer and plt.fignum_exists(fig.number):
y_line = x_line * norm_theta_1 + norm_theta_0
line.set_ydata(y_line)
fig.canvas.draw()
fig.canvas.flush_events()
y_pred = norm_theta_0 + (norm_theta_1 * x)
tmp_theta_0 = learning_rate / m * np.sum(y_pred - y)
tmp_theta_1 = learning_rate / m * np.sum((y_pred - y) * x)
norm_theta_0 = norm_theta_0 - tmp_theta_0
norm_theta_1 = norm_theta_1 - tmp_theta_1
if (abs(tmp_theta_0) < precision and abs(tmp_theta_1) < precision):
break
# Computing R2 score
y_mean = y.mean()
y_pred = norm_theta_0 + (norm_theta_1 * x)
self.r2_score = 1 - (np.sum((y - y_pred) ** 2) / np.sum((y - y_mean) ** 2))
# Denormalizing final parameters
self.theta_1 = norm_theta_1 * (max_y - min_y) / (max_x - min_x)
self.theta_0 = self.theta_0 = norm_theta_0 * (max_y - min_y) + min_y - self.theta_1 * min_x
def predict(self, x: float) -> float:
return self.theta_0 + (self.theta_1 * x)
def import_coef(self, file_name: str = '.coefs.pkl'):
try:
with open(file_name, 'rb') as file:
self.theta_0 = pickle.load(file)
self.theta_1 = pickle.load(file)
self.r2_score = pickle.load(file)
except:
print("No coefficients found, please train the model first or provide a valid file.")
def export_coef(self, file_name: str = '.coefs.pkl'):
try:
with open(file_name, 'wb') as file:
pickle.dump(self.theta_0, file)
pickle.dump(self.theta_1, file)
pickle.dump(self.r2_score, file)
except:
print("Error: Could not export to file.")