LINEAR REGRESSION
This project forecasts electricity demand in Germany using renewable energy data (solar and wind). Two forecasting techniques were implemented: Linear Regression and LSTM-based Recurrent Neural Networks (RNN). Correlation analysis using Pearson and Spearman coefficients was performed to analyze relationships between renewable generation and load demand.
- Python
- Spyder IDE
- Pandas
- NumPy
- Matplotlib
- Scikit-learn
- TensorFlow / Keras
- Data preprocessing and cleaning
- Time-series forecasting using Linear Regression
- Deep learning forecasting using LSTM
- Model evaluation using prediction errors
- Correlation analysis between load, solar, and wind energy data
The LSTM model showed better forecasting performance compared to Linear Regression due to its ability to capture temporal patterns in time-series data.
Disha Harwalkar
M.Sc. Electrical & Electronic Engineering
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as seabornInstance
from sklearn.model_selection import train_test_split
from sklearn import linear_model
from sklearn import metrics
from sklearn.preprocessing import scale
from collections import Counter
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import MinMaxScaler
def import_data():
dataset= pd.read_csv('time_series - 60min (5) - time_series - 60min (5) - time_series - 60min (5) - time_series - 60min (5).csv')
return dataset
result = import_data()
result.head()
result['Datee']= pd.to_datetime(result['Date'])
data= result.loc[:,['Dataa']]
data = data.set_index(result.Date)
data['Dataa']= pd.to_numeric(data['Dataa'],downcast='float',errors='coerce')
data.plot()
plt.figure(figsize=(15,10))
plt.tight_layout()
values= data['Dataa'].values.reshape(-1,1)
values= values.astype('float32')
scaler= MinMaxScaler(feature_range=(0,1))
scaled= scaler.fit_transform(values)
normalized=pd.DataFrame({'normalized':scaled.flatten()})
import pandas as pd
X = result['time'].values.reshape(-1,1)
y = normalized['normalized'].values.reshape(-1,1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
regressor = LinearRegression()
regressor.fit(X_train, y_train) #training the algorithm
y_pred = regressor.predict(X_test)
df = pd.DataFrame({'Actual': y_test.flatten(), 'Predicted': y_pred.flatten()})
df
df1 = df.head(25)
df1.plot(kind='bar',figsize=(16,10))
plt.grid(which='major', linestyle='-', linewidth='0.5', color='green')
plt.grid(which='minor', linestyle=':', linewidth='0.5', color='black')
plt.show()
plt.scatter(X_test, y_test, color='gray')
plt.plot(X_test, y_pred, color='red', linewidth=2)
plt.show()
print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred))
print(df)
ARIMA
import numpy as np import pandas as pd from matplotlib import pyplot as plt from statsmodels.tsa.stattools import adfuller from statsmodels.tsa.seasonal import seasonal_decompose from statsmodels.tsa.arima_model import ARIMA from pandas.plotting import register_matplotlib_converters from sklearn.preprocessing import MinMaxScaler from sklearn.model_selection import train_test_split from sklearn.linear_model import LinearRegression from sklearn.preprocessing import MinMaxScaler def import_data(): dataset= pd.read_csv('time_series - 60min (5) - time_series - 60min (5) - time_series - 60min (5) - time_series - 60min (5).csv') return dataset result = import_data() result.head() result['Datee']= pd.to_datetime(result['Date']) data= result.loc[:,['Dataa']] data = data.set_index(result.Date) data['Dataa']= pd.to_numeric(data['Dataa'],downcast='float',errors='coerce') data.plot() plt.figure(figsize=(15,10)) plt.tight_layout() values= data['Dataa'].values.reshape(-1,1) values= values.astype('float32') scaler= MinMaxScaler(feature_range=(0,1)) scaled= scaler.fit_transform(values) normalized=pd.DataFrame({'normalized':scaled.flatten()}) train = normalized[:int(0.8*(len(normalized)))] test = normalized[int(0.8*(len(normalized))):] result = adfuller(test['normalized']) print('ADF Statistic: {}'.format(result[0])) print('p-value: {}'.format(result[1])) print('Critical Values:') for key, value in result[4].items(): print('\t{}: {}'.format(key, value)) df_log = np.log(test) plt.plot(df_log) def get_stationarity(timeseries):
# rolling statistics
rolling_mean = timeseries.rolling(window=12).mean()
rolling_std = timeseries.rolling(window=12).std()
# rolling statistics plot
original = plt.plot(timeseries, color='blue', label='Original')
mean = plt.plot(rolling_mean, color='red', label='Rolling Mean')
std = plt.plot(rolling_std, color='black', label='Rolling Std')
plt.legend(loc='best')
plt.title('Rolling Mean & Standard Deviation')
plt.show(block=False)
# Dickey–Fuller test:
result = adfuller(timeseries['normalized'])
print('ADF Statistic: {}'.format(result[0]))
print('p-value: {}'.format(result[1]))
print('Critical Values:')
for key, value in result[4].items():
print('\t{}: {}'.format(key, value))
df_log_shift = df_log - df_log.shift()
df_log_shift.dropna(inplace=True)
get_stationarity(df_log_shift)
decomposition = seasonal_decompose(df_log, freq=3)
model = ARIMA(df_log, order=(2,1,2))
results = model.fit(disp=-1)
plt.plot(df_log_shift)
plt.plot(results.fittedvalues, color='red')
predictions_ARIMA_diff = pd.Series(results.fittedvalues, copy=True)
predictions_ARIMA_diff_cumsum = predictions_ARIMA_diff.cumsum()
predictions_ARIMA_log = pd.Series(df_log['normalized'].iloc[0], index=df_log.index)
predictions_ARIMA_log = predictions_ARIMA_log.add(predictions_ARIMA_diff_cumsum, fill_value=0)
predictions_ARIMA = np.exp(predictions_ARIMA_log)
plt.plot(test)
plt.plot(predictions_ARIMA)
import sklearn.metrics as metrics
print('Mean Squared Error:', metrics.mean_squared_error(test,predictions_ARIMA))
print(test)
print(predictions_ARIMA)
RNN
import numpy as np import tensorflow.compat.v1 as tf import tensorflow as tf from tensorflow import keras import pandas as pd import seaborn as sns from pylab import rcParams import matplotlib.pyplot as plt from matplotlib import rc from sklearn.preprocessing import MinMaxScaler import numpy as np import pandas as pd from matplotlib import pyplot as plt from statsmodels.tsa.stattools import adfuller from statsmodels.tsa.seasonal import seasonal_decompose from statsmodels.tsa.arima_model import ARIMA from pandas.plotting import register_matplotlib_converters from sklearn.preprocessing import MinMaxScaler from sklearn.model_selection import train_test_split from sklearn.linear_model import LinearRegression from sklearn.preprocessing import MinMaxScaler import sklearn.metrics as metrics def import_data(): dataset= pd.read_csv('time_series - 60min (5) - time_series - 60min (5) - time_series - 60min (5) - time_series - 60min (5).csv') return dataset result = import_data() result.head() result['Datee']= pd.to_datetime(result['Date']) data= result.loc[:,['Dataa']] data = data.set_index(result.Date) data['Dataa']= pd.to_numeric(data['Dataa'],downcast='float',errors='coerce') data.plot() plt.figure(figsize=(15,10)) plt.tight_layout() values= data['Dataa'].values.reshape(-1,1) values= values.astype('float32') scaler= MinMaxScaler(feature_range=(0,1)) scaled= scaler.fit_transform(values) normalized=pd.DataFrame({'normalized':scaled.flatten()}) train= normalized[:int(0.8*(len(normalized)))] test = normalized[int(0.8*(len(normalized))):] print(len(train), len(test)) def create_dataset(X, y, time_steps=1): Xs, ys = [], [] for i in range(len(X) - time_steps): v = X.iloc[i:(i + time_steps)].values Xs.append(v) ys.append(y.iloc[i + time_steps]) return np.array(Xs), np.array(ys) time_steps = 1 X_train, y_train = create_dataset(train, train.normalized, time_steps) X_test, y_test = create_dataset(test, test.normalized, time_steps)
print(X_train.shape, y_train.shape) model = keras.Sequential() model.add(keras.layers.LSTM(units=128)) model.add(keras.layers.Dense(units=1)) model.compile( loss='mean_squared_error', optimizer=keras.optimizers.Adam(0.001) ) history = model.fit( X_train, y_train, epochs=30, batch_size=16, validation_split=0.1, verbose=1, shuffle=False ) y_pred = model.predict(X_test) print(len(y_pred)) df = pd.DataFrame({'Actual': y_test.flatten(), 'Predicted': y_pred.flatten()}) df print(df) print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred))
conditioning = pd.DataFrame(MH75) conditioning = conditioning.T
export_csv = conditioning.to_csv (path+'/analysis files/MH075/conditioning.csv', header=True) MH76.append(df_cond[animal])
conditioning = pd.DataFrame(MH76) conditioning = conditioning.T
export_csv = conditioning.to_csv (path+'/analysis files/MH076/conditioning.csv', header=True)
if '34' in animal: MH34.append(df_cond[animal])
cond = pd.DataFrame(MH34) cond = cond.T
export_csv = cond.to_csv (path+'/analysis files/MH034/conditioning.csv', header=True) cond = pd.DataFrame(MH35) cond = cond.T
export_csv = cond.to_csv (path+'/analysis files/MH035/conditioning.csv', header=True)