|
8 | 8 | from sklearn.base import clone |
9 | 9 | from sklearn.metrics import mean_absolute_error |
10 | 10 | from sklearn.model_selection import train_test_split |
| 11 | +from database import get_db_connection |
11 | 12 | from config import DB_FILE, MODEL_FILE |
12 | 13 | from utils import calculate_sun_elevation |
13 | 14 |
|
14 | 15 | def build_training_data(): |
15 | | - conn = sqlite3.connect(DB_FILE) |
| 16 | + conn = get_db_connection() |
16 | 17 | c = conn.cursor() |
17 | | - c.execute("SELECT day, kwh, avg_clouds, avg_temp FROM daily_stats WHERE kwh IS NOT NULL ORDER BY day") |
| 18 | + # SQL erweitert um die neuen Spalten |
| 19 | + c.execute(""" |
| 20 | + SELECT day, kwh, avg_clouds, avg_temp, daylight_duration, sunshine_duration |
| 21 | + FROM daily_stats |
| 22 | + WHERE kwh IS NOT NULL |
| 23 | + ORDER BY day |
| 24 | + """) |
18 | 25 | rows = c.fetchall() |
19 | 26 | conn.close() |
20 | 27 |
|
21 | 28 | X, y, kwh_history = [], [], [] |
22 | 29 |
|
23 | | - for day_str, kwh, clouds, temp in rows: |
| 30 | + for day_str, kwh, clouds, temp, daylight, sunshine in rows: |
24 | 31 | date = datetime.datetime.strptime(day_str, "%Y-%m-%d") |
25 | 32 | day_of_year = date.timetuple().tm_yday |
| 33 | + |
| 34 | + # Zeitliche Features |
26 | 35 | sin_day = math.sin(2 * math.pi * day_of_year / 365) |
27 | 36 | cos_day = math.cos(2 * math.pi * day_of_year / 365) |
28 | 37 | sun_elev = calculate_sun_elevation(date) |
| 38 | + |
| 39 | + # Lag-Features (was war gestern?) |
29 | 40 | prev_kwh = kwh_history[-1] if kwh_history else 0 |
30 | 41 | rolling_avg = sum(kwh_history[-7:]) / 7 if len(kwh_history) >= 7 else prev_kwh |
31 | 42 |
|
32 | | - X.append([sin_day, cos_day, clouds or 0, temp or 0, sun_elev, prev_kwh, rolling_avg]) |
| 43 | + # X-Vektor mit den neuen Werten (Daylight & Sunshine in Sekunden) |
| 44 | + X.append([ |
| 45 | + sin_day, |
| 46 | + cos_day, |
| 47 | + clouds or 0, |
| 48 | + temp or 0, |
| 49 | + sun_elev, |
| 50 | + prev_kwh, |
| 51 | + rolling_avg, |
| 52 | + daylight or 0, |
| 53 | + sunshine or 0 |
| 54 | + ]) |
33 | 55 | y.append(kwh) |
34 | 56 | kwh_history.append(kwh) |
35 | 57 |
|
36 | 58 | return np.array(X), np.array(y) |
37 | 59 |
|
38 | 60 | def train_model(): |
39 | 61 | X, y = build_training_data() |
| 62 | + # Da wir mehr Features haben, sollten wir mind. 10-15 Tage haben für ein erstes Training |
40 | 63 | if len(X) < 8: #15!!! |
41 | | - print("⚠️ Nicht genug Trainingsdaten.") |
| 64 | + print(f"⚠️ Nicht genug Trainingsdaten ({len(X)}/8).") #15!!! |
42 | 65 | return None |
43 | 66 |
|
44 | | - feature_names = ["sin_day", "cos_day", "clouds", "temperature", "sun_elevation", "prev_kwh", "rolling_avg"] |
| 67 | + # Feature-Liste erweitert |
| 68 | + feature_names = [ |
| 69 | + "sin_day", "cos_day", "clouds", "temperature", |
| 70 | + "sun_elevation", "prev_kwh", "rolling_avg", |
| 71 | + "daylight", "sunshine" |
| 72 | + ] |
| 73 | + |
45 | 74 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False) |
46 | 75 |
|
47 | | - # 1. RandomForestRegressor für den Erwartungswert) |
| 76 | + # 1. Hauptmodell |
48 | 77 | model = RandomForestRegressor(n_estimators=300, random_state=42) |
49 | 78 | model.fit(X_train, y_train) |
50 | 79 |
|
51 | | - # 2. Unteres Quantil (z.B. 10% Perzentil - "Worst Case") mit GradientBoostingRegressor |
52 | | - model_low = GradientBoostingRegressor(loss='quantile', alpha=0.1, n_estimators=300, random_state=42) # 0.1 entspricht dem 10. Perzentil |
| 80 | + # 2. Unteres Quantil (Worst Case) |
| 81 | + model_low = GradientBoostingRegressor(loss='quantile', alpha=0.1, n_estimators=300, random_state=42) |
53 | 82 | model_low.fit(X_train, y_train) |
54 | 83 |
|
55 | | - # 3. Oberes Quantil (z.B. 90% Perzentil - "Best Case") mit GradientBoostingRegressor |
56 | | - model_high = GradientBoostingRegressor(loss='quantile', alpha=0.9, n_estimators=300, random_state=42) # 0.9 entspricht dem 90. Perzentil |
| 84 | + # 3. Oberes Quantil (Best Case) |
| 85 | + model_high = GradientBoostingRegressor(loss='quantile', alpha=0.9, n_estimators=300, random_state=42) |
57 | 86 | model_high.fit(X_train, y_train) |
58 | 87 |
|
59 | 88 | mae = mean_absolute_error(y_test, model.predict(X_test)) |
60 | 89 |
|
61 | 90 | joblib.dump({ |
62 | | - "model": model, "model_low": model_low, "model_high": model_high, |
63 | | - "mae": mae, "feature_names": feature_names |
| 91 | + "model": model, |
| 92 | + "model_low": model_low, |
| 93 | + "model_high": model_high, |
| 94 | + "mae": mae, |
| 95 | + "feature_names": feature_names |
64 | 96 | }, MODEL_FILE) |
65 | 97 |
|
66 | | - print(f"✅ Modell trainiert | MAE: {round(mae,3)}") |
| 98 | + print(f"✅ Modell trainiert mit {len(feature_names)} Features | MAE: {round(mae,3)}") |
67 | 99 | return model |
68 | 100 |
|
69 | 101 | def load_or_train_model(): |
70 | 102 | if os.path.exists(MODEL_FILE): |
71 | | - return joblib.load(MODEL_FILE) |
72 | | - train_model() |
73 | | - return joblib.load(MODEL_FILE) |
| 103 | + try: |
| 104 | + bundle = joblib.load(MODEL_FILE) |
| 105 | + # Kleiner Check, ob die Feature-Anzahl noch stimmt (falls du upgradest) |
| 106 | + if len(bundle["feature_names"]) != 9: |
| 107 | + print("🔄 Altes Modell erkannt, trainiere neu mit 9 Features...") |
| 108 | + return train_model() |
| 109 | + return bundle |
| 110 | + except: |
| 111 | + return train_model() |
| 112 | + return train_model() |
| 113 | + |
0 commit comments