Added new ML Features in API Routes

jacquesbach · jacquesbach · commit 26c06ba71358 · 2026-03-13T17:15:49.000+01:00
diff --git a/app.py b/app.py
@@ -25,7 +25,7 @@
     init_db()
 
     # 🔥 EINMAL ausführen, danach wieder auskommentieren!
-    force_rebuild_daily_stats()
+    # force_rebuild_daily_stats()
 
     self_heal_daily_stats()
     
diff --git a/ml_logic.py b/ml_logic.py
@@ -8,66 +8,106 @@
 from sklearn.base import clone
 from sklearn.metrics import mean_absolute_error
 from sklearn.model_selection import train_test_split
+from database import get_db_connection
 from config import DB_FILE, MODEL_FILE
 from utils import calculate_sun_elevation
 
 def build_training_data():
-    conn = sqlite3.connect(DB_FILE)
+    conn = get_db_connection()
     c = conn.cursor()
-    c.execute("SELECT day, kwh, avg_clouds, avg_temp FROM daily_stats WHERE kwh IS NOT NULL ORDER BY day")
+    # SQL erweitert um die neuen Spalten
+    c.execute("""
+        SELECT day, kwh, avg_clouds, avg_temp, daylight_duration, sunshine_duration 
+        FROM daily_stats 
+        WHERE kwh IS NOT NULL 
+        ORDER BY day
+    """)
     rows = c.fetchall()
     conn.close()
 
     X, y, kwh_history = [], [], []
 
-    for day_str, kwh, clouds, temp in rows:
+    for day_str, kwh, clouds, temp, daylight, sunshine in rows:
         date = datetime.datetime.strptime(day_str, "%Y-%m-%d")
         day_of_year = date.timetuple().tm_yday
+        
+        # Zeitliche Features
         sin_day = math.sin(2 * math.pi * day_of_year / 365)
         cos_day = math.cos(2 * math.pi * day_of_year / 365)
         sun_elev = calculate_sun_elevation(date)
+        
+        # Lag-Features (was war gestern?)
         prev_kwh = kwh_history[-1] if kwh_history else 0
         rolling_avg = sum(kwh_history[-7:]) / 7 if len(kwh_history) >= 7 else prev_kwh
 
-        X.append([sin_day, cos_day, clouds or 0, temp or 0, sun_elev, prev_kwh, rolling_avg])
+        # X-Vektor mit den neuen Werten (Daylight & Sunshine in Sekunden)
+        X.append([
+            sin_day, 
+            cos_day, 
+            clouds or 0, 
+            temp or 0, 
+            sun_elev, 
+            prev_kwh, 
+            rolling_avg,
+            daylight or 0,
+            sunshine or 0
+        ])
         y.append(kwh)
         kwh_history.append(kwh)
 
     return np.array(X), np.array(y)
 
 def train_model():
     X, y = build_training_data()
+    # Da wir mehr Features haben, sollten wir mind. 10-15 Tage haben für ein erstes Training
     if len(X) < 8: #15!!!
-        print("⚠️ Nicht genug Trainingsdaten.")
+        print(f"⚠️ Nicht genug Trainingsdaten ({len(X)}/8).") #15!!!
         return None
 
-    feature_names = ["sin_day", "cos_day", "clouds", "temperature", "sun_elevation", "prev_kwh", "rolling_avg"]
+    # Feature-Liste erweitert
+    feature_names = [
+        "sin_day", "cos_day", "clouds", "temperature", 
+        "sun_elevation", "prev_kwh", "rolling_avg", 
+        "daylight", "sunshine"
+    ]
+    
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)
 
-    # 1. RandomForestRegressor für den Erwartungswert)
+    # 1. Hauptmodell
     model = RandomForestRegressor(n_estimators=300, random_state=42)
     model.fit(X_train, y_train)
     
-    # 2. Unteres Quantil (z.B. 10% Perzentil - "Worst Case") mit GradientBoostingRegressor
-    model_low = GradientBoostingRegressor(loss='quantile', alpha=0.1, n_estimators=300, random_state=42) # 0.1 entspricht dem 10. Perzentil
+    # 2. Unteres Quantil (Worst Case)
+    model_low = GradientBoostingRegressor(loss='quantile', alpha=0.1, n_estimators=300, random_state=42)
     model_low.fit(X_train, y_train)
 
-    # 3. Oberes Quantil (z.B. 90% Perzentil - "Best Case") mit GradientBoostingRegressor
-    model_high = GradientBoostingRegressor(loss='quantile', alpha=0.9, n_estimators=300, random_state=42) # 0.9 entspricht dem 90. Perzentil
+    # 3. Oberes Quantil (Best Case)
+    model_high = GradientBoostingRegressor(loss='quantile', alpha=0.9, n_estimators=300, random_state=42)
     model_high.fit(X_train, y_train)
 
     mae = mean_absolute_error(y_test, model.predict(X_test))
     
     joblib.dump({
-        "model": model, "model_low": model_low, "model_high": model_high,
-        "mae": mae, "feature_names": feature_names
+        "model": model, 
+        "model_low": model_low, 
+        "model_high": model_high,
+        "mae": mae, 
+        "feature_names": feature_names
     }, MODEL_FILE)
 
-    print(f"✅ Modell trainiert | MAE: {round(mae,3)}")
+    print(f"✅ Modell trainiert mit {len(feature_names)} Features | MAE: {round(mae,3)}")
     return model
 
 def load_or_train_model():
     if os.path.exists(MODEL_FILE):
-        return joblib.load(MODEL_FILE)
-    train_model()
-    return joblib.load(MODEL_FILE)
+        try:
+            bundle = joblib.load(MODEL_FILE)
+            # Kleiner Check, ob die Feature-Anzahl noch stimmt (falls du upgradest)
+            if len(bundle["feature_names"]) != 9:
+                print("🔄 Altes Modell erkannt, trainiere neu mit 9 Features...")
+                return train_model()
+            return bundle
+        except:
+            return train_model()
+    return train_model()
+
diff --git a/routes.py b/routes.py
@@ -808,8 +808,8 @@ def forecast():
     c.execute("SELECT valid_from, price FROM prices ORDER BY valid_from DESC")
     prices = [{"date": r[0], "price": r[1]} for r in c.fetchall()]
     conn.close()
-
-    for date_str, cloud, temp in forecast_data:
+    
+    for date_str, cloud, temp, daylight, sunshine in forecast_data:
 
         date = datetime.datetime.strptime(date_str, "%Y-%m-%d")
         doy = date.timetuple().tm_yday
@@ -820,13 +820,18 @@ def forecast():
 
         prev_kwh = last_rows[-1] if last_rows else 0
         rolling_avg = sum(last_rows[-7:]) / 7 if len(last_rows) >= 7 else prev_kwh
-
-        X = np.array([[sin_day, cos_day,
-                       cloud or 0,
-                       temp or 0,
-                       sun_elev,
-                       prev_kwh,
-                       rolling_avg]])
+        
+        X = np.array([[
+            sin_day, 
+            cos_day, 
+            cloud or 0, 
+            temp or 0, 
+            sun_elev, 
+            prev_kwh, 
+            rolling_avg,
+            daylight or 0,
+            sunshine or 0
+        ]])
 
         median = max(float(model.predict(X)[0]), 0)
         lower = max(float(model_low.predict(X)[0]), 0)
@@ -927,16 +932,8 @@ def feature_importance():
 
     model_bundle = load_or_train_model()
     model = model_bundle["model"]
-
-    feature_names = [
-        "sin_day",
-        "cos_day",
-        "clouds",
-        "temperature",
-        "sun_elevation",
-        "prev_kwh",
-        "rolling_avg"
-    ]
+    
+    feature_names = model_bundle["feature_names"]
 
     importances = model.feature_importances_
 
@@ -975,7 +972,7 @@ def shap_values():
 
     results = []
 
-    for date_str, cloud, temp in forecast_data:
+    for date_str, cloud, temp, daylight, sunshine in forecast_data:
 
         date = datetime.datetime.strptime(date_str, "%Y-%m-%d")
         doy = date.timetuple().tm_yday
@@ -987,9 +984,19 @@ def shap_values():
         
         prev_kwh = last_rows[-1] if last_rows else 0
         rolling_avg = sum(last_rows[-7:]) / 7 if len(last_rows) >= 7 else prev_kwh
-        
-        X = np.array([[sin_day, cos_day, cloud or 0, temp or 0, sun_elev, prev_kwh, rolling_avg]])
-        
+
+        X = np.array([[
+            sin_day, 
+            cos_day, 
+            cloud or 0, 
+            temp or 0, 
+            sun_elev, 
+            prev_kwh, 
+            rolling_avg,
+            daylight or 0,
+            sunshine or 0
+        ]])
+
         prediction = float(model.predict(X)[0])
         
         shap_vals = explainer.shap_values(X)[0]
@@ -1012,7 +1019,6 @@ def shap_values():
 
     return jsonify(results)
 
-
 @api_bp.route('/api/shap-summary')
 def shap_summary():
 
diff --git a/utils.py b/utils.py
@@ -67,15 +67,22 @@ def get_weather_forecast(days=7):
             f"https://api.open-meteo.com/v1/forecast"
             f"?latitude={LATITUDE}"
             f"&longitude={LONGITUDE}"
-            f"&daily=cloud_cover_mean,temperature_2m_mean"
+            f"&daily=cloud_cover_mean,temperature_2m_mean,daylight_duration,sunshine_duration"
             f"&timezone=Europe/Berlin"
+            f"&forecast_days={days}"
         )
         r = requests.get(url, timeout=5)
         data = r.json().get("daily", {})
+        
         dates = data.get("time", [])
         clouds = data.get("cloud_cover_mean", [])
         temps = data.get("temperature_2m_mean", [])
-        return list(zip(dates[:days], clouds[:days], temps[:days]))
+        daylight = data.get("daylight_duration", [])
+        sunshine = data.get("sunshine_duration", [])
+        
+        # Wir geben nun 5 Werte pro Tag zurück
+        return list(zip(dates, clouds, temps, daylight, sunshine))
+        
     except Exception as e:
         print("Forecast Weather Error:", e)
-        return []
+        return []