Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,18 @@ on:
push:
branches:
- main
pull_request:
branches:
- main
- 2023ac05362

jobs:
build:
runs-on: ubuntu-latest
permissions:
contents: write # Needed to push back updated VERSION file
if: >
github.ref == 'refs/heads/main' ||
(github.ref == 'refs/heads/2023ac05362' &&
contains(join(github.event.commits.*.added, ','), '.pkl') ||
contains(join(github.event.commits.*.modified, ','), '.pkl') )

steps:
- name: Checkout code
Expand Down
Binary file not shown.
13 changes: 9 additions & 4 deletions feature_store/housing_feature_repo/feature_repo/data/registry.db
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ __dummy_idJhousing_feature_repo
u
U
location)A specific housing location in California"house_idJhousing_feature_repo
ܩ������ ��������1"$19581b39-c512-40c3-84e9-a104cdc8efd7* ֦������2�
ܩ������ ��������1"$2ff5a961-d2f2-4ebc-a7f6-004d11861d6c* �������.2�
�
location_featureshousing_feature_repolocation"

Expand All @@ -16,13 +16,18 @@ U
AveBedrms"
Latitude"
MedHouseVal2����:�event_timestampZdata/housing_features.parquet�1feast.infra.offline_stores.file_source.FileSource�data/housing_features.parquet�The raw housing data source� ��������" ��������@b
house_id�
ܩ������ ֦��؞ބ
house_id�
ܩ������ �������-
�Ƣ�ȣ�� ��������
�������� �������
������� ��������
�������� ������ð
������ð �����R�
������ð �����
����� Ӯ���ū�
Ӯ���ū� ��������
�������� �������e
�������e ��������
�������� �����ӄ�R�
�
,feast.infra.online_stores.sqlite.SqliteTable"�
�/media/gaurav/ubuntudata/SemThree/MLops/Assigment1/MLOps_Group_49/feature_store/housing_feature_repo/feature_repo/data/online_store.db&housing_feature_repo_location_featuresb�event_timestampZdata/housing_features.parquet�1feast.infra.offline_stores.file_source.FileSource�data/housing_features.parquet�housing_feature_repo�The raw housing data source� ۩������" ܩ������6
Expand Down
2 changes: 1 addition & 1 deletion new_data/housing.csv
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,MedHouseVal
8.3252,41.0,6.984126984126984,1.0238095238095237,322.0,2.5555555555555554,37.88,-122.23,4.526
89.3252,41.0,6.984126984126984,1.0238095238095237,322.0,2.5555555555555554,37.88,-122.23,4.526
8.3014,21.0,6.238137082601054,0.9718804920913884,2401.0,2.109841827768014,37.86,-122.22,3.585
7.2574,52.0,8.288135593220339,1.073446327683616,496.0,2.8022598870056497,37.85,-122.24,3.521
5.6431,52.0,5.8173515981735155,1.0730593607305936,558.0,2.547945205479452,37.85,-122.25,3.413
Expand Down
20,641 changes: 20,641 additions & 0 deletions new_data/one.csv

Large diffs are not rendered by default.

60 changes: 60 additions & 0 deletions retraining/watchfile.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import time
import subprocess
import os
import threading
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler

# Configuration
FOLDER_TO_WATCH = "../new_data/"
CURL_COMMAND = [
"curl", "-X", "POST", "http://localhost:5001/retrain" # Your Flask retrain endpoint
]

# Lock to prevent overlapping retraining
retrain_lock = threading.Lock()

def file_fully_written(file_path, check_interval=1, checks=3):
last_size = -1
stable_count = 0
while stable_count < checks:
try:
current_size = os.path.getsize(file_path)
except FileNotFoundError:
return False
if current_size == last_size:
stable_count += 1
else:
stable_count = 0
last_size = current_size
time.sleep(check_interval)
return True

class WatcherHandler(FileSystemEventHandler):
def on_created(self, event):
if not event.is_directory:
file_path = event.src_path
print(f"New file detected: {file_path}")

if file_fully_written(file_path):
print(f"File complete: {file_path}")
with retrain_lock: # Ensure only one retrain at a time
print("Triggering retraining...")
subprocess.run(CURL_COMMAND)
print("Retraining finished.")
else:
print(f"File disappeared before completion: {file_path}")

if __name__ == "__main__":
event_handler = WatcherHandler()
observer = Observer()
observer.schedule(event_handler, FOLDER_TO_WATCH, recursive=False)
observer.start()
print(f"Monitoring folder: {FOLDER_TO_WATCH}")

try:
while True:
time.sleep(1)
except KeyboardInterrupt:
observer.stop()
observer.join()
16 changes: 13 additions & 3 deletions retraining/webhookServer.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,23 @@
from flask import Flask, request
import subprocess
import threading

app = Flask(__name__)
lock = threading.Lock()

@app.route("/retrain", methods=["POST"])
def retrain():
print("Retrain webhook received!")
subprocess.Popen(["python", "retrain.py"])
return "Retraining started", 200
if lock.locked():
return "Retraining already in progress. Try again later.", 429 # Too Many Requests

with lock: # Prevents other retrain calls until this finishes
print("Retrain webhook received!")
result = subprocess.run(["python", "retrain.py"])

if result.returncode == 0:
return "Retraining completed successfully", 200
else:
return f"Retraining failed with code {result.returncode}", 500

if __name__ == "__main__":
app.run(host='0.0.0.0', port=5001)