Skip to content

Database Timezone aware #160

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions whitebox/cron_tasks/monitoring_metrics.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from datetime import datetime
from datetime import datetime, timezone
import pandas as pd
import time
from sqlalchemy import create_engine
Expand Down Expand Up @@ -260,7 +260,7 @@ async def run_calculate_metrics_pipeline():
last_report_time = (
last_report.timestamp
if last_report
else round_timestamp(datetime.utcnow(), "1D")
else round_timestamp(datetime.now(timezone.utc), "1D")
)

unused_inference_rows_in_db = await get_unused_model_inference_rows(
Expand Down
15 changes: 12 additions & 3 deletions whitebox/cron_tasks/shared.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import itertools
import pandas as pd
import datetime
import pytz
from sqlalchemy.orm import Session
from whitebox import crud
from whitebox.schemas.inferenceRow import InferenceRow
Expand Down Expand Up @@ -43,7 +44,10 @@ async def group_inference_rows_by_timestamp(
for x in dict_inference_rows:
new_obj = {**x}
new_obj["timestamp"] = change_timestamp(
x["timestamp"], last_time, granularity_amount, granularity_type
x["timestamp"],
last_time,
granularity_amount,
granularity_type,
)
updated_inferences_dict.append(new_obj)

Expand Down Expand Up @@ -159,7 +163,10 @@ def change_timestamp(
(E.g. a timestamp 2023-03-03 12:33:25.34432 when granularity is set to 2D and the previous group's timestamp is \
2023-03-03 00:00:00 will be converted into 2023-03-05 00:00:00)"""

timestamp_in_seconds = round_timestamp(timestamp, granularity_type).timestamp()
timestamp_utc_timezone = timestamp.replace(tzinfo=pytz.UTC)
timestamp_in_seconds = round_timestamp(
timestamp_utc_timezone, granularity_type
).timestamp()

granularity_in_seconds = convert_granularity_to_secs(
granularity_amount, granularity_type
Expand All @@ -175,7 +182,9 @@ def change_timestamp(
time_difference + 1
) * granularity_in_seconds + start_time_in_seconds

new_timestamp = datetime.datetime.fromtimestamp(new_timestamp_in_seconds)
new_timestamp = datetime.datetime.fromtimestamp(
new_timestamp_in_seconds
).astimezone(datetime.timezone.utc)

return new_timestamp

Expand Down
6 changes: 3 additions & 3 deletions whitebox/entities/Alert.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ class Alert(Base):
model_monitor_id = Column(
String, ForeignKey("model_monitors.id", ondelete="CASCADE")
)
timestamp = Column(DateTime)
timestamp = Column(DateTime(timezone=True))
description = Column(String)
created_at = Column(DateTime)
updated_at = Column(DateTime)
created_at = Column(DateTime(timezone=True))
updated_at = Column(DateTime(timezone=True))
4 changes: 2 additions & 2 deletions whitebox/entities/DatasetRow.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,5 @@ class DatasetRow(Base):
model_id = Column(String, ForeignKey("models.id", ondelete="CASCADE"))
nonprocessed = Column(JSON)
processed = Column(JSON)
created_at = Column(DateTime)
updated_at = Column(DateTime)
created_at = Column(DateTime(timezone=True))
updated_at = Column(DateTime(timezone=True))
6 changes: 3 additions & 3 deletions whitebox/entities/DriftingMetric.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ class DriftingMetric(Base):

id = Column(String, primary_key=True, unique=True, default=generate_uuid)
model_id = Column(String, ForeignKey("models.id", ondelete="CASCADE"))
timestamp = Column(DateTime)
timestamp = Column(DateTime(timezone=True))
concept_drift_summary = Column(JSON)
data_drift_summary = Column(JSON)
created_at = Column(DateTime)
updated_at = Column(DateTime)
created_at = Column(DateTime(timezone=True))
updated_at = Column(DateTime(timezone=True))
6 changes: 3 additions & 3 deletions whitebox/entities/Inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@ class InferenceRow(Base):

id = Column(String, primary_key=True, unique=True, default=generate_uuid)
model_id = Column(String, ForeignKey("models.id", ondelete="CASCADE"))
timestamp = Column(DateTime)
timestamp = Column(DateTime(timezone=True))
nonprocessed = Column(JSON)
processed = Column(JSON)
is_used = Column(Boolean)
actual = Column(Float, nullable=True)

created_at = Column(DateTime)
updated_at = Column(DateTime)
created_at = Column(DateTime(timezone=True))
updated_at = Column(DateTime(timezone=True))
4 changes: 2 additions & 2 deletions whitebox/entities/Model.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ class Model(Base):
target_column = Column(String)
granularity = Column(String)
labels = Column(JSON, nullable=True)
created_at = Column(DateTime)
updated_at = Column(DateTime)
created_at = Column(DateTime(timezone=True))
updated_at = Column(DateTime(timezone=True))

dataset_rows = relationship("DatasetRow")
inference_rows = relationship("InferenceRow")
Expand Down
6 changes: 3 additions & 3 deletions whitebox/entities/ModelIntegrityMetric.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ class ModelIntegrityMetric(Base):

id = Column(String, primary_key=True, unique=True, default=generate_uuid)
model_id = Column(String, ForeignKey("models.id", ondelete="CASCADE"))
timestamp = Column(DateTime)
timestamp = Column(DateTime(timezone=True))
feature_metrics = Column(JSON)
created_at = Column(DateTime)
updated_at = Column(DateTime)
created_at = Column(DateTime(timezone=True))
updated_at = Column(DateTime(timezone=True))
4 changes: 2 additions & 2 deletions whitebox/entities/ModelMonitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ class ModelMonitor(Base):
lower_threshold = Column(Numeric, nullable=True)
severity = Column("severity", Enum(AlertSeverity))
email = Column(String)
created_at = Column(DateTime)
updated_at = Column(DateTime)
created_at = Column(DateTime(timezone=True))
updated_at = Column(DateTime(timezone=True))

alerts = relationship("Alert")
18 changes: 9 additions & 9 deletions whitebox/entities/PerformanceMetric.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ class BinaryClassificationMetrics(Base):

id = Column(String, primary_key=True, unique=True, default=generate_uuid)
model_id = Column(String, ForeignKey("models.id", ondelete="CASCADE"))
timestamp = Column(DateTime)
timestamp = Column(DateTime(timezone=True))
accuracy = Column(Float)
precision = Column(Float)
recall = Column(Float)
Expand All @@ -17,33 +17,33 @@ class BinaryClassificationMetrics(Base):
false_positive = Column(Integer)
false_negative = Column(Integer)
true_positive = Column(Integer)
created_at = Column(DateTime)
updated_at = Column(DateTime)
created_at = Column(DateTime(timezone=True))
updated_at = Column(DateTime(timezone=True))


class MultiClassificationMetrics(Base):
__tablename__ = "multi_classification_metrics"

id = Column(String, primary_key=True, unique=True, default=generate_uuid)
model_id = Column(String, ForeignKey("models.id", ondelete="CASCADE"))
timestamp = Column(DateTime)
timestamp = Column(DateTime(timezone=True))
accuracy = Column(Float)
precision = Column(JSON)
recall = Column(JSON)
f1 = Column(JSON)
confusion_matrix = Column(JSON)
created_at = Column(DateTime)
updated_at = Column(DateTime)
created_at = Column(DateTime(timezone=True))
updated_at = Column(DateTime(timezone=True))


class RegressionMetrics(Base):
__tablename__ = "regression_metrics"

id = Column(String, primary_key=True, unique=True, default=generate_uuid)
model_id = Column(String, ForeignKey("models.id", ondelete="CASCADE"))
timestamp = Column(DateTime)
timestamp = Column(DateTime(timezone=True))
r_square = Column(Float)
mean_squared_error = Column(Float)
mean_absolute_error = Column(Float)
created_at = Column(DateTime)
updated_at = Column(DateTime)
created_at = Column(DateTime(timezone=True))
updated_at = Column(DateTime(timezone=True))
4 changes: 2 additions & 2 deletions whitebox/entities/User.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,5 @@ class User(Base):
id = Column(String, unique=True, primary_key=True, default=generate_uuid)
username = Column(String)
api_key = Column(String)
created_at = Column(DateTime)
updated_at = Column(DateTime)
created_at = Column(DateTime(timezone=True))
updated_at = Column(DateTime(timezone=True))
1 change: 0 additions & 1 deletion whitebox/schemas/user.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from typing import Optional, Union
from pydantic import BaseModel
from whitebox.schemas.base import ItemBase

Expand Down
18 changes: 11 additions & 7 deletions whitebox/tests/unit_tests/test_unit.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,21 @@
from whitebox.cron_tasks.shared import change_timestamp
from datetime import datetime
from datetime import datetime, timezone


class TestNodes:
def test_round_timestamp(self):
timestamp = datetime(2023, 3, 7, 15, 34, 23)
start_time = datetime(2023, 3, 6)
timestamp = datetime(2023, 3, 7, 15, 34, 23, tzinfo=timezone.utc)
start_time = datetime(2023, 3, 6, tzinfo=timezone.utc)

assert change_timestamp(timestamp, start_time, 15, "T") == datetime(
2023, 3, 7, 15, 45
2023, 3, 7, 15, 45, tzinfo=timezone.utc
)
assert change_timestamp(timestamp, start_time, 5, "H") == datetime(
2023, 3, 7, 16, 0
2023, 3, 7, 16, 0, tzinfo=timezone.utc
)
assert change_timestamp(timestamp, start_time, 2, "D") == datetime(
2023, 3, 8, tzinfo=timezone.utc
)
assert change_timestamp(timestamp, start_time, 1, "W") == datetime(
2023, 3, 13, tzinfo=timezone.utc
)
assert change_timestamp(timestamp, start_time, 2, "D") == datetime(2023, 3, 8)
assert change_timestamp(timestamp, start_time, 1, "W") == datetime(2023, 3, 13)
6 changes: 3 additions & 3 deletions whitebox/tests/v1/test_drifting_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ def test_drifting_metric_get_model_all(client, api_key):
assert len(response_multi_json) == 1
assert len(response_binary_json) == 1

assert response_multi_json[0]["timestamp"] == "2023-03-06T12:15:00"
assert response_binary_json[0]["timestamp"] == "2023-03-07T00:00:00"
assert response_multi_json[0]["timestamp"] == "2023-03-06T12:15:00+00:00"
assert response_binary_json[0]["timestamp"] == "2023-03-07T00:00:00+00:00"

assert response_multi.status_code == status.HTTP_200_OK
assert response_binary.status_code == status.HTTP_200_OK
Expand All @@ -48,7 +48,7 @@ def test_drifting_metrics_get_binary_model_after_x_time(client, api_key):

assert len(response_binary_json) == 1

assert response_binary_json[0]["timestamp"] == "2023-03-07T00:00:00"
assert response_binary_json[0]["timestamp"] == "2023-03-07T00:00:00+00:00"

assert response_binary.status_code == status.HTTP_200_OK

Expand Down