Admin features extension (#120)

lumburovskalina · web-flow · commit aee43d5cfca4 · 2023-03-09T14:43:14.000+01:00
* Added started at and finished at on attribute and embedding table

* Added started at and finished at in the queries

* Removed print

* Added started_at and finished_at as for import and export

* Added full name to task

* Started at and finished at added on attributes

* Added finished at for embeddings

* Added finished at for sample projects

* Finised at date

* Finished at fix

* Adding embeddings one by one when importing project

* Embeddings import fix

* Sleep outside for loop

* Sleep back in the loop with reduced time

* Submodules merge
diff --git a/alembic/versions/546e5cd7feaa_added_started_at_and_finished_at_on_.py b/alembic/versions/546e5cd7feaa_added_started_at_and_finished_at_on_.py
@@ -0,0 +1,34 @@
+"""Added started_at and finished_at on attribute and embedding table
+
+Revision ID: 546e5cd7feaa
+Revises: 3b118e1e02cb
+Create Date: 2023-03-06 12:21:19.095488
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = '546e5cd7feaa'
+down_revision = '3b118e1e02cb'
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.add_column('attribute', sa.Column('started_at', sa.DateTime(), nullable=True))
+    op.add_column('attribute', sa.Column('finished_at', sa.DateTime(), nullable=True))
+    op.add_column('embedding', sa.Column('started_at', sa.DateTime(), nullable=True))
+    op.add_column('embedding', sa.Column('finished_at', sa.DateTime(), nullable=True))
+    # ### end Alembic commands ###
+
+
+def downgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_column('embedding', 'finished_at')
+    op.drop_column('embedding', 'started_at')
+    op.drop_column('attribute', 'finished_at')
+    op.drop_column('attribute', 'started_at')
+    # ### end Alembic commands ###
diff --git a/controller/attribute/manager.py b/controller/attribute/manager.py
@@ -11,6 +11,7 @@
 from util import daemon, notification
 
 from . import util
+from sqlalchemy import sql
 
 
 def get_attribute(project_id: str, attribute_id: str) -> Attribute:
@@ -91,7 +92,6 @@ def update_attribute(
     source_code: str,
     visibility: str,
 ) -> None:
-
     attribute_item: Attribute = attribute.update(
         project_id,
         attribute_id,
@@ -142,7 +142,6 @@ def add_running_id(
         project_id, attribute_name, for_retokenization, with_commit=True
     )
     if for_retokenization:
-
         daemon.run(
             request_tokenize_project,
             project_id,
@@ -192,6 +191,7 @@ def calculate_user_attribute_all_records(
         attribute_id=attribute_id,
         state=AttributeState.RUNNING.value,
         with_commit=True,
+        started_at=sql.func.now(),
     )
     notification.send_organization_update(
         project_id=project_id, message=f"calculate_attribute:started:{attribute_id}"
@@ -207,7 +207,6 @@ def calculate_user_attribute_all_records(
 def __calculate_user_attribute_all_records(
     project_id: str, user_id: str, attribute_id: str
 ) -> None:
-
     try:
         calculated_attributes = util.run_attribute_calculation_exec_env(
             attribute_id=attribute_id, project_id=project_id, doc_bin="docbin_full"
@@ -285,6 +284,7 @@ def __calculate_user_attribute_all_records(
         attribute_id=attribute_id,
         state=AttributeState.USABLE.value,
         with_commit=True,
+        finished_at=sql.func.now(),
     )
 
     notification.send_organization_update(
diff --git a/controller/embedding/manager.py b/controller/embedding/manager.py
@@ -16,7 +16,6 @@ def get_recommended_encoders() -> List[Any]:
     else:
         existing_models = []
     for model in existing_models:
-
         if not model["zero_shot_pipeline"]:
             not_yet_known = (
                 len(
@@ -110,6 +109,6 @@ def __embed_one_by_one_helper(
                 user_id=user_id,
                 config_string=splitted[2],
             )
-        time.sleep(10)
+        time.sleep(5)
         while util.has_encoder_running(project_id):
-            time.sleep(10)
+            time.sleep(5)
diff --git a/controller/transfer/project_transfer_manager.py b/controller/transfer/project_transfer_manager.py
@@ -33,6 +33,7 @@
 from util.notification import create_notification
 from submodules.s3 import controller as s3
 import os
+from sqlalchemy import sql
 
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
@@ -149,20 +150,30 @@ def import_file(
     """
     send_progress_update_throttle(project_id, task_id, 0)
     project_item = project.get(project_id)
-    project_item.name = data.get("project_details_data",).get(
+    project_item.name = data.get(
+        "project_details_data",
+    ).get(
         "name",
     )
-    project_item.description = data.get("project_details_data",).get(
+    project_item.description = data.get(
+        "project_details_data",
+    ).get(
         "description",
     )
-    project_item.tokenizer = data.get("project_details_data",).get(
+    project_item.tokenizer = data.get(
+        "project_details_data",
+    ).get(
         "tokenizer",
     )
-    spacy_language = data.get("project_details_data",).get(
+    spacy_language = data.get(
+        "project_details_data",
+    ).get(
         "tokenizer",
     )[:2]
     project_item.tokenizer_blank = spacy_language
-    project_item.status = data.get("project_details_data",).get(
+    project_item.status = data.get(
+        "project_details_data",
+    ).get(
         "status",
     )
     old_project_id = data.get(
@@ -209,6 +220,8 @@ def import_file(
                 "source_code",
             ),
             visibility=attribute_item.get("visibility"),
+            started_at=attribute_item.get("started_at"),
+            finished_at=attribute_item.get("finished_at"),
             project_id=project_id,
         )
         attribute_ids_by_old_id[
@@ -606,7 +619,6 @@ def import_file(
         for embedding_item in data.get(
             "embeddings_data",
         ):
-
             attribute_id = embedding_item.get("attribute_id")
             embedding_name = embedding_item.get("name")
             if attribute_id:
@@ -617,6 +629,10 @@ def import_file(
                 )
                 attribute_id = attribute_ids_by_old_name[attribute_name]
 
+            finished_at_str = "finished_at" in embedding_item
+            if not finished_at_str:
+                embedding_item["finished_at"] = sql.func.now()
+
             embedding_object = embedding.create(
                 project_id=project_id,
                 attribute_id=attribute_id,
@@ -628,6 +644,12 @@ def import_file(
                 type=embedding_item.get(
                     "type",
                 ),
+                started_at=embedding_item.get(
+                    "started_at",
+                ),
+                finished_at=embedding_item.get(
+                    "finished_at",
+                ),
             )
             embedding_ids[
                 embedding_item.get(
@@ -987,6 +1009,8 @@ def get_project_export_dump(
             "state": attribute_item.state,
             "logs": attribute_item.logs,
             "visibility": attribute_item.visibility,
+            "started_at": attribute_item.started_at,
+            "finished_at": attribute_item.finished_at,
         }
         for attribute_item in attributes
     ]
@@ -1080,6 +1104,8 @@ def get_project_export_dump(
             "name": embedding_item.name,
             "custom": embedding_item.custom,
             "type": embedding_item.type,
+            "started_at": embedding_item.started_at,
+            "finished_at": embedding_item.finished_at,
         }
         for embedding_item in embeddings
     ]
@@ -1218,7 +1244,7 @@ def delete_project(project_id: str) -> bool:
 
 def replace_by_mappings(text: str, mappings: List[Dict[str, str]]) -> str:
     for mapping in mappings:
-        for (key, value) in mapping.items():
+        for key, value in mapping.items():
             text = text.replace(str(key), str(value))
     return text
 
diff --git a/graphql_api/types.py b/graphql_api/types.py
@@ -801,3 +801,5 @@ class Task(graphene.ObjectType):
     project_name = graphene.String()
     state = graphene.String()
     task_type = graphene.String()
+    started_at = graphene.DateTime()
+    finished_at = graphene.DateTime()
diff --git a/submodules/model b/submodules/model
@@ -1 +1 @@
-Subproject commit 58726b00ad1fb3f4e61ed614c00346bd5346a676
+Subproject commit 9b9f9c85b56413bf478a7c695e302b4e3a0248d6