Skip to content

Commit 03469d7

Browse files
Release updates (#127)
* Dummy commit * Adds enums support for export manager * Adds running_id though pandas not db update * Checks for IN_DELETEION projects on startup and finishes them up * Submodules merge --------- Co-authored-by: JWittmeyer <[email protected]>
1 parent 3777401 commit 03469d7

File tree

10 files changed

+91
-13
lines changed

10 files changed

+91
-13
lines changed

api/transfer.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -457,3 +457,4 @@ def __create_embeddings(
457457
break
458458
time.sleep(1)
459459
return ctx_token
460+

app.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
from graphql_api import schema
2121
from controller.task_queue.task_queue import init_task_queue
22+
from controller.project.manager import check_in_deletion_projects
2223

2324

2425
logging.basicConfig(level=logging.DEBUG)
@@ -52,3 +53,4 @@
5253
app = Starlette(routes=routes, middleware=middleware)
5354

5455
init_task_queue()
56+
check_in_deletion_projects()

controller/attribute/manager.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,20 @@ def add_running_id(
141141
) -> None:
142142
if attribute.get_by_name(project_id, attribute_name):
143143
raise ValueError(f"attribute with name {attribute_name} already exists")
144+
general.commit()
144145

146+
# added threading for session management because otherwise this can sometimes create a deadlock
147+
thread = daemon.prepare_thread(
148+
__add_running_id, user_id, project_id, attribute_name, for_retokenization
149+
)
150+
thread.start()
151+
thread.join()
152+
153+
154+
def __add_running_id(
155+
user_id: str, project_id: str, attribute_name: str, for_retokenization: bool = True
156+
):
157+
session_token = general.get_ctx_token()
145158
attribute.add_running_id(
146159
project_id, attribute_name, for_retokenization, with_commit=True
147160
)
@@ -156,6 +169,7 @@ def add_running_id(
156169
"only_uploaded_attributes": False,
157170
},
158171
)
172+
general.remove_and_refresh_session(session_token)
159173

160174

161175
def calculate_user_attribute_all_records(

controller/project/manager.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
embedding,
2020
information_source,
2121
general,
22+
organization,
2223
)
2324
from graphql_api.types import HuddleData, ProjectSize, GatesIntegrationData
2425
from util import daemon, notification
@@ -469,3 +470,22 @@ def __create_missing_information_source_pickles(
469470
time.sleep(1)
470471

471472
return session_token
473+
474+
475+
def check_in_deletion_projects() -> None:
476+
# this is only supposed to be called during startup of the application
477+
daemon.run(__check_in_deletion_projects)
478+
479+
480+
def __check_in_deletion_projects() -> None:
481+
# wait for startup to finish
482+
time.sleep(2)
483+
to_be_deleted = []
484+
orgs = organization.get_all()
485+
for org_item in orgs:
486+
projects = project.get_all(str(org_item.id))
487+
for project_item in projects:
488+
if project_item.status == enums.ProjectStatus.IN_DELETION.value:
489+
to_be_deleted.append(str(project_item.id))
490+
for project_id in to_be_deleted:
491+
delete_project(project_id)

controller/transfer/manager.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ def get_upload_credentials_and_id(
6767

6868
def import_records_from_file(project_id: str, task: UploadTask) -> None:
6969
import_file(project_id, task)
70-
check_and_add_running_id(project_id, str(task.user_id))
70+
# adding of running_id now in pandas handling for performance reasons
7171
record_label_association.update_is_valid_manual_label_for_project(project_id)
7272
general.commit()
7373
check_and_update_null_labels(project_id, str(task.user_id))

controller/transfer/record_export_manager.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -229,11 +229,12 @@ def __get_record_data_query(project_id: str, row_options: Dict[str, Any]) -> str
229229

230230

231231
def __record_data_by_type(project_id: str, row_options: Dict[str, Any]) -> str:
232-
if row_options.get("type") == "SLICE":
232+
print(row_options, flush=True)
233+
if row_options.get("type") == enums.RecordExportAmountTypes.SLICE.value:
233234
return ___record_data_by_slice(project_id, row_options.get("id"))
234-
elif row_options.get("type") == "SESSION":
235+
elif row_options.get("type") == enums.RecordExportAmountTypes.SESSION.value:
235236
return __record_data_by_session(project_id, row_options.get("id"))
236-
elif row_options.get("type") == "ALL":
237+
elif row_options.get("type") == enums.RecordExportAmountTypes.ALL.value:
237238
return __record_data_without_reducing(project_id)
238239
else:
239240
message = (

controller/transfer/record_transfer_manager.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ def import_file(project_id: str, upload_task: UploadTask) -> None:
123123
)
124124
record_category = category.infer_category(upload_task.file_name)
125125

126-
data = convert_to_record_dict(
126+
data, added_col = convert_to_record_dict(
127127
file_type,
128128
tmp_file_name,
129129
upload_task.user_id,
@@ -134,7 +134,12 @@ def import_file(project_id: str, upload_task: UploadTask) -> None:
134134
import_records_and_rlas(
135135
project_id, upload_task.user_id, data, upload_task, record_category
136136
)
137-
137+
if added_col:
138+
attribute_item = attribute.get_by_name(project_id, added_col)
139+
attribute_item.relative_position = 0
140+
attribute_item.is_primary_key = True
141+
attribute_item.state = enums.AttributeState.AUTOMATICALLY_CREATED.value
142+
general.commit()
138143
upload_task_manager.update_upload_task_to_finished(upload_task)
139144

140145
user = user_manager.get_or_create_user(upload_task.user_id)

controller/transfer/util.py

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,13 @@
33

44
from submodules.model import enums
55
from .checks import check_argument_allowed, run_checks, run_limit_checks
6-
from submodules.model.models import UploadTask, Attribute
6+
from submodules.model.models import UploadTask
77
import pandas as pd
88
from submodules.model.enums import NotificationType
9+
from submodules.model.business_objects import record
910
import os
1011
import logging
1112
import traceback
12-
from submodules.model.business_objects import export
1313
from util import category
1414
from util import notification
1515

@@ -45,7 +45,7 @@ def convert_to_record_dict(
4545
user_id: str,
4646
file_import_options: str,
4747
project_id: str,
48-
) -> List:
48+
) -> Tuple[List, str]:
4949
if not file_type:
5050
notification.create_notification(
5151
NotificationType.FILE_TYPE_NOT_GIVEN,
@@ -100,7 +100,28 @@ def convert_to_record_dict(
100100
run_limit_checks(df, project_id, user_id)
101101
run_checks(df, project_id, user_id)
102102
check_and_convert_category_for_unknown(df, project_id, user_id)
103-
return df.to_dict("records")
103+
added_col = add_running_id_if_not_present(df, project_id)
104+
return df.to_dict("records"), added_col
105+
106+
107+
def add_running_id_if_not_present(df: pd.DataFrame, project_id: str) -> Optional[str]:
108+
record_item = record.get_one(project_id)
109+
if record_item:
110+
# project already has records => no extensions of existing data
111+
return
112+
has_id_like = False
113+
for key in df.columns:
114+
if category.infer_category_enum(df, key) == enums.DataTypes.INTEGER.value:
115+
has_id_like = True
116+
break
117+
if has_id_like:
118+
return
119+
col_name = "running_id"
120+
while col_name in df.columns:
121+
col_name += "_"
122+
df[col_name] = df.index
123+
124+
return col_name
104125

105126

106127
def check_and_convert_category_for_unknown(
@@ -130,7 +151,7 @@ def string_to_import_option_dict(
130151
if len(tmp) == 2:
131152
parameter = tmp[0].strip()
132153
if not check_argument_allowed(parameter):
133-
create_notification(
154+
notification.create_notification(
134155
NotificationType.UNKNOWN_PARAMETER,
135156
user_id,
136157
project_id,

submodules/model

util/daemon.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,18 @@
22

33

44
def run(target, *args, **kwargs):
5-
threading.Thread(target=target, args=args, kwargs=kwargs, daemon=True,).start()
5+
threading.Thread(
6+
target=target,
7+
args=args,
8+
kwargs=kwargs,
9+
daemon=True,
10+
).start()
11+
12+
13+
def prepare_thread(target, *args, **kwargs) -> threading.Thread:
14+
return threading.Thread(
15+
target=target,
16+
args=args,
17+
kwargs=kwargs,
18+
daemon=True,
19+
)

0 commit comments

Comments
 (0)