Skip to content

Commit 7460ac8

Browse files
Release preparations (#154)
* Adds newer bricks naming sheme for string detection * Adds jsonify of list attributes * Error message for recreating azure and openai embeddings * Submodules merged --------- Co-authored-by: Lina <[email protected]>
1 parent f1a2f8d commit 7460ac8

File tree

7 files changed

+54
-33
lines changed

7 files changed

+54
-33
lines changed

controller/embedding/manager.py

Lines changed: 28 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import time
22
from typing import Any, Dict, List, Optional
3+
from exceptions.exceptions import ApiTokenImportError
34

45
from submodules.model import enums
56
from submodules.model.models import Embedding
@@ -108,7 +109,7 @@ def get_embedding_name(
108109

109110

110111
def recreate_embeddings(
111-
project_id: str, embedding_ids: Optional[List[str]] = None
112+
project_id: str, embedding_ids: Optional[List[str]] = None, user_id: str = None
112113
) -> None:
113114
if not embedding_ids:
114115
embeddings = embedding.get_all_embeddings_by_project_id(project_id)
@@ -148,29 +149,37 @@ def recreate_embeddings(
148149
break
149150
else:
150151
time.sleep(1)
152+
except ApiTokenImportError as e:
153+
notification.create_notification(enums.NotificationType.RECREATION_OF_EMBEDDINGS_ERROR, user_id, project_id)
154+
__handle_failed_embedding(project_id,embedding_id, new_id,e)
155+
151156
except Exception as e:
152-
print(
153-
f"Error while recreating embedding for {project_id} with id {embedding_id} - {e}",
154-
flush=True,
155-
)
156-
notification.send_organization_update(
157-
project_id,
158-
f"embedding:{embedding_id}:state:{enums.EmbeddingState.FAILED.value}",
159-
)
160-
old_embedding_item = embedding.get(project_id, embedding_id)
161-
if old_embedding_item:
162-
old_embedding_item.state = enums.EmbeddingState.FAILED.value
163-
164-
if new_id:
165-
new_embedding_item = embedding.get(project_id, new_id)
166-
if new_embedding_item:
167-
new_embedding_item.state = enums.EmbeddingState.FAILED.value
168-
general.commit()
157+
__handle_failed_embedding(project_id,embedding_id, new_id,e)
169158

170159
notification.send_organization_update(
171160
project_id=project_id, message="embedding:finished:all"
172161
)
173162

163+
def __handle_failed_embedding(project_id: str, embedding_id: str,new_id: str, e: Exception) -> None:
164+
print(
165+
f"Error while recreating embedding for {project_id} with id {embedding_id} - {e}",
166+
flush=True,
167+
)
168+
169+
notification.send_organization_update(
170+
project_id,
171+
f"embedding:{embedding_id}:state:{enums.EmbeddingState.FAILED.value}",
172+
)
173+
old_embedding_item = embedding.get(project_id, embedding_id)
174+
if old_embedding_item:
175+
old_embedding_item.state = enums.EmbeddingState.FAILED.value
176+
177+
if new_id:
178+
new_embedding_item = embedding.get(project_id, new_id)
179+
if new_embedding_item:
180+
new_embedding_item.state = enums.EmbeddingState.FAILED.value
181+
general.commit()
182+
174183

175184
def __recreate_embedding(project_id: str, embedding_id: str) -> Embedding:
176185
old_embedding_item = embedding.get(project_id, embedding_id)
@@ -204,7 +213,7 @@ def __recreate_embedding(project_id: str, embedding_id: str) -> Embedding:
204213
if not agreement_item:
205214
new_embedding_item.state = enums.EmbeddingState.FAILED.value
206215
general.commit()
207-
raise Exception(
216+
raise ApiTokenImportError(
208217
f"No agreement found for embedding {new_embedding_item.name}"
209218
)
210219
agreement_item.xfkey = new_embedding_item.id

controller/notification/notification_data.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -361,6 +361,13 @@
361361
"page": enums.Pages.SETTINGS.value,
362362
"docs": enums.DOCS.CREATING_PROJECTS.value,
363363
},
364+
enums.NotificationType.RECREATION_OF_EMBEDDINGS_ERROR.value: {
365+
"message_template": "Recreation of embeddings failed. Third party embeddings must be recreated manually.",
366+
"title": "Embedding creation",
367+
"level": enums.Notification.ERROR.value,
368+
"page": enums.Pages.SETTINGS.value,
369+
"docs": enums.DOCS.CREATE_EMBEDDINGS.value,
370+
},
364371
}
365372

366373

controller/payload/payload_scheduler.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,6 @@ def prepare_input_data_for_payload(
149149
information_source_item.type
150150
== enums.InformationSourceType.ACTIVE_LEARNING.value
151151
):
152-
153152
# for active learning, we can not evaluate on all records that are used for training
154153
# as otherwise, we would retrieve a false understanding of the accuracy!
155154
add_information_source_statistics_exclusion(
@@ -211,7 +210,6 @@ def execution_pipeline(
211210
add_file_name: str,
212211
input_data: Dict[str, Any],
213212
) -> None:
214-
215213
if (
216214
information_source_item.type
217215
== enums.InformationSourceType.LABELING_FUNCTION.value
@@ -469,7 +467,6 @@ def read_container_logs_thread(
469467
payload_id: str,
470468
docker_container: Any,
471469
):
472-
473470
ctx_token = general.get_ctx_token()
474471
# needs to be refetched since it is not thread safe
475472
information_source_payload = information_source.get_payload(project_id, payload_id)
@@ -782,6 +779,14 @@ def __get_embedding_id_from_function(
782779
re.IGNORECASE,
783780
)
784781
if not embedding_name:
782+
# newer bricks modules
783+
embedding_name = re.search(
784+
r'EMBEDDING: str = "([\w\W]+?)"',
785+
source_item.source_code,
786+
re.IGNORECASE,
787+
)
788+
if not embedding_name:
789+
# older bricks modules
785790
embedding_name = re.search(
786791
r'YOUR_EMBEDDING: str = "([\w\W]+?)"',
787792
source_item.source_code,
@@ -863,7 +868,6 @@ def prepare_sample_records_doc_bin(
863868
def run_labeling_function_exec_env(
864869
project_id: str, information_source_id: str, prefixed_doc_bin: str
865870
) -> Tuple[List[str], List[List[str]], bool]:
866-
867871
information_source_item = information_source.get(project_id, information_source_id)
868872

869873
prefixed_function_name = f"{information_source_id}_fn"

controller/transfer/project_transfer_manager.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -397,7 +397,6 @@ def __transform_embedding_by_name(embedding_name: str):
397397
"id",
398398
)
399399
] = embedding_object.id
400-
401400
if data.get(
402401
"embedding_tensors_data",
403402
):
@@ -904,14 +903,14 @@ def __replace_embedding_name(
904903

905904
general.commit()
906905
daemon.run(
907-
__post_processing_import_threaded, project_id, task_id, embedding_ids, data
906+
__post_processing_import_threaded, project_id, task_id, embedding_ids, data, import_user_id
908907
)
909908
send_progress_update(project_id, task_id, 100)
910909
logger.info(f"Finished import of project {project_id}")
911910

912911

913912
def __post_processing_import_threaded(
914-
project_id: str, task_id: str, embedding_ids: List[str], data: Dict[str, Any]
913+
project_id: str, task_id: str, embedding_ids: List[str], data: Dict[str, Any], user_id: str
915914
) -> None:
916915
time.sleep(5)
917916
while True:
@@ -926,7 +925,7 @@ def __post_processing_import_threaded(
926925
if not data.get(
927926
"embedding_tensors_data",
928927
):
929-
embedding_manager.recreate_embeddings(project_id)
928+
embedding_manager.recreate_embeddings(project_id,user_id=user_id)
930929
else:
931930
for old_id in embedding_ids:
932931
embedding_manager.request_tensor_upload(

controller/transfer/util.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ def convert_to_record_dict(
103103
run_checks(df, project_id, user_id)
104104
check_and_convert_category_for_unknown(df, project_id, user_id)
105105

106-
df = covert_nested_attributes_to_text(df)
106+
covert_nested_attributes_to_text(df)
107107
added_col = add_running_id_if_not_present(df, project_id)
108108
return df.to_dict("records"), added_col
109109

@@ -148,14 +148,13 @@ def check_and_convert_category_for_unknown(
148148
def covert_nested_attributes_to_text(df: pd.DataFrame) -> pd.DataFrame:
149149
for key in df.columns:
150150
sample = pick_sample(df, key)
151-
if check_sample_has_dict_values(sample):
151+
if check_sample_has_dict_or_list_values(sample):
152152
df[key] = df[key].apply(lambda x: json.dumps(x))
153-
return df
154153

155154

156-
def check_sample_has_dict_values(sample: List[Any]) -> bool:
155+
def check_sample_has_dict_or_list_values(sample: List[Any]) -> bool:
157156
for value in sample:
158-
if isinstance(value, dict):
157+
if isinstance(value, dict) or isinstance(value, list):
159158
return True
160159
return False
161160

exceptions/exceptions.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,6 @@ class NotAllowedInDemoError(Exception):
2323

2424
class BadPasswordError(Exception):
2525
pass
26+
27+
class ApiTokenImportError(Exception):
28+
pass

0 commit comments

Comments
 (0)