Skip to content

Commit 8681e93

Browse files
anmarhindiJWittmeyerlumburovskalina
authored
Release updates (#205)
* fix prepare-record-export * fix create-outlier-slice via create-data-slice * Fixes wizard wait for tokenization * Hotfix * Uses new free api route * Fix export projects with key * Fixes pw snapshot upload * Submodule update * Remove double org request * Cache kratos * slight changes * cache update * Fixes gql request foruser assignment * Remove print * fixes snakecase key prepare_record_export * Submodules merge --------- Co-authored-by: JWittmeyer <[email protected]> Co-authored-by: Lina <[email protected]>
1 parent 08bf394 commit 8681e93

File tree

18 files changed

+167
-80
lines changed

18 files changed

+167
-80
lines changed

api/transfer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -441,7 +441,7 @@ def __calculate_missing_attributes(project_id: str, user_id: str) -> None:
441441
if i >= 60:
442442
i = 0
443443
ctx_token = general.remove_and_refresh_session(ctx_token, True)
444-
if tokenization.is_doc_bin_creation_running(project_id):
444+
if tokenization.is_doc_bin_creation_running_or_queued(project_id):
445445
time.sleep(2)
446446
continue
447447
else:

controller/auth/kratos.py

Lines changed: 108 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,60 +1,133 @@
1-
from typing import Union, Any, List, Optional, Dict
2-
from requests import Response
1+
from typing import Union, Any, List, Dict
32
import os
43
import requests
54
import logging
5+
from datetime import datetime, timedelta
66

77
logging.basicConfig(level=logging.INFO)
88
logger: logging.Logger = logging.getLogger(__name__)
99
logger.setLevel(logging.DEBUG)
1010

1111
KRATOS_ADMIN_URL = os.getenv("KRATOS_ADMIN_URL")
1212

13+
# user_id -> {"identity" -> full identity, "simple" -> {"id": str, "mail": str, "firstName": str, "lastName": str}}
14+
# "collected" -> timestamp
15+
KRATOS_IDENTITY_CACHE: Dict[str, Any] = {}
16+
KRATOS_IDENTITY_CACHE_TIMEOUT = timedelta(minutes=30)
17+
18+
19+
def __get_cached_values() -> Dict[str, Dict[str, Any]]:
20+
global KRATOS_IDENTITY_CACHE
21+
if not KRATOS_IDENTITY_CACHE or len(KRATOS_IDENTITY_CACHE) == 0:
22+
__refresh_identity_cache()
23+
elif (
24+
KRATOS_IDENTITY_CACHE["collected"] + KRATOS_IDENTITY_CACHE_TIMEOUT
25+
< datetime.now()
26+
):
27+
__refresh_identity_cache()
28+
return KRATOS_IDENTITY_CACHE
29+
30+
31+
def __refresh_identity_cache():
32+
global KRATOS_IDENTITY_CACHE
33+
request = requests.get(f"{KRATOS_ADMIN_URL}/identities")
34+
if request.ok:
35+
collected = datetime.now()
36+
KRATOS_IDENTITY_CACHE = {
37+
identity["id"]: {
38+
"identity": identity,
39+
"simple": __parse_identity_to_simple(identity),
40+
}
41+
for identity in request.json()
42+
}
43+
KRATOS_IDENTITY_CACHE["collected"] = collected
44+
else:
45+
KRATOS_IDENTITY_CACHE = {}
46+
47+
48+
def __get_identity(user_id: str, only_simple: bool = True) -> Dict[str, Any]:
49+
if not isinstance(user_id, str):
50+
user_id = str(user_id)
51+
cache = __get_cached_values()
52+
if user_id in cache:
53+
if only_simple:
54+
return cache[user_id]["simple"]
55+
return cache[user_id]
56+
57+
if len(user_id) == 36:
58+
# check not new entry outside cache
59+
request = requests.get(f"{KRATOS_ADMIN_URL}/identities/{user_id}")
60+
if request.ok:
61+
identity = request.json()
62+
if identity["id"] == user_id:
63+
KRATOS_IDENTITY_CACHE[user_id] = {
64+
"identity": identity,
65+
"simple": __parse_identity_to_simple(identity),
66+
}
67+
if only_simple:
68+
return KRATOS_IDENTITY_CACHE[user_id]["simple"]
69+
return KRATOS_IDENTITY_CACHE[user_id]
70+
# e.g. if id "GOLD_STAR" is requested => wont be in cache but expects a dummy dict
71+
if only_simple:
72+
return __parse_identity_to_simple({"id": user_id})
73+
return {
74+
"identity": {
75+
"id": user_id,
76+
"traits": {"email": None, "name": {"first": None, "last": None}},
77+
}
78+
}
79+
80+
81+
def __parse_identity_to_simple(identity: Dict[str, Any]) -> Dict[str, str]:
82+
r = {
83+
"id": identity["id"],
84+
"mail": None,
85+
"firstName": None,
86+
"lastName": None,
87+
}
88+
if "traits" in identity:
89+
r["mail"] = identity["traits"]["email"]
90+
if "name" in identity["traits"]:
91+
r["firstName"] = identity["traits"]["name"]["first"]
92+
r["lastName"] = identity["traits"]["name"]["last"]
93+
return r
94+
1395

1496
def get_userid_from_mail(user_mail: str) -> str:
15-
for identity in requests.get(f"{KRATOS_ADMIN_URL}/identities").json():
16-
if identity["traits"]["email"] == user_mail:
17-
return identity["id"]
97+
values = __get_cached_values()
98+
for key in values:
99+
if key == "collected":
100+
continue
101+
if values[key]["simple"]["mail"] == user_mail:
102+
return key
18103
return None
19104

20105

21106
def resolve_user_mail_by_id(user_id: str) -> str:
22-
res: Response = requests.get("{}/identities/{}".format(KRATOS_ADMIN_URL, user_id))
23-
data: Any = res.json()
24-
if res.status_code == 200 and data["traits"]:
25-
return data["traits"]["email"]
107+
i = __get_identity(user_id)
108+
if i:
109+
return i["mail"]
26110
return None
27111

28112

29113
def resolve_user_name_by_id(user_id: str) -> str:
30-
res: Response = requests.get("{}/identities/{}".format(KRATOS_ADMIN_URL, user_id))
31-
data: Any = res.json()
32-
if res.status_code == 200 and data["traits"]:
33-
return data["traits"]["name"]
114+
i = __get_identity(user_id, False)
115+
if i:
116+
i = i["identity"]
117+
return i["traits"]["name"]
34118
return None
35119

36120

37121
def resolve_all_user_ids(
38122
relevant_ids: List[str], as_list: bool = True
39-
) -> List[Dict[str, str]]:
123+
) -> Union[Dict[str, Dict[str, str]], List[Dict[str, str]]]:
40124
final = [] if as_list else {}
41125
for id in relevant_ids:
42-
r = requests.get(f"{KRATOS_ADMIN_URL}/identities/{id}").json()
43-
d = {
44-
"id": id,
45-
"mail": None,
46-
"firstName": None,
47-
"lastName": None,
48-
}
49-
if "traits" in r:
50-
traits = r["traits"]
51-
d["mail"] = traits["email"]
52-
d["firstName"] = traits["name"]["first"]
53-
d["lastName"] = traits["name"]["last"]
126+
i = __get_identity(id)
54127
if as_list:
55-
final.append(d)
128+
final.append(i)
56129
else:
57-
final[id] = d
130+
final[id] = i
58131
return final
59132

60133

@@ -63,25 +136,16 @@ def expand_user_mail_name(
63136
) -> List[Dict[str, str]]:
64137
final = []
65138
for user in users:
66-
r = requests.get(f"{KRATOS_ADMIN_URL}/identities/{user[user_id_key]}").json()
67-
d = {
68-
"mail": None,
69-
"firstName": None,
70-
"lastName": None,
71-
}
72-
if "traits" in r:
73-
traits = r["traits"]
74-
d["mail"] = traits["email"]
75-
d["firstName"] = traits["name"]["first"]
76-
d["lastName"] = traits["name"]["last"]
77-
user = {**user, **d}
139+
i = __get_identity(user[user_id_key])
140+
user = {**user, **i}
78141
final.append(user)
79142
return final
80143

81144

82145
def resolve_user_name_and_email_by_id(user_id: str) -> dict:
83-
res: Response = requests.get("{}/identities/{}".format(KRATOS_ADMIN_URL, user_id))
84-
data: Any = res.json()
85-
if res.status_code == 200 and data["traits"]:
86-
return data["traits"]["name"], data["traits"]["email"]
146+
i = __get_identity(user_id, False)
147+
if i:
148+
i = i["identity"]
149+
if i and "traits" in i and i["traits"]:
150+
return i["traits"]["name"], i["traits"]["email"]
87151
return None

controller/auth/manager.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,10 @@
1818

1919

2020
def get_organization_id_by_info(info) -> Organization:
21-
organization: Organization = get_user_by_info(info).organization
22-
if not organization:
21+
user = get_user_by_info(info)
22+
if not user or not user.organization_id:
2323
raise GraphQLError("User is not associated to an organization")
24-
return organization
24+
return str(user.organization_id)
2525

2626

2727
def get_user_by_info(info) -> User:
@@ -76,7 +76,7 @@ def check_project_access_dep(request: Request, project_id: str):
7676

7777

7878
def check_project_access(info, project_id: str) -> None:
79-
organization_id: str = get_organization_id_by_info(info).id
79+
organization_id: str = get_organization_id_by_info(info)
8080
project: Project = project_manager.get_project_with_orga_id(
8181
organization_id, project_id
8282
)

controller/data_slice/manager.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import json
21
from typing import Dict, Any, List, Optional
32

43
from submodules.model import DataSlice
@@ -79,7 +78,7 @@ def create_data_slice(
7978
project_id=project_id,
8079
created_by=user_id,
8180
name=name,
82-
filter_raw=json.loads(filter_raw),
81+
filter_raw=filter_raw,
8382
filter_data=filter_data,
8483
static=static,
8584
slice_type=slice_type,

controller/organization/manager.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,14 +56,18 @@ def get_user_info(user) -> User:
5656
return user_expanded
5757

5858

59-
def get_all_users(organization_id: str, user_role: Optional[str] = None) -> List[User]:
59+
def get_all_users(
60+
organization_id: str, user_role: Optional[str] = None, as_dict: bool = True
61+
) -> List[User]:
6062
parsed = None
6163
if user_role:
6264
try:
6365
parsed = enums.UserRoles[user_role.upper()]
6466
except KeyError:
6567
raise ValueError(f"Invalid UserRoles: {user_role}")
6668
all_users = user.get_all(organization_id, parsed)
69+
if not as_dict:
70+
return all_users
6771
all_users_dict = sql_alchemy_to_dict(
6872
all_users, column_whitelist=USER_INFO_WHITELIST
6973
)

controller/record/manager.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ def edit_records(
164164
tokenization_service.request_tokenize_project(project_id, user_id)
165165
time.sleep(1)
166166
# wait for tokenization to finish, the endpoint itself handles missing docbins
167-
while tokenization.is_doc_bin_creation_running(project_id):
167+
while tokenization.is_doc_bin_creation_running_or_queued(project_id):
168168
time.sleep(0.5)
169169

170170
except Exception as e:
@@ -255,7 +255,7 @@ def __check_and_prep_edit_records(
255255
useable_embeddings[embedding_item.attribute_id] = []
256256
useable_embeddings[embedding_item.attribute_id].append(embedding_item)
257257

258-
if tokenization.is_doc_bin_creation_running(project_id):
258+
if tokenization.is_doc_bin_creation_running_or_queued(project_id):
259259
errors_found.append(
260260
"tokenization is currently running. Wait for it to finish before editing records."
261261
)

controller/task_queue/handler/tokenization.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@
55
attribute as attribute_db_bo,
66
)
77
from controller.tokenization import tokenization_service
8-
from submodules.model.business_objects.tokenization import is_doc_bin_creation_running
8+
from submodules.model.business_objects.tokenization import (
9+
is_doc_bin_creation_running_or_queued,
10+
)
911
from submodules.model.enums import RecordTokenizationScope
1012
from ..util import if_task_queue_send_websocket
1113

@@ -51,4 +53,4 @@ def __start_task(task: Dict[str, Any]) -> bool:
5153

5254

5355
def __check_finished(task: Dict[str, Any]) -> bool:
54-
return not is_doc_bin_creation_running(task["project_id"])
56+
return not is_doc_bin_creation_running_or_queued(task["project_id"], True)

controller/transfer/cognition/constants.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -242,4 +242,4 @@ def get_embeddings(self):
242242
"en": "doc2query/msmarco-t5-base-v1",
243243
}
244244

245-
FREE_API_REQUEST_URL = "https://free.api.kern.ai/inference"
245+
FREE_API_REQUEST_URL = "https://k8s.freeapi.kern.ai/inference"

controller/transfer/cognition/import_wizard.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -193,11 +193,17 @@ def __finalize_setup(
193193
if c > 120:
194194
token_ref.request_new()
195195
c = 0
196-
if tokenization_db_bo.is_doc_bin_creation_running(reference_project_id):
196+
if tokenization_db_bo.is_doc_bin_creation_running_or_queued(
197+
reference_project_id
198+
):
197199
continue
198-
if tokenization_db_bo.is_doc_bin_creation_running(question_project_id):
200+
if tokenization_db_bo.is_doc_bin_creation_running_or_queued(
201+
question_project_id
202+
):
199203
continue
200-
if tokenization_db_bo.is_doc_bin_creation_running(relevance_project_id):
204+
if tokenization_db_bo.is_doc_bin_creation_running_or_queued(
205+
relevance_project_id
206+
):
201207
continue
202208
break
203209

@@ -313,6 +319,7 @@ def __finalize_setup_for(
313319
token_ref: TokenRef,
314320
) -> Token:
315321
target_data = {"TARGET_LANGUAGE": project_language}
322+
316323
# attributes
317324
attributes = project_type.get_attributes()
318325
target_data["target_type"] = "ac"

fast_api/models.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -305,7 +305,12 @@ class UpdateProjectStatusBody(BaseModel):
305305

306306

307307
class PrepareProjectExportBody(BaseModel):
308-
export_options: StrictStr
308+
export_options: Any
309+
key: Optional[StrictStr] = None
310+
311+
312+
class PrepareRecordExportBody(BaseModel):
313+
export_options: Any
309314
key: Optional[StrictStr] = None
310315

311316

fast_api/routes/data_browser.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ def create_data_slice(
163163
):
164164
name = body.name
165165
static = body.static
166-
filter_raw = body.filterRaw
166+
filter_raw = json.loads(body.filterRaw)
167167
filter_data = [json.loads(item) for item in body.filterData]
168168

169169
user = auth_manager.get_user_by_info(request.state.info)

fast_api/routes/data_slices.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from typing import Optional
22

33
from fastapi import APIRouter, Depends, Request
4-
from fast_api.routes.client_response import pack_json_result, wrap_content_for_frontend
4+
from fast_api.routes.client_response import pack_json_result
55
from submodules.model.util import sql_alchemy_to_dict
66
from typing import List
77
from controller.data_slice import manager
@@ -28,10 +28,12 @@ def get_data_slices(
2828
sql_alchemy_to_dict(ds, for_frontend=False)
2929
for ds in manager.get_all_data_slices(project_id, slice_type)
3030
]
31+
3132
for v in values:
3233
v["filterData"] = json.dumps(v["filter_data"])
3334
v["filterRaw"] = json.dumps(v["filter_raw"])
3435
del v["count_sql"]
36+
3537
return pack_json_result(
3638
{"data": {"dataSlices": values}},
3739
)

0 commit comments

Comments
 (0)