-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathtest_project.py
More file actions
173 lines (144 loc) · 5.3 KB
/
test_project.py
File metadata and controls
173 lines (144 loc) · 5.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
from fastapi.testclient import TestClient
from submodules.model.models import Project as RefineryProject, User
from controller.transfer import record_transfer_manager
from api import transfer as transfer_api
from controller.upload_task import manager as upload_task_manager
from submodules.model.business_objects import (
general,
record as record_bo,
attribute as attribute_bo,
embedding as embedding_bo,
)
from submodules.model import enums
import json
import time
def test_get_project_by_project_id(
client: TestClient, refinery_project: RefineryProject
):
response = client.get(
f"/api/v1/project/{refinery_project.id}/project-by-project-id"
)
assert response.status_code == 200
response_data = response.json()
assert response_data.get("id")
def test_update_project_name_description(
client: TestClient, refinery_project: RefineryProject
):
response = client.post(
f"/api/v1/project/{refinery_project.id}/update-project-name-description",
json={"name": "new_name", "description": "new_description"},
)
assert response.status_code == 200
general.refresh(refinery_project)
assert refinery_project.name == "new_name"
assert refinery_project.description == "new_description"
def test_upload_records_to_project(
client: TestClient, refinery_project: RefineryProject, user: User
):
upload_task = upload_task_manager.create_upload_task(
str(user.id),
str(refinery_project.id),
"dummy_file_name.csv",
"records",
"",
upload_type=enums.UploadTypes.DEFAULT.value,
key=None,
)
record_transfer_manager.import_file_record_dict(
refinery_project.id,
upload_task,
[
{"running_id": 1, "data": "hello world"},
{"running_id": 2, "data": "hello world 2"},
],
)
assert record_bo.count(refinery_project.id) == 2
attributes = attribute_bo.get_all(project_id=refinery_project.id)
assert len(attributes) == 2
for attribute in attributes:
if attribute.name != "running_id":
continue
att = attribute_bo.update(
refinery_project.id, attribute.id, is_primary_key=True, with_commit=True
)
assert att is not None
assert att.is_primary_key is True
## in same file to ensure it's run in correct order
def test_create_embedding(client: TestClient, refinery_project: RefineryProject):
att = attribute_bo.get_by_name(refinery_project.id, "data")
assert att is not None
response = client.post(
f"/api/v1/embedding/{refinery_project.id}/create-embedding",
json={
"attribute_id": str(att.id),
"config": json.dumps(
{
"platform": "huggingface",
"termsText": None,
"termsAccepted": False,
"embeddingType": "ON_ATTRIBUTE",
"filterAttributes": [],
"model": "distilbert-base-uncased",
}
),
},
)
assert response.status_code == 200
for _ in range(20):
time.sleep(1)
all = embedding_bo.get_all_by_attribute_ids(refinery_project.id, [str(att.id)])
if len(all) > 0:
break
assert len(all) > 0
assert all[0].type == enums.EmbeddingType.ON_ATTRIBUTE.value
# quite long since for a fresh start the model needs to be downloaded!
for _ in range(60):
time.sleep(1)
count = embedding_bo.get_tensor_count(all[0].id)
if count > 0:
break
assert count > 0
def test_update_records_to_project(
client: TestClient, refinery_project: RefineryProject, user: User
):
upload_task = upload_task_manager.create_upload_task(
str(user.id),
str(refinery_project.id),
"dummy_file_name.csv",
"records",
"",
upload_type=enums.UploadTypes.DEFAULT.value,
key=None,
)
record_transfer_manager.import_file_record_dict(
refinery_project.id,
upload_task,
[{"running_id": 1, "data": "goodbye world"}],
)
assert record_bo.count(refinery_project.id) == 2
all_records = record_bo.get_all(refinery_project.id)
assert len(all_records) == 2
assert any(r.data["data"] == "goodbye world" for r in all_records)
transfer_api.__recalculate_missing_attributes_and_embeddings(
project_id=refinery_project.id, user_id=user.id
)
time.sleep(5)
emb = embedding_bo.get_all_embeddings_by_project_id(refinery_project.id)
assert len(emb) > 0
assert emb[0].current_delta_record_count > 0
def test_delete_records_from_project(
client: TestClient, refinery_project: RefineryProject
):
assert record_bo.count(refinery_project.id) == 2
record_ids = record_bo.get_all_ids(refinery_project.id)
to_del = record_ids[:1] # delete one record
##note that .delete doesn'T seem to support bodies so we use the request directly
response = client.request(
"DELETE",
f"/api/v1/record/{refinery_project.id}/delete-records",
json={"record_ids": to_del}, # delete one record
)
assert response.status_code == 200
record_ids = record_bo.get_all_ids(refinery_project.id)
assert len(record_ids) == 1
assert record_ids[0] != to_del[0] # the deleted record should not be present