Skip to content

Commit cb549a1

Browse files
authored
[Cherry-Pick-Main][Server][SDL-5812] Updating project to allow locking/unlocking the schema. Adds support for a timestamp column (#56)
1 parent 67d3733 commit cb549a1

File tree

11 files changed

+229
-40
lines changed

11 files changed

+229
-40
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Generated by Django 3.2.12 on 2024-11-08 04:55
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
("datamanager", "0086_label_info"),
10+
]
11+
12+
operations = [
13+
migrations.AddField(
14+
model_name="project",
15+
name="lock_schema",
16+
field=models.BooleanField(default=False),
17+
),
18+
]

src/server/datamanager/models.py

+1
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,7 @@ class Project(models.Model):
248248
image_file_name = models.CharField(
249249
max_length=1100, null=True, unique=True, default=None
250250
)
251+
lock_schema = models.BooleanField(default=False)
251252
last_modified = models.DateTimeField(auto_now=True)
252253

253254
def __unicode__(self):

src/server/datamanager/query.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,12 @@ def get_capture_file(project_uuid: str, capture_file: str, ext: str) -> DataFram
311311
datastore.get("{}".format(os.path.basename(capture_file)), capture_file)
312312

313313
if ext == ".csv":
314-
tmp_df = read_csv(capture_file, index_col="sequence")
314+
cols = list(read_csv(capture_file, nrows=1))
315+
tmp_df = read_csv(
316+
capture_file,
317+
index_col="sequence",
318+
usecols=[col for col in cols if col != "timestamp"],
319+
)
315320
elif ext == ".wav":
316321
with wave.open(capture_file, "rb") as wave_reader:
317322
waveFrames = wave_reader.readframes(wave_reader.getnframes())

src/server/datamanager/serializers/capture.py

+3-6
Original file line numberDiff line numberDiff line change
@@ -79,19 +79,16 @@ def validate_capture_file(capture, tmp_name):
7979
for index, (key, item) in enumerate(
8080
capture.project.capture_sample_schema.items()
8181
):
82-
if item.get("index") is None:
83-
item["index"] = index
82+
if item.get("index"):
83+
item.pop("index")
8484
update_capture_sample_schema = True
8585

8686
for key in reader.schema.keys():
8787
if key not in capture.project.capture_sample_schema:
8888
capture.project.capture_sample_schema[key] = reader.schema[key]
8989
update_capture_sample_schema = True
9090

91-
if (
92-
not settings.ALLOW_UPDATE_PROJECT_SCHEMA
93-
and capture.project.capture_sample_schema
94-
):
91+
if capture.project.lock_schema:
9592
project_columns = sorted(list(capture.project.capture_sample_schema.keys()))
9693
capture_columns = sorted(list(reader.schema.keys()))
9794
if project_columns != capture_columns:

src/server/datamanager/serializers/serializers.py

+1
Original file line numberDiff line numberDiff line change
@@ -300,6 +300,7 @@ class Meta:
300300
"created_at",
301301
"active_pipelines",
302302
"description",
303+
"lock_schema",
303304
"last_modified",
304305
)
305306
read_only_fields = (

src/server/datamanager/tests/utils/test_file_reader.py

+12-12
Original file line numberDiff line numberDiff line change
@@ -114,36 +114,36 @@ def test_csv_reader_data_mixed(filereader_mixed_int_float):
114114
def test_csv_reader_schema_int_float(filereader_mixed_int_float):
115115
schema = filereader_mixed_int_float.schema
116116
assert {
117-
"AccelerometerX": {"type": "int", "index": 0},
118-
"AccelerometerY": {"type": "float", "index": 1},
119-
"GyroscopeZ": {"type": "float", "index": 2},
117+
"AccelerometerX": {"type": "int"},
118+
"AccelerometerY": {"type": "float"},
119+
"GyroscopeZ": {"type": "float"},
120120
} == schema
121121

122122

123123
def test_csv_reader_schema_float(filereader_float):
124124
schema = filereader_float.schema
125125
assert {
126-
"AccelerometerX": {"type": "float", "index": 0},
127-
"AccelerometerY": {"type": "float", "index": 1},
128-
"GyroscopeZ": {"type": "float", "index": 2},
126+
"AccelerometerX": {"type": "float"},
127+
"AccelerometerY": {"type": "float"},
128+
"GyroscopeZ": {"type": "float"},
129129
} == schema
130130

131131

132132
def test_csv_reader_schema_int(filereader_int):
133133
schema = filereader_int.schema
134134
assert {
135-
"AccelerometerX": {"type": "int", "index": 0},
136-
"AccelerometerY": {"type": "int", "index": 1},
137-
"GyroscopeZ": {"type": "int", "index": 2},
135+
"AccelerometerX": {"type": "int"},
136+
"AccelerometerY": {"type": "int"},
137+
"GyroscopeZ": {"type": "int"},
138138
} == schema
139139

140140

141141
def test_csv_reader_schema_int_space_fields(filereader_int_name_space_fields):
142142
schema = filereader_int_name_space_fields.schema
143143
assert {
144-
"Accelerometer_X": {"type": "int", "index": 0},
145-
"AccelerometerY": {"type": "int", "index": 1},
146-
"GyroscopeZ": {"type": "int", "index": 2},
144+
"Accelerometer_X": {"type": "int"},
145+
"AccelerometerY": {"type": "int"},
146+
"GyroscopeZ": {"type": "int"},
147147
} == schema
148148

149149

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
timestamp,sequence,AccelerometerX,AccelerometerY,AccelerometerZ,GyroscopeX,GyroscopeY,GyroscopeZ
2+
r,1000,-158,313,4173,0,-2,-226
3+
r,1001,-146,368,4192,15,-5,-332
4+
r,1002,-282,278,4120,-93,56,-408
5+
r,1003,-332,179,4159,-19,14,-333
6+
r,1004,-181,170,4166,29,-13,-153
7+
r,1005,-148,154,4175,12,0,-51
8+
r,1006,-197,234,4166,9,-1,-8
9+
r,1007,-225,263,4160,10,-3,1
10+
r,1008,-218,252,4154,10,-2,-1
11+
r,1009,-215,254,4159,10,-3,-1
12+
r,1010,-221,248,4180,9,-2,-1
13+
r,1011,-217,250,4166,10,-4,-4
14+
r,1012,-215,255,4165,9,-3,-3
15+
r,1013,-217,256,4169,9,-2,-2
16+
r,1014,-215,262,4163,10,-3,-2
17+
r,1015,-213,264,4177,11,-2,-2
18+
r,1016,-214,255,4161,10,-2,-2
19+
r,1017,-213,248,4163,10,-2,-1
20+
r,1018,-218,250,4172,10,-3,-1
21+
r,1019,-211,254,4169,11,-4,-1
22+
r,1020,-213,256,4169,10,-2,-3
23+
r,1021,-220,250,4158,10,-3,-2
24+
r,1022,-212,258,4168,10,-3,-2
25+
r,1023,-202,257,4169,9,-2,-2
26+
r,1024,-210,258,4161,9,-3,-2
27+
r,1025,-215,256,4165,10,-4,-3
28+
r,1026,-213,256,4163,11,-1,-2
29+
r,1027,-210,256,4169,10,-2,-1
30+
r,1028,-216,257,4164,9,-2,-1
31+
r,1029,-217,253,4167,10,-1,-3
32+
r,1030,-215,260,4171,11,-1,-2
33+
r,1031,-213,255,4174,11,-3,0
34+
r,1032,-214,256,4164,9,-3,-1
35+
r,1033,-211,253,4169,8,-3,-2
36+
r,1034,-209,253,4163,9,-2,-2
37+
r,1035,-215,253,4159,9,-2,-2
38+
r,1036,-215,253,4166,10,-4,-2
39+
r,1037,-214,258,4161,11,-4,-2
40+
r,1038,-213,256,4163,10,-3,-1
41+
r,1039,-211,256,4163,10,-3,-2
42+
r,1040,-212,255,4164,9,-2,-1
43+
r,1041,-218,256,4172,9,-3,0
44+
r,1042,-215,253,4171,10,-4,0
45+
r,1043,-211,253,4164,10,-1,-1
46+
r,1044,-215,260,4158,11,-2,-2
47+
r,1045,-212,248,4169,10,-3,-3
48+
r,1046,-213,256,4162,11,-2,-1
49+
r,1047,-214,259,4160,10,-3,-1
50+
r,1048,-217,256,4161,10,-4,-1
51+
r,1049,-217,254,4175,11,-4,-2
52+
r,1050,-210,256,4172,11,-4,-3
53+
r,1051,-211,264,4162,10,-4,-3
54+
r,1052,-210,259,4163,10,-3,-1
55+
r,1053,-205,253,4168,9,-3,0
56+
r,1054,-221,250,4164,10,-2,-2
57+
r,1055,-221,255,4156,10,-3,-3
58+
r,1056,-213,259,4162,10,-2,-1
59+
r,1057,-218,254,4176,10,-2,-1
60+
r,1058,-216,250,4167,9,-2,-1
61+
r,1059,-206,257,4165,9,-2,-3
62+
r,1060,-212,255,4161,10,-1,-1
63+
r,1061,-214,252,4173,10,-2,-2
64+
r,1062,-217,255,4188,10,-2,-2
65+
r,1063,-224,255,4191,10,-2,-1
66+
r,1064,-218,258,4180,9,-2,-3
67+
r,1065,-221,256,4157,9,-3,-1
68+
r,1066,-208,258,4147,10,-4,-1
69+
r,1067,-211,257,4157,10,-2,-1
70+
r,1068,-214,257,4166,10,-2,-2
71+
r,1069,-209,253,4181,10,-2,-2
72+
r,1070,-213,256,4179,12,-1,-2
73+
r,1071,-212,257,4168,11,-3,-2
74+
r,1072,-205,255,4155,9,-3,-1
75+
r,1073,-215,261,4162,11,-2,-1
76+
r,1074,-223,261,4168,10,-2,-2
77+
r,1075,-212,264,4175,10,-2,-1
78+
r,1076,-217,257,4187,9,-2,-2
79+
r,1077,-214,258,4184,10,-4,-2
80+
r,1078,-220,264,4171,9,-3,-2
81+
r,1079,-211,284,4164,10,-2,-11
82+
r,1080,-206,268,4163,11,-2,-58
83+
r,1081,-222,233,4155,10,-4,-66
84+
r,1082,-217,252,4164,11,-3,-17
85+
r,1083,-214,261,4173,10,-3,3
86+
r,1084,-217,258,4181,10,-3,-2
87+
r,1086,-210,258,4171,10,-3,-2
88+
r,1087,-203,253,4158,9,-2,-2
89+
r,1088,-209,249,4167,10,-3,-2
90+
r,1089,-215,252,4173,10,-2,-1
91+
r,1090,-219,263,4172,10,-3,-1
92+
r,1092,-205,258,4184,11,-4,0
93+
r,1093,-211,261,4163,10,-3,-1
94+
r,1094,-219,258,4166,10,-4,-2
95+
r,1095,-217,251,4162,10,-3,-3
96+
r,1096,-213,257,4164,10,-3,-2
97+
r,1097,-210,254,4168,9,-4,-3
98+
r,1098,-224,252,4169,10,-2,-2
99+
r,1099,-219,261,4173,11,-3,-2
100+
r,1120,-212,263,4167,10,-3,-3

src/server/datamanager/tests/views/test_capture.py

+79-13
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,62 @@ def test_create_csv_with_packet_loss(self, client, project):
190190
assert r["name"] == "test_packet.csv"
191191
assert r["file_size"] == 2943
192192

193+
def test_create_csv_with_timestamp(self, client, project):
194+
project.save()
195+
settings.DEBUG = True
196+
197+
capture_list_url = reverse(
198+
"capture-list",
199+
kwargs={"project_uuid": project.uuid},
200+
)
201+
202+
dirname = os.path.dirname(__file__)
203+
204+
template_path = os.path.join(dirname, "data/packet_loss_with_timestamp.csv")
205+
with open(template_path, "rb") as f:
206+
response = client.post(
207+
capture_list_url,
208+
format="multipart",
209+
data={"file": f, "name": "test_packet.csv"},
210+
)
211+
212+
assert response.status_code == status.HTTP_201_CREATED
213+
214+
r = response.json()
215+
216+
assert r["max_sequence"] == 1120
217+
assert r["number_samples"] == 99
218+
assert r["name"] == "test_packet.csv"
219+
assert r["file_size"] == 3151
220+
221+
from datamanager.query import get_capture_file
222+
223+
capture = Capture.objects.get(name="test_packet.csv")
224+
capture_df = get_capture_file(project.uuid, capture.file, ".csv")
225+
assert "timestamp" not in capture_df.columns
226+
227+
from engine.base import pipeline_utils
228+
from datamanager.models import TeamMember
229+
230+
user = TeamMember.objects.get(email="[email protected]").user
231+
capture_df_pipeline_utils, _, _ = pipeline_utils.get_capturefile(
232+
user, project.uuid, capture.name
233+
)
234+
235+
assert "timestamp" not in capture_df_pipeline_utils.columns
236+
237+
capture_list_url = reverse(
238+
"capture-file",
239+
kwargs={"project_uuid": project.uuid, "uuid": capture.uuid},
240+
)
241+
242+
response = client.get(capture_list_url)
243+
244+
assert (
245+
response.data
246+
== b"timestamp,sequence,AccelerometerX,AccelerometerY,AccelerometerZ,GyroscopeX,GyroscopeY,GyroscopeZ\r\nr,1000,-158,313,4173,0,-2,-226\r\nr,1001,-146,368,4192,15,-5,-332\r\nr,1002,-282,278,4120,-93,56,-408\r\nr,1003,-332,179,4159,-19,14,-333\r\nr,1004,-181,170,4166,29,-13,-153\r\nr,1005,-148,154,4175,12,0,-51\r\nr,1006,-197,234,4166,9,-1,-8\r\nr,1007,-225,263,4160,10,-3,1\r\nr,1008,-218,252,4154,10,-2,-1\r\nr,1009,-215,254,4159,10,-3,-1\r\nr,1010,-221,248,4180,9,-2,-1\r\nr,1011,-217,250,4166,10,-4,-4\r\nr,1012,-215,255,4165,9,-3,-3\r\nr,1013,-217,256,4169,9,-2,-2\r\nr,1014,-215,262,4163,10,-3,-2\r\nr,1015,-213,264,4177,11,-2,-2\r\nr,1016,-214,255,4161,10,-2,-2\r\nr,1017,-213,248,4163,10,-2,-1\r\nr,1018,-218,250,4172,10,-3,-1\r\nr,1019,-211,254,4169,11,-4,-1\r\nr,1020,-213,256,4169,10,-2,-3\r\nr,1021,-220,250,4158,10,-3,-2\r\nr,1022,-212,258,4168,10,-3,-2\r\nr,1023,-202,257,4169,9,-2,-2\r\nr,1024,-210,258,4161,9,-3,-2\r\nr,1025,-215,256,4165,10,-4,-3\r\nr,1026,-213,256,4163,11,-1,-2\r\nr,1027,-210,256,4169,10,-2,-1\r\nr,1028,-216,257,4164,9,-2,-1\r\nr,1029,-217,253,4167,10,-1,-3\r\nr,1030,-215,260,4171,11,-1,-2\r\nr,1031,-213,255,4174,11,-3,0\r\nr,1032,-214,256,4164,9,-3,-1\r\nr,1033,-211,253,4169,8,-3,-2\r\nr,1034,-209,253,4163,9,-2,-2\r\nr,1035,-215,253,4159,9,-2,-2\r\nr,1036,-215,253,4166,10,-4,-2\r\nr,1037,-214,258,4161,11,-4,-2\r\nr,1038,-213,256,4163,10,-3,-1\r\nr,1039,-211,256,4163,10,-3,-2\r\nr,1040,-212,255,4164,9,-2,-1\r\nr,1041,-218,256,4172,9,-3,0\r\nr,1042,-215,253,4171,10,-4,0\r\nr,1043,-211,253,4164,10,-1,-1\r\nr,1044,-215,260,4158,11,-2,-2\r\nr,1045,-212,248,4169,10,-3,-3\r\nr,1046,-213,256,4162,11,-2,-1\r\nr,1047,-214,259,4160,10,-3,-1\r\nr,1048,-217,256,4161,10,-4,-1\r\nr,1049,-217,254,4175,11,-4,-2\r\nr,1050,-210,256,4172,11,-4,-3\r\nr,1051,-211,264,4162,10,-4,-3\r\nr,1052,-210,259,4163,10,-3,-1\r\nr,1053,-205,253,4168,9,-3,0\r\nr,1054,-221,250,4164,10,-2,-2\r\nr,1055,-221,255,4156,10,-3,-3\r\nr,1056,-213,259,4162,10,-2,-1\r\nr,1057,-218,254,4176,10,-2,-1\r\nr,1058,-216,250,4167,9,-2,-1\r\nr,1059,-206,257,4165,9,-2,-3\r\nr,1060,-212,255,4161,10,-1,-1\r\nr,1061,-214,252,4173,10,-2,-2\r\nr,1062,-217,255,4188,10,-2,-2\r\nr,1063,-224,255,4191,10,-2,-1\r\nr,1064,-218,258,4180,9,-2,-3\r\nr,1065,-221,256,4157,9,-3,-1\r\nr,1066,-208,258,4147,10,-4,-1\r\nr,1067,-211,257,4157,10,-2,-1\r\nr,1068,-214,257,4166,10,-2,-2\r\nr,1069,-209,253,4181,10,-2,-2\r\nr,1070,-213,256,4179,12,-1,-2\r\nr,1071,-212,257,4168,11,-3,-2\r\nr,1072,-205,255,4155,9,-3,-1\r\nr,1073,-215,261,4162,11,-2,-1\r\nr,1074,-223,261,4168,10,-2,-2\r\nr,1075,-212,264,4175,10,-2,-1\r\nr,1076,-217,257,4187,9,-2,-2\r\nr,1077,-214,258,4184,10,-4,-2\r\nr,1078,-220,264,4171,9,-3,-2\r\nr,1079,-211,284,4164,10,-2,-11\r\nr,1080,-206,268,4163,11,-2,-58\r\nr,1081,-222,233,4155,10,-4,-66\r\nr,1082,-217,252,4164,11,-3,-17\r\nr,1083,-214,261,4173,10,-3,3\r\nr,1084,-217,258,4181,10,-3,-2\r\nr,1086,-210,258,4171,10,-3,-2\r\nr,1087,-203,253,4158,9,-2,-2\r\nr,1088,-209,249,4167,10,-3,-2\r\nr,1089,-215,252,4173,10,-2,-1\r\nr,1090,-219,263,4172,10,-3,-1\r\nr,1092,-205,258,4184,11,-4,0\r\nr,1093,-211,261,4163,10,-3,-1\r\nr,1094,-219,258,4166,10,-4,-2\r\nr,1095,-217,251,4162,10,-3,-3\r\nr,1096,-213,257,4164,10,-3,-2\r\nr,1097,-210,254,4168,9,-4,-3\r\nr,1098,-224,252,4169,10,-2,-2\r\nr,1099,-219,261,4173,11,-3,-2\r\nr,1120,-212,263,4167,10,-3,-3"
247+
)
248+
193249
def test_create_csv_then_upload_wave(self, client, project):
194250
project.save()
195251
settings.DEBUG = True
@@ -210,8 +266,6 @@ def test_create_csv_then_upload_wave(self, client, project):
210266
)
211267
assert response.status_code == status.HTTP_201_CREATED
212268

213-
settings.ALLOW_UPDATE_PROJECT_SCHEMA = True
214-
215269
template_path = os.path.join(dirname, "data/on_4c77947d_nohash_0.wav")
216270
with open(template_path, "rb") as f:
217271
response = client.post(
@@ -231,14 +285,25 @@ def test_create_csv_then_upload_wave(self, client, project):
231285
},
232286
}
233287

234-
settings.ALLOW_UPDATE_PROJECT_SCHEMA = False
288+
template_path = os.path.join(dirname, "data/on_4c77947d_nohash_0.wav")
289+
with open(template_path, "rb") as f:
290+
response = client.post(
291+
capture_list_url,
292+
format="multipart",
293+
data={"file": f, "name": "window_test2.wav"},
294+
)
295+
296+
assert response.status_code == status.HTTP_201_CREATED
297+
298+
project.lock_schema = True
299+
project.save(update_fields=["lock_schema"])
235300

236301
template_path = os.path.join(dirname, "data/on_4c77947d_nohash_0.wav")
237302
with open(template_path, "rb") as f:
238303
response = client.post(
239304
capture_list_url,
240305
format="multipart",
241-
data={"file": f, "name": "window_test_2.wav"},
306+
data={"file": f, "name": "window_test_3.wav"},
242307
)
243308

244309
assert response.status_code == status.HTTP_400_BAD_REQUEST
@@ -250,14 +315,15 @@ def test_create_csv_then_upload_wave(self, client, project):
250315
],
251316
}
252317

253-
settings.ALLOW_UPDATE_PROJECT_SCHEMA = True
318+
project.lock_schema = False
319+
project.save(update_fields=["lock_schema"])
254320

255321
template_path = os.path.join(dirname, "data/on_4c77947d_nohash_0.wav")
256322
with open(template_path, "rb") as f:
257323
response = client.post(
258324
capture_list_url,
259325
format="multipart",
260-
data={"file": f, "name": "window_test_2.wav"},
326+
data={"file": f, "name": "window_test_4.wav"},
261327
)
262328

263329
assert response.status_code == status.HTTP_201_CREATED
@@ -266,13 +332,13 @@ def test_create_csv_then_upload_wave(self, client, project):
266332

267333
assert response.status_code == status.HTTP_200_OK
268334
assert response.json()["capture_sample_schema"] == {
269-
"channel_0": {"type": "int", "index": 0},
270-
"GyroscopeX": {"type": "int", "index": 3},
271-
"GyroscopeY": {"type": "int", "index": 4},
272-
"GyroscopeZ": {"type": "int", "index": 5},
273-
"AccelerometerX": {"type": "int", "index": 0},
274-
"AccelerometerY": {"type": "int", "index": 1},
275-
"AccelerometerZ": {"type": "int", "index": 2},
335+
"channel_0": {"type": "int"},
336+
"GyroscopeX": {"type": "int"},
337+
"GyroscopeY": {"type": "int"},
338+
"GyroscopeZ": {"type": "int"},
339+
"AccelerometerX": {"type": "int"},
340+
"AccelerometerY": {"type": "int"},
341+
"AccelerometerZ": {"type": "int"},
276342
}
277343

278344
def test_capture_stats_api_base(

src/server/datamanager/utils/file_reader.py

+7-6
Original file line numberDiff line numberDiff line change
@@ -50,18 +50,22 @@ def make_schema(dataframe):
5050
invalid_columns = []
5151
schema = {}
5252
for index, dtype in enumerate(dataframe.dtypes):
53-
if dtype not in ["int64", "float64"]:
53+
if (
54+
dtype not in ["int64", "float64"]
55+
and dataframe.columns[index] != "timestamp"
56+
):
5457
invalid_columns.append(dataframe.columns[index])
5558

5659
column_dtype = None
5760
if dtype in ["int64"]:
5861
column_dtype = "int"
62+
elif dtype in ["string"]:
63+
column_dtype = "string"
5964
elif dtype in ["float64"]:
6065
column_dtype = "float"
6166

6267
schema[dataframe.columns[index].replace(" ", "_")] = {
6368
"type": column_dtype,
64-
"index": index,
6569
}
6670

6771
if invalid_columns:
@@ -154,10 +158,7 @@ def __init__(self, file_path):
154158
index="sequence"
155159
)
156160

157-
self._schema = {
158-
key: {"type": "int16", "index": index}
159-
for index, key in enumerate(columns)
160-
}
161+
self._schema = {key: {"type": "int16"} for index, key in enumerate(columns)}
161162

162163
def to_CSVFileReader(self, tmp_file_path):
163164
self._dataframe.to_csv(tmp_file_path, index=None)

0 commit comments

Comments
 (0)