Skip to content

Commit 1219fe7

Browse files
committed
merged in main
2 parents 5df725c + 1926625 commit 1219fe7

File tree

8 files changed

+534
-55
lines changed

8 files changed

+534
-55
lines changed

arch.md

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,3 +104,85 @@ Taiga is deployed as a containerized application:
104104
1. **Authentication**: Token-based authentication
105105
2. **Authorization**: Group-based access control (This is not really fully implemented. If there is code about group-based access, I am willing to bet it is half-implement if at all)
106106
3. **Secure Storage**: Taiga will sign URLs for fetching data from S3 and give them to the client, so that the transfer of large data happens directly between client and S3.
107+
108+
## Deployment Diagram
109+
110+
```mermaid
111+
graph TD
112+
subgraph "Client"
113+
Browser[Web Browser]
114+
end
115+
116+
subgraph "Application Server"
117+
Flask[Flask API]
118+
Celery[Celery Workers]
119+
Redis[Redis Queue]
120+
end
121+
122+
subgraph "Storage"
123+
Postgres[PostgreSQL]
124+
S3[AWS S3]
125+
GCS[Google Cloud Storage]
126+
end
127+
128+
subgraph "External Services"
129+
Figshare[Figshare]
130+
Email[Email Service]
131+
end
132+
133+
Browser -->|HTTP/HTTPS| Flask
134+
Flask -->|Query/Update| Postgres
135+
Flask -->|Enqueue Tasks| Redis
136+
Redis -->|Process Tasks| Celery
137+
Celery -->|File Operations| S3
138+
Celery -->|File Operations| GCS
139+
Celery -->|Publish| Figshare
140+
Celery -->|Send Notifications| Email
141+
Flask -->|Generate Signed URLs| S3
142+
Flask -->|Generate Signed URLs| GCS
143+
```
144+
145+
## Service Architecture
146+
147+
```mermaid
148+
flowchart LR
149+
subgraph "Frontend"
150+
React[React App]
151+
TypeScript[TypeScript Models]
152+
end
153+
154+
subgraph "Backend API"
155+
Endpoints[API Endpoints]
156+
Controllers[Controllers]
157+
Models[Data Models]
158+
Auth[Authentication]
159+
end
160+
161+
subgraph "Background Processing"
162+
TaskQueue[Celery Queue]
163+
Workers[Celery Workers]
164+
Conversion[File Conversion]
165+
Import[Data Import]
166+
Export[Data Export]
167+
end
168+
169+
subgraph "Storage"
170+
MetadataDB[PostgreSQL]
171+
FileStorage[S3/GCS]
172+
end
173+
174+
React -->|API Calls| Endpoints
175+
TypeScript -->|Data Types| React
176+
Endpoints -->|Process Requests| Controllers
177+
Controllers -->|CRUD Operations| Models
178+
Models -->|Persist| MetadataDB
179+
Controllers -->|Enqueue Tasks| TaskQueue
180+
TaskQueue -->|Process| Workers
181+
Workers -->|Execute| Conversion
182+
Workers -->|Execute| Import
183+
Workers -->|Execute| Export
184+
Conversion -->|Read/Write| FileStorage
185+
Import -->|Write| FileStorage
186+
Export -->|Read| FileStorage
187+
Auth -->|Secure| Endpoints
188+
```

docker-base/requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ celery==4.1.1
55
certifi==2019.6.16
66
Click==7.0
77
clickclick==1.2.2
8+
openapi-spec-validator==0.2.9
89
connexion[swagger-ui]==2.3.0
910
docutils==0.14
1011
Flask==1.0.3
@@ -16,7 +17,7 @@ google-cloud-storage==1.35.0
1617
h5py==2.6.0
1718
humanize==0.5.1
1819
jsonpointer==2.0
19-
jsonschema==2.6.0
20+
jsonschema==2.7.0
2021
kombu==4.2.1
2122
mailjet_rest==1.3.3
2223
marshmallow==2.15.1
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
"""empty message
2+
3+
Revision ID: c1be0bfabe2e
4+
Revises: a286182fdf59
5+
Create Date: 2023-03-06 11:16:22.434754
6+
7+
"""
8+
from alembic import op
9+
import sqlalchemy as sa
10+
11+
12+
# revision identifiers, used by Alembic.
13+
revision = "c1be0bfabe2e"
14+
down_revision = "a286182fdf59"
15+
branch_labels = None
16+
depends_on = None
17+
18+
19+
def upgrade():
20+
# ### commands auto generated by Alembic - please adjust! ###
21+
op.drop_table("tmp_datafiles_cleanup")
22+
op.add_column("datafiles", sa.Column("custom_metadata", sa.JSON(), nullable=True))
23+
op.add_column(
24+
"upload_session_files", sa.Column("custom_metadata", sa.JSON(), nullable=True)
25+
)
26+
# ### end Alembic commands ###
27+
28+
29+
def downgrade():
30+
# ### commands auto generated by Alembic - please adjust! ###
31+
op.drop_column("upload_session_files", "custom_metadata")
32+
op.drop_column("datafiles", "custom_metadata")
33+
op.create_table(
34+
"tmp_datafiles_cleanup",
35+
sa.Column("id", sa.VARCHAR(length=80), autoincrement=False, nullable=True),
36+
sa.Column(
37+
"orig_underlying_data_file_id",
38+
sa.VARCHAR(length=80),
39+
autoincrement=False,
40+
nullable=True,
41+
),
42+
sa.Column(
43+
"new_underlying_data_file_id",
44+
sa.VARCHAR(length=80),
45+
autoincrement=False,
46+
nullable=True,
47+
),
48+
sa.Column("update_count", sa.INTEGER(), autoincrement=False, nullable=True),
49+
)
50+
# ### end Alembic commands ###

taiga2/controllers/endpoint.py

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -479,6 +479,11 @@ def de_delete_dataset_version(datasetVersionId):
479479
@validate
480480
def create_upload_session_file(uploadMetadata, sid):
481481
filename = uploadMetadata["filename"]
482+
483+
# Optional per file metadata, stored as a json encoded string.
484+
# Not queryable independently from files.
485+
custom_metadata = uploadMetadata.get("custom_metadata")
486+
482487
if uploadMetadata["filetype"] == "s3":
483488
S3UploadedFileMetadata = uploadMetadata["s3Upload"]
484489
s3_bucket = S3UploadedFileMetadata["bucket"]
@@ -492,6 +497,7 @@ def create_upload_session_file(uploadMetadata, sid):
492497
upload_session_file = models_controller.add_upload_session_s3_file(
493498
session_id=sid,
494499
filename=filename,
500+
custom_metadata=custom_metadata,
495501
initial_file_type=initial_file_type,
496502
initial_s3_key=initial_s3_key,
497503
s3_bucket=s3_bucket,
@@ -514,11 +520,12 @@ def create_upload_session_file(uploadMetadata, sid):
514520
return flask.jsonify(task.id)
515521
elif uploadMetadata["filetype"] == "virtual":
516522
existing_taiga_id = uploadMetadata["existingTaigaId"]
517-
518523
try:
519-
data_file = models_controller.get_datafile_by_taiga_id(
524+
data_file_by_taiga_id = models_controller.get_datafile_by_taiga_id(
520525
existing_taiga_id, one_or_none=True
521526
)
527+
data_file = models_controller.get_underlying_file(data_file_by_taiga_id)
528+
522529
except InvalidTaigaIdFormat as ex:
523530
api_error(
524531
"The following was not formatted like a valid taiga ID: {}".format(
@@ -529,8 +536,20 @@ def create_upload_session_file(uploadMetadata, sid):
529536
if data_file is None:
530537
api_error("Unknown taiga ID: " + existing_taiga_id)
531538

539+
if custom_metadata == None:
540+
custom_metadata = data_file_by_taiga_id.custom_metadata
541+
else:
542+
custom_metadata = (
543+
custom_metadata
544+
if data_file_by_taiga_id.custom_metadata == None
545+
else {**data_file_by_taiga_id.custom_metadata, **custom_metadata}
546+
)
547+
532548
models_controller.add_upload_session_virtual_file(
533-
session_id=sid, filename=filename, data_file_id=data_file.id
549+
session_id=sid,
550+
filename=filename,
551+
custom_metadata=custom_metadata,
552+
data_file_id=data_file.id,
534553
)
535554

536555
return flask.jsonify("done")
@@ -554,6 +573,7 @@ def create_upload_session_file(uploadMetadata, sid):
554573
models_controller.add_upload_session_gcs_file(
555574
session_id=sid,
556575
filename=filename,
576+
custom_metadata=custom_metadata,
557577
gcs_path=gcs_path,
558578
generation_id=str(generation_id),
559579
)
@@ -1078,7 +1098,7 @@ def copy_datafile_to_google_bucket(datafileGCSCopy):
10781098
datafile_id = datafileGCSCopy["datafile_id"]
10791099
gcs_path = datafileGCSCopy["gcs_path"]
10801100

1081-
datafile = models_controller.get_datafile_by_taiga_id(datafile_id)
1101+
datafile = models_controller.get_underlying_datafile_by_taiga_id(datafile_id)
10821102
if datafile is None:
10831103
raise flask.abort(404)
10841104

0 commit comments

Comments
 (0)