Skip to content

Commit 08dcdd6

Browse files
Rasko Leinonencsc-jm
andcommitted
Encrypt and upload Bigpicture XML files to S3inbox upon publishing (merge commit)
Merge branch 'feature/encrypt-and-send-xmls' into 'main' * Addressed comments * Encrypt and send BP XML files to S3inbox Closes #929 See merge request https://gitlab.ci.csc.fi/sds-dev/sd-submit/metadata-submitter/-/merge_requests/1126 Approved-by: Rasko Leinonen <raskolei@csc.fi> Co-authored-by: Joonatan Mäkinen <joonatan.makinen@csc.fi> Merged by Rasko Leinonen <raskolei@csc.fi>
2 parents f0ad001 + a6a7dc1 commit 08dcdd6

21 files changed

Lines changed: 665 additions & 52 deletions

File tree

Makefile

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,6 @@ get_env: ## Get secrets needed for integration tests from vault
5353
$(call write_secret,SDS_AAI_CLIENT_ID,sd-submit/secrets,sds_aai_id) \
5454
$(call write_secret,SDS_AAI_CLIENT_SECRET,sd-submit/secrets,sds_aai_secret) \
5555
$(call write_secret,SDS_AAI_URL,sd-submit/secrets,sds_aai_url) \
56-
$(call write_secret,LS_AAI_CLIENT_ID,sd-submit/secrets,ls_aai_id) \
57-
$(call write_secret,LS_AAI_CLIENT_SECRET,sd-submit/secrets,ls_aai_secret) \
58-
$(call write_secret,LS_AAI_URL,sd-submit/secrets,ls_aai_url) \
5956
$(call write_secret,KEYSTONE_ENDPOINT,sd-submit/secrets,pouta_host) \
6057
$(call write_secret,NBIS_JWT_PUBLIC_KEY,sd-submit/secrets,nbis_jwt_public_key) \
6158
$(call write_integration_test_secret,DATACITE_API,sd-submit/datacite_test,DOI_API) \

docker-compose.yml

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -75,9 +75,6 @@ services:
7575
start_period: 30s
7676
environment:
7777
- "DEPLOYMENT=NBIS"
78-
- "OIDC_CLIENT_SECRET=${LS_AAI_CLIENT_SECRET:-${OIDC_CLIENT_SECRET:?OIDC_CLIENT_SECRET must be defined}}"
79-
- "OIDC_CLIENT_ID=${LS_AAI_CLIENT_ID:-${OIDC_CLIENT_ID:?OIDC_CLIENT_ID must be defined}}"
80-
- "OIDC_URL=${LS_AAI_URL:-${OIDC_URL:?OIDC_URL must be defined}}"
8178
- "BASE_URL=http://sd-submit-api-nbis:5431"
8279
- "OIDC_REDIRECT_URL=${OIDC_REDIRECT_URL}"
8380
- "OIDC_SECURE_COOKIE=${OIDC_SECURE_COOKIE}"
@@ -98,9 +95,12 @@ services:
9895
- "DATABASE_URL=${NBIS_DATABASE_URL}"
9996
- "BP_CENTER_ID=${BP_CENTER_ID}"
10097
- "S3_REGION=${S3_REGION}"
101-
- "S3_ENDPOINT=${S3_ENDPOINT}"
98+
- "S3_ENDPOINT=${S3_INBOX_ENDPOINT:?S3_INBOX_ENDPOINT must be defined}"
10299
- "ADMIN_URL=${SDA_API_URL}"
103100
- "ADMIN_TOKEN=${ADMIN_TOKEN}"
101+
- "CRYPT4GH_PUBLIC_KEY=${C4GH_RECIPIENT_PUBLIC_KEY:?C4GH_RECIPIENT_PUBLIC_KEY must be defined}"
102+
- "CRYPT4GH_PRIVATE_KEY=${C4GH_SENDER_SECRET_KEY:?C4GH_SENDER_SECRET_KEY must be defined}"
103+
- "CRYPT4GH_PRIVATE_KEY_PASSPHRASE=${C4GH_SECRET_KEY_PASSPHRASE:?C4GH_SECRET_KEY_PASSPHRASE must be defined}"
104104
- "ALLOW_UNSAFE=${ALLOW_UNSAFE}"
105105

106106
mock-oauth2:
@@ -205,6 +205,12 @@ services:
205205
- "8006:8006"
206206
environment:
207207
- MOTO_PORT=8006
208+
healthcheck:
209+
test: curl --fail http://127.0.0.1:8006/ || exit 1
210+
interval: 10s
211+
timeout: 5s
212+
retries: 5
213+
start_period: 10s
208214

209215
postgres-csc:
210216
image: postgres:18-alpine
@@ -272,3 +278,5 @@ services:
272278
condition: service_healthy
273279
mockldap:
274280
condition: service_started
281+
mockinbox:
282+
condition: service_healthy

metadata_backend/api/handlers/publish.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,12 @@
1414
from ..models.datacite import DataCiteMetadata
1515
from ..models.models import File, Registration, SubmissionId
1616
from ..models.submission import Rems, Submission, SubmissionMetadata, SubmissionWorkflow
17-
from ..processors.xml.bigpicture import BP_POLICY_OBJECT_TYPE, BP_XML_OBJECT_CONFIG
17+
from ..processors.xml.bigpicture import (
18+
BP_POLICY_OBJECT_TYPE,
19+
BP_XML_OBJECT_CONFIG,
20+
)
1821
from ..processors.xml.processors import XmlObjectProcessor
22+
from ..services.bigpicture import upload_bp_metadata_xmls
1923
from ..services.datacite import DataciteService
2024
from ..services.submission.bigpicture import is_clinical_policy
2125
from .restapi import RESTAPIHandler
@@ -139,7 +143,7 @@ async def _publish_rems(self, submission: Submission, rems: Rems, registration:
139143
# Create REMS resource.
140144
if not registration.remsResourceId:
141145
if submission.workflow == SubmissionWorkflow.BP:
142-
# Use BigPicture dataset id as the REMS resource id.
146+
# Use Bigpicture dataset id as the REMS resource id.
143147
resid = submission.submissionId
144148
elif registration.doi is not None:
145149
# Use DOI as the REMS resource id.
@@ -304,6 +308,13 @@ async def publish_submission(
304308
if deployment_config().ALLOW_REGISTRATION:
305309
await self._register_submission(submission, datacite, rems)
306310

311+
if workflow == SubmissionWorkflow.BP:
312+
headers = req.headers
313+
jwt = self._services.auth._get_bearer_token(headers)
314+
if not jwt:
315+
raise UserException("Missing OIDC access token in Authorization bearer header for SDA inbox upload.")
316+
await upload_bp_metadata_xmls(self._services, submission_id, user.user_id, jwt)
317+
307318
# Update submission status to published.
308319
await submission_service.publish(submission_id)
309320

metadata_backend/api/handlers/restapi.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ class RESTAPIServiceHandlers(BaseModel):
4949
ror: RorServiceHandler | None
5050
rems: RemsServiceHandler
5151
keystone: KeystoneServiceHandler | None
52-
auth: AuthServiceHandler
52+
auth: AuthServiceHandler | None
5353
admin: AdminServiceHandler | None = None
5454
database: HealthHandler
5555

metadata_backend/api/services/auth.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import jwt
1111
from fastapi import HTTPException
1212
from starlette import status
13+
from starlette.datastructures import Headers
1314

1415
from ...conf.jwt import jwt_config
1516
from ...database.postgres.models import ApiKeyEntity
@@ -219,3 +220,20 @@ async def list_api_keys(self, user_id: str) -> list[ApiKey]:
219220
"""
220221

221222
return await self.__repository.get_api_keys(user_id)
223+
224+
@staticmethod
225+
def _get_bearer_token(headers: Headers) -> str | None:
226+
"""Get OIDC access token from Authorization bearer header.
227+
228+
Args:
229+
headers: The HTTP headers containing the Authorization bearer token.
230+
231+
Returns:
232+
The OIDC access token if present, None otherwise.
233+
"""
234+
auth_header = headers.get("Authorization", "")
235+
if auth_header.lower().startswith("bearer "):
236+
token = auth_header[7:].strip()
237+
if token:
238+
return token
239+
return None
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
"""Bigpicture API services."""
2+
3+
from ..exceptions import SystemException
4+
from ..handlers.restapi import RESTAPIServices
5+
from ..processors.xml.bigpicture import (
6+
BP_ANNOTATION_OBJECT_TYPE,
7+
BP_DATASET_OBJECT_TYPE,
8+
BP_IMAGE_OBJECT_TYPE,
9+
BP_LANDING_PAGE_OBJECT_TYPE,
10+
BP_OBSERVATION_OBJECT_TYPE,
11+
BP_OBSERVER_OBJECT_TYPE,
12+
BP_ORGANISATION_OBJECT_TYPE,
13+
BP_POLICY_OBJECT_TYPE,
14+
BP_REMS_OBJECT_TYPE,
15+
BP_SAMPLE_OBJECT_TYPES,
16+
BP_STAINING_OBJECT_TYPE,
17+
)
18+
from .file import S3InboxSDAService
19+
20+
BP_METADATA_FILES: tuple[tuple[str | tuple[str, ...], str], ...] = (
21+
(BP_DATASET_OBJECT_TYPE, "dataset.xml.c4gh"),
22+
(BP_POLICY_OBJECT_TYPE, "policy.xml.c4gh"),
23+
(BP_IMAGE_OBJECT_TYPE, "image.xml.c4gh"),
24+
(BP_ANNOTATION_OBJECT_TYPE, "annotation.xml.c4gh"),
25+
(BP_OBSERVATION_OBJECT_TYPE, "observation.xml.c4gh"),
26+
(BP_OBSERVER_OBJECT_TYPE, "observer.xml.c4gh"),
27+
(tuple(BP_SAMPLE_OBJECT_TYPES), "sample.xml.c4gh"),
28+
(BP_STAINING_OBJECT_TYPE, "staining.xml.c4gh"),
29+
)
30+
31+
BP_LANDING_PAGE: tuple[tuple[str, str], ...] = ((BP_LANDING_PAGE_OBJECT_TYPE, "landing_page.xml.c4gh"),)
32+
33+
BP_PRIVATE_METDATA_FILES: tuple[tuple[str, str], ...] = (
34+
(BP_ORGANISATION_OBJECT_TYPE, "organisation.xml.c4gh"),
35+
(BP_REMS_OBJECT_TYPE, "rems.xml.c4gh"),
36+
)
37+
38+
39+
async def _upload_xml_documents(
40+
services: RESTAPIServices,
41+
file_provider: S3InboxSDAService,
42+
*,
43+
bucket_name: str,
44+
prefix: str,
45+
object_files: tuple[tuple[str | tuple[str, ...], str], ...],
46+
user_id: str,
47+
jwt: str,
48+
submission_id: str,
49+
) -> None:
50+
for object_type, filename in object_files:
51+
xml = None
52+
async for xml_doc in services.object.get_xml_documents(submission_id, object_type):
53+
xml = xml_doc
54+
break
55+
if xml is None:
56+
continue
57+
58+
object_key = f"{prefix}/{filename}"
59+
await file_provider._add_file_to_bucket(
60+
bucket_name=bucket_name,
61+
object_key=object_key,
62+
access_key=user_id,
63+
secret_key=user_id,
64+
session_token=jwt,
65+
body=xml.encode("utf-8"),
66+
)
67+
68+
69+
async def upload_bp_metadata_xmls(services: RESTAPIServices, submission_id: str, user_id: str, jwt: str) -> None:
70+
"""Upload encrypted Bigpicture metadata XML files to SDA inbox."""
71+
file_provider = services.file_provider
72+
if not isinstance(file_provider, S3InboxSDAService):
73+
raise SystemException("Bigpicture metadata upload requires SDA inbox file provider service.")
74+
75+
bucket = user_id.replace("@", "_") # SDA inbox bucket name is the user id with @ replaced by underscore
76+
77+
# Metadata XML files
78+
await _upload_xml_documents(
79+
services,
80+
file_provider,
81+
bucket_name=bucket,
82+
prefix=f"DATASET_{submission_id}/METADATA",
83+
object_files=BP_METADATA_FILES,
84+
user_id=user_id,
85+
jwt=jwt,
86+
submission_id=submission_id,
87+
)
88+
89+
# Landing page XML file
90+
await _upload_xml_documents(
91+
services,
92+
file_provider,
93+
bucket_name=bucket,
94+
prefix=f"DATASET_{submission_id}/LANDING_PAGE",
95+
object_files=BP_LANDING_PAGE,
96+
user_id=user_id,
97+
jwt=jwt,
98+
submission_id=submission_id,
99+
)
100+
101+
# Private metadata XML files
102+
# TODO(improve): Add datacite.xml to private metadata files once datacite.xml is available
103+
await _upload_xml_documents(
104+
services,
105+
file_provider,
106+
bucket_name=bucket,
107+
prefix=f"DATASET_{submission_id}/PRIVATE",
108+
object_files=BP_PRIVATE_METDATA_FILES,
109+
user_id=user_id,
110+
jwt=jwt,
111+
submission_id=submission_id,
112+
)

metadata_backend/api/services/file.py

Lines changed: 78 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,18 @@
11
"""Service to retrieve file and bucket information from a file provider."""
22

3+
import base64
4+
import binascii
35
from abc import ABC, abstractmethod
6+
from io import BytesIO
47

58
import aioboto3
69
import botocore.exceptions
710
import ujson
11+
from crypt4gh.keys import c4gh
12+
from crypt4gh.lib import encrypt
813
from pydantic import BaseModel, RootModel
914

15+
from ...conf.c4gh import c4gh_config
1016
from ...conf.s3 import s3_config
1117
from ...helpers.logger import LOG
1218
from ...services.admin_service import AdminServiceHandler
@@ -389,7 +395,6 @@ async def _verify_bucket_policy(self, bucket: str) -> bool:
389395
return False
390396

391397

392-
# WIP: to be possibly used for writing XML file to SDA S3 Inbox.
393398
class S3InboxSDAService(FileProviderService):
394399
"""Service to manage S3 buckets in NeIC SDA S3 Inbox."""
395400

@@ -433,30 +438,85 @@ async def check_files_exist(self, user_id: str, files: list[SubmissionFile]) ->
433438
inbox_paths = [inbox_file.get("inboxPath", "") for inbox_file in inbox_files]
434439
return [file.path for file in files if not any(file.path in inbox_path for inbox_path in inbox_paths)]
435440

441+
async def _load_crypt4gh_keys(self) -> tuple[object, object]:
442+
"""Load Crypt4GH sender secret and recipient public keys from env variables."""
443+
conf = c4gh_config()
444+
try:
445+
sender_key_pem = base64.b64decode(conf.CRYPT4GH_PRIVATE_KEY).decode("utf-8")
446+
recipient_key_pem = base64.b64decode(conf.CRYPT4GH_PUBLIC_KEY).decode("utf-8")
447+
except (binascii.Error, UnicodeDecodeError) as ex:
448+
raise SystemException("Invalid base64 in C4GH key environment variables.") from ex
449+
450+
try:
451+
sender_lines = [line.strip().encode("utf-8") for line in sender_key_pem.splitlines() if line.strip()]
452+
recipient_lines = [line.strip().encode("utf-8") for line in recipient_key_pem.splitlines() if line.strip()]
453+
454+
private_data = base64.b64decode(b"".join(sender_lines[1:-1]))
455+
public_data = base64.b64decode(b"".join(recipient_lines[1:-1]))
456+
457+
private_stream = BytesIO(private_data)
458+
if private_data.startswith(c4gh.MAGIC_WORD):
459+
private_stream.seek(len(c4gh.MAGIC_WORD))
460+
461+
sender_secret_key = c4gh.parse_private_key(private_stream, lambda: conf.CRYPT4GH_PRIVATE_KEY_PASSPHRASE)
462+
recipient_public_key = public_data
463+
return sender_secret_key, recipient_public_key
464+
except Exception as ex:
465+
raise SystemException("Failed to load Crypt4GH keys for Bigpicture metadata encryption.") from ex
466+
467+
async def _encrypt_file(self, file: bytes, sender_secret_key: object, recipient_public_key: object) -> bytes:
468+
"""Encrypt file bytes using crypt4gh and return encrypted payload bytes."""
469+
infile = BytesIO(file)
470+
outfile = BytesIO()
471+
encrypt([(0, sender_secret_key, recipient_public_key)], infile, outfile)
472+
return outfile.getvalue()
473+
436474
async def _add_file_to_bucket(
437-
self, bucket_name: str, object_key: str, access_key: str, secret_key: str, session_token: str
475+
self,
476+
bucket_name: str,
477+
object_key: str,
478+
access_key: str,
479+
secret_key: str,
480+
session_token: str,
481+
body: bytes = b"",
438482
) -> None:
439-
"""Add a new object to S3 bucket using provided credentials.
483+
"""Put a C4GH encrypted object to S3 bucket using provided credentials.
440484
441485
:param bucket_name: name of the bucket
442486
:param object_key: key for the object to be added
443487
:param access_key: S3 access key ID
444488
:param secret_key: S3 secret access key
445489
:param session_token: S3 session token
490+
:param body: unencrypted object bytes
446491
"""
447-
# TODO(improve): Add file content as body instead of empty file.
448-
session = aioboto3.Session()
449-
async with session.client(
450-
"s3",
451-
endpoint_url=self.endpoint,
452-
aws_access_key_id=access_key,
453-
aws_secret_access_key=secret_key,
454-
aws_session_token=session_token, # equivalent to s3cmd access_token
455-
region_name=self.region,
456-
) as s3:
457-
await s3.put_object(
458-
Bucket=bucket_name,
459-
Key=object_key,
460-
Body="",
461-
ContentType="application/json",
492+
sender_secret_key, recipient_public_key = await self._load_crypt4gh_keys()
493+
encrypted_file = await self._encrypt_file(body, sender_secret_key, recipient_public_key)
494+
495+
try:
496+
session = aioboto3.Session()
497+
async with session.client(
498+
"s3",
499+
endpoint_url=self.endpoint,
500+
aws_access_key_id=access_key,
501+
aws_secret_access_key=secret_key,
502+
aws_session_token=session_token, # equivalent to s3cmd access_token
503+
region_name=self.region,
504+
) as s3:
505+
await s3.put_object(
506+
Bucket=bucket_name,
507+
Key=object_key,
508+
Body=encrypted_file,
509+
ContentType="application/octet-stream",
510+
)
511+
except botocore.exceptions.ClientError as ex:
512+
err = ex.response.get("Error", {})
513+
code = err.get("Code")
514+
msg = err.get("Message")
515+
LOG.exception(
516+
"Failed to upload encrypted file to SDA inbox bucket '%s' key '%s': %s - %s",
517+
bucket_name,
518+
object_key,
519+
code,
520+
msg,
462521
)
522+
raise SystemException("Failed to upload encrypted file to SDA inbox.") from ex

metadata_backend/api/services/submission/bigpicture.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ def is_clinical_policy(policy_processor: XmlObjectProcessor) -> bool:
7878

7979

8080
class BigpictureObjectSubmissionService(ObjectSubmissionService):
81-
"""Service for processing BigPicture submissions."""
81+
"""Service for processing Bigpicture submissions."""
8282

8383
def __init__(
8484
self,
@@ -140,7 +140,7 @@ def _create_processor(objects: list[ObjectSubmission]) -> tuple[XmlStringDocumen
140140
if datacite_object:
141141
datacite = read_datacite_xml(datacite_object.document)
142142

143-
# Create processor for BigPicture XMLs.
143+
# Create processor for Bigpicture XMLs.
144144
processor = XmlStringDocumentsProcessor(BP_XML_OBJECT_CONFIG, [o.document for o in bp_objects])
145145
return processor, datacite
146146

0 commit comments

Comments
 (0)