Skip to content

Commit a282ece

Browse files
committed
First version of the cold storage interface. New actions to request files, see the requests and subscribe
1 parent 663bb82 commit a282ece

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

49 files changed

+2721
-115
lines changed

Dockerfile

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,11 @@ FROM --platform=$BUILDPLATFORM registry.cern.ch/inveniosoftware/almalinux:1
2929
# Use XRootD 5.8.1
3030
ENV XROOTD_VERSION=5.8.1
3131

32+
# Install the CERN CA
33+
COPY docker/carepo.repo /etc/yum.repos.d/
34+
35+
RUN yum install -y ca_CERN-Root-2 && yum clean -y all
36+
3237
# Install CERN Open Data Portal web node pre-requisites
3338
# hadolint ignore=DL3033
3439
RUN yum install -y \
@@ -42,8 +47,9 @@ RUN yum install -y \
4247
yum groupinstall -y "Development Tools" && \
4348
yum clean -y all
4449

50+
# hadolint ignore=DL3033
4551
RUN echo "Will install xrootd version: $XROOTD_VERSION (latest if empty)" && \
46-
yum install -y xrootd-"$XROOTD_VERSION" python3-xrootd-"$XROOTD_VERSION" && \
52+
yum install -y xrootd-"$XROOTD_VERSION" python3-xrootd-"$XROOTD_VERSION" swig python3-gfal2-util gfal2-plugin-http python3-gfal2 && \
4753
yum clean -y all
4854

4955
RUN pip uninstall pipenv -y && pip install --no-cache-dir --upgrade pip==24.3.1 setuptools==70.0.0 wheel==0.45.1 && \
@@ -72,13 +78,16 @@ ENV PATH=$PATH:${INVENIO_INSTANCE_PATH}/python/bin
7278

7379
# Add CERN Open Data Portal sources to `code` and work there
7480
WORKDIR ${CODE_DIR}
81+
COPY . ${CODE_DIR}
82+
USER root
83+
RUN chown -R "${INVENIO_USER_ID}":root "${CODE_DIR}"
84+
USER ${INVENIO_USER_ID}
7585

7686
# Debug off by default
7787
ARG DEBUG=""
7888
ENV DEBUG=${DEBUG:-""}
7989

8090
# Install CERN Open Data Portal sources
81-
COPY . ${CODE_DIR}
8291
# hadolint ignore=DL3013,SC2086
8392
RUN git config --global url.https://github.com/.insteadOf git://github.com/ && if [ "$DEBUG" ]; then FLAGS="-e"; fi && \
8493
pip install --no-cache-dir --user ${FLAGS} ".[all]" && pip check

MANIFEST.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,3 +45,4 @@ recursive-include sentry *.json
4545
recursive-include sentry *.py
4646
recursive-include sentry *.sh
4747
recursive-include sentry *.yml
48+
recursive-include docker *.repo

cernopendata/api.py

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def __getitem__(self, key):
4545
"""Get a specific file."""
4646
obj = FileIndexMetadata.get(self.record, key)
4747
if obj:
48-
return self.file_cls(obj, self.filesmap.get(obj.key, {}))
48+
return self.file_cls(obj, self.file_indices.get(obj.key, {}))
4949
raise KeyError(key)
5050

5151
def flush(self):
@@ -96,7 +96,18 @@ def check_availability(self):
9696
if len(self._avl.keys()) == 1:
9797
self["availability"] = list(self._avl.keys())[0]
9898
else:
99-
self["availability"] = "sample files"
99+
self["availability"] = "partially"
100+
101+
def flush_indices(self):
102+
"""Updates the _file_indices information based on what exists on the database."""
103+
print("Updating the record with file indices")
104+
self["_file_indices"] = []
105+
# First, let's get all the file indices that this record has
106+
for elem in BucketTag.query.filter_by(value=str(self.id), key="record").all():
107+
self["_file_indices"].append(
108+
FileIndexMetadata.get(None, str(elem.bucket)).dumps()
109+
)
110+
self.check_availability()
100111

101112

102113
class FileIndexMetadata:
@@ -109,6 +120,7 @@ def __init__(self):
109120
self._size = 0
110121
self._files = []
111122
self._description = ""
123+
self._bucket = ""
112124

113125
def __repr__(self):
114126
"""Representation of the object."""
@@ -131,6 +143,7 @@ def create(cls, record, file_object, description=""):
131143
rb._description = description
132144
BucketTag.create(rb._bucket, "index_name", index_file_name)
133145
BucketTag.create(rb._bucket, "record", record.model.id)
146+
BucketTag.create(rb._bucket, "description", description)
134147
print(f"The file index contains {len(index_content)} entries.")
135148
for entry in index_content:
136149
entry_file = FileInstance.create()
@@ -162,9 +175,13 @@ def get(cls, record_id, bucket_id):
162175
.one()
163176
.value
164177
)
165-
bucket = Bucket.get(bucket_id)
166-
for o in ObjectVersion.get_by_bucket(bucket).all():
167-
f = FileObject(o, {})
178+
tag = BucketTag.query.filter_by(
179+
bucket_id=str(bucket_id), key="description"
180+
).first()
181+
obj._description = tag.value if tag else obj._index_file_name
182+
obj._bucket = Bucket.get(bucket_id)
183+
for o in ObjectVersion.get_by_bucket(obj._bucket).all():
184+
f = MultiURIFileObject(o, {})
168185
# Let's put also the uri
169186
f["uri"] = FileInstance.get(str(o.file_id)).uri
170187
f["filename"] = f["uri"].split("/")[-1]
@@ -196,6 +213,7 @@ def dumps(self):
196213
"size": self._size,
197214
"files": files,
198215
"description": self._description,
216+
"bucket": str(self._bucket),
199217
}
200218

201219
def flush(self):

cernopendata/cold_storage/__init__.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# -*- coding: utf-8 -*-
2+
#
3+
# This file is part of CERN Open Data Portal.
4+
# Copyright (C) 2017-2025 CERN.
5+
#
6+
# CERN Open Data Portal is free software; you can redistribute it
7+
# and/or modify it under the terms of the GNU General Public License as
8+
# published by the Free Software Foundation; either version 2 of the
9+
# License, or (at your option) any later version.
10+
#
11+
# CERN Open Data Portal is distributed in the hope that it will be
12+
# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14+
# General Public License for more details.
15+
#
16+
# You should have received a copy of the GNU General Public License
17+
# along with CERN Open Data Portal; if not, write to the
18+
# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
19+
# MA 02111-1307, USA.
20+
#
21+
# In applying this license, CERN does not
22+
# waive the privileges and immunities granted to it by virtue of its status
23+
# as an Intergovernmental Organization or submit itself to any jurisdiction.
24+
"""Cold Storage."""

0 commit comments

Comments
 (0)