diff --git a/.gitignore b/.gitignore index 6f2d130..e754dd5 100644 --- a/.gitignore +++ b/.gitignore @@ -58,3 +58,4 @@ target/ # build artifacts version.txt +.claude/settings.local.json diff --git a/Makefile b/Makefile index 9a7009f..154a492 100644 --- a/Makefile +++ b/Makefile @@ -7,8 +7,8 @@ IMAGE="$(REPO)/$(REPO_PATH):$(VERSION)" docker: version docker build --build-arg VERSION=$(VERSION) -t $(IMAGE) . -.PHONY: devshell # Open a developer shell in the docker env -devshell: docker +.PHONY: dev # Open a developer shell in the docker env +dev: docker docker run --rm -it -v $$PWD:/opt --entrypoint /bin/bash $(IMAGE) test-client: docker diff --git a/api/datalake_api/v0.py b/api/datalake_api/v0.py index c1aad72..84ec951 100644 --- a/api/datalake_api/v0.py +++ b/api/datalake_api/v0.py @@ -18,6 +18,8 @@ from flask import current_app as app import os import simplejson as json +from datetime import datetime, timezone +import decimal from .querier import ArchiveQuerier, Cursor, InvalidCursor, \ DEFAULT_LOOKBACK_DAYS from .fetcher import ArchiveFileFetcher @@ -29,6 +31,38 @@ _archive_querier = None + +def unix_ms_to_utc_iso(unix_ms): + if unix_ms is None: + return unix_ms + unix_ms_to_iso = unix_ms + if isinstance(unix_ms_to_iso, decimal.Decimal): + unix_ms_to_iso = float(unix_ms_to_iso) + iso = datetime.fromtimestamp( + unix_ms_to_iso / 1000.0, tz=timezone.utc + ).isoformat(timespec='milliseconds').replace('+00:00', 'Z') + return iso + + +def add_utc_metadata(metadata): + """Add ISO-8601 UTC timestamp fields to metadata dict + + This function takes a metadata dict and adds start_iso and end_iso fields + based on existing start and end epoch timestamps + iso precision is set to milliseconds + Can be expanded to add any api-level metadata + """ + if not metadata: + return metadata + + start_iso = unix_ms_to_utc_iso(metadata['start']) + end_iso = unix_ms_to_utc_iso(metadata['end']) + + metadata['start_iso'] = start_iso + metadata['end_iso'] = end_iso + return metadata + + def _get_aws_kwargs(): kwargs = dict( region_name=app.config.get('AWS_REGION'), @@ -305,6 +339,14 @@ def files_get(): type: string description: 16-byte blake2 hash of the file content + start_iso: + type: string + description: the start time of the file in ISO + format UTC iso timezone + end_iso: + type: string + description: the end time of the file in ISO + format UTC iso timezone next: type: string @@ -349,7 +391,10 @@ def files_get(): where=params.get('where'), cursor=params.get('cursor')) - [r.update(http_url=_get_canonical_http_url(r)) for r in results] + for r in results: + r.update(http_url=_get_canonical_http_url(r)) + r['metadata'] = add_utc_metadata(r['metadata']) + response = { 'records': results, 'next': _get_next_url(flask.request, results), @@ -476,6 +521,7 @@ def file_get_metadata(file_id): id: DatalakeAPIError ''' f = _get_file(file_id) + f.metadata = add_utc_metadata(f.metadata) return Response(json.dumps(f.metadata), content_type='application/json') @@ -542,6 +588,7 @@ def latest_get(what, where): params = _validate_latest_params(params) f = _get_latest(what, where, params.get('lookback', DEFAULT_LOOKBACK_DAYS)) f.update(http_url=_get_canonical_http_url(f)) + f['metadata'] = add_utc_metadata(f['metadata']) return Response(json.dumps(f), content_type='application/json') diff --git a/api/setup.py b/api/setup.py index fa3e272..d45adde 100644 --- a/api/setup.py +++ b/api/setup.py @@ -30,7 +30,7 @@ def get_version_from_pyver(): if 'sdist' in sys.argv or 'bdist_wheel' in sys.argv: raise ImportError('You must install pyver to create a package') else: - return 'noversion' + return '0.0.0' version, version_info = pyver.get_version(pkg="datalake_api", public=True) return version diff --git a/api/tests/test_metadata.py b/api/tests/test_metadata.py index 22f0047..7b25bec 100644 --- a/api/tests/test_metadata.py +++ b/api/tests/test_metadata.py @@ -11,6 +11,8 @@ # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations under # the License. +from datetime import datetime, timezone +from decimal import Decimal import pytest import simplejson as json @@ -32,7 +34,20 @@ def test_get_metadata(metadata_getter, s3_file_maker, random_metadata): res = metadata_getter('12345') assert res.status_code == 200 assert res.content_type == 'application/json' - assert json.loads(res.data) == random_metadata + res_data = json.loads(res.data) + for k, v in res_data.items(): + if k == 'start_iso' or k == 'end_iso': + k_epoch = k.replace('_iso','') + v_epoch = res_data[k_epoch] + if v is None: + assert v == v_epoch + + expected_v_iso = datetime.fromtimestamp( + v_epoch / 1000.0, tz=timezone.utc + ).isoformat(timespec='milliseconds').replace('+00:00', 'Z') + assert v == expected_v_iso + else: + assert v == random_metadata[k] def test_no_such_metadata(s3_bucket_maker, metadata_getter): diff --git a/ingester/setup.py b/ingester/setup.py index 792d5b4..2aff6f8 100644 --- a/ingester/setup.py +++ b/ingester/setup.py @@ -30,7 +30,7 @@ def get_version_from_pyver(): if 'sdist' in sys.argv or 'bdist_wheel' in sys.argv: raise ImportError('You must install pyver to create a package') else: - return 'noversion' + return '0.0.0' version, version_info = pyver.get_version(pkg="datalake_ingester", public=True) return version