Skip to content

added objid functions #79

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,18 @@ authors = [{ "name" = "The HDF Group", "email" = "[email protected]" }]
keywords = ["json", "hdf5", "multidimensional array", "data", "datacube"]
requires-python = ">=3.8"
dependencies = [
"h5py >=3.10",
"h5py >= 3.10",
"numpy >= 2.0; python_version>='3.9'",
"jsonschema >=4.4.0",
"tomli; python_version<'3.11'",
"numpy >=1.20,<2.0.0; python_version=='3.8'",
]

dynamic = ["version"]

[project.urls]
Homepage = "https://hdf5-json.readthedocs.io"
Documentation = "https://hdf5-json.readthedocs.io"
Homepage = "https://support.hdfgroup.org/documentation/hdf5-json/latest/"
Documentation = "https://support.hdfgroup.org/documentation/hdf5-json/latest/"
Source = "https://github.com/HDFGroup/hdf5-json"
"Bug Reports" = "https://github.com/HDFGroup/hdf5-json/issues"
Social = "https://twitter.com/hdf5"
Expand Down
8 changes: 8 additions & 0 deletions src/h5json/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,14 @@
from .hdf5dtype import getTypeResponse
from .hdf5dtype import getItemSize
from .hdf5dtype import createDataType
from .objid import createObjId
from .objid import getCollectionForId
from .objid import isObjId
from .objid import isS3ObjKey
from .objid import getS3Key
from .objid import getObjId
from .objid import isSchema2Id
from .objid import isRootObjId
from .hdf5db import Hdf5db
from . import _version

Expand Down
70 changes: 55 additions & 15 deletions src/h5json/hdf5db.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@
import time
import h5py
import numpy as np
import uuid
import os.path as op
import os
import json
import logging
from .hdf5dtype import getTypeItem, createDataType, getItemSize
from .hdf5dtype import getTypeItem, createDataType, getItemSize, Reference, RegionReference
from .objid import createObjId
from .apiversion import _apiver


Expand Down Expand Up @@ -73,6 +73,43 @@
_H5PY_COMPRESSION_FILTERS = ("gzip", "lzf", "szip")


def convert_dtype(srcdt):
"""Return a dtype based on input dtype, converting any Reference types from
h5json style to h5py.
"""

if len(srcdt) > 0:
fields = []
for name in srcdt.fields:
item = srcdt.fields[name]
# item is a tuple of dtype and integer offset
field_dt = convert_dtype(item[0])
fields.append((name, field_dt))
tgt_dt = np.dtype(fields)
else:
# check if this a "special dtype"
if srcdt.metadata and "ref" in srcdt.metadata:
if srcdt.metadata['ref'] is Reference:
tgt_dt = h5py.special_dtype(ref=h5py.Reference)
elif srcdt.metadata['ref'] is RegionReference:
tgt_dt = h5py.special_dtype(ref=h5py.RegionReference)
else:
raise TypeError(f"Unexpected ref type: {srcdt}")
elif srcdt.metadata and "vlen" in srcdt.metadata:
src_vlen = srcdt.metadata["vlen"]
if isinstance(src_vlen, np.dtype):
tgt_base = convert_dtype(src_vlen)
else:
tgt_base = src_vlen
tgt_dt = h5py.special_dtype(vlen=tgt_base)
elif srcdt.kind == "U":
# use vlen for unicode strings
tgt_dt = h5py.special_dtype(vlen=str)
else:
tgt_dt = srcdt # no conversion needed
return tgt_dt


def visitObj(path, obj):
hdf5db = _db[obj.file.filename]
hdf5db.visit(path, obj)
Expand Down Expand Up @@ -561,7 +598,7 @@ def initFile(self):

self.log.info("initializing file")
if not self.root_uuid:
self.root_uuid = str(uuid.uuid1())
self.root_uuid = createObjId()
self.dbGrp.attrs["rootUUID"] = self.root_uuid
self.dbGrp.create_group("{groups}")
self.dbGrp.create_group("{datasets}")
Expand Down Expand Up @@ -593,21 +630,21 @@ def visit(self, path, obj):
msg = "Unknown object type: " + __name__ + " found during scan of HDF5 file"
self.log.error(msg)
raise IOError(errno.EIO, msg)
uuid1 = uuid.uuid1() # create uuid
id = str(uuid1)
obj_id = createObjId() # create uuid

addrGrp = self.dbGrp["{addr}"]
if not self.readonly:
# storing db in the file itself, so we can link to the object directly
col[id] = obj.ref # save attribute ref to object
col[obj_id] = obj.ref # save attribute ref to object
else:
# store path to object
col[id] = obj.name
col[obj_id] = obj.name
addr = h5py.h5o.get_info(obj.id).addr
# store reverse map as an attribute
addrGrp.attrs[str(addr)] = id
addrGrp.attrs[str(addr)] = obj_id

#
# Get Datset creation properties
# Get Dataset creation properties
#
def getDatasetCreationProps(self, dset_uuid):
prop_list = {}
Expand Down Expand Up @@ -760,7 +797,7 @@ def getObjByPath(self, path):
def getObjectByUuid(self, col_type, obj_uuid):
# col_type should be either "datasets", "groups", or "datatypes"
if col_type not in ("datasets", "groups", "datatypes"):
msg = "Unexpectd error, invalid col_type: [" + col_type + "]"
msg = "Unexpected error, invalid col_type: [" + col_type + "]"
self.log.error(msg)
raise IOError(errno.EIO, msg)
if col_type == "groups" and obj_uuid == self.dbGrp.attrs["rootUUID"]:
Expand Down Expand Up @@ -1087,7 +1124,7 @@ def createCommittedType(self, datatype, obj_uuid=None):
raise IOError(errno.EPERM, msg)
datatypes = self.dbGrp["{datatypes}"]
if not obj_uuid:
obj_uuid = str(uuid.uuid1())
obj_uuid = createObjId()
dt = self.createTypeFromItem(datatype)

datatypes[obj_uuid] = dt
Expand Down Expand Up @@ -1476,6 +1513,7 @@ def makeAttribute(self, obj, attr_name, shape, attr_type, value):
self.makeNullTermStringAttribute(obj, attr_name, strLength, value)
else:
typeItem = getTypeItem(dt)
dt = convert_dtype(dt)
value = self.toRef(rank, typeItem, value)

# create numpy array
Expand Down Expand Up @@ -1725,6 +1763,7 @@ def toNumPyValue(self, typeItem, src, des):
baseType = typeItem["base"]

dt = self.createTypeFromItem(baseType)
dt = convert_dtype(dt)
des = np.array(src, dtype=dt)

elif typeClass == "H5T_REFERENCE":
Expand Down Expand Up @@ -1901,7 +1940,7 @@ def listToRef(self, data):
# object reference should be in the form: <collection_name>/<uuid>
for prefix in ("datasets", "groups", "datatypes"):
if data.startswith(prefix):
uuid_ref = data[len(prefix) :]
uuid_ref = data[len(prefix):]
if len(uuid_ref) == (UUID_LEN + 1) and uuid_ref.startswith("/"):
obj = self.getObjectByUuid(prefix, uuid_ref[1:])
if obj:
Expand Down Expand Up @@ -2193,7 +2232,8 @@ def getDatasetValuesByUuid(self, obj_uuid, slices=Ellipsis, format="json"):
raise IOError(errno.EIO, msg)

if isinstance(slices, (list, tuple)) and len(slices) != rank:
msg = "Unexpected error: getDatasetValuesByUuid: number of dims in selection not same as rank"
msg = "Unexpected error: getDatasetValuesByUuid: "
msg += "number of dims in selection not same as rank"
self.log.error(msg)
raise IOError(errno.EIO, msg)

Expand Down Expand Up @@ -2715,7 +2755,7 @@ def createDataset(
raise IOError(errno.EPERM, msg)
datasets = self.dbGrp["{datasets}"]
if not obj_uuid:
obj_uuid = str(uuid.uuid1())
obj_uuid = createObjId()
dt = None
item = {}
fillvalue = None
Expand Down Expand Up @@ -3490,7 +3530,7 @@ def createGroup(self, obj_uuid=None):
raise IOError(errno.EPERM, msg)
groups = self.dbGrp["{groups}"]
if not obj_uuid:
obj_uuid = str(uuid.uuid1())
obj_uuid = createObjId()
newGroup = groups.create_group(obj_uuid)
# store reverse map as an attribute
addr = h5py.h5o.get_info(newGroup.id).addr
Expand Down
Loading
Loading