|
| 1 | +"""Typed-artifact contract for Metaflow. |
| 2 | +
|
| 3 | +This module defines the minimal :class:`IOType` abstract base class. OSS |
| 4 | +Metaflow ships the contract; concrete types (scalars, tensors, enums, |
| 5 | +dataclass-backed structs, etc.) live in extensions — they embody |
| 6 | +deployment-specific opinions about encoding, byte order, and dataclass |
| 7 | +inference that do not belong in core. |
| 8 | +
|
| 9 | +:class:`IOType` mirrors the ``format`` argument introduced on |
| 10 | +:class:`metaflow.datastore.artifacts.serializer.ArtifactSerializer` so a |
| 11 | +single subclass can own both representations: |
| 12 | +
|
| 13 | +- ``STORAGE`` — blob-based, persisted through the datastore. |
| 14 | +- ``WIRE`` — string-based, for CLI args, protobuf payloads, and |
| 15 | + cross-process IPC. |
| 16 | +
|
| 17 | +Subclasses implement four hooks (``_wire_serialize``, ``_wire_deserialize``, |
| 18 | +``_storage_serialize``, ``_storage_deserialize``); callers use the public |
| 19 | +``serialize(format=...)`` / ``deserialize(data, format=...)`` methods. |
| 20 | +""" |
| 21 | + |
| 22 | +from abc import ABCMeta, abstractmethod |
| 23 | + |
| 24 | +from metaflow.datastore.artifacts.serializer import STORAGE, WIRE |
| 25 | + |
| 26 | + |
| 27 | +_UNSET = object() |
| 28 | + |
| 29 | + |
| 30 | +class IOType(object, metaclass=ABCMeta): |
| 31 | + """ |
| 32 | + Base class for typed Metaflow artifacts. |
| 33 | +
|
| 34 | + An :class:`IOType` instance plays two roles: |
| 35 | +
|
| 36 | + - **Descriptor** (no value): ``Int64`` in a spec describes an int64 |
| 37 | + field. |
| 38 | + - **Wrapper** (with value): ``Int64(42)`` wraps a value for typed |
| 39 | + serialization. |
| 40 | +
|
| 41 | + Subclasses implement four internal operations, dispatched by the |
| 42 | + ``format`` argument of the public :meth:`serialize` / :meth:`deserialize` |
| 43 | + methods. |
| 44 | + """ |
| 45 | + |
| 46 | + type_name = None # e.g. "text", "json", "int64" — set by subclasses. |
| 47 | + |
| 48 | + def __init__(self, value=_UNSET): |
| 49 | + self._value = value |
| 50 | + |
| 51 | + @property |
| 52 | + def value(self): |
| 53 | + """The wrapped Python value, or ``_UNSET`` if this is a pure descriptor.""" |
| 54 | + return self._value |
| 55 | + |
| 56 | + # -- Public API -------------------------------------------------------- |
| 57 | + |
| 58 | + def serialize(self, format=STORAGE): |
| 59 | + """ |
| 60 | + Serialize the wrapped value. Must be side-effect-free. |
| 61 | +
|
| 62 | + Parameters |
| 63 | + ---------- |
| 64 | + format : str |
| 65 | + ``STORAGE`` (default) returns ``(List[SerializedBlob], dict)``. |
| 66 | + ``WIRE`` returns a ``str``. |
| 67 | + """ |
| 68 | + if format == WIRE: |
| 69 | + return self._wire_serialize() |
| 70 | + if format == STORAGE: |
| 71 | + return self._storage_serialize() |
| 72 | + raise ValueError("format must be %r or %r, got %r" % (STORAGE, WIRE, format)) |
| 73 | + |
| 74 | + @classmethod |
| 75 | + def deserialize(cls, data, format=STORAGE, **kwargs): |
| 76 | + """ |
| 77 | + Reconstruct an :class:`IOType` from serialized data. |
| 78 | +
|
| 79 | + Parameters |
| 80 | + ---------- |
| 81 | + data : Union[str, List[bytes]] |
| 82 | + ``str`` when ``format=WIRE``; ``List[bytes]`` when ``format=STORAGE``. |
| 83 | + format : str |
| 84 | + ``STORAGE`` (default) or ``WIRE``. |
| 85 | + **kwargs |
| 86 | + Forwarded to the underlying ``_storage_deserialize`` hook |
| 87 | + (e.g. metadata the datastore produced at save time). |
| 88 | + """ |
| 89 | + if format == WIRE: |
| 90 | + return cls._wire_deserialize(data) |
| 91 | + if format == STORAGE: |
| 92 | + return cls._storage_deserialize(data, **kwargs) |
| 93 | + raise ValueError("format must be %r or %r, got %r" % (STORAGE, WIRE, format)) |
| 94 | + |
| 95 | + # -- Subclass hooks ---------------------------------------------------- |
| 96 | + |
| 97 | + @abstractmethod |
| 98 | + def _wire_serialize(self): |
| 99 | + """Value -> string (for CLI args, protobuf, external APIs).""" |
| 100 | + raise NotImplementedError |
| 101 | + |
| 102 | + @classmethod |
| 103 | + @abstractmethod |
| 104 | + def _wire_deserialize(cls, s): |
| 105 | + """String -> :class:`IOType` instance.""" |
| 106 | + raise NotImplementedError |
| 107 | + |
| 108 | + @abstractmethod |
| 109 | + def _storage_serialize(self): |
| 110 | + """Value -> ``(List[SerializedBlob], metadata_dict)``. Side-effect-free.""" |
| 111 | + raise NotImplementedError |
| 112 | + |
| 113 | + @classmethod |
| 114 | + @abstractmethod |
| 115 | + def _storage_deserialize(cls, blobs, **kwargs): |
| 116 | + """``(List[bytes], metadata)`` -> :class:`IOType` instance.""" |
| 117 | + raise NotImplementedError |
| 118 | + |
| 119 | + # -- Spec generation --------------------------------------------------- |
| 120 | + |
| 121 | + def to_spec(self): |
| 122 | + """JSON type spec. Works with or without a wrapped value.""" |
| 123 | + return {"type": self.type_name} |
| 124 | + |
| 125 | + # -- Dunder ------------------------------------------------------------ |
| 126 | + |
| 127 | + def __repr__(self): |
| 128 | + if self._value is _UNSET: |
| 129 | + return "%s()" % self.__class__.__name__ |
| 130 | + return "%s(%r)" % (self.__class__.__name__, self._value) |
| 131 | + |
| 132 | + def __eq__(self, other): |
| 133 | + if type(self) is not type(other): |
| 134 | + return NotImplemented |
| 135 | + return self._value == other._value |
| 136 | + |
| 137 | + def __hash__(self): |
| 138 | + return hash((type(self), self._value)) |
0 commit comments