Skip to content

Commit 69fa27c

Browse files
authored
[125, 126] Adding the no-op and in-memory stores (#127)
* adding documentation * adding test to CI/CD * fix in ci/cd * update mypy, disable python 3.6 as it seems to cause issues * typing
1 parent a376a56 commit 69fa27c

File tree

7 files changed

+117
-17
lines changed

7 files changed

+117
-17
lines changed

.github/workflows/python-package.yml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ jobs:
1515
runs-on: ubuntu-latest
1616
strategy:
1717
matrix:
18-
python-version: [3.6, 3.7, 3.8, 3.9]
18+
python-version: [3.7, 3.8, 3.9]
1919

2020
steps:
2121
- uses: actions/checkout@v2
@@ -42,9 +42,12 @@ jobs:
4242
run: |
4343
mypy dds
4444
mypy dds_tests/test_typed.py
45-
- name: Test with pytest
45+
- name: Test
4646
run: |
4747
pytest -o log_cli=true -o log_cli_level=debug
48+
- name: Test (in-memory store)
49+
run: |
50+
DDS_STORE_TYPE=memory pytest -o log_cli=true -o log_cli_level=debug
4851
- name: Run the documentation notebooks
4952
run: |
5053
jupyter kernelspec list

Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
.PHONY: build pytest dbc
22

33
pytest:
4+
DDS_STORE_TYPE=memory pytest -o log_cli=true -o log_cli_level=debug dds_tests
45
pytest -o log_cli=true -o log_cli_level=debug dds_tests
56

67
build:

dds/__init__.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -242,10 +242,12 @@ def set_store(
242242
243243
Arguments:
244244
245-
store: a type of store. Two values are supported currently:
245+
store: a type of store. Four values are supported by default:
246246
247-
- `local`: local file system
247+
- `local`: local file system (default)
248248
- `dbfs`: the Databricks file system (only valid for the Databricks environment)
249+
- `memory`: in-memory storage (useful for debugging and testing)
250+
- `noop`: no-op storage (nothing gets stored, useful for debugging)
249251
250252
internal_dir: a path in a filesystem for the internal storage. The internal storage contains evaluated blobs
251253
and corresponding metadata. Accepted values are:

dds/_api.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
from .introspect import introspect, _accepted_packages
1717
from ._lru_store import LRUCacheStore, default_cache_size
1818

19-
from .store import LocalFileStore, Store
19+
from .store import LocalFileStore, Store, NoOpStore, MemoryStore
2020
from .structures import (
2121
DDSPath,
2222
DDSException,
@@ -122,6 +122,10 @@ def set_store(
122122
_store_var = DBFSStore(
123123
DBFSURI.parse(internal_dir), DBFSURI.parse(data_dir), dbutils, commit_type_
124124
)
125+
elif store == "noop":
126+
_store_var = NoOpStore()
127+
elif store == "memory":
128+
_store_var = MemoryStore() # type: ignore
125129
else:
126130
raise DDSException(f"Unknown store {store}")
127131

dds/store.py

Lines changed: 89 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import os
44
import time
55
from pathlib import PurePath
6-
from typing import Any, Optional, List, Union
6+
from typing import Any, Optional, List, Union, Dict
77
from collections import OrderedDict
88

99
from .codec import codec_registry, CodecRegistry
@@ -55,16 +55,101 @@ def codec_registry(self) -> CodecRegistry:
5555
"""
5656
The registry of codecs associated to this instance of a store.
5757
58-
It is not necessaily unique
58+
It is not necessarily unique
5959
It may not be called for mutable operations during an evaluation. In that case, the behavior is not defined.
6060
"""
6161
pass
6262

6363
# TODO: reset paths to start the store from scratch without losing data
6464

6565

66-
# TODO: add a notion of FileSystemType (Local, DBFS, S3)
67-
# We need to have a matrix between FS types and object types
66+
class NoOpStore(Store):
67+
"""
68+
The store that never stores an object.
69+
70+
This store is in practice of very limited value because it cannot store paths to an object either.
71+
As a result, dds.load() will not work correctly with this store.
72+
73+
It is recommended to use this store only to debug specific issues for which DDS would be disabled
74+
altogether.
75+
"""
76+
77+
def has_blob(self, key: PyHash) -> bool:
78+
return False
79+
80+
def fetch_blob(self, key: PyHash) -> Optional[Any]:
81+
raise DDSException(f"Blob {key} not store (NoOpStore)")
82+
83+
def store_blob(self, key: PyHash, blob: Any, codec: Optional[ProtocolRef]) -> None:
84+
return
85+
86+
def sync_paths(self, paths: "OrderedDict[DDSPath, PyHash]") -> None:
87+
"""
88+
Commits all the paths.
89+
"""
90+
return
91+
92+
def fetch_paths(self, paths: List[DDSPath]) -> "OrderedDict[DDSPath, PyHash]":
93+
"""
94+
Fetches a set of paths from the store. It is expected that all the paths are returned.
95+
"""
96+
raise DDSException(f"Cannot fetch paths (the NoOpStore does not store paths)")
97+
98+
def codec_registry(self) -> CodecRegistry:
99+
return codec_registry()
100+
101+
102+
class MemoryStore(Store):
103+
"""
104+
The store that stores all objects in memory, without saving them permanently in storage.
105+
It is an good example of how to implement a store that is fully functional.
106+
107+
This store is useful when the following conditions are met:
108+
- there is limited value in storing objects beyond the lifetime of the process
109+
- some complex objects are not serializable
110+
- the objects are not too large in memory
111+
112+
This store is not useful for most users, but is useful in debugging or testing context.
113+
"""
114+
115+
def __init__(self):
116+
self._cache: Dict[PyHash, Any] = {}
117+
self._paths: Dict[DDSPath, PyHash] = {}
118+
119+
def has_blob(self, key: PyHash) -> bool:
120+
return key in self._cache
121+
122+
def fetch_blob(self, key: PyHash) -> Optional[Any]:
123+
return self._cache.get(key)
124+
125+
def store_blob(self, key: PyHash, blob: Any, codec: Optional[ProtocolRef]) -> None:
126+
if key in self._cache:
127+
_logger.warning(f"Overwriting key {key}")
128+
self._cache[key] = blob
129+
130+
def sync_paths(self, paths: "OrderedDict[DDSPath, PyHash]") -> None:
131+
"""
132+
Commits all the paths.
133+
"""
134+
for (p, k) in paths.items():
135+
if p in self._paths:
136+
_logger.debug(f"Overwriting path: {p} -> {k}")
137+
else:
138+
_logger.debug(f"Registering path: {p} -> {k}")
139+
self._paths[p] = k
140+
141+
def fetch_paths(self, paths: List[DDSPath]) -> "OrderedDict[DDSPath, PyHash]":
142+
"""
143+
Fetches a set of paths from the store. It is expected that all the paths are returned.
144+
"""
145+
missing_paths = [p for p in paths if p not in self._paths]
146+
if missing_paths:
147+
raise DDSException(f"Missing paths in store: {missing_paths}")
148+
return OrderedDict([(p, self._paths[p]) for p in paths])
149+
150+
def codec_registry(self) -> CodecRegistry:
151+
# All the default content
152+
return codec_registry()
68153

69154

70155
class LocalFileStore(Store):

dds_tests/utils.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,9 @@
55
import shutil
66
from pathlib import Path
77
import pkgutil
8+
from typing import Optional
89
from io import UnsupportedOperation
9-
from typing import Generator
10+
import os
1011

1112

1213
_logger = logging.getLogger(__name__)
@@ -37,12 +38,16 @@ def cleandir():
3738
tdir = Path(tempfile.mkdtemp(prefix="dds"))
3839
internal_dir = tdir.joinpath("internal_dir")
3940
data_dir = tdir.joinpath("data_dir")
40-
dds.set_store(
41-
"local",
42-
internal_dir=str(internal_dir),
43-
data_dir=str(data_dir),
44-
cache_objects=100,
45-
)
41+
env: Optional[str] = os.environ.get("DDS_STORE_TYPE")
42+
if env is not None:
43+
dds.set_store(env)
44+
else:
45+
dds.set_store(
46+
"local",
47+
internal_dir=str(internal_dir),
48+
data_dir=str(data_dir),
49+
cache_objects=100,
50+
)
4651
_logger.debug(f"data dir: {tdir}")
4752
yield
4853
shutil.rmtree(str(tdir), ignore_errors=True)

requirements-dev.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ pandas>=0.23.2,<1.2.0
1212
pyarrow>=0.10
1313

1414
# Linter
15-
mypy
15+
mypy==0.901
1616
flake8
1717
gitpython
1818

0 commit comments

Comments
 (0)