Skip to content

Commit 0ead276

Browse files
authored
Adds guards around version logic (#789)
* Adds guards around version logic HamiltonNode.version was returning None in certain cases. This broke graph.version. Culprits: config, and sometimes materializers. This change: 1. makes the node version for config == name of the node. 2. guards graph.version against computing functions with None as the version value. 3. after investigating more, materializers aren't the issue. We could include the materializer code in the originating function if needed though. So node.version should now hardly ever be None. But if it is for some strange reason, we should now handle it better. Adds tests for it. Also fixed a fixture that was failing for me for the shelve caching adapter. * Fixing hashing for 3.8 So different python versions hash differently. So rather than doing a lot of work to figure out the code and hash of each node... I am hardcoding the hash for versions that are different. It appears that 3.8 is the odd one out -- all the other python versions seem to hash to the same thing. * Reverting adding originating function to materializers We can figure something better out later. Right now what I had worked, but wasn't useful.
1 parent 16efa8f commit 0ead276

File tree

4 files changed

+49
-7
lines changed

4 files changed

+49
-7
lines changed

hamilton/function_modifiers/adapters.py

-1
Original file line numberDiff line numberDiff line change
@@ -564,7 +564,6 @@ def get_input_type_key(key: str) -> str:
564564
key: value for key, value in input_types.items() if key not in resolved_kwargs
565565
}
566566
input_types[node_to_save] = (node_.type, DependencyType.REQUIRED)
567-
568567
save_node = node.Node(
569568
name=artifact_name,
570569
callabl=save_data,

hamilton/graph_types.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -154,9 +154,13 @@ def version(self) -> typing.Optional[str]:
154154
The option `strip=True` means docstring and comments are ignored
155155
when hashing the function.
156156
"""
157-
if not self.originating_functions:
158-
return None
157+
if self.originating_functions is None or len(self.originating_functions) == 0:
158+
if self.is_external_input:
159+
# return the name of the config node. (we could add type but skipping for now)
160+
return self.name
161+
return None # this shouldn't happen often.
159162
try:
163+
# return hash of first function. It could be that others are Hamilton framework code.
160164
return hash_source_code(self.originating_functions[0], strip=True)
161165
except (
162166
OSError
@@ -203,5 +207,5 @@ def version(self) -> str:
203207
Node hashes are in a sorted list, then concatenated as a string before hashing.
204208
To find differences between dataflows, you need to inspect the node level.
205209
"""
206-
sorted_node_versions = sorted([n.version for n in self.nodes])
210+
sorted_node_versions = sorted([n.version for n in self.nodes if n.version is not None])
207211
return hashlib.sha256(str(sorted_node_versions).encode()).hexdigest()

tests/lifecycle/test_cache_adapter.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ def _callable_to_node(callable) -> node.Node:
1818

1919
@pytest.fixture()
2020
def hook(tmp_path: pathlib.Path):
21-
return CacheAdapter(cache_path=str(tmp_path.resolve()))
21+
return CacheAdapter(cache_path=str((tmp_path / "cache.db").resolve()))
2222

2323

2424
@pytest.fixture()

tests/test_graph_types.py

+41-2
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,11 @@
55

66
import pytest
77

8-
from hamilton import graph_types, node
8+
from hamilton import driver, graph_types, node
99
from hamilton.node import Node, NodeType
1010

1111
from tests import nodes as test_nodes
12+
from tests.resources.dynamic_parallelism import no_parallel
1213

1314

1415
@pytest.fixture()
@@ -113,14 +114,52 @@ def node_to_create(required_dep: int, optional_dep: int = 1) -> str:
113114
}
114115

115116

116-
def test_create_hamilton_node_missing_version():
117+
def test_create_hamilton_config_node_version():
118+
"""Config nodes now return the name as the version."""
117119
n = Node("foo", int, node_source=NodeType.EXTERNAL)
118120
hamilton_node = graph_types.HamiltonNode.from_node(n)
119121
# the above will have no specified versions
122+
assert hamilton_node.version == "foo"
123+
assert hamilton_node.as_dict()["version"] == "foo"
124+
125+
126+
def test_create_hamilton_node_missing_version():
127+
"""We contrive a case where originating_functions is None."""
128+
129+
def foo(i: int) -> int:
130+
return i
131+
132+
n = Node("foo", int, "", foo, node_source=NodeType.STANDARD)
133+
hamilton_node = graph_types.HamiltonNode.from_node(n)
134+
# the above will have no specified versions
120135
assert hamilton_node.version is None
121136
assert hamilton_node.as_dict()["version"] is None
122137

123138

139+
def test_hamilton_graph_version_normal():
140+
dr = driver.Builder().with_modules(no_parallel).build()
141+
graph = graph_types.HamiltonGraph.from_graph(dr.graph)
142+
# assumption is for python 3
143+
if sys.version_info.minor == 8:
144+
hash_value = "0a375f3366590453dea8927d4c02c15dc090f8be42e9129d9a1139284eac920c"
145+
else:
146+
hash_value = "3b3487599ccc4fc56995989c6d32b58a90c0b91b8c16b3f453a2793f47436831"
147+
assert graph.version == hash_value
148+
149+
150+
def test_hamilton_graph_version_with_none_originating_functions():
151+
dr = driver.Builder().with_modules(no_parallel).build()
152+
graph = graph_types.HamiltonGraph.from_graph(dr.graph)
153+
# if this gets flakey we should find a specific node to make None
154+
graph.nodes[-1].originating_functions = None
155+
# assumption is for python 3
156+
if sys.version_info.minor == 8:
157+
hash_value = "7d556424cd84b97a395d9b6219f502e8f818f17002ce88f47974266c0cce454a"
158+
else:
159+
hash_value = "781d89517c1744c40a7afcdc49ee8592fbb23955e28d87f1b584d08430a3e837"
160+
assert graph.version == hash_value
161+
162+
124163
def test_json_serializable_dict():
125164
for name, obj in inspect.getmembers(test_nodes):
126165
if inspect.isfunction(obj) and not name.startswith("_"):

0 commit comments

Comments
 (0)