Skip to content

Commit 262d54c

Browse files
authored
Fix cyclic dependencies in operation graph for field.references (#949)
Original [PR](#948). Renamed the branch from "issue#904" -> "issue-904" to prevent errors for `pip install`. Also fixed backward compatibility issues with graph constructions for Croissant metadata using version 0.8 and 1.0, where `field.references` can lead to `FileObject` or `FileSet` (similar to `field.source`).
1 parent 9a95129 commit 262d54c

File tree

1 file changed

+29
-6
lines changed
  • python/mlcroissant/mlcroissant/_src/structure_graph

1 file changed

+29
-6
lines changed

python/mlcroissant/mlcroissant/_src/structure_graph/graph.py

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
from mlcroissant._src.core import constants
2727
from mlcroissant._src.core.types import Json
2828
from mlcroissant._src.structure_graph.base_node import Node
29+
from mlcroissant._src.structure_graph.nodes.field import Field
2930
from mlcroissant._src.structure_graph.nodes.file_object import FileObject
3031
from mlcroissant._src.structure_graph.nodes.file_set import FileSet
3132
from mlcroissant._src.structure_graph.nodes.record_set import RecordSet
@@ -63,13 +64,35 @@ def from_nodes_to_graph(metadata) -> nx.MultiDiGraph:
6364
for record_set in metadata.record_sets:
6465
for field in record_set.fields:
6566
_add_edge(graph, uuid_to_node, record_set.uuid, field)
66-
for origin in [field.source, field.references]:
67-
if origin:
68-
_add_edge(graph, uuid_to_node, origin.uuid, record_set)
67+
if field.source:
68+
_add_edge(graph, uuid_to_node, field.source.uuid, record_set)
69+
if field.references:
70+
referenced_node = uuid_to_node.get(field.references.uuid)
71+
# If the referenced node is a "Field"
72+
if isinstance(referenced_node, Field):
73+
# Dependency on references: Referenced Field -> Referencing Field
74+
_add_edge(graph, uuid_to_node, field.references.uuid, field)
75+
# Backward compatible with earlier version of the Croissant spec
76+
else:
77+
# If it is referencing other types of nodes such as FileObject
78+
_add_edge(graph, uuid_to_node, field.references.uuid, record_set)
6979
for sub_field in field.sub_fields:
70-
for origin in [sub_field.source, sub_field.references]:
71-
if origin:
72-
_add_edge(graph, uuid_to_node, origin.uuid, record_set)
80+
if sub_field.source:
81+
_add_edge(graph, uuid_to_node, sub_field.source.uuid, record_set)
82+
if sub_field.references:
83+
referenced_node = uuid_to_node.get(field.references.uuid)
84+
# If the referenced node is a "Field"
85+
if isinstance(referenced_node, Field):
86+
# Dependency on references: Referenced Field -> Referencing Field
87+
_add_edge(
88+
graph, uuid_to_node, sub_field.references.uuid, sub_field
89+
)
90+
else:
91+
# If it is referencing other types of nodes such as FileObject
92+
_add_edge(
93+
graph, uuid_to_node, sub_field.references.uuid, record_set
94+
)
95+
7396
# `Metadata` are used as the entry node.
7497
_add_node_as_entry_node(graph, metadata)
7598
return graph

0 commit comments

Comments
 (0)