|
26 | 26 | from mlcroissant._src.core import constants |
27 | 27 | from mlcroissant._src.core.types import Json |
28 | 28 | from mlcroissant._src.structure_graph.base_node import Node |
| 29 | +from mlcroissant._src.structure_graph.nodes.field import Field |
29 | 30 | from mlcroissant._src.structure_graph.nodes.file_object import FileObject |
30 | 31 | from mlcroissant._src.structure_graph.nodes.file_set import FileSet |
31 | 32 | from mlcroissant._src.structure_graph.nodes.record_set import RecordSet |
@@ -63,13 +64,35 @@ def from_nodes_to_graph(metadata) -> nx.MultiDiGraph: |
63 | 64 | for record_set in metadata.record_sets: |
64 | 65 | for field in record_set.fields: |
65 | 66 | _add_edge(graph, uuid_to_node, record_set.uuid, field) |
66 | | - for origin in [field.source, field.references]: |
67 | | - if origin: |
68 | | - _add_edge(graph, uuid_to_node, origin.uuid, record_set) |
| 67 | + if field.source: |
| 68 | + _add_edge(graph, uuid_to_node, field.source.uuid, record_set) |
| 69 | + if field.references: |
| 70 | + referenced_node = uuid_to_node.get(field.references.uuid) |
| 71 | + # If the referenced node is a "Field" |
| 72 | + if isinstance(referenced_node, Field): |
| 73 | + # Dependency on references: Referenced Field -> Referencing Field |
| 74 | + _add_edge(graph, uuid_to_node, field.references.uuid, field) |
| 75 | + # Backward compatible with earlier version of the Croissant spec |
| 76 | + else: |
| 77 | + # If it is referencing other types of nodes such as FileObject |
| 78 | + _add_edge(graph, uuid_to_node, field.references.uuid, record_set) |
69 | 79 | for sub_field in field.sub_fields: |
70 | | - for origin in [sub_field.source, sub_field.references]: |
71 | | - if origin: |
72 | | - _add_edge(graph, uuid_to_node, origin.uuid, record_set) |
| 80 | + if sub_field.source: |
| 81 | + _add_edge(graph, uuid_to_node, sub_field.source.uuid, record_set) |
| 82 | + if sub_field.references: |
| 83 | + referenced_node = uuid_to_node.get(field.references.uuid) |
| 84 | + # If the referenced node is a "Field" |
| 85 | + if isinstance(referenced_node, Field): |
| 86 | + # Dependency on references: Referenced Field -> Referencing Field |
| 87 | + _add_edge( |
| 88 | + graph, uuid_to_node, sub_field.references.uuid, sub_field |
| 89 | + ) |
| 90 | + else: |
| 91 | + # If it is referencing other types of nodes such as FileObject |
| 92 | + _add_edge( |
| 93 | + graph, uuid_to_node, sub_field.references.uuid, record_set |
| 94 | + ) |
| 95 | + |
73 | 96 | # `Metadata` are used as the entry node. |
74 | 97 | _add_node_as_entry_node(graph, metadata) |
75 | 98 | return graph |
|
0 commit comments