Skip to content

Commit ca10fde

Browse files
authored
[editor] Added support for displaying and editing RecordSet data types and Field equivalentProperty (#819)
Currently those features are not observable or editable in the editor, but they are important to give clearer meaning to the records and fields. - For now the RecordSet dataTypes are shown in 1 text box with each data type separated by a comma. It could be changed to a button and multiple fields, but that would be a lot more code without necessarily be more readable, let us know how do you feel. Ideally there should be some kind of autocomplete combobox that takes multiples values, with a predefined list of types to select from, and still the possibility to add any types the user wants. But we could not find such component for now, so we went for the simpler solution. - The `equivalentProperty` has been added right after the Field dataType in the table on the left side in the Record Sets tab, as well as in the field details on the right side. We can remove it from the left side table if you feel it is too much and should only be shown in the details Doing so made us realize some confusing things with the editor example, e.g. with the titanic dataset the RecordSets `genders` and `embarkation_ports` have the type https://schema.org/Enumeration but the checkbox "The RecordSet is an enumeration" is not checked. Wouldn't it make sense to be automatically checked in this case? We did this during a short hackathon at the https://www.swat4ls.org/ conference (semantic web for life sciences). It is a beginning of answer to this issue: #739 raised by @benjelloun A lot of people in our community (semantic web for life science research) are interested by the features requested in this issue We followed the official specs definition for `RecordSet` and `Field`: https://docs.mlcommons.org/croissant/docs/croissant-spec.html#field We tested the new features using the docker deployment locally and they work as expected. RecordSet Data types and Field equivalentProperty are properly added to the exported JSON-LD file It was done with @vemonet Also note that in the code for `Field` you use camelcase `equivalentProperty` instead of the usual snake case that is used elsewhere, might want to fix this for overall consistency https://github.com/mlcommons/croissant/blob/main/python/mlcroissant/mlcroissant/_src/structure_graph/nodes/field.py#L71
1 parent 87866ec commit ca10fde

File tree

5 files changed

+46
-1
lines changed

5 files changed

+46
-1
lines changed

editor/core/state.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ class Field(Node):
167167

168168
description: str | None = None
169169
data_types: str | list[str] | None = None
170+
equivalentProperty: str | list[str] | None = None
170171
source: mlc.Source | None = None
171172
references: mlc.Source | None = None
172173

editor/events/fields.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ class FieldEvent(enum.Enum):
6161
ID = "ID"
6262
DESCRIPTION = "DESCRIPTION"
6363
DATA_TYPE = "DATA_TYPE"
64+
EQUIVALENT_PROPERTY = "EQUIVALENT_PROPERTY"
6465
SOURCE = "SOURCE"
6566
SOURCE_EXTRACT = "SOURCE_EXTRACT"
6667
SOURCE_EXTRACT_COLUMN = "SOURCE_EXTRACT_COLUMN"
@@ -95,6 +96,8 @@ def handle_field_change(
9596
metadata.rename_id(old_id=old_id, new_id=new_id)
9697
elif change == FieldEvent.DESCRIPTION:
9798
field.description = value
99+
elif change == FieldEvent.EQUIVALENT_PROPERTY:
100+
field.equivalentProperty = value
98101
elif change == FieldEvent.DATA_TYPE:
99102
field.data_types = [str_to_mlc_data_type(value)]
100103
elif change == FieldEvent.SOURCE:

editor/events/record_sets.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ class RecordSetEvent(enum.Enum):
1313
NAME = "NAME"
1414
ID = "ID"
1515
DESCRIPTION = "DESCRIPTION"
16+
DATA_TYPES = "DATA_TYPES"
1617
IS_ENUMERATION = "IS_ENUMERATION"
1718
HAS_DATA = "HAS_DATA"
1819
CHANGE_DATA = "CHANGE_DATA"
@@ -35,6 +36,8 @@ def handle_record_set_change(event: RecordSetEvent, record_set: RecordSet, key:
3536
metadata.rename_id(old_id=old_id, new_id=new_id)
3637
elif event == RecordSetEvent.DESCRIPTION:
3738
record_set.description = value
39+
elif event == RecordSetEvent.DATA_TYPES:
40+
record_set.data_types = [value.strip() for value in value.split(",")]
3841
elif event == RecordSetEvent.IS_ENUMERATION:
3942
record_set.is_enumeration = value
4043
elif event == RecordSetEvent.HAS_DATA:

editor/views/jsonld.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ def render_jsonld():
3232
name=field["name"],
3333
description=field["description"],
3434
data_types=field["data_type"],
35+
equivalentProperty=field["equivalentProperty"],
3536
source=mlc.Source(
3637
distribution=file.name,
3738
extract=mlc.Extract(column=field["name"]),

editor/views/record_sets.py

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,13 +193,16 @@ def _handle_fields_change(record_set_key: int, record_set: RecordSet):
193193
field.description = new_value
194194
elif new_field == FieldDataFrame.DATA_TYPE:
195195
field.data_types = [str_to_mlc_data_type(new_value)]
196+
elif new_field == FieldDataFrame.EQUIVALENT_PROPERTY:
197+
field.equivalentProperty = new_value
196198
for added_row in result["added_rows"]:
197199
data_type = str_to_mlc_data_type(added_row.get(FieldDataFrame.DATA_TYPE))
198200
field = Field(
199201
id=added_row.get(FieldDataFrame.NAME),
200202
name=added_row.get(FieldDataFrame.NAME),
201203
description=added_row.get(FieldDataFrame.DESCRIPTION),
202204
data_types=[data_type],
205+
equivalentProperty=added_row.get(FieldDataFrame.EQUIVALENT_PROPERTY),
203206
source=mlc.Source(),
204207
references=mlc.Source(),
205208
)
@@ -217,6 +220,7 @@ class FieldDataFrame:
217220
NAME = "Field name"
218221
DESCRIPTION = "Field description"
219222
DATA_TYPE = "Data type"
223+
EQUIVALENT_PROPERTY = "Equivalent property"
220224
SOURCE_UID = "Source"
221225
SOURCE_EXTRACT = "Source extract"
222226
SOURCE_TRANSFORM = "Source transform"
@@ -273,6 +277,21 @@ def _render_left_panel():
273277
on_change=handle_record_set_change,
274278
args=(RecordSetEvent.DESCRIPTION, record_set, key),
275279
)
280+
key = f"{prefix}-datatypes"
281+
st.text_input(
282+
"Data types",
283+
placeholder="Provide comma-separated data types for the RecordSet.",
284+
help=(
285+
"Records in this set are instances of the corresponding data types"
286+
" (URL, comma-separated)."
287+
),
288+
key=key,
289+
value=(
290+
", ".join(record_set.data_types) if record_set.data_types else None
291+
),
292+
on_change=handle_record_set_change,
293+
args=(RecordSetEvent.DATA_TYPES, record_set, key),
294+
)
276295
key = f"{prefix}-is-enumeration"
277296
st.checkbox(
278297
"The RecordSet is an enumeration",
@@ -350,6 +369,9 @@ def _render_left_panel():
350369
FieldDataFrame.NAME: names,
351370
FieldDataFrame.DESCRIPTION: descriptions,
352371
FieldDataFrame.DATA_TYPE: data_types,
372+
FieldDataFrame.EQUIVALENT_PROPERTY: [
373+
field.equivalentProperty for field in record_set.fields
374+
],
353375
},
354376
dtype=np.str_,
355377
)
@@ -386,6 +408,11 @@ def _render_left_panel():
386408
options=STR_DATA_TYPES,
387409
required=True,
388410
),
411+
FieldDataFrame.EQUIVALENT_PROPERTY: st.column_config.TextColumn(
412+
FieldDataFrame.EQUIVALENT_PROPERTY,
413+
help="Equivalent property IRI for the field",
414+
required=False,
415+
),
389416
},
390417
on_change=_handle_fields_change,
391418
args=(record_set_key, record_set),
@@ -469,7 +496,7 @@ def _render_right_panel():
469496
else:
470497
for field_key, field in enumerate(record_set.fields):
471498
prefix = f"{record_set_key}-{field.name}-{field_key}"
472-
col1, col2, col3 = st.columns([1, 1, 1])
499+
col1, col2, col3, col4 = st.columns([1, 1, 1, 1])
473500

474501
key = f"{prefix}-name"
475502
if field.ctx.is_v0():
@@ -521,6 +548,16 @@ def _render_right_panel():
521548
on_change=handle_field_change,
522549
args=(FieldEvent.DATA_TYPE, field, key),
523550
)
551+
key = f"{prefix}-properties"
552+
col4.text_input(
553+
"Equivalent Property (URL)",
554+
placeholder="Provide an equivalent property URL for the field.",
555+
help="Equivalent property URL describing the field.",
556+
key=key,
557+
on_change=handle_field_change,
558+
value=field.equivalentProperty,
559+
args=(FieldEvent.EQUIVALENT_PROPERTY, field, key),
560+
)
524561
possible_sources = _get_possible_sources(metadata)
525562
render_source(record_set, field, possible_sources)
526563
render_references(record_set, field, possible_sources)

0 commit comments

Comments
 (0)