mlcommons
diff --git a/‎datasets/1.1/huggingface-data_provenance_initiative/metadata.json‎
Lines changed: 121 additions & 126 deletions b/‎datasets/1.1/huggingface-data_provenance_initiative/metadata.json‎
Lines changed: 121 additions & 126 deletions
diff --git a/‎datasets/1.1/huggingface-squad_v2/metadata.json‎
Lines changed: 10 additions & 13 deletions b/‎datasets/1.1/huggingface-squad_v2/metadata.json‎
Lines changed: 10 additions & 13 deletions
@@ -2,6 +2,7 @@
   "@context": {
     "@language": "en",
     "@vocab": "https://schema.org/",
+    "annotation": "cr:annotation",
     "arrayShape": "cr:arrayShape",
     "citeAs": "cr:citeAs",
     "column": "cr:column",
@@ -81,11 +82,9 @@
           "dataType": "sc:Text"
         }
       ],
-      "data": [
-        {
-          "default_splits/split_name": "train"
-        }
-      ]
+      "data": {
+        "default_splits/split_name": "train"
+      }
     },
     {
       "@type": "cr:RecordSet",
@@ -166,136 +165,132 @@
           }
         }
       ],
-      "annotation": [
-        {
-          "@type": "cr:Field",
-          "@id": "default/metadata",
-          "equivalentProperty": "prov:wasDerivedFrom",
-          "dataType": [
-            "prov:Entity"
-          ],
-          "subField": [
-            {
-              "@type": "cr:Field",
-              "@id": "default/metadata/dataset_id",
-              "equivalentProperty": "id",
-              "dataType": "sc:Text",
-              "source": {
-                "fileSet": {
-                  "@id": "parquet-files-for-config-default"
-                },
-                "extract": {
-                  "column": "metadata"
-                },
-                "transform": {
-                  "jsonPath": "dataset_id"
-                }
+      "annotation": {
+        "@type": "cr:Field",
+        "@id": "default/metadata",
+        "equivalentProperty": "prov:wasDerivedFrom",
+        "dataType": "prov:Entity",
+        "subField": [
+          {
+            "@type": "cr:Field",
+            "@id": "default/metadata/dataset_id",
+            "equivalentProperty": "id",
+            "dataType": "sc:Text",
+            "source": {
+              "fileSet": {
+                "@id": "parquet-files-for-config-default"
+              },
+              "extract": {
+                "column": "metadata"
+              },
+              "transform": {
+                "jsonPath": "dataset_id"
               }
-            },
-            {
-              "@type": "cr:Field",
-              "@id": "default/metadata/language",
-              "dataType": "sc:Text",
-              "source": {
-                "fileSet": {
-                  "@id": "parquet-files-for-config-default"
-                },
-                "extract": {
-                  "column": "metadata"
-                },
-                "transform": {
-                  "jsonPath": "language"
-                }
+            }
+          },
+          {
+            "@type": "cr:Field",
+            "@id": "default/metadata/language",
+            "dataType": "sc:Text",
+            "source": {
+              "fileSet": {
+                "@id": "parquet-files-for-config-default"
               },
-              "isArray": true,
-              "arrayShape": "-1"
-            },
-            {
-              "@type": "cr:Field",
-              "@id": "default/metadata/license",
-              "dataType": "sc:Text",
-              "source": {
-                "fileSet": {
-                  "@id": "parquet-files-for-config-default"
-                },
-                "extract": {
-                  "column": "metadata"
-                },
-                "transform": {
-                  "jsonPath": "license"
-                }
+              "extract": {
+                "column": "metadata"
               },
-              "isArray": true,
-              "arrayShape": "-1"
-            },
-            {
-              "@type": "cr:Field",
-              "@id": "default/metadata/license_url",
-              "dataType": "sc:Text",
-              "source": {
-                "fileSet": {
-                  "@id": "parquet-files-for-config-default"
-                },
-                "extract": {
-                  "column": "metadata"
-                },
-                "transform": {
-                  "jsonPath": "license_url"
-                }
+              "transform": {
+                "jsonPath": "language"
               }
             },
-            {
-              "@type": "cr:Field",
-              "@id": "default/metadata/provenance",
-              "dataType": "sc:Text",
-              "source": {
-                "fileSet": {
-                  "@id": "parquet-files-for-config-default"
-                },
-                "extract": {
-                  "column": "metadata"
-                },
-                "transform": {
-                  "jsonPath": "provenance"
-                }
+            "isArray": true,
+            "arrayShape": "-1"
+          },
+          {
+            "@type": "cr:Field",
+            "@id": "default/metadata/license",
+            "dataType": "sc:Text",
+            "source": {
+              "fileSet": {
+                "@id": "parquet-files-for-config-default"
+              },
+              "extract": {
+                "column": "metadata"
+              },
+              "transform": {
+                "jsonPath": "license"
               }
             },
-            {
-              "@type": "cr:Field",
-              "@id": "default/metadata/response",
-              "dataType": "sc:Text",
-              "source": {
-                "fileSet": {
-                  "@id": "parquet-files-for-config-default"
-                },
-                "extract": {
-                  "column": "metadata"
-                },
-                "transform": {
-                  "jsonPath": "response"
-                }
+            "isArray": true,
+            "arrayShape": "-1"
+          },
+          {
+            "@type": "cr:Field",
+            "@id": "default/metadata/license_url",
+            "dataType": "sc:Text",
+            "source": {
+              "fileSet": {
+                "@id": "parquet-files-for-config-default"
+              },
+              "extract": {
+                "column": "metadata"
+              },
+              "transform": {
+                "jsonPath": "license_url"
               }
-            },
-            {
-              "@type": "cr:Field",
-              "@id": "default/metadata/url",
-              "equivalentProperty": "prov:atLocation",
-              "dataType": "sc:Text",
-              "source": {
-                "fileSet": {
-                  "@id": "parquet-files-for-config-default"
-                },
-                "extract": {
-                  "column": "metadata"
-                },
-                "transform": {
-                  "jsonPath": "url"
-                }
+            }
+          },
+          {
+            "@type": "cr:Field",
+            "@id": "default/metadata/provenance",
+            "dataType": "sc:Text",
+            "source": {
+              "fileSet": {
+                "@id": "parquet-files-for-config-default"
+              },
+              "extract": {
+                "column": "metadata"
+              },
+              "transform": {
+                "jsonPath": "provenance"
               }
             }
-          ]
-        }
-      ]
+          },
+          {
+            "@type": "cr:Field",
+            "@id": "default/metadata/response",
+            "dataType": "sc:Text",
+            "source": {
+              "fileSet": {
+                "@id": "parquet-files-for-config-default"
+              },
+              "extract": {
+                "column": "metadata"
+              },
+              "transform": {
+                "jsonPath": "response"
+              }
+            }
+          },
+          {
+            "@type": "cr:Field",
+            "@id": "default/metadata/url",
+            "equivalentProperty": "prov:atLocation",
+            "dataType": "sc:Text",
+            "source": {
+              "fileSet": {
+                "@id": "parquet-files-for-config-default"
+              },
+              "extract": {
+                "column": "metadata"
+              },
+              "transform": {
+                "jsonPath": "url"
+              }
+            }
+          }
+        ]
+      }
     }
   ],
   "conformsTo": "http://mlcommons.org/croissant/1.1",
@@ -306,7 +301,7 @@
     "Data Provenance Initiative"
   ],
   "creator": {
-    "@type": "Organization",
+    "@type": "sc:Organization",
     "name": "Common Pile",
     "url": "https://huggingface.co/common-pile"
   },
 
@@ -248,36 +248,33 @@
   "license": "https://choosealicense.com/licenses/cc-by-sa-4.0/",
   "sameAs": "https://rajpurkar.github.io/SQuAD-explorer/",
   "url": "https://huggingface.co/datasets/rajpurkar/squad_v2",
-  "prov:wasDerivedFrom": [
+  "prov:wasDerivedFrom":
     {
       "@type": "prov:Entity",
       "@id": "squad1",
       "prov:locatedAt": "https://huggingface.co/datasets/rajpurkar/squad"
     }
-  ],
-  "prov:wasGeneratedBy": [
+  ,
+  "prov:wasGeneratedBy":
     {
       "@type": "prov:Activity",
       "@id": "additionActivity",
-      "type": [
-        "prov:Collection"
-      ],
+      "type": "prov:Collection",
       "description": "Added 50K plausible-sounding but have no correct answer…",
       "usage": {
-        "@id": "squad1"
+        "@id": "squad1",
+        "@type": "prov:Entity",
+        "prov:locatedAt": "https://huggingface.co/datasets/rajpurkar/squad"
       },
-      "prov:isAssociatedWith": [
-        "crowdworkersAgent"
-      ]
+      "prov:isAssociatedWith": "crowdworkersAgent"
     }
-  ],
-  "prov:isAssociatedWith": [
+  ,
+  "prov:isAssociatedWith":
     {
       "@type": "prov:Agent",
       "@id": "crowdworkersAgent",
       "prov:label": "Crowdworkers",
       "description": "Crowdworkers were hired through Daemo crowdsourcing platform",
       "prov:locatedAt": "https://github.com/crowdresearch/daemo"
     }
-  ]
 }