|
2 | 2 | "@context": { |
3 | 3 | "@language": "en", |
4 | 4 | "@vocab": "https://schema.org/", |
| 5 | + "annotation": "cr:annotation", |
5 | 6 | "arrayShape": "cr:arrayShape", |
6 | 7 | "citeAs": "cr:citeAs", |
7 | 8 | "column": "cr:column", |
|
81 | 82 | "dataType": "sc:Text" |
82 | 83 | } |
83 | 84 | ], |
84 | | - "data": [ |
85 | | - { |
86 | | - "default_splits/split_name": "train" |
87 | | - } |
88 | | - ] |
| 85 | + "data": { |
| 86 | + "default_splits/split_name": "train" |
| 87 | + } |
89 | 88 | }, |
90 | 89 | { |
91 | 90 | "@type": "cr:RecordSet", |
|
166 | 165 | } |
167 | 166 | } |
168 | 167 | ], |
169 | | - "annotation": [ |
170 | | - { |
171 | | - "@type": "cr:Field", |
172 | | - "@id": "default/metadata", |
173 | | - "equivalentProperty": "prov:wasDerivedFrom", |
174 | | - "dataType": [ |
175 | | - "prov:Entity" |
176 | | - ], |
177 | | - "subField": [ |
178 | | - { |
179 | | - "@type": "cr:Field", |
180 | | - "@id": "default/metadata/dataset_id", |
181 | | - "equivalentProperty": "id", |
182 | | - "dataType": "sc:Text", |
183 | | - "source": { |
184 | | - "fileSet": { |
185 | | - "@id": "parquet-files-for-config-default" |
186 | | - }, |
187 | | - "extract": { |
188 | | - "column": "metadata" |
189 | | - }, |
190 | | - "transform": { |
191 | | - "jsonPath": "dataset_id" |
192 | | - } |
| 168 | + "annotation": { |
| 169 | + "@type": "cr:Field", |
| 170 | + "@id": "default/metadata", |
| 171 | + "equivalentProperty": "prov:wasDerivedFrom", |
| 172 | + "dataType": "prov:Entity", |
| 173 | + "subField": [ |
| 174 | + { |
| 175 | + "@type": "cr:Field", |
| 176 | + "@id": "default/metadata/dataset_id", |
| 177 | + "equivalentProperty": "id", |
| 178 | + "dataType": "sc:Text", |
| 179 | + "source": { |
| 180 | + "fileSet": { |
| 181 | + "@id": "parquet-files-for-config-default" |
| 182 | + }, |
| 183 | + "extract": { |
| 184 | + "column": "metadata" |
| 185 | + }, |
| 186 | + "transform": { |
| 187 | + "jsonPath": "dataset_id" |
193 | 188 | } |
194 | | - }, |
195 | | - { |
196 | | - "@type": "cr:Field", |
197 | | - "@id": "default/metadata/language", |
198 | | - "dataType": "sc:Text", |
199 | | - "source": { |
200 | | - "fileSet": { |
201 | | - "@id": "parquet-files-for-config-default" |
202 | | - }, |
203 | | - "extract": { |
204 | | - "column": "metadata" |
205 | | - }, |
206 | | - "transform": { |
207 | | - "jsonPath": "language" |
208 | | - } |
| 189 | + } |
| 190 | + }, |
| 191 | + { |
| 192 | + "@type": "cr:Field", |
| 193 | + "@id": "default/metadata/language", |
| 194 | + "dataType": "sc:Text", |
| 195 | + "source": { |
| 196 | + "fileSet": { |
| 197 | + "@id": "parquet-files-for-config-default" |
209 | 198 | }, |
210 | | - "isArray": true, |
211 | | - "arrayShape": "-1" |
212 | | - }, |
213 | | - { |
214 | | - "@type": "cr:Field", |
215 | | - "@id": "default/metadata/license", |
216 | | - "dataType": "sc:Text", |
217 | | - "source": { |
218 | | - "fileSet": { |
219 | | - "@id": "parquet-files-for-config-default" |
220 | | - }, |
221 | | - "extract": { |
222 | | - "column": "metadata" |
223 | | - }, |
224 | | - "transform": { |
225 | | - "jsonPath": "license" |
226 | | - } |
| 199 | + "extract": { |
| 200 | + "column": "metadata" |
227 | 201 | }, |
228 | | - "isArray": true, |
229 | | - "arrayShape": "-1" |
230 | | - }, |
231 | | - { |
232 | | - "@type": "cr:Field", |
233 | | - "@id": "default/metadata/license_url", |
234 | | - "dataType": "sc:Text", |
235 | | - "source": { |
236 | | - "fileSet": { |
237 | | - "@id": "parquet-files-for-config-default" |
238 | | - }, |
239 | | - "extract": { |
240 | | - "column": "metadata" |
241 | | - }, |
242 | | - "transform": { |
243 | | - "jsonPath": "license_url" |
244 | | - } |
| 202 | + "transform": { |
| 203 | + "jsonPath": "language" |
245 | 204 | } |
246 | 205 | }, |
247 | | - { |
248 | | - "@type": "cr:Field", |
249 | | - "@id": "default/metadata/provenance", |
250 | | - "dataType": "sc:Text", |
251 | | - "source": { |
252 | | - "fileSet": { |
253 | | - "@id": "parquet-files-for-config-default" |
254 | | - }, |
255 | | - "extract": { |
256 | | - "column": "metadata" |
257 | | - }, |
258 | | - "transform": { |
259 | | - "jsonPath": "provenance" |
260 | | - } |
| 206 | + "isArray": true, |
| 207 | + "arrayShape": "-1" |
| 208 | + }, |
| 209 | + { |
| 210 | + "@type": "cr:Field", |
| 211 | + "@id": "default/metadata/license", |
| 212 | + "dataType": "sc:Text", |
| 213 | + "source": { |
| 214 | + "fileSet": { |
| 215 | + "@id": "parquet-files-for-config-default" |
| 216 | + }, |
| 217 | + "extract": { |
| 218 | + "column": "metadata" |
| 219 | + }, |
| 220 | + "transform": { |
| 221 | + "jsonPath": "license" |
261 | 222 | } |
262 | 223 | }, |
263 | | - { |
264 | | - "@type": "cr:Field", |
265 | | - "@id": "default/metadata/response", |
266 | | - "dataType": "sc:Text", |
267 | | - "source": { |
268 | | - "fileSet": { |
269 | | - "@id": "parquet-files-for-config-default" |
270 | | - }, |
271 | | - "extract": { |
272 | | - "column": "metadata" |
273 | | - }, |
274 | | - "transform": { |
275 | | - "jsonPath": "response" |
276 | | - } |
| 224 | + "isArray": true, |
| 225 | + "arrayShape": "-1" |
| 226 | + }, |
| 227 | + { |
| 228 | + "@type": "cr:Field", |
| 229 | + "@id": "default/metadata/license_url", |
| 230 | + "dataType": "sc:Text", |
| 231 | + "source": { |
| 232 | + "fileSet": { |
| 233 | + "@id": "parquet-files-for-config-default" |
| 234 | + }, |
| 235 | + "extract": { |
| 236 | + "column": "metadata" |
| 237 | + }, |
| 238 | + "transform": { |
| 239 | + "jsonPath": "license_url" |
277 | 240 | } |
278 | | - }, |
279 | | - { |
280 | | - "@type": "cr:Field", |
281 | | - "@id": "default/metadata/url", |
282 | | - "equivalentProperty": "prov:atLocation", |
283 | | - "dataType": "sc:Text", |
284 | | - "source": { |
285 | | - "fileSet": { |
286 | | - "@id": "parquet-files-for-config-default" |
287 | | - }, |
288 | | - "extract": { |
289 | | - "column": "metadata" |
290 | | - }, |
291 | | - "transform": { |
292 | | - "jsonPath": "url" |
293 | | - } |
| 241 | + } |
| 242 | + }, |
| 243 | + { |
| 244 | + "@type": "cr:Field", |
| 245 | + "@id": "default/metadata/provenance", |
| 246 | + "dataType": "sc:Text", |
| 247 | + "source": { |
| 248 | + "fileSet": { |
| 249 | + "@id": "parquet-files-for-config-default" |
| 250 | + }, |
| 251 | + "extract": { |
| 252 | + "column": "metadata" |
| 253 | + }, |
| 254 | + "transform": { |
| 255 | + "jsonPath": "provenance" |
294 | 256 | } |
295 | 257 | } |
296 | | - ] |
297 | | - } |
298 | | - ] |
| 258 | + }, |
| 259 | + { |
| 260 | + "@type": "cr:Field", |
| 261 | + "@id": "default/metadata/response", |
| 262 | + "dataType": "sc:Text", |
| 263 | + "source": { |
| 264 | + "fileSet": { |
| 265 | + "@id": "parquet-files-for-config-default" |
| 266 | + }, |
| 267 | + "extract": { |
| 268 | + "column": "metadata" |
| 269 | + }, |
| 270 | + "transform": { |
| 271 | + "jsonPath": "response" |
| 272 | + } |
| 273 | + } |
| 274 | + }, |
| 275 | + { |
| 276 | + "@type": "cr:Field", |
| 277 | + "@id": "default/metadata/url", |
| 278 | + "equivalentProperty": "prov:atLocation", |
| 279 | + "dataType": "sc:Text", |
| 280 | + "source": { |
| 281 | + "fileSet": { |
| 282 | + "@id": "parquet-files-for-config-default" |
| 283 | + }, |
| 284 | + "extract": { |
| 285 | + "column": "metadata" |
| 286 | + }, |
| 287 | + "transform": { |
| 288 | + "jsonPath": "url" |
| 289 | + } |
| 290 | + } |
| 291 | + } |
| 292 | + ] |
| 293 | + } |
299 | 294 | } |
300 | 295 | ], |
301 | 296 | "conformsTo": "http://mlcommons.org/croissant/1.1", |
|
306 | 301 | "Data Provenance Initiative" |
307 | 302 | ], |
308 | 303 | "creator": { |
309 | | - "@type": "Organization", |
| 304 | + "@type": "sc:Organization", |
310 | 305 | "name": "Common Pile", |
311 | 306 | "url": "https://huggingface.co/common-pile" |
312 | 307 | }, |
|
0 commit comments