Skip to content

Commit b38000e

Browse files
robot-ci-heartextriklozoidMihajloHoma
authored
fix: LEAP-1692: Image export for COCO and YOLO (#6855)
Co-authored-by: Sergei Ivashchenko <[email protected]> Co-authored-by: triklozoid <[email protected]> Co-authored-by: MihajloHoma <[email protected]>
1 parent 40f156a commit b38000e

File tree

7 files changed

+529
-70
lines changed

7 files changed

+529
-70
lines changed

label_studio/data_export/api.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ def get(self, request, *args, **kwargs):
204204
logger.debug('Prepare export files')
205205

206206
export_file, content_type, filename = DataExport.generate_export_file(
207-
project, tasks, export_type, download_resources, request.GET
207+
project, tasks, export_type, download_resources, request.GET, hostname=request.build_absolute_uri('/')
208208
)
209209

210210
r = FileResponse(export_file, as_attachment=True, content_type=content_type, filename=filename)
@@ -569,7 +569,7 @@ def get(self, request, *args, **kwargs):
569569
return response
570570

571571

572-
def async_convert(converted_format_id, export_type, project, **kwargs):
572+
def async_convert(converted_format_id, export_type, project, hostname, download_resources=False, **kwargs):
573573
with transaction.atomic():
574574
try:
575575
converted_format = ConvertedFormat.objects.get(id=converted_format_id)
@@ -583,7 +583,7 @@ def async_convert(converted_format_id, export_type, project, **kwargs):
583583
converted_format.save(update_fields=['status'])
584584

585585
snapshot = converted_format.export
586-
converted_file = snapshot.convert_file(export_type)
586+
converted_file = snapshot.convert_file(export_type, download_resources=download_resources, hostname=hostname)
587587
if converted_file is None:
588588
raise ValidationError('No converted file found, probably there are no annotations in the export snapshot')
589589
md5 = Export.eval_md5(converted_file)
@@ -645,6 +645,7 @@ def post(self, request, *args, **kwargs):
645645
serializer = ExportConvertSerializer(data=request.data, context={'project': snapshot.project})
646646
serializer.is_valid(raise_exception=True)
647647
export_type = serializer.validated_data['export_type']
648+
download_resources = serializer.validated_data.get('download_resources')
648649

649650
with transaction.atomic():
650651
converted_format, created = ConvertedFormat.objects.get_or_create(export=snapshot, export_type=export_type)
@@ -657,6 +658,8 @@ def post(self, request, *args, **kwargs):
657658
converted_format.id,
658659
export_type,
659660
snapshot.project,
661+
request.build_absolute_uri('/'),
662+
download_resources=download_resources,
660663
on_failure=set_convert_background_failure,
661664
)
662665
return Response({'export_type': export_type, 'converted_format': converted_format.id})

label_studio/data_export/mixins.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,9 @@ def _get_export_serializer_option(serialization_options):
137137
options['context']['interpolate_key_frames'] = serialization_options['interpolate_key_frames']
138138
if serialization_options.get('include_annotation_history') is False:
139139
options['omit'] = ['annotations.history']
140+
# download resources
141+
if serialization_options.get('download_resources') is True:
142+
options['download_resources'] = True
140143
return options
141144

142145
def get_task_queryset(self, ids, annotation_filter_options):
@@ -303,7 +306,7 @@ def run_file_exporting(self, task_filter_options=None, annotation_filter_options
303306
serialization_options=serialization_options,
304307
)
305308

306-
def convert_file(self, to_format):
309+
def convert_file(self, to_format, download_resources=False, hostname=None):
307310
with get_temp_dir() as tmp_dir:
308311
OUT = 'out'
309312
out_dir = pathlib.Path(tmp_dir) / OUT
@@ -313,7 +316,10 @@ def convert_file(self, to_format):
313316
config=self.project.get_parsed_config(),
314317
project_dir=None,
315318
upload_dir=out_dir,
316-
download_resources=False,
319+
download_resources=download_resources,
320+
# for downloading resource we need access to the API
321+
access_token=self.project.organization.created_by.auth_token.key,
322+
hostname=hostname,
317323
)
318324
input_name = pathlib.Path(self.file.name).name
319325
input_file_path = pathlib.Path(tmp_dir) / input_name

label_studio/data_export/models.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ def get_export_formats(project):
142142
return sorted(formats, key=lambda f: f.get('disabled', False))
143143

144144
@staticmethod
145-
def generate_export_file(project, tasks, output_format, download_resources, get_args):
145+
def generate_export_file(project, tasks, output_format, download_resources, get_args, hostname=None):
146146
"""Generate export file and return it as an open file object.
147147
148148
Be sure to close the file after using it, to avoid wasting disk space.
@@ -161,6 +161,8 @@ def generate_export_file(project, tasks, output_format, download_resources, get_
161161
project_dir=None,
162162
upload_dir=os.path.join(settings.MEDIA_ROOT, settings.UPLOAD_DIR),
163163
download_resources=download_resources,
164+
access_token=project.organization.created_by.auth_token.key,
165+
hostname=hostname,
164166
)
165167
with get_temp_dir() as tmp_dir:
166168
converter.convert(input_json, tmp_dir, output_format, is_dir=False)

label_studio/data_export/serializers.py

+1
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,7 @@ class SerializationOption(serializers.Serializer):
166166

167167
class ExportConvertSerializer(serializers.Serializer):
168168
export_type = serializers.CharField(help_text='Export file format.')
169+
download_resources = serializers.BooleanField(help_text='Download resources in converter.', required=False)
169170

170171
def validate_export_type(self, value):
171172
project = self.context.get('project')

label_studio/tests/export.tavern.yml

+105-56
Original file line numberDiff line numberDiff line change
@@ -112,67 +112,116 @@ stages:
112112
url: '{django_live_url}/api/projects/{pk}/export/formats'
113113
response:
114114
json:
115-
- title: 'JSON'
116-
description: !anystr
117-
link: 'https://labelstud.io/guide/export.html#JSON'
118-
name: 'JSON'
119-
- title: 'JSON-MIN'
120-
description: !anystr
121-
link: 'https://labelstud.io/guide/export.html#JSON-MIN'
122-
name: 'JSON_MIN'
123-
- title: 'CSV'
124-
description: !anystr
125-
link: 'https://labelstud.io/guide/export.html#CSV'
126-
name: 'CSV'
127-
- title: 'TSV'
128-
description: !anystr
129-
link: 'https://labelstud.io/guide/export.html#TSV'
130-
name: 'TSV'
131-
- title: 'COCO'
132-
description: 'Popular machine learning format used by the COCO dataset for object detection and image segmentation tasks with polygons and rectangles.'
133-
link: 'https://labelstud.io/guide/export.html#COCO'
134-
tags: ['image segmentation', 'object detection']
135-
name: 'COCO'
136-
- title: 'YOLO'
137-
description: 'Popular TXT format is created for each image file. Each txt file contains annotations for the corresponding image file, that is object class, object coordinates, height & width.'
138-
link: 'https://labelstud.io/guide/export.html#YOLO'
139-
tags: ['image segmentation', 'object detection']
140-
name: 'YOLO'
141-
- title: 'YOLOv8 OBB'
142-
description: 'Popular TXT format is created for each image file. Each txt file contains annotations for the corresponding image file. The YOLO OBB format designates bounding boxes by their four corner points with coordinates normalized between 0 and 1, so it is possible to export rotated objects.'
143-
link: 'https://labelstud.io/guide/export.html#YOLO'
144-
tags: ['image segmentation', 'object detection']
145-
name: 'YOLO_OBB'
146-
- title: 'CONLL2003'
147-
description: 'Popular format used for the CoNLL-2003 named entity recognition challenge.'
148-
link: 'https://labelstud.io/guide/export.html#CONLL2003'
149-
tags: ['sequence labeling', 'text tagging', 'named entity recognition']
150-
name: 'CONLL2003'
115+
- title: JSON
116+
description: List of items in raw JSON format stored in one JSON file. Use to export
117+
both the data and the annotations for a dataset. It's Label Studio Common Format
118+
link: https://labelstud.io/guide/export.html#JSON
119+
name: JSON
120+
- title: JSON-MIN
121+
description: List of items where only "from_name", "to_name" values from the raw
122+
JSON format are exported. Use to export only the annotations for a dataset.
123+
link: https://labelstud.io/guide/export.html#JSON-MIN
124+
name: JSON_MIN
125+
- title: CSV
126+
description: Results are stored as comma-separated values with the column names
127+
specified by the values of the "from_name" and "to_name" fields.
128+
link: https://labelstud.io/guide/export.html#CSV
129+
name: CSV
130+
- title: TSV
131+
description: Results are stored in tab-separated tabular file with column names
132+
specified by "from_name" "to_name" values
133+
link: https://labelstud.io/guide/export.html#TSV
134+
name: TSV
135+
- title: COCO
136+
description: Popular machine learning format used by the COCO dataset for object
137+
detection and image segmentation tasks with polygons and rectangles.
138+
link: https://labelstud.io/guide/export.html#COCO
139+
tags:
140+
- image segmentation
141+
- object detection
142+
name: COCO
143+
- title: COCO with Images
144+
description: COCO format with images downloaded.
145+
link: https://labelstud.io/guide/export.html#COCO
146+
tags:
147+
- image segmentation
148+
- object detection
149+
name: COCO_WITH_IMAGES
150+
- title: YOLO
151+
description: Popular TXT format is created for each image file. Each txt file contains
152+
annotations for the corresponding image file, that is object class, object coordinates,
153+
height & width.
154+
link: https://labelstud.io/guide/export.html#YOLO
155+
tags:
156+
- image segmentation
157+
- object detection
158+
name: YOLO
159+
- title: YOLO with Images
160+
description: YOLO format with images downloaded.
161+
link: https://labelstud.io/guide/export.html#YOLO
162+
tags:
163+
- image segmentation
164+
- object detection
165+
name: YOLO_WITH_IMAGES
166+
- title: YOLOv8 OBB
167+
description: Popular TXT format is created for each image file. Each txt file contains
168+
annotations for the corresponding image file. The YOLO OBB format designates bounding
169+
boxes by their four corner points with coordinates normalized between 0 and 1,
170+
so it is possible to export rotated objects.
171+
link: https://labelstud.io/guide/export.html#YOLO
172+
tags:
173+
- image segmentation
174+
- object detection
175+
name: YOLO_OBB
176+
- title: YOLOv8 OBB with Images
177+
description: YOLOv8 OBB format with images downloaded.
178+
link: https://labelstud.io/guide/export.html#YOLO
179+
tags:
180+
- image segmentation
181+
- object detection
182+
name: YOLO_OBB_WITH_IMAGES
183+
- title: CONLL2003
184+
description: Popular format used for the CoNLL-2003 named entity recognition challenge.
185+
link: https://labelstud.io/guide/export.html#CONLL2003
186+
tags:
187+
- sequence labeling
188+
- text tagging
189+
- named entity recognition
190+
name: CONLL2003
151191
disabled: true
152-
- title: 'Pascal VOC XML'
153-
description: 'Popular XML format used for object detection and polygon image segmentation tasks.'
154-
link: 'https://labelstud.io/guide/export.html#Pascal-VOC-XML'
155-
tags: ['image segmentation', 'object detection']
156-
name: 'VOC'
192+
- title: Pascal VOC XML
193+
description: Popular XML format used for object detection and polygon image segmentation
194+
tasks.
195+
link: https://labelstud.io/guide/export.html#Pascal-VOC-XML
196+
tags:
197+
- image segmentation
198+
- object detection
199+
name: VOC
157200
disabled: true
158-
- title: 'Brush labels to NumPy'
159-
description: 'Export your brush labels as NumPy 2d arrays. Each label outputs as one image.'
160-
link: 'https://labelstud.io/guide/export.html#Brush-labels-to-NumPy-amp-PNG'
161-
tags: ['image segmentation']
162-
name: 'BRUSH_TO_NUMPY'
201+
- title: Brush labels to NumPy
202+
description: Export your brush labels as NumPy 2d arrays. Each label outputs as
203+
one image.
204+
link: https://labelstud.io/guide/export.html#Brush-labels-to-NumPy-amp-PNG
205+
tags:
206+
- image segmentation
207+
name: BRUSH_TO_NUMPY
163208
disabled: true
164-
- title: 'Brush labels to PNG'
165-
description: 'Export your brush labels as PNG images. Each label outputs as one image.'
166-
link: 'https://labelstud.io/guide/export.html#Brush-labels-to-NumPy-amp-PNG'
167-
tags: ['image segmentation']
168-
name: 'BRUSH_TO_PNG'
209+
- title: Brush labels to PNG
210+
description: Export your brush labels as PNG images. Each label outputs as one image.
211+
link: https://labelstud.io/guide/export.html#Brush-labels-to-NumPy-amp-PNG
212+
tags:
213+
- image segmentation
214+
name: BRUSH_TO_PNG
169215
disabled: true
170-
- title: 'ASR Manifest'
171-
description: 'Export audio transcription labels for automatic speech recognition as the JSON manifest format expected by NVIDIA NeMo models.'
172-
link: 'https://labelstud.io/guide/export.html#ASR-MANIFEST'
173-
tags: ['speech recognition']
174-
name: 'ASR_MANIFEST'
216+
- title: ASR Manifest
217+
description: Export audio transcription labels for automatic speech recognition
218+
as the JSON manifest format expected by NVIDIA NeMo models.
219+
link: https://labelstud.io/guide/export.html#ASR-MANIFEST
220+
tags:
221+
- speech recognition
222+
name: ASR_MANIFEST
175223
disabled: true
224+
176225
status_code: 200
177226

178227

0 commit comments

Comments
 (0)