diff --git a/label_studio/data_manager/actions/cache_labels.py b/label_studio/data_manager/actions/cache_labels.py index af8aa893f76e..e4f280cafd63 100644 --- a/label_studio/data_manager/actions/cache_labels.py +++ b/label_studio/data_manager/actions/cache_labels.py @@ -6,6 +6,7 @@ from core.permissions import AllPermissions from core.redis import start_job_async_or_sync from tasks.models import Annotation, Prediction, Task +from label_studio_sdk.label_interface import LabelInterface logger = logging.getLogger(__name__) all_permissions = AllPermissions() @@ -18,7 +19,9 @@ def cache_labels_job(project, queryset, **kwargs): source_class = Annotation if source == 'annotations' else Prediction control_tag = request_data.get('custom_control_tag') or request_data.get('control_tag') with_counters = request_data.get('with_counters', 'Yes').lower() == 'yes' - + label_interface = LabelInterface(project.label_config) + label_interface_tags = {tag.name:tag for tag in label_interface.find_tags('control')} + if source == 'annotations': column_name = 'cache' else: @@ -38,7 +41,7 @@ def cache_labels_job(project, queryset, **kwargs): task_labels = [] annotations = source_class.objects.filter(task=task).only('result') for annotation in annotations: - labels = extract_labels(annotation, control_tag) + labels = extract_labels(annotation, control_tag, label_interface_tags) task_labels.extend(labels) # cache labels in separate data column @@ -57,20 +60,36 @@ def cache_labels_job(project, queryset, **kwargs): return {'response_code': 200, 'detail': f'Updated {len(tasks)} tasks'} -def extract_labels(annotation, control_tag): +def extract_labels(annotation, control_tag, label_interface_tags=None): labels = [] for region in annotation.result: # find regions with specific control tag name or just all regions if control tag is None if (control_tag is None or region['from_name'] == control_tag) and 'value' in region: - # scan value for a field with list of strings, - # as bonus it will work with textareas too + # scan value for a field with list of strings (eg choices, textareas) + # or taxonomy (list of string-lists) for key in region['value']: - if ( - isinstance(region['value'][key], list) - and region['value'][key] - and isinstance(region['value'][key][0], str) - ): - labels.extend(region['value'][key]) + if region['value'][key] and isinstance(region['value'][key], list): + + if key == 'taxonomy': + showFullPath = 'true' + pathSeparator = '/' + if label_interface_tags is not None and region['from_name'] in label_interface_tags: + # if from_name is not a custom_control tag, then we can try to fetch taxonomy formatting params + label_interface_tag = label_interface_tags[region['from_name']] + showFullPath = label_interface_tag.attr.get('showFullPath', 'false') + pathSeparator = label_interface_tag.attr.get('pathSeparator', '/') + + if showFullPath == 'false': + for elems in region['value'][key]: + labels.append( elems[-1] ) # just the leaf node of a taxonomy selection + else: + for elems in region['value'][key]: + labels.append( pathSeparator.join(elems) ) # the full delimited taxonomy path + + # other control tag types like Choices & TextAreas + elif isinstance(region['value'][key][0], str): + labels.extend(region['value'][key]) + break return labels