From 20cb9721b9af2d148f18639d8f452c2e13b0ff55 Mon Sep 17 00:00:00 2001 From: Sidney Batchelder <44208509+sbatchelder@users.noreply.github.com> Date: Wed, 16 Apr 2025 08:07:48 -0400 Subject: [PATCH 1/5] hotfix for Taxonomy Cache Labels #7382 --- label_studio/data_manager/actions/cache_labels.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/label_studio/data_manager/actions/cache_labels.py b/label_studio/data_manager/actions/cache_labels.py index af8aa893f76e..b4d2ae3ac2cb 100644 --- a/label_studio/data_manager/actions/cache_labels.py +++ b/label_studio/data_manager/actions/cache_labels.py @@ -68,9 +68,13 @@ def extract_labels(annotation, control_tag): if ( isinstance(region['value'][key], list) and region['value'][key] - and isinstance(region['value'][key][0], str) + and isinstance(region['value'][key], list) ): - labels.extend(region['value'][key]) + for elem in region['value'][key]: + if isinstance(elem, str): + labels.append(elem) # eg Choices fields + elif isinstance(elem, list): + labels.append(elem[-1]) # Taxonomy fields break return labels From 2d72746855d26a58a4a6672419f9f12262e0bbee Mon Sep 17 00:00:00 2001 From: Sidney Batchelder <44208509+sbatchelder@users.noreply.github.com> Date: Thu, 17 Apr 2025 09:57:16 -0400 Subject: [PATCH 2/5] taxonomy cache_labels respects showFullPath and pathSeparator control_tag attrs #7382 --- .../data_manager/actions/cache_labels.py | 41 ++++++++++++------- 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/label_studio/data_manager/actions/cache_labels.py b/label_studio/data_manager/actions/cache_labels.py index b4d2ae3ac2cb..80be8162fe4a 100644 --- a/label_studio/data_manager/actions/cache_labels.py +++ b/label_studio/data_manager/actions/cache_labels.py @@ -6,6 +6,7 @@ from core.permissions import AllPermissions from core.redis import start_job_async_or_sync from tasks.models import Annotation, Prediction, Task +from label_studio_sdk.label_interface import LabelInterface logger = logging.getLogger(__name__) all_permissions = AllPermissions() @@ -18,6 +19,7 @@ def cache_labels_job(project, queryset, **kwargs): source_class = Annotation if source == 'annotations' else Prediction control_tag = request_data.get('custom_control_tag') or request_data.get('control_tag') with_counters = request_data.get('with_counters', 'Yes').lower() == 'yes' + label_interface = LabelInterface(project.label_config) if source == 'annotations': column_name = 'cache' @@ -38,7 +40,7 @@ def cache_labels_job(project, queryset, **kwargs): task_labels = [] annotations = source_class.objects.filter(task=task).only('result') for annotation in annotations: - labels = extract_labels(annotation, control_tag) + labels = extract_labels(annotation, control_tag, label_interface) task_labels.extend(labels) # cache labels in separate data column @@ -57,24 +59,35 @@ def cache_labels_job(project, queryset, **kwargs): return {'response_code': 200, 'detail': f'Updated {len(tasks)} tasks'} -def extract_labels(annotation, control_tag): +def extract_labels(annotation, control_tag, label_interface=None): labels = [] for region in annotation.result: # find regions with specific control tag name or just all regions if control tag is None if (control_tag is None or region['from_name'] == control_tag) and 'value' in region: - # scan value for a field with list of strings, - # as bonus it will work with textareas too + # scan value for a field with list of strings (eg choices, textareas) + # or taxonomy (list of string-lists) for key in region['value']: - if ( - isinstance(region['value'][key], list) - and region['value'][key] - and isinstance(region['value'][key], list) - ): - for elem in region['value'][key]: - if isinstance(elem, str): - labels.append(elem) # eg Choices fields - elif isinstance(elem, list): - labels.append(elem[-1]) # Taxonomy fields + if region['value'][key] and isinstance(region['value'][key], list): + + if key == 'taxonomy': + showFullPath = 'true' + pathSeparator = '/' + if label_interface.find_tags('control', match_fn=lambda tag: tag.name==region['from_name']): + # if from_name is not a custom_control tag, then we can try to fetch taxonomy formatting params + showFullPath = label_interface.get_control(region['from_name']).attr.get('showFullPath', 'false') + pathSeparator = label_interface.get_control(region['from_name']).attr.get('pathSeparator', '/') + + if showFullPath == 'false': + for elems in region['value'][key]: + labels.append( elems[-1] ) # just the leaf node of a taxonomy selection + else: + for elems in region['value'][key]: + labels.append( pathSeparator.join(elems) ) # the full deliminated taxonomy path + + # other control tag types like Choices & TextAreas + elif isinstance(region['value'][key][0], str): + labels.extend( region['value'][key] ) + break return labels From 2cbc80d1248b9941f5a413e1e6c399a26929d470 Mon Sep 17 00:00:00 2001 From: Sidney Batchelder <44208509+sbatchelder@users.noreply.github.com> Date: Thu, 17 Apr 2025 10:13:09 -0400 Subject: [PATCH 3/5] providing label_interface to extract_labels now optional --- label_studio/data_manager/actions/cache_labels.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/label_studio/data_manager/actions/cache_labels.py b/label_studio/data_manager/actions/cache_labels.py index 80be8162fe4a..cf1addbf6159 100644 --- a/label_studio/data_manager/actions/cache_labels.py +++ b/label_studio/data_manager/actions/cache_labels.py @@ -72,7 +72,7 @@ def extract_labels(annotation, control_tag, label_interface=None): if key == 'taxonomy': showFullPath = 'true' pathSeparator = '/' - if label_interface.find_tags('control', match_fn=lambda tag: tag.name==region['from_name']): + if label_interface is not None and label_interface.find_tags('control', match_fn=lambda tag: tag.name==region['from_name']): # if from_name is not a custom_control tag, then we can try to fetch taxonomy formatting params showFullPath = label_interface.get_control(region['from_name']).attr.get('showFullPath', 'false') pathSeparator = label_interface.get_control(region['from_name']).attr.get('pathSeparator', '/') From 0c3a20f47a5e6479c24a5888f1a3f381f2b29566 Mon Sep 17 00:00:00 2001 From: Sidney Batchelder <44208509+sbatchelder@users.noreply.github.com> Date: Thu, 17 Apr 2025 15:14:55 -0400 Subject: [PATCH 4/5] preload LabelInterface for efficient access for cache_labels #7382 --- .../data_manager/actions/cache_labels.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/label_studio/data_manager/actions/cache_labels.py b/label_studio/data_manager/actions/cache_labels.py index cf1addbf6159..0d5fde310d6a 100644 --- a/label_studio/data_manager/actions/cache_labels.py +++ b/label_studio/data_manager/actions/cache_labels.py @@ -20,7 +20,8 @@ def cache_labels_job(project, queryset, **kwargs): control_tag = request_data.get('custom_control_tag') or request_data.get('control_tag') with_counters = request_data.get('with_counters', 'Yes').lower() == 'yes' label_interface = LabelInterface(project.label_config) - + label_interface_tags = {tag.name:tag for tag in label_interface.find_tags('control')} + if source == 'annotations': column_name = 'cache' else: @@ -40,7 +41,7 @@ def cache_labels_job(project, queryset, **kwargs): task_labels = [] annotations = source_class.objects.filter(task=task).only('result') for annotation in annotations: - labels = extract_labels(annotation, control_tag, label_interface) + labels = extract_labels(annotation, control_tag, label_interface_tags) task_labels.extend(labels) # cache labels in separate data column @@ -59,7 +60,7 @@ def cache_labels_job(project, queryset, **kwargs): return {'response_code': 200, 'detail': f'Updated {len(tasks)} tasks'} -def extract_labels(annotation, control_tag, label_interface=None): +def extract_labels(annotation, control_tag, label_interface_tags=None): labels = [] for region in annotation.result: # find regions with specific control tag name or just all regions if control tag is None @@ -72,17 +73,18 @@ def extract_labels(annotation, control_tag, label_interface=None): if key == 'taxonomy': showFullPath = 'true' pathSeparator = '/' - if label_interface is not None and label_interface.find_tags('control', match_fn=lambda tag: tag.name==region['from_name']): + if label_interface_tags is not None and region['from_name'] in label_interface_tags: # if from_name is not a custom_control tag, then we can try to fetch taxonomy formatting params - showFullPath = label_interface.get_control(region['from_name']).attr.get('showFullPath', 'false') - pathSeparator = label_interface.get_control(region['from_name']).attr.get('pathSeparator', '/') + label_interface_tag = label_interface_tags[region['from_name']] + showFullPath = label_interface_tag.attr.get('showFullPath', 'false') + pathSeparator = label_interface_tag.attr.get('pathSeparator', '/') if showFullPath == 'false': for elems in region['value'][key]: labels.append( elems[-1] ) # just the leaf node of a taxonomy selection else: for elems in region['value'][key]: - labels.append( pathSeparator.join(elems) ) # the full deliminated taxonomy path + labels.append( pathSeparator.join(elems) ) # the full delimited taxonomy path # other control tag types like Choices & TextAreas elif isinstance(region['value'][key][0], str): From 5610b1072236c6edb54a789364c54b51e9686618 Mon Sep 17 00:00:00 2001 From: Max Tkachenko Date: Thu, 17 Apr 2025 21:32:19 +0100 Subject: [PATCH 5/5] Update label_studio/data_manager/actions/cache_labels.py --- label_studio/data_manager/actions/cache_labels.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/label_studio/data_manager/actions/cache_labels.py b/label_studio/data_manager/actions/cache_labels.py index 0d5fde310d6a..e4f280cafd63 100644 --- a/label_studio/data_manager/actions/cache_labels.py +++ b/label_studio/data_manager/actions/cache_labels.py @@ -88,7 +88,7 @@ def extract_labels(annotation, control_tag, label_interface_tags=None): # other control tag types like Choices & TextAreas elif isinstance(region['value'][key][0], str): - labels.extend( region['value'][key] ) + labels.extend(region['value'][key]) break return labels