HumanSignal · makseq · May 15, 2025 · Apr 16, 2025 · Apr 17, 2025 · Apr 17, 2025
@@ -6,6 +6,7 @@
 from core.permissions import AllPermissions
 from core.redis import start_job_async_or_sync
 from tasks.models import Annotation, Prediction, Task
+from label_studio_sdk.label_interface import LabelInterface
 
 logger = logging.getLogger(__name__)
 all_permissions = AllPermissions()
@@ -18,7 +19,9 @@ def cache_labels_job(project, queryset, **kwargs):
     source_class = Annotation if source == 'annotations' else Prediction
     control_tag = request_data.get('custom_control_tag') or request_data.get('control_tag')
     with_counters = request_data.get('with_counters', 'Yes').lower() == 'yes'
-
+    label_interface = LabelInterface(project.label_config)
+    label_interface_tags = {tag.name:tag for tag in label_interface.find_tags('control')}
+
     if source == 'annotations':
         column_name = 'cache'
     else:
@@ -38,7 +41,7 @@ def cache_labels_job(project, queryset, **kwargs):
         task_labels = []
         annotations = source_class.objects.filter(task=task).only('result')
         for annotation in annotations:
-            labels = extract_labels(annotation, control_tag)
+            labels = extract_labels(annotation, control_tag, label_interface_tags)
             task_labels.extend(labels)
 
         # cache labels in separate data column
@@ -57,20 +60,36 @@ def cache_labels_job(project, queryset, **kwargs):
     return {'response_code': 200, 'detail': f'Updated {len(tasks)} tasks'}
 
 
-def extract_labels(annotation, control_tag):
+def extract_labels(annotation, control_tag, label_interface_tags=None):
     labels = []
     for region in annotation.result:
         # find regions with specific control tag name or just all regions if control tag is None
         if (control_tag is None or region['from_name'] == control_tag) and 'value' in region:
-            # scan value for a field with list of strings,
-            # as bonus it will work with textareas too
+            # scan value for a field with list of strings (eg choices, textareas) 
+            # or taxonomy (list of string-lists)
             for key in region['value']:
-                if (
-                    isinstance(region['value'][key], list)
-                    and region['value'][key]
-                    and isinstance(region['value'][key][0], str)
-                ):
-                    labels.extend(region['value'][key])
+                if region['value'][key] and isinstance(region['value'][key], list):
+
+                    if key == 'taxonomy':
+                        showFullPath = 'true'
+                        pathSeparator = '/'
+                        if label_interface_tags is not None and region['from_name'] in label_interface_tags:
+                            # if from_name is not a custom_control tag, then we can try to fetch taxonomy formatting params
+                            label_interface_tag = label_interface_tags[region['from_name']]
+                            showFullPath = label_interface_tag.attr.get('showFullPath', 'false')
+                            pathSeparator = label_interface_tag.attr.get('pathSeparator', '/')
+
+                        if showFullPath == 'false':
+                            for elems in region['value'][key]:
+                                labels.append( elems[-1] )  # just the leaf node of a taxonomy selection
+                        else:
+                            for elems in region['value'][key]:
+                                labels.append( pathSeparator.join(elems) )  # the full delimited taxonomy path
+
+                    # other control tag types like Choices & TextAreas
+                    elif isinstance(region['value'][key][0], str):
+                        labels.extend(region['value'][key]) 
+
                     break
     return labels