Skip to content

feat: Cache Labels for Taxonomy #7383

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
May 15, 2025
Merged
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 30 additions & 11 deletions label_studio/data_manager/actions/cache_labels.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from core.permissions import AllPermissions
from core.redis import start_job_async_or_sync
from tasks.models import Annotation, Prediction, Task
from label_studio_sdk.label_interface import LabelInterface

logger = logging.getLogger(__name__)
all_permissions = AllPermissions()
Expand All @@ -18,7 +19,9 @@ def cache_labels_job(project, queryset, **kwargs):
source_class = Annotation if source == 'annotations' else Prediction
control_tag = request_data.get('custom_control_tag') or request_data.get('control_tag')
with_counters = request_data.get('with_counters', 'Yes').lower() == 'yes'

label_interface = LabelInterface(project.label_config)
label_interface_tags = {tag.name:tag for tag in label_interface.find_tags('control')}

if source == 'annotations':
column_name = 'cache'
else:
Expand All @@ -38,7 +41,7 @@ def cache_labels_job(project, queryset, **kwargs):
task_labels = []
annotations = source_class.objects.filter(task=task).only('result')
for annotation in annotations:
labels = extract_labels(annotation, control_tag)
labels = extract_labels(annotation, control_tag, label_interface_tags)
task_labels.extend(labels)

# cache labels in separate data column
Expand All @@ -57,20 +60,36 @@ def cache_labels_job(project, queryset, **kwargs):
return {'response_code': 200, 'detail': f'Updated {len(tasks)} tasks'}


def extract_labels(annotation, control_tag):
def extract_labels(annotation, control_tag, label_interface_tags=None):
labels = []
for region in annotation.result:
# find regions with specific control tag name or just all regions if control tag is None
if (control_tag is None or region['from_name'] == control_tag) and 'value' in region:
# scan value for a field with list of strings,
# as bonus it will work with textareas too
# scan value for a field with list of strings (eg choices, textareas)
# or taxonomy (list of string-lists)
for key in region['value']:
if (
isinstance(region['value'][key], list)
and region['value'][key]
and isinstance(region['value'][key][0], str)
):
labels.extend(region['value'][key])
if region['value'][key] and isinstance(region['value'][key], list):

if key == 'taxonomy':
showFullPath = 'true'
pathSeparator = '/'
if label_interface_tags is not None and region['from_name'] in label_interface_tags:
# if from_name is not a custom_control tag, then we can try to fetch taxonomy formatting params
label_interface_tag = label_interface_tags[region['from_name']]
showFullPath = label_interface_tag.attr.get('showFullPath', 'false')
pathSeparator = label_interface_tag.attr.get('pathSeparator', '/')

if showFullPath == 'false':
for elems in region['value'][key]:
labels.append( elems[-1] ) # just the leaf node of a taxonomy selection
else:
for elems in region['value'][key]:
labels.append( pathSeparator.join(elems) ) # the full delimited taxonomy path

# other control tag types like Choices & TextAreas
elif isinstance(region['value'][key][0], str):
labels.extend(region['value'][key])

break
return labels

Expand Down
Loading