From 531a033048a263bd09cb94ad356bfa7c362c2791 Mon Sep 17 00:00:00 2001 From: Rukaiya Bano <97897944+rukaiya-rk-24@users.noreply.github.com> Date: Sun, 6 Aug 2023 21:35:00 +0530 Subject: [PATCH 1/8] asr.py --- label_studio_ml/examples/nemo/asr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/label_studio_ml/examples/nemo/asr.py b/label_studio_ml/examples/nemo/asr.py index 71fbb074d..d213645e2 100644 --- a/label_studio_ml/examples/nemo/asr.py +++ b/label_studio_ml/examples/nemo/asr.py @@ -12,7 +12,7 @@ class NemoASR(LabelStudioMLBase): - def __init__(self, model_name='QuartzNet15x5Base-En', **kwargs): + def __init__(self, model_name='stt_hi_conformer_ctc_medium', **kwargs): super(NemoASR, self).__init__(**kwargs) # Find TextArea control tag and bind ASR model to it From 87a8d28e83d57e4fd9fc4eb8effb60ea68815a48 Mon Sep 17 00:00:00 2001 From: Rukaiya Bano <97897944+rukaiya-rk-24@users.noreply.github.com> Date: Thu, 2 Nov 2023 08:13:44 +0530 Subject: [PATCH 2/8] Update openai_predictor.py --- .../examples/openai/openai_predictor.py | 106 ++++-------------- 1 file changed, 23 insertions(+), 83 deletions(-) diff --git a/label_studio_ml/examples/openai/openai_predictor.py b/label_studio_ml/examples/openai/openai_predictor.py index d6546dd8d..4f880c400 100644 --- a/label_studio_ml/examples/openai/openai_predictor.py +++ b/label_studio_ml/examples/openai/openai_predictor.py @@ -1,95 +1,35 @@ -import os -import openai -import difflib import logging - +from typing import List, Dict, Optional from label_studio_ml.model import LabelStudioMLBase +from uuid import uuid4 +from transformers import AutoTokenizer, AutoModelForCausalLM -logger = logging.getLogger(__name__) - -openai.api_key = os.environ['OPENAI_API_KEY'] - +class PalmyraSmallInteractive(LabelStudioMLBase): -class OpenAIPredictor(LabelStudioMLBase): - DEFAULT_PROMPT = os.path.join(os.path.dirname(__file__), 'prompt.txt') + TEMPERATURE = 0.7 def __init__(self, **kwargs): - # don't forget to initialize base class... - super(OpenAIPredictor, self).__init__(**kwargs) - - # Parsed label config contains only one output of type - assert len(self.parsed_label_config) == 1 - self.from_name, self.info = list(self.parsed_label_config.items())[0] - assert self.info['type'] == 'Choices' - - # the model has only one textual input - assert len(self.info['to_name']) == 1 - assert len(self.info['inputs']) == 1 - assert self.info['inputs'][0]['type'] == 'Text' - self.to_name = self.info['to_name'][0] - self.value = self.info['inputs'][0]['value'] - self.labels = self.info['labels'] - - self.openai_model = kwargs.get('model', 'gpt-3.5-turbo') - self.openai_max_tokens = int(kwargs.get('max_tokens', 40)) - self.openai_temperature = float(kwargs.get('temperature', 0.5)) - self.openai_prompt = kwargs.get('prompt', self.DEFAULT_PROMPT) - if os.path.isfile(self.openai_prompt): - with open(self.openai_prompt) as f: - self.openai_prompt = f.read() + super(PalmyraSmallInteractive, self).__init__(**kwargs) + self.tokenizer = AutoTokenizer.from_pretrained("Writer/palmyra-small") + self.model = AutoModelForCausalLM.from_pretrained("Writer/palmyra-small") - logger.debug( - f'Initialize OpenAI API with the following parameters:' - f' model={self.openai_model}, max_tokens={self.openai_max_tokens}, temperature={self.openai_temperature},' - f' prompt={self.openai_prompt}') - - def _get_prompt(self, task_data): - if os.path.isfile(self.openai_prompt): - # Read the prompt from the file - # that allows changing the prompt without restarting the server - # use it only for development - with open(self.openai_prompt) as f: - prompt = f.read() - else: - prompt = self.openai_prompt - return prompt.format(labels=self.labels, **task_data) - - def _get_predicted_label(self, task_data): - # Create a prompt for the OpenAI API - prompt = self._get_prompt(task_data) - # Call OpenAI's API to create a chat completion using the GPT-3 model - response = openai.ChatCompletion.create( - model=self.openai_model, - messages=[ - {"role": "user", "content": prompt} # The 'user' role is assigned to the prompt - ], - max_tokens=self.openai_max_tokens, # Maximum number of tokens in the response is set to 40 - n=1, # We only want one response - stop=None, # There are no specific stop sequences - temperature=self.openai_temperature, # The temperature parameter affects randomness in the output. Lower values (like 0.5) make the output more deterministic. - ) - logger.debug(f'OpenAI response: {response}') - # Extract the response text from the ChatCompletion response - response_text = response.choices[0].message['content'].strip() - - # Extract the matched labels from the response text - matched_labels = [] - for pred in response_text.split("\n"): - scores = list(map(lambda l: difflib.SequenceMatcher(None, pred, l).ratio(), self.labels)) - matched_labels.append(self.labels[scores.index(max(scores))]) - - # Return the input_text along with the identified sentiment - return matched_labels - - def predict(self, tasks, **kwargs): + def predict(self, tasks: List[Dict], context: Optional[Dict] = None, **kwargs) -> List[Dict]: predictions = [] + model_version = "writer/palmyra-small" for task in tasks: - predicted_labels = self._get_predicted_label(task['data']) + prompt = task['data']['prompt'] + inputs = self.tokenizer.encode(prompt, return_tensors='pt') + outputs = self.model.generate(inputs, max_length=512, temperature=self.TEMPERATURE) + generated_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True) result = [{ - 'from_name': self.from_name, - 'to_name': self.to_name, - 'type': 'choices', - 'value': {'choices': predicted_labels} + 'id': str(uuid4())[:4], + 'from_name': 'instruction', + 'to_name': 'prompt', + 'type': 'textarea', + 'value': { + 'text': generated_text + } }] - predictions.append({'result': result, 'score': 1.0}) + predictions.append({'result': result, 'model_version': model_version}) return predictions + From 559dabdba29df7c3b399c010a47cecd940044f39 Mon Sep 17 00:00:00 2001 From: Rukaiya Bano <97897944+rukaiya-rk-24@users.noreply.github.com> Date: Thu, 2 Nov 2023 08:14:44 +0530 Subject: [PATCH 3/8] Update requirements.txt --- label_studio_ml/examples/openai/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/label_studio_ml/examples/openai/requirements.txt b/label_studio_ml/examples/openai/requirements.txt index b499e1515..208c745c3 100644 --- a/label_studio_ml/examples/openai/requirements.txt +++ b/label_studio_ml/examples/openai/requirements.txt @@ -1,4 +1,4 @@ gunicorn==20.1.0 label-studio-ml>=1.0.9 rq==1.10.1 -openai==0.27.4 \ No newline at end of file +transformers From 920f99775d84f40b34179a74628df5ea55d07865 Mon Sep 17 00:00:00 2001 From: Rukaiya Bano <97897944+rukaiya-rk-24@users.noreply.github.com> Date: Thu, 2 Nov 2023 13:19:37 +0530 Subject: [PATCH 4/8] Update openai_predictor.py --- .../examples/openai/openai_predictor.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/label_studio_ml/examples/openai/openai_predictor.py b/label_studio_ml/examples/openai/openai_predictor.py index 4f880c400..ca0a99206 100644 --- a/label_studio_ml/examples/openai/openai_predictor.py +++ b/label_studio_ml/examples/openai/openai_predictor.py @@ -6,20 +6,18 @@ class PalmyraSmallInteractive(LabelStudioMLBase): - TEMPERATURE = 0.7 - - def __init__(self, **kwargs): - super(PalmyraSmallInteractive, self).__init__(**kwargs) - self.tokenizer = AutoTokenizer.from_pretrained("Writer/palmyra-small") - self.model = AutoModelForCausalLM.from_pretrained("Writer/palmyra-small") + def _init_(self, **kwargs): + super(PalmyraSmallInteractive, self)._init_(**kwargs) + self.tokenizer = AutoTokenizer.from_pretrained("aashay96/indic-gpt") + self.model = AutoModelForCausalLM.from_pretrained("aashay96/indic-gpt") def predict(self, tasks: List[Dict], context: Optional[Dict] = None, **kwargs) -> List[Dict]: predictions = [] - model_version = "writer/palmyra-small" + model_version = "aashay96/indic-gpt" for task in tasks: prompt = task['data']['prompt'] inputs = self.tokenizer.encode(prompt, return_tensors='pt') - outputs = self.model.generate(inputs, max_length=512, temperature=self.TEMPERATURE) + outputs = self.model.generate(inputs, max_length=200) generated_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True) result = [{ 'id': str(uuid4())[:4], @@ -31,5 +29,4 @@ def predict(self, tasks: List[Dict], context: Optional[Dict] = None, **kwargs) - } }] predictions.append({'result': result, 'model_version': model_version}) - return predictions - + return predictions From 620cb5d6b2fe5977a7651a9502bda6aa04d0769f Mon Sep 17 00:00:00 2001 From: Rukaiya Bano <97897944+rukaiya-rk-24@users.noreply.github.com> Date: Thu, 2 Nov 2023 13:31:35 +0530 Subject: [PATCH 5/8] Update openai_predictor.py --- label_studio_ml/examples/openai/openai_predictor.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/label_studio_ml/examples/openai/openai_predictor.py b/label_studio_ml/examples/openai/openai_predictor.py index ca0a99206..c54e2a94a 100644 --- a/label_studio_ml/examples/openai/openai_predictor.py +++ b/label_studio_ml/examples/openai/openai_predictor.py @@ -6,8 +6,9 @@ class PalmyraSmallInteractive(LabelStudioMLBase): - def _init_(self, **kwargs): - super(PalmyraSmallInteractive, self)._init_(**kwargs) + + def __init__(self, **kwargs): + super(PalmyraSmallInteractive, self).__init__(**kwargs) self.tokenizer = AutoTokenizer.from_pretrained("aashay96/indic-gpt") self.model = AutoModelForCausalLM.from_pretrained("aashay96/indic-gpt") @@ -17,7 +18,7 @@ def predict(self, tasks: List[Dict], context: Optional[Dict] = None, **kwargs) - for task in tasks: prompt = task['data']['prompt'] inputs = self.tokenizer.encode(prompt, return_tensors='pt') - outputs = self.model.generate(inputs, max_length=200) + outputs = self.model.generate(inputs, max_length=512) generated_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True) result = [{ 'id': str(uuid4())[:4], @@ -29,4 +30,5 @@ def predict(self, tasks: List[Dict], context: Optional[Dict] = None, **kwargs) - } }] predictions.append({'result': result, 'model_version': model_version}) - return predictions + return predictions + From b0e385ac67a39f050d7c6e520f7372e8c3a3ac96 Mon Sep 17 00:00:00 2001 From: Rukaiya Bano <97897944+rukaiya-rk-24@users.noreply.github.com> Date: Fri, 3 Nov 2023 16:08:43 +0530 Subject: [PATCH 6/8] Update openai_predictor.py --- .../examples/openai/openai_predictor.py | 50 +++++++++++-------- 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/label_studio_ml/examples/openai/openai_predictor.py b/label_studio_ml/examples/openai/openai_predictor.py index c54e2a94a..334a7943f 100644 --- a/label_studio_ml/examples/openai/openai_predictor.py +++ b/label_studio_ml/examples/openai/openai_predictor.py @@ -1,34 +1,40 @@ +from label_studio_ml.model import LabelStudioMLBase import logging from typing import List, Dict, Optional -from label_studio_ml.model import LabelStudioMLBase -from uuid import uuid4 +import torch from transformers import AutoTokenizer, AutoModelForCausalLM -class PalmyraSmallInteractive(LabelStudioMLBase): - +class GPTIndicBackend(LabelStudioMLBase): def __init__(self, **kwargs): - super(PalmyraSmallInteractive, self).__init__(**kwargs) + # Initialization for the ML backend + super(GPTIndicBackend, self).__init__(**kwargs) + + # Load the pre-trained tokenizer and model from HuggingFace self.tokenizer = AutoTokenizer.from_pretrained("aashay96/indic-gpt") self.model = AutoModelForCausalLM.from_pretrained("aashay96/indic-gpt") - def predict(self, tasks: List[Dict], context: Optional[Dict] = None, **kwargs) -> List[Dict]: + def predict(self, tasks, **kwargs): predictions = [] - model_version = "aashay96/indic-gpt" + for task in tasks: - prompt = task['data']['prompt'] - inputs = self.tokenizer.encode(prompt, return_tensors='pt') - outputs = self.model.generate(inputs, max_length=512) - generated_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True) - result = [{ - 'id': str(uuid4())[:4], - 'from_name': 'instruction', - 'to_name': 'prompt', - 'type': 'textarea', - 'value': { - 'text': generated_text - } - }] - predictions.append({'result': result, 'model_version': model_version}) + # Extract prompt from the task data + prompt_text = task['data']['prompt'] + inputs = self.tokenizer.encode(prompt_text, return_tensors="pt") + + # Generate the response using the model + outputs = self.model.generate(inputs, max_length=100) + response_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True) + + # Structure the prediction result + predictions.append({ + 'result': [{ + 'from_name': 'instruction', + 'to_name': 'prompt', + 'type': 'textarea', + 'value': {'text': [response_text[len(prompt_text):]]}, + }], + 'score': 1.0 # Confidence score + }) + return predictions - From ddc421dabba34cecf2b9058aa6046ddee408136f Mon Sep 17 00:00:00 2001 From: Rukaiya Bano <97897944+rukaiya-rk-24@users.noreply.github.com> Date: Fri, 3 Nov 2023 16:45:32 +0530 Subject: [PATCH 7/8] Update docker-compose.yml --- .../examples/openai/docker-compose.yml | 23 ++++++++++++++----- 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/label_studio_ml/examples/openai/docker-compose.yml b/label_studio_ml/examples/openai/docker-compose.yml index c379a3220..728836046 100644 --- a/label_studio_ml/examples/openai/docker-compose.yml +++ b/label_studio_ml/examples/openai/docker-compose.yml @@ -1,14 +1,25 @@ version: "3.8" services: - server: - container_name: server + label-studio-ml-backend: build: . + container_name: ls-ml-backend environment: - - LABEL_STUDIO_ML_BACKEND_V2=true - - LOG_LEVEL=DEBUG - - OPENAI_API_KEY= + - PYTHONUNBUFFERED=1 ports: - "9090:9090" volumes: - - "./prompt.txt:/app/prompt.txt" \ No newline at end of file + - .:/app + command: python openai_predictor.py + + label-studio: + image: heartexlabs/label-studio:latest + container_name: label-studio + environment: + - LABEL_STUDIO_ML_BACKENDS=ml_backend:9090 + ports: + - "8080:8080" + depends_on: + - label-studio-ml-backend + volumes: + - label-studio-data:/label-studio/data From fb494757f6f8f8cf2d7a7f9bab35a57e2b61b6c4 Mon Sep 17 00:00:00 2001 From: Rukaiya Bano <97897944+rukaiya-rk-24@users.noreply.github.com> Date: Mon, 20 Nov 2023 15:44:07 +0530 Subject: [PATCH 8/8] Update Dockerfile --- label_studio_ml/examples/segment_anything_model/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/label_studio_ml/examples/segment_anything_model/Dockerfile b/label_studio_ml/examples/segment_anything_model/Dockerfile index 07e0b5295..bce222bde 100644 --- a/label_studio_ml/examples/segment_anything_model/Dockerfile +++ b/label_studio_ml/examples/segment_anything_model/Dockerfile @@ -25,7 +25,7 @@ COPY * /app/ -ENV ACCESS_TOKEN=0c5e516d37ed2bc1d11ff5fc59ebaf5e0f756386 +ENV ACCESS_TOKEN=be24dfbee45f8916fc2fee2d6f71da1dc9d5f109 RUN pip install opencv-python