Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 17 additions & 8 deletions education-ai-suite/smart-classroom/components/asr_component.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,8 +187,6 @@ def process(self, input_generator):
if os.path.exists(chunk_path) and DELETE_CHUNK_AFTER_USE:
os.remove(chunk_path)

StorageManager.save_async(transcript_path, transcribed_text, append=True)

yield {
**chunk_data,
"text": transcribed_text,
Expand All @@ -208,8 +206,9 @@ def process(self, input_generator):
teacher_speaker = max(self.speaker_text_len, key=self.speaker_text_len.get)

if teacher_speaker:
teacher_lines_with_time = []
teacher_lines = []
full_updated_lines = []
full_timestamped_lines = []

for seg in self.all_segments:
spk = seg["speaker"]
Expand All @@ -219,8 +218,8 @@ def process(self, input_generator):

if spk == teacher_speaker:
speaker_label = LABEL_TEACHER
teacher_lines_with_time.append(
f"[{start} - {end}] {speaker_label}: {text}"
teacher_lines.append(
f"{text}"
)
else:
if spk.startswith(f"{LABEL_SPEAKER}_"):
Expand All @@ -233,7 +232,11 @@ def process(self, input_generator):
speaker_label = spk

full_updated_lines.append(
f"[{start} - {end}] {speaker_label}: {text}"
f"{speaker_label}: {text}"
)

full_timestamped_lines.append(
f"[{start} - {end}]: {text}"
)

StorageManager.save(
Expand All @@ -242,9 +245,15 @@ def process(self, input_generator):
append=False
)

StorageManager.save(
os.path.join(project_path, "content_segmentation_transcription.txt"),
"\n".join(full_timestamped_lines) + "\n",
append=False
)

StorageManager.save(
os.path.join(project_path, "teacher_transcription.txt"),
"\n".join(teacher_lines_with_time) + "\n",
"\n".join(teacher_lines) + "\n",
append=False
)

Expand All @@ -269,4 +278,4 @@ def process(self, input_generator):
}
)

logger.info(f"Transcription Complete: {self.session_id}")
logger.info(f"Transcription Complete: {self.session_id}")
Original file line number Diff line number Diff line change
@@ -1,99 +1,135 @@
from components.llm.base_summarizer import BaseSummarizer
import logging
import logging, threading, gc
from transformers import AutoTokenizer, TextIteratorStreamer
from optimum.intel.openvino import OVModelForCausalLM
from utils import ensure_model
from utils.config_loader import config
from utils.locks import audio_pipeline_lock
import threading

logger = logging.getLogger(__name__)


class Summarizer(BaseSummarizer):
def __init__(self, model_name, device, temperature=0.7, revision=None):
self.model_name = model_name
self.device = device.upper() # OpenVINO uses "GPU" or "CPU"
self.device = device.upper()
self.temperature = temperature

model_path = ensure_model.get_model_path()
logger.info(f"Loading Model: model name={self.model_name}, model path={model_path}, device={self.device}")

self.model_path = ensure_model.get_model_path()

logger.info(
f"Summarizer initialized (lazy load). "
f"model={self.model_name}, path={self.model_path}, device={self.device}"
)

self.tokenizer = AutoTokenizer.from_pretrained(
model_path,
trust_remote_code=True,
fix_mistral_regex=True
self.model_path,
trust_remote_code=True,
fix_mistral_regex=True,
)

if self.tokenizer.pad_token is None:
self.tokenizer.pad_token = self.tokenizer.eos_token

self.model = OVModelForCausalLM.from_pretrained(
model_path,
device=self.device,
use_cache=True

def _load_model(self):
logger.info("Loading OVModelForCausalLM instance...")
return OVModelForCausalLM.from_pretrained(
self.model_path,
device=self.device,
use_cache=True,
)

def _destroy_model(self, model):
try:
del model
gc.collect()
logger.info("OV model instance destroyed")
except Exception as e:
logger.warning(f"Failed to destroy OV model cleanly: {e}")

def generate(self, prompt: str, stream: bool = True):
max_new_tokens = config.models.summarizer.max_new_tokens
inputs = self.tokenizer(prompt, return_tensors="pt")

if stream:
class CountingTextIteratorStreamer(TextIteratorStreamer):
def __init__(self, tokenizer, skip_special_tokens=True, skip_prompt=True):
super().__init__(tokenizer, skip_special_tokens=skip_special_tokens, skip_prompt=skip_prompt)
self.total_tokens = 0
def __init__(self, tokenizer, skip_special_tokens=True, skip_prompt=True):
super().__init__(
tokenizer,
skip_special_tokens=skip_special_tokens,
skip_prompt=skip_prompt,
)
self.total_tokens = 0

def put(self, value):
if value is not None:
self.total_tokens += 1
super().put(value)
def put(self, value):
if value is not None:
self.total_tokens += 1
super().put(value)

streamer = CountingTextIteratorStreamer(
self.tokenizer,
skip_special_tokens=True,
skip_prompt=True
self.tokenizer,
skip_special_tokens=True,
skip_prompt=True,
)

def run_generation():
with audio_pipeline_lock:
generation_kwargs = {
"input_ids": inputs.input_ids,
"max_new_tokens": max_new_tokens,

# 🔑 sampling safety
"do_sample": True,
"temperature": max(self.temperature, 0.1),
"top_p": 0.9,
"top_k": 50,

# tokens
"pad_token_id": self.tokenizer.eos_token_id,
"eos_token_id": self.tokenizer.eos_token_id,

# streaming
"streamer": streamer,
}
self.model.generate(**generation_kwargs)

thread = threading.Thread(target=run_generation, daemon=True)
thread.start()

model = None
try:
with audio_pipeline_lock:
model = self._load_model()
model.generate(
input_ids=inputs.input_ids,
max_new_tokens=max_new_tokens,

# sampling
do_sample=True,
temperature=max(self.temperature, 0.1),
top_p=0.9,
top_k=50,

# tokens
pad_token_id=self.tokenizer.eos_token_id,
eos_token_id=self.tokenizer.eos_token_id,

# streaming
streamer=streamer,
)

except Exception:
logger.error(
"Exception occurred in OV streaming generation",
exc_info=True,
)
if hasattr(streamer, "_queue"):
streamer._queue.put(
"[ERROR]: Summary generation failed due to resource constraints."
)

finally:
if model is not None:
self._destroy_model(model)
streamer.end()

threading.Thread(target=run_generation, daemon=True).start()
return streamer

else:
with audio_pipeline_lock:
generation_kwargs = {
"input_ids": inputs.input_ids,
"max_new_tokens": max_new_tokens,

# 🔑 sampling safety
"do_sample": True,
"temperature": max(self.temperature, 0.1),
"top_p": 0.9,
"top_k": 50,

# tokens
"pad_token_id": self.tokenizer.eos_token_id,
"eos_token_id": self.tokenizer.eos_token_id,
}
return self.model.generate(**generation_kwargs)
model = None
try:
with audio_pipeline_lock:
model = self._load_model()
return model.generate(
input_ids=inputs.input_ids,
max_new_tokens=max_new_tokens,

do_sample=True,
temperature=max(self.temperature, 0.1),
top_p=0.9,
top_k=50,

pad_token_id=self.tokenizer.eos_token_id,
eos_token_id=self.tokenizer.eos_token_id,
)
finally:
if model is not None:
self._destroy_model(model)
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from components.llm.base_summarizer import BaseSummarizer
import openvino_genai as ov_genai
from transformers import AutoTokenizer
import logging, threading
import logging, threading, gc
from utils import ensure_model
from utils.config_loader import config
from utils.ov_genai_util import YieldingTextStreamer
Expand All @@ -15,21 +15,27 @@ def __init__(self, model_name, device, temperature=0.7, revision=None):
self.temperature = temperature
logger.info(f"Loading Model: model name={self.model_name}, model path={ensure_model.get_model_path()}, device={self.device}")
self.tokenizer = AutoTokenizer.from_pretrained(ensure_model.get_model_path())
self.model = ov_genai.LLMPipeline(ensure_model.get_model_path(), device=device)

def generate(self, prompt, stream: bool = True):
if stream:
streamer = YieldingTextStreamer(self.tokenizer)

def run_generation():
model = None
try:
audio_pipeline_lock.acquire()
self.model.generate(
prompt,
streamer=streamer,
max_new_tokens=config.models.summarizer.max_new_tokens,
temperature=self.temperature,
)
with audio_pipeline_lock:
model = self._load_model()
model.generate(
prompt,
streamer=streamer,
max_new_tokens=config.models.summarizer.max_new_tokens,
temperature=self.temperature,
do_sample=False,
)
cfg = model.get_generation_config()
for attr in dir(cfg):
if not attr.startswith("_"):
logger.info(f" {attr}: {getattr(cfg, attr)}")

except Exception as e:
error_msg = "Summary generation failed. Please ensure sufficient free resources are available to run this process."
Expand All @@ -38,15 +44,35 @@ def run_generation():
error_msg = "Summary generation failed. Insufficient GPU resources available to run this process."
streamer._queue.put(f"[ERROR]: {error_msg}")
finally:
audio_pipeline_lock.release()
if model is not None:
self._destroy_model(model)
streamer.end()

threading.Thread(target=run_generation, daemon=True).start()
return streamer
else:
with audio_pipeline_lock:
return self.model.generate(
model = None
try:
with audio_pipeline_lock:
model = self._load_model()
return model.generate(
prompt,
max_new_tokens=config.models.summarizer.max_new_tokens,
temperature=self.temperature,
do_sample=False,
)
finally:
if model is not None:
self._destroy_model(model)

def _load_model(self):
logger.info("Loading model instance...")
return ov_genai.LLMPipeline(ensure_model.get_model_path(), device=self.device)

def _destroy_model(self, model):
try:
del model
gc.collect()
logger.info("Model instance destroyed and memory reclaimed")
except Exception as e:
logger.warning(f"Failed to fully destroy model: {e}")
Original file line number Diff line number Diff line change
Expand Up @@ -23,25 +23,24 @@ def __init__(self, session_id, provider, model_name, device, temperature=0.7, mo
provider = provider.lower()
cfg = (provider, model_name, device)

# Reload only if config changed
if SummarizerComponent._model is None or SummarizerComponent._config != cfg:
if provider == "openvino":
SummarizerComponent._model = OvSummarizer(
model_name=model_name,
device=device,
temperature=temperature,
revision=None
)
elif provider == "ipex":
SummarizerComponent._model = IpexSummarizer(
model_name=model_name,
device=device.lower(),
temperature=temperature
)
else:
raise ValueError(f"Unsupported summarizer provider: {provider}")

SummarizerComponent._config = cfg

if provider == "openvino":
SummarizerComponent._model = OvSummarizer(
model_name=model_name,
device=device,
temperature=temperature,
revision=None
)
elif provider == "ipex":
SummarizerComponent._model = IpexSummarizer(
model_name=model_name,
device=device.lower(),
temperature=temperature
)
else:
raise ValueError(f"Unsupported summarizer provider: {provider}")

SummarizerComponent._config = cfg

self.summarizer = SummarizerComponent._model
self.model_name = model_name
Expand Down
Loading
Loading