diff --git a/education-ai-suite/smart-classroom/components/asr_component.py b/education-ai-suite/smart-classroom/components/asr_component.py index 09af0adb16..c7fba0b949 100644 --- a/education-ai-suite/smart-classroom/components/asr_component.py +++ b/education-ai-suite/smart-classroom/components/asr_component.py @@ -187,8 +187,6 @@ def process(self, input_generator): if os.path.exists(chunk_path) and DELETE_CHUNK_AFTER_USE: os.remove(chunk_path) - StorageManager.save_async(transcript_path, transcribed_text, append=True) - yield { **chunk_data, "text": transcribed_text, @@ -208,8 +206,9 @@ def process(self, input_generator): teacher_speaker = max(self.speaker_text_len, key=self.speaker_text_len.get) if teacher_speaker: - teacher_lines_with_time = [] + teacher_lines = [] full_updated_lines = [] + full_timestamped_lines = [] for seg in self.all_segments: spk = seg["speaker"] @@ -219,8 +218,8 @@ def process(self, input_generator): if spk == teacher_speaker: speaker_label = LABEL_TEACHER - teacher_lines_with_time.append( - f"[{start} - {end}] {speaker_label}: {text}" + teacher_lines.append( + f"{text}" ) else: if spk.startswith(f"{LABEL_SPEAKER}_"): @@ -233,7 +232,11 @@ def process(self, input_generator): speaker_label = spk full_updated_lines.append( - f"[{start} - {end}] {speaker_label}: {text}" + f"{speaker_label}: {text}" + ) + + full_timestamped_lines.append( + f"[{start} - {end}]: {text}" ) StorageManager.save( @@ -242,9 +245,15 @@ def process(self, input_generator): append=False ) + StorageManager.save( + os.path.join(project_path, "content_segmentation_transcription.txt"), + "\n".join(full_timestamped_lines) + "\n", + append=False + ) + StorageManager.save( os.path.join(project_path, "teacher_transcription.txt"), - "\n".join(teacher_lines_with_time) + "\n", + "\n".join(teacher_lines) + "\n", append=False ) @@ -269,4 +278,4 @@ def process(self, input_generator): } ) - logger.info(f"Transcription Complete: {self.session_id}") \ No newline at end of file + logger.info(f"Transcription Complete: {self.session_id}") diff --git a/education-ai-suite/smart-classroom/components/llm/openvino/summarizer.py b/education-ai-suite/smart-classroom/components/llm/openvino/summarizer.py index 9e61033d5c..84d9f004b9 100644 --- a/education-ai-suite/smart-classroom/components/llm/openvino/summarizer.py +++ b/education-ai-suite/smart-classroom/components/llm/openvino/summarizer.py @@ -1,11 +1,10 @@ from components.llm.base_summarizer import BaseSummarizer -import logging +import logging, threading, gc from transformers import AutoTokenizer, TextIteratorStreamer from optimum.intel.openvino import OVModelForCausalLM from utils import ensure_model from utils.config_loader import config from utils.locks import audio_pipeline_lock -import threading logger = logging.getLogger(__name__) @@ -13,87 +12,124 @@ class Summarizer(BaseSummarizer): def __init__(self, model_name, device, temperature=0.7, revision=None): self.model_name = model_name - self.device = device.upper() # OpenVINO uses "GPU" or "CPU" + self.device = device.upper() self.temperature = temperature - model_path = ensure_model.get_model_path() - logger.info(f"Loading Model: model name={self.model_name}, model path={model_path}, device={self.device}") - + self.model_path = ensure_model.get_model_path() + + logger.info( + f"Summarizer initialized (lazy load). " + f"model={self.model_name}, path={self.model_path}, device={self.device}" + ) + self.tokenizer = AutoTokenizer.from_pretrained( - model_path, - trust_remote_code=True, - fix_mistral_regex=True + self.model_path, + trust_remote_code=True, + fix_mistral_regex=True, ) if self.tokenizer.pad_token is None: self.tokenizer.pad_token = self.tokenizer.eos_token - - self.model = OVModelForCausalLM.from_pretrained( - model_path, - device=self.device, - use_cache=True + + def _load_model(self): + logger.info("Loading OVModelForCausalLM instance...") + return OVModelForCausalLM.from_pretrained( + self.model_path, + device=self.device, + use_cache=True, ) + def _destroy_model(self, model): + try: + del model + gc.collect() + logger.info("OV model instance destroyed") + except Exception as e: + logger.warning(f"Failed to destroy OV model cleanly: {e}") + def generate(self, prompt: str, stream: bool = True): max_new_tokens = config.models.summarizer.max_new_tokens inputs = self.tokenizer(prompt, return_tensors="pt") if stream: class CountingTextIteratorStreamer(TextIteratorStreamer): - def __init__(self, tokenizer, skip_special_tokens=True, skip_prompt=True): - super().__init__(tokenizer, skip_special_tokens=skip_special_tokens, skip_prompt=skip_prompt) - self.total_tokens = 0 + def __init__(self, tokenizer, skip_special_tokens=True, skip_prompt=True): + super().__init__( + tokenizer, + skip_special_tokens=skip_special_tokens, + skip_prompt=skip_prompt, + ) + self.total_tokens = 0 - def put(self, value): - if value is not None: - self.total_tokens += 1 - super().put(value) + def put(self, value): + if value is not None: + self.total_tokens += 1 + super().put(value) streamer = CountingTextIteratorStreamer( - self.tokenizer, - skip_special_tokens=True, - skip_prompt=True + self.tokenizer, + skip_special_tokens=True, + skip_prompt=True, ) - + def run_generation(): - with audio_pipeline_lock: - generation_kwargs = { - "input_ids": inputs.input_ids, - "max_new_tokens": max_new_tokens, - - # 🔑 sampling safety - "do_sample": True, - "temperature": max(self.temperature, 0.1), - "top_p": 0.9, - "top_k": 50, - - # tokens - "pad_token_id": self.tokenizer.eos_token_id, - "eos_token_id": self.tokenizer.eos_token_id, - - # streaming - "streamer": streamer, - } - self.model.generate(**generation_kwargs) - - thread = threading.Thread(target=run_generation, daemon=True) - thread.start() - + model = None + try: + with audio_pipeline_lock: + model = self._load_model() + model.generate( + input_ids=inputs.input_ids, + max_new_tokens=max_new_tokens, + + # sampling + do_sample=True, + temperature=max(self.temperature, 0.1), + top_p=0.9, + top_k=50, + + # tokens + pad_token_id=self.tokenizer.eos_token_id, + eos_token_id=self.tokenizer.eos_token_id, + + # streaming + streamer=streamer, + ) + + except Exception: + logger.error( + "Exception occurred in OV streaming generation", + exc_info=True, + ) + if hasattr(streamer, "_queue"): + streamer._queue.put( + "[ERROR]: Summary generation failed due to resource constraints." + ) + + finally: + if model is not None: + self._destroy_model(model) + streamer.end() + + threading.Thread(target=run_generation, daemon=True).start() return streamer + else: - with audio_pipeline_lock: - generation_kwargs = { - "input_ids": inputs.input_ids, - "max_new_tokens": max_new_tokens, - - # 🔑 sampling safety - "do_sample": True, - "temperature": max(self.temperature, 0.1), - "top_p": 0.9, - "top_k": 50, - - # tokens - "pad_token_id": self.tokenizer.eos_token_id, - "eos_token_id": self.tokenizer.eos_token_id, - } - return self.model.generate(**generation_kwargs) + model = None + try: + with audio_pipeline_lock: + model = self._load_model() + return model.generate( + input_ids=inputs.input_ids, + max_new_tokens=max_new_tokens, + + do_sample=True, + temperature=max(self.temperature, 0.1), + top_p=0.9, + top_k=50, + + pad_token_id=self.tokenizer.eos_token_id, + eos_token_id=self.tokenizer.eos_token_id, + ) + finally: + if model is not None: + self._destroy_model(model) diff --git a/education-ai-suite/smart-classroom/components/llm/openvino_genai/summarizer.py b/education-ai-suite/smart-classroom/components/llm/openvino_genai/summarizer.py index f285a386d6..a95848ac13 100644 --- a/education-ai-suite/smart-classroom/components/llm/openvino_genai/summarizer.py +++ b/education-ai-suite/smart-classroom/components/llm/openvino_genai/summarizer.py @@ -1,7 +1,7 @@ from components.llm.base_summarizer import BaseSummarizer import openvino_genai as ov_genai from transformers import AutoTokenizer -import logging, threading +import logging, threading, gc from utils import ensure_model from utils.config_loader import config from utils.ov_genai_util import YieldingTextStreamer @@ -15,21 +15,27 @@ def __init__(self, model_name, device, temperature=0.7, revision=None): self.temperature = temperature logger.info(f"Loading Model: model name={self.model_name}, model path={ensure_model.get_model_path()}, device={self.device}") self.tokenizer = AutoTokenizer.from_pretrained(ensure_model.get_model_path()) - self.model = ov_genai.LLMPipeline(ensure_model.get_model_path(), device=device) def generate(self, prompt, stream: bool = True): if stream: streamer = YieldingTextStreamer(self.tokenizer) def run_generation(): + model = None try: - audio_pipeline_lock.acquire() - self.model.generate( - prompt, - streamer=streamer, - max_new_tokens=config.models.summarizer.max_new_tokens, - temperature=self.temperature, - ) + with audio_pipeline_lock: + model = self._load_model() + model.generate( + prompt, + streamer=streamer, + max_new_tokens=config.models.summarizer.max_new_tokens, + temperature=self.temperature, + do_sample=False, + ) + cfg = model.get_generation_config() + for attr in dir(cfg): + if not attr.startswith("_"): + logger.info(f" {attr}: {getattr(cfg, attr)}") except Exception as e: error_msg = "Summary generation failed. Please ensure sufficient free resources are available to run this process." @@ -38,15 +44,35 @@ def run_generation(): error_msg = "Summary generation failed. Insufficient GPU resources available to run this process." streamer._queue.put(f"[ERROR]: {error_msg}") finally: - audio_pipeline_lock.release() + if model is not None: + self._destroy_model(model) streamer.end() threading.Thread(target=run_generation, daemon=True).start() return streamer else: - with audio_pipeline_lock: - return self.model.generate( + model = None + try: + with audio_pipeline_lock: + model = self._load_model() + return model.generate( prompt, max_new_tokens=config.models.summarizer.max_new_tokens, temperature=self.temperature, + do_sample=False, ) + finally: + if model is not None: + self._destroy_model(model) + + def _load_model(self): + logger.info("Loading model instance...") + return ov_genai.LLMPipeline(ensure_model.get_model_path(), device=self.device) + + def _destroy_model(self, model): + try: + del model + gc.collect() + logger.info("Model instance destroyed and memory reclaimed") + except Exception as e: + logger.warning(f"Failed to fully destroy model: {e}") diff --git a/education-ai-suite/smart-classroom/components/summarizer_component.py b/education-ai-suite/smart-classroom/components/summarizer_component.py index 407bb07ea9..d0da86deb8 100644 --- a/education-ai-suite/smart-classroom/components/summarizer_component.py +++ b/education-ai-suite/smart-classroom/components/summarizer_component.py @@ -23,25 +23,24 @@ def __init__(self, session_id, provider, model_name, device, temperature=0.7, mo provider = provider.lower() cfg = (provider, model_name, device) - # Reload only if config changed - if SummarizerComponent._model is None or SummarizerComponent._config != cfg: - if provider == "openvino": - SummarizerComponent._model = OvSummarizer( - model_name=model_name, - device=device, - temperature=temperature, - revision=None - ) - elif provider == "ipex": - SummarizerComponent._model = IpexSummarizer( - model_name=model_name, - device=device.lower(), - temperature=temperature - ) - else: - raise ValueError(f"Unsupported summarizer provider: {provider}") - - SummarizerComponent._config = cfg + + if provider == "openvino": + SummarizerComponent._model = OvSummarizer( + model_name=model_name, + device=device, + temperature=temperature, + revision=None + ) + elif provider == "ipex": + SummarizerComponent._model = IpexSummarizer( + model_name=model_name, + device=device.lower(), + temperature=temperature + ) + else: + raise ValueError(f"Unsupported summarizer provider: {provider}") + + SummarizerComponent._config = cfg self.summarizer = SummarizerComponent._model self.model_name = model_name diff --git a/education-ai-suite/smart-classroom/config.yaml b/education-ai-suite/smart-classroom/config.yaml index 2b682f63ef..8ce13b14d0 100644 --- a/education-ai-suite/smart-classroom/config.yaml +++ b/education-ai-suite/smart-classroom/config.yaml @@ -15,7 +15,7 @@ models: name: whisper-small # can be (whisper-base, whisper-small etc) or paraformer-zh device: CPU # CPU Recommended temperature: 0.0 - diarization: True + diarization: False hf_token: None # needed only if diarization=true models_base_path: "models" threads_limit: Null # applied only if > 0 (else defaults are used); value can be tuned based on CPU specifications diff --git a/education-ai-suite/smart-classroom/pipeline.py b/education-ai-suite/smart-classroom/pipeline.py index 88c5de22e4..59bf3bb796 100644 --- a/education-ai-suite/smart-classroom/pipeline.py +++ b/education-ai-suite/smart-classroom/pipeline.py @@ -193,7 +193,7 @@ def run_content_segmentation(self): self.session_id ) - transcription_path = os.path.join(session_dir, "transcription.txt") + transcription_path = os.path.join(session_dir, "content_segmentation_transcription.txt") session_state = SessionState.get_session_state(self.session_id) # VALIDATION: Check media duration match before processing