Skip to content

Commit 246ba1b

Browse files
authored
Tags as optional, free model convert memory (#2393)
Signed-off-by: Xiaojing Liu <xiaojing.liu@intel.com>
1 parent 3b7ca11 commit 246ba1b

File tree

2 files changed

+66
-50
lines changed
  • education-ai-suite/smart-classroom/content_search/providers

2 files changed

+66
-50
lines changed

education-ai-suite/smart-classroom/content_search/providers/video_preprocess/server.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -440,7 +440,7 @@ def _summary_params_for_reuse() -> Dict[str, Any]:
440440
summary_key,
441441
summary_text=summary_text,
442442
meta={
443-
"tags": req.tags,
443+
**({"tags": req.tags} if req.tags else {}),
444444
"chunk_id": chunk_id,
445445
"chunk_index": idx,
446446
"asset_id": asset_id,

education-ai-suite/smart-classroom/content_search/providers/vlm_openvino_serving/utils/utils.py

Lines changed: 65 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
# SPDX-License-Identifier: Apache-2.0
33

44
import base64
5+
import multiprocessing
56
import os
67
import random
78
from io import BytesIO
@@ -12,18 +13,57 @@
1213
import openvino as ov
1314
import torch
1415
import yaml
15-
from openvino_tokenizers import convert_tokenizer
16-
from optimum.exporters.openvino.utils import save_preprocessors
17-
from optimum.intel import (
18-
OVModelForCausalLM,
19-
OVModelForFeatureExtraction,
20-
OVModelForSequenceClassification,
21-
OVModelForVisualCausalLM,
22-
)
23-
from optimum.utils.save_utils import maybe_load_preprocessors
2416
from PIL import Image
2517
from providers.vlm_openvino_serving.utils.common import ErrorMessages, logger, settings
26-
from transformers import AutoTokenizer
18+
19+
20+
def _convert_model_worker(
21+
model_id: str, cache_dir: str, model_type: str, weight_format: str
22+
):
23+
"""
24+
Worker function that runs in a subprocess to perform the actual model conversion.
25+
When the subprocess exits, all memory used during conversion is fully reclaimed by the OS.
26+
"""
27+
from openvino_tokenizers import convert_tokenizer
28+
from optimum.exporters.openvino.utils import save_preprocessors
29+
from optimum.intel import (
30+
OVModelForCausalLM,
31+
OVModelForFeatureExtraction,
32+
OVModelForSequenceClassification,
33+
OVModelForVisualCausalLM,
34+
)
35+
from optimum.utils.save_utils import maybe_load_preprocessors
36+
from transformers import AutoTokenizer
37+
38+
hf_tokenizer = AutoTokenizer.from_pretrained(model_id)
39+
hf_tokenizer.save_pretrained(cache_dir)
40+
ov_tokenizer = convert_tokenizer(hf_tokenizer, add_special_tokens=False)
41+
ov.save_model(ov_tokenizer, f"{cache_dir}/openvino_tokenizer.xml")
42+
43+
if model_type == "embedding":
44+
embedding_model = OVModelForFeatureExtraction.from_pretrained(
45+
model_id, export=True
46+
)
47+
embedding_model.save_pretrained(cache_dir)
48+
elif model_type == "reranker":
49+
reranker_model = OVModelForSequenceClassification.from_pretrained(
50+
model_id, export=True
51+
)
52+
reranker_model.save_pretrained(cache_dir)
53+
elif model_type == "llm":
54+
llm_model = OVModelForCausalLM.from_pretrained(
55+
model_id, export=True, weight_format=weight_format
56+
)
57+
llm_model.save_pretrained(cache_dir)
58+
elif model_type == "vlm":
59+
vlm_model = OVModelForVisualCausalLM.from_pretrained(
60+
model_id, export=True, weight_format=weight_format
61+
)
62+
vlm_model.save_pretrained(cache_dir)
63+
preprocessors = maybe_load_preprocessors(model_id)
64+
save_preprocessors(preprocessors, vlm_model.config, cache_dir, True)
65+
else:
66+
raise ValueError(f"Unsupported model type: {model_type}")
2767

2868

2969
def convert_model(
@@ -32,6 +72,9 @@ def convert_model(
3272
"""
3373
Converts a specified model to OpenVINO format and saves it to the cache directory.
3474
75+
The conversion runs in a subprocess so that all memory used during quantization
76+
and export is fully released when the subprocess exits.
77+
3578
Args:
3679
model_id (str): The identifier of the model to be converted.
3780
cache_dir (str): The directory where the converted model will be saved.
@@ -42,52 +85,25 @@ def convert_model(
4285
4386
Raises:
4487
ValueError: If the model_type is not one of "embedding", "reranker", "llm", or "vlm".
45-
46-
Notes:
47-
- If the model has already been converted and exists in the cache directory, the conversion process is skipped.
48-
- The function uses the Hugging Face `AutoTokenizer` to load and save the tokenizer.
49-
- The function uses OpenVINO's `convert_tokenizer` and `save_model` to convert and save the tokenizer.
50-
- Depending on the model_type, the function uses different OpenVINO model classes to convert and save the model:
51-
- "embedding": Uses `OVModelForFeatureExtraction`.
52-
- "reranker": Uses `OVModelForSequenceClassification`.
53-
- "llm": Uses `OVModelForCausalLM`.
54-
- "vlm": Uses `OVModelForVisualCausalLM`.
88+
RuntimeError: If the subprocess fails during conversion.
5589
"""
5690
try:
5791
logger.debug(f"cache_ddir: {cache_dir}")
5892
if is_model_ready(Path(cache_dir)):
5993
logger.info(f"Optimized {model_id} exist in {cache_dir}. Skip process...")
6094
else:
61-
logger.info(f"Converting {model_id} model to OpenVINO format...")
62-
hf_tokenizer = AutoTokenizer.from_pretrained(model_id)
63-
hf_tokenizer.save_pretrained(cache_dir)
64-
ov_tokenizer = convert_tokenizer(hf_tokenizer, add_special_tokens=False)
65-
ov.save_model(ov_tokenizer, f"{cache_dir}/openvino_tokenizer.xml")
66-
67-
if model_type == "embedding":
68-
embedding_model = OVModelForFeatureExtraction.from_pretrained(
69-
model_id, export=True
70-
)
71-
embedding_model.save_pretrained(cache_dir)
72-
elif model_type == "reranker":
73-
reranker_model = OVModelForSequenceClassification.from_pretrained(
74-
model_id, export=True
75-
)
76-
reranker_model.save_pretrained(cache_dir)
77-
elif model_type == "llm":
78-
llm_model = OVModelForCausalLM.from_pretrained(
79-
model_id, export=True, weight_format=weight_format
80-
)
81-
llm_model.save_pretrained(cache_dir)
82-
elif model_type == "vlm":
83-
vlm_model = OVModelForVisualCausalLM.from_pretrained(
84-
model_id, export=True, weight_format=weight_format
95+
logger.info(f"Converting {model_id} model to OpenVINO format in subprocess...")
96+
process = multiprocessing.Process(
97+
target=_convert_model_worker,
98+
args=(model_id, cache_dir, model_type, weight_format),
99+
)
100+
process.start()
101+
process.join()
102+
if process.exitcode != 0:
103+
raise RuntimeError(
104+
f"Model conversion subprocess failed with exit code {process.exitcode}"
85105
)
86-
vlm_model.save_pretrained(cache_dir)
87-
preprocessors = maybe_load_preprocessors(model_id)
88-
save_preprocessors(preprocessors, vlm_model.config, cache_dir, True)
89-
else:
90-
raise ValueError(f"Unsupported model type: {model_type}")
106+
logger.info(f"Model conversion completed. Subprocess memory released.")
91107
except Exception as e:
92108
logger.error(f"Error occurred during model conversion: {e}")
93109
raise RuntimeError(f"Error occurred during model conversion: {e}")

0 commit comments

Comments
 (0)