22# SPDX-License-Identifier: Apache-2.0
33
44import base64
5+ import multiprocessing
56import os
67import random
78from io import BytesIO
1213import openvino as ov
1314import torch
1415import yaml
15- from openvino_tokenizers import convert_tokenizer
16- from optimum .exporters .openvino .utils import save_preprocessors
17- from optimum .intel import (
18- OVModelForCausalLM ,
19- OVModelForFeatureExtraction ,
20- OVModelForSequenceClassification ,
21- OVModelForVisualCausalLM ,
22- )
23- from optimum .utils .save_utils import maybe_load_preprocessors
2416from PIL import Image
2517from providers .vlm_openvino_serving .utils .common import ErrorMessages , logger , settings
26- from transformers import AutoTokenizer
18+
19+
20+ def _convert_model_worker (
21+ model_id : str , cache_dir : str , model_type : str , weight_format : str
22+ ):
23+ """
24+ Worker function that runs in a subprocess to perform the actual model conversion.
25+ When the subprocess exits, all memory used during conversion is fully reclaimed by the OS.
26+ """
27+ from openvino_tokenizers import convert_tokenizer
28+ from optimum .exporters .openvino .utils import save_preprocessors
29+ from optimum .intel import (
30+ OVModelForCausalLM ,
31+ OVModelForFeatureExtraction ,
32+ OVModelForSequenceClassification ,
33+ OVModelForVisualCausalLM ,
34+ )
35+ from optimum .utils .save_utils import maybe_load_preprocessors
36+ from transformers import AutoTokenizer
37+
38+ hf_tokenizer = AutoTokenizer .from_pretrained (model_id )
39+ hf_tokenizer .save_pretrained (cache_dir )
40+ ov_tokenizer = convert_tokenizer (hf_tokenizer , add_special_tokens = False )
41+ ov .save_model (ov_tokenizer , f"{ cache_dir } /openvino_tokenizer.xml" )
42+
43+ if model_type == "embedding" :
44+ embedding_model = OVModelForFeatureExtraction .from_pretrained (
45+ model_id , export = True
46+ )
47+ embedding_model .save_pretrained (cache_dir )
48+ elif model_type == "reranker" :
49+ reranker_model = OVModelForSequenceClassification .from_pretrained (
50+ model_id , export = True
51+ )
52+ reranker_model .save_pretrained (cache_dir )
53+ elif model_type == "llm" :
54+ llm_model = OVModelForCausalLM .from_pretrained (
55+ model_id , export = True , weight_format = weight_format
56+ )
57+ llm_model .save_pretrained (cache_dir )
58+ elif model_type == "vlm" :
59+ vlm_model = OVModelForVisualCausalLM .from_pretrained (
60+ model_id , export = True , weight_format = weight_format
61+ )
62+ vlm_model .save_pretrained (cache_dir )
63+ preprocessors = maybe_load_preprocessors (model_id )
64+ save_preprocessors (preprocessors , vlm_model .config , cache_dir , True )
65+ else :
66+ raise ValueError (f"Unsupported model type: { model_type } " )
2767
2868
2969def convert_model (
@@ -32,6 +72,9 @@ def convert_model(
3272 """
3373 Converts a specified model to OpenVINO format and saves it to the cache directory.
3474
75+ The conversion runs in a subprocess so that all memory used during quantization
76+ and export is fully released when the subprocess exits.
77+
3578 Args:
3679 model_id (str): The identifier of the model to be converted.
3780 cache_dir (str): The directory where the converted model will be saved.
@@ -42,52 +85,25 @@ def convert_model(
4285
4386 Raises:
4487 ValueError: If the model_type is not one of "embedding", "reranker", "llm", or "vlm".
45-
46- Notes:
47- - If the model has already been converted and exists in the cache directory, the conversion process is skipped.
48- - The function uses the Hugging Face `AutoTokenizer` to load and save the tokenizer.
49- - The function uses OpenVINO's `convert_tokenizer` and `save_model` to convert and save the tokenizer.
50- - Depending on the model_type, the function uses different OpenVINO model classes to convert and save the model:
51- - "embedding": Uses `OVModelForFeatureExtraction`.
52- - "reranker": Uses `OVModelForSequenceClassification`.
53- - "llm": Uses `OVModelForCausalLM`.
54- - "vlm": Uses `OVModelForVisualCausalLM`.
88+ RuntimeError: If the subprocess fails during conversion.
5589 """
5690 try :
5791 logger .debug (f"cache_ddir: { cache_dir } " )
5892 if is_model_ready (Path (cache_dir )):
5993 logger .info (f"Optimized { model_id } exist in { cache_dir } . Skip process..." )
6094 else :
61- logger .info (f"Converting { model_id } model to OpenVINO format..." )
62- hf_tokenizer = AutoTokenizer .from_pretrained (model_id )
63- hf_tokenizer .save_pretrained (cache_dir )
64- ov_tokenizer = convert_tokenizer (hf_tokenizer , add_special_tokens = False )
65- ov .save_model (ov_tokenizer , f"{ cache_dir } /openvino_tokenizer.xml" )
66-
67- if model_type == "embedding" :
68- embedding_model = OVModelForFeatureExtraction .from_pretrained (
69- model_id , export = True
70- )
71- embedding_model .save_pretrained (cache_dir )
72- elif model_type == "reranker" :
73- reranker_model = OVModelForSequenceClassification .from_pretrained (
74- model_id , export = True
75- )
76- reranker_model .save_pretrained (cache_dir )
77- elif model_type == "llm" :
78- llm_model = OVModelForCausalLM .from_pretrained (
79- model_id , export = True , weight_format = weight_format
80- )
81- llm_model .save_pretrained (cache_dir )
82- elif model_type == "vlm" :
83- vlm_model = OVModelForVisualCausalLM .from_pretrained (
84- model_id , export = True , weight_format = weight_format
95+ logger .info (f"Converting { model_id } model to OpenVINO format in subprocess..." )
96+ process = multiprocessing .Process (
97+ target = _convert_model_worker ,
98+ args = (model_id , cache_dir , model_type , weight_format ),
99+ )
100+ process .start ()
101+ process .join ()
102+ if process .exitcode != 0 :
103+ raise RuntimeError (
104+ f"Model conversion subprocess failed with exit code { process .exitcode } "
85105 )
86- vlm_model .save_pretrained (cache_dir )
87- preprocessors = maybe_load_preprocessors (model_id )
88- save_preprocessors (preprocessors , vlm_model .config , cache_dir , True )
89- else :
90- raise ValueError (f"Unsupported model type: { model_type } " )
106+ logger .info (f"Model conversion completed. Subprocess memory released." )
91107 except Exception as e :
92108 logger .error (f"Error occurred during model conversion: { e } " )
93109 raise RuntimeError (f"Error occurred during model conversion: { e } " )
0 commit comments