From 8677159d0920eb81622c94392b047aac19ee32f5 Mon Sep 17 00:00:00 2001
From: rebel-thkim <157466331+rebel-thkim@users.noreply.github.com>
Date: Mon, 3 Nov 2025 14:13:40 +0900
Subject: [PATCH 01/14] model: _reconstruction_if_needed & method renaming
 (#337)

Co-authored-by: kblee <kblee@rebellions.ai>
---
 .../autoencoders/autoencoder_kl_cosmos.py     |  6 +--
 .../rbln/diffusers/models/controlnet.py       |  2 +-
 .../models/transformers/prior_transformer.py  |  2 +-
 .../models/transformers/transformer_cosmos.py |  2 +-
 .../models/transformers/transformer_sd3.py    |  2 +-
 .../models/unets/unet_2d_condition.py         |  2 +-
 src/optimum/rbln/modeling.py                  | 47 +------------------
 .../rbln/transformers/modeling_generic.py     |  4 +-
 .../transformers/models/bart/modeling_bart.py |  2 +-
 .../transformers/models/bert/modeling_bert.py |  2 +-
 .../models/blip_2/modeling_blip_2.py          |  6 +--
 .../transformers/models/clip/modeling_clip.py |  4 +-
 .../models/colpali/modeling_colpali.py        | 46 ++----------------
 .../decoderonly/modeling_decoderonly.py       |  4 +-
 .../models/gemma3/modeling_gemma3.py          | 12 ++---
 .../grounding_dino/modeling_grounding_dino.py | 12 ++---
 .../models/idefics3/modeling_idefics3.py      |  8 ++--
 .../models/llava/modeling_llava.py            |  6 +--
 .../models/llava_next/modeling_llava_next.py  |  6 +--
 .../transformers/models/opt/modeling_opt.py   |  4 +-
 .../models/pegasus/modeling_pegasus.py        |  2 +-
 .../models/pixtral/modeling_pixtral.py        |  2 +-
 .../models/qwen2_5_vl/modeling_qwen2_5_vl.py  |  5 +-
 .../models/qwen2_vl/modeling_qwen2_vl.py      |  5 +-
 .../models/seq2seq/modeling_seq2seq.py        |  2 +-
 .../models/siglip/modeling_siglip.py          |  2 +-
 .../transformers/models/swin/modeling_swin.py |  2 +-
 .../transformers/models/t5/modeling_t5.py     |  4 +-
 .../modeling_time_series_transformer.py       |  4 +-
 .../models/wav2vec2/modeling_wav2vec2.py      |  2 +-
 .../models/whisper/modeling_whisper.py        |  4 +-
 31 files changed, 62 insertions(+), 151 deletions(-)

diff --git a/src/optimum/rbln/diffusers/models/autoencoders/autoencoder_kl_cosmos.py b/src/optimum/rbln/diffusers/models/autoencoders/autoencoder_kl_cosmos.py
index d765befba..d76f69a37 100644
--- a/src/optimum/rbln/diffusers/models/autoencoders/autoencoder_kl_cosmos.py
+++ b/src/optimum/rbln/diffusers/models/autoencoders/autoencoder_kl_cosmos.py
@@ -68,7 +68,7 @@ def __post_init__(self, **kwargs):
         self.image_size = self.rbln_config.image_size
 
     @classmethod
-    def wrap_model_if_needed(
+    def _wrap_model_if_needed(
         cls, model: torch.nn.Module, rbln_config: RBLNAutoencoderKLCosmosConfig
     ) -> torch.nn.Module:
         decoder_model = _VAECosmosDecoder(model)
@@ -98,7 +98,7 @@ def replaced_forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
 
             compiled_models = {}
             if rbln_config.uses_encoder:
-                encoder_model, decoder_model = cls.wrap_model_if_needed(model, rbln_config)
+                encoder_model, decoder_model = cls._wrap_model_if_needed(model, rbln_config)
                 enc_compiled_model = cls.compile(
                     encoder_model,
                     rbln_compile_config=rbln_config.compile_cfgs[0],
@@ -107,7 +107,7 @@ def replaced_forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
                 )
                 compiled_models["encoder"] = enc_compiled_model
             else:
-                decoder_model = cls.wrap_model_if_needed(model, rbln_config)
+                decoder_model = cls._wrap_model_if_needed(model, rbln_config)
             dec_compiled_model = cls.compile(
                 decoder_model,
                 rbln_compile_config=rbln_config.compile_cfgs[-1],
diff --git a/src/optimum/rbln/diffusers/models/controlnet.py b/src/optimum/rbln/diffusers/models/controlnet.py
index b82cb2362..d71ab1da2 100644
--- a/src/optimum/rbln/diffusers/models/controlnet.py
+++ b/src/optimum/rbln/diffusers/models/controlnet.py
@@ -118,7 +118,7 @@ def __post_init__(self, **kwargs):
         )
 
     @classmethod
-    def wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNModelConfig) -> torch.nn.Module:
+    def _wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNModelConfig) -> torch.nn.Module:
         use_encoder_hidden_states = False
         for down_block in model.down_blocks:
             if use_encoder_hidden_states := getattr(down_block, "has_cross_attention", False):
diff --git a/src/optimum/rbln/diffusers/models/transformers/prior_transformer.py b/src/optimum/rbln/diffusers/models/transformers/prior_transformer.py
index 5a8199265..4a33164f8 100644
--- a/src/optimum/rbln/diffusers/models/transformers/prior_transformer.py
+++ b/src/optimum/rbln/diffusers/models/transformers/prior_transformer.py
@@ -77,7 +77,7 @@ def __post_init__(self, **kwargs):
         self.clip_std = artifacts["clip_std"]
 
     @classmethod
-    def wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNModelConfig) -> torch.nn.Module:
+    def _wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNModelConfig) -> torch.nn.Module:
         return _PriorTransformer(model).eval()
 
     @classmethod
diff --git a/src/optimum/rbln/diffusers/models/transformers/transformer_cosmos.py b/src/optimum/rbln/diffusers/models/transformers/transformer_cosmos.py
index 1a0691248..97dcc69a9 100644
--- a/src/optimum/rbln/diffusers/models/transformers/transformer_cosmos.py
+++ b/src/optimum/rbln/diffusers/models/transformers/transformer_cosmos.py
@@ -185,7 +185,7 @@ def compute_embedding(
         )
 
     @classmethod
-    def wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNModelConfig) -> torch.nn.Module:
+    def _wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNModelConfig) -> torch.nn.Module:
         num_latent_frames = rbln_config.num_latent_frames
         latent_height = rbln_config.latent_height
         latent_width = rbln_config.latent_width
diff --git a/src/optimum/rbln/diffusers/models/transformers/transformer_sd3.py b/src/optimum/rbln/diffusers/models/transformers/transformer_sd3.py
index 7b6ed6576..5c98fdb0c 100644
--- a/src/optimum/rbln/diffusers/models/transformers/transformer_sd3.py
+++ b/src/optimum/rbln/diffusers/models/transformers/transformer_sd3.py
@@ -77,7 +77,7 @@ def __post_init__(self, **kwargs):
         super().__post_init__(**kwargs)
 
     @classmethod
-    def wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNModelConfig) -> torch.nn.Module:
+    def _wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNModelConfig) -> torch.nn.Module:
         return SD3Transformer2DModelWrapper(model).eval()
 
     @classmethod
diff --git a/src/optimum/rbln/diffusers/models/unets/unet_2d_condition.py b/src/optimum/rbln/diffusers/models/unets/unet_2d_condition.py
index 0a93a500b..e288abd50 100644
--- a/src/optimum/rbln/diffusers/models/unets/unet_2d_condition.py
+++ b/src/optimum/rbln/diffusers/models/unets/unet_2d_condition.py
@@ -171,7 +171,7 @@ class ADDEMBEDDING:
             self.add_embedding = ADDEMBEDDING(LINEAR1(self.in_features))
 
     @classmethod
-    def wrap_model_if_needed(
+    def _wrap_model_if_needed(
         cls, model: torch.nn.Module, rbln_config: RBLNUNet2DConditionModelConfig
     ) -> torch.nn.Module:
         if model.config.addition_embed_type == "text_time":
diff --git a/src/optimum/rbln/modeling.py b/src/optimum/rbln/modeling.py
index b64d72d20..09e408e4f 100644
--- a/src/optimum/rbln/modeling.py
+++ b/src/optimum/rbln/modeling.py
@@ -34,49 +34,6 @@
 logger = get_logger(__name__)
 
 
-def _get_dtype(
-    cls,
-    dtype: Optional[Union[str, torch.dtype, dict]],
-    config: PretrainedConfig,
-) -> tuple[PretrainedConfig, Optional[torch.dtype], Optional[torch.dtype]]:
-    dtype_orig = None
-
-    if dtype is not None:
-        if isinstance(dtype, str):
-            if dtype == "auto":
-                if hasattr(config, "dtype") and config.dtype is not None:
-                    dtype = config.dtype
-                else:
-                    dtype = torch.get_default_dtype()
-            elif hasattr(torch, dtype):
-                dtype = getattr(torch, dtype)
-                config.dtype = dtype
-        elif isinstance(dtype, torch.dtype):
-            config.dtype = dtype
-        elif isinstance(dtype, dict):
-            for key, curr_dtype in dtype.items():
-                if hasattr(config, key):
-                    value = getattr(config, key)
-                    curr_dtype = curr_dtype if not isinstance(curr_dtype, str) else getattr(torch, curr_dtype)
-                    value.dtype = curr_dtype
-            # main torch dtype for modules that aren't part of any sub-config
-            dtype = dtype.get("")
-            dtype = dtype if not isinstance(dtype, str) else getattr(torch, dtype)
-            config.dtype = dtype
-            if dtype is None:
-                dtype = torch.float32
-        else:
-            raise ValueError(f"Invalid dtype: {dtype}")
-
-        dtype_orig = cls._set_default_dtype(dtype)
-    else:
-        # Use default dtype
-        default_dtype = torch.get_default_dtype()
-        config.dtype = default_dtype
-
-    return config, dtype, dtype_orig
-
-
 class RBLNModel(RBLNBaseModel):
     @classmethod
     def update_kwargs(cls, kwargs):
@@ -97,13 +54,13 @@ def save_torch_artifacts(
         pass
 
     @classmethod
-    def wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNModelConfig) -> torch.nn.Module:
+    def _wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNModelConfig) -> torch.nn.Module:
         # Wrap the model if needed.
         return model
 
     @classmethod
     def get_compiled_model(cls, model: "PreTrainedModel", rbln_config: RBLNModelConfig):
-        model = cls.wrap_model_if_needed(model, rbln_config)
+        model = cls._wrap_model_if_needed(model, rbln_config)
         rbln_compile_config = rbln_config.compile_cfgs[0]
         compiled_model = cls.compile(
             model,
diff --git a/src/optimum/rbln/transformers/modeling_generic.py b/src/optimum/rbln/transformers/modeling_generic.py
index 1df597c63..f49e89d32 100644
--- a/src/optimum/rbln/transformers/modeling_generic.py
+++ b/src/optimum/rbln/transformers/modeling_generic.py
@@ -59,7 +59,7 @@ class RBLNTransformerEncoder(RBLNModel):
     rbln_dtype = "int64"
 
     @classmethod
-    def wrap_model_if_needed(cls, model: "PreTrainedModel", rbln_config: RBLNTransformerEncoderConfig) -> nn.Module:
+    def _wrap_model_if_needed(cls, model: "PreTrainedModel", rbln_config: RBLNTransformerEncoderConfig) -> nn.Module:
         class TransformerEncoderWrapper(nn.Module):
             # Parameters to disable for RBLN compilation
             DISABLED_PARAMS = {"return_dict", "use_cache"}
@@ -268,7 +268,7 @@ class RBLNModelForDepthEstimation(RBLNImageModel):
     auto_model_class = AutoModelForDepthEstimation
 
     @classmethod
-    def wrap_model_if_needed(cls, model: "PreTrainedModel", rbln_config: RBLNImageModelConfig):
+    def _wrap_model_if_needed(cls, model: "PreTrainedModel", rbln_config: RBLNImageModelConfig):
         class ImageModelWrapper(nn.Module):
             def __init__(self, model: "PreTrainedModel", rbln_config: RBLNImageModelConfig):
                 super().__init__()
diff --git a/src/optimum/rbln/transformers/models/bart/modeling_bart.py b/src/optimum/rbln/transformers/models/bart/modeling_bart.py
index 60db64f75..43d77dac2 100644
--- a/src/optimum/rbln/transformers/models/bart/modeling_bart.py
+++ b/src/optimum/rbln/transformers/models/bart/modeling_bart.py
@@ -48,7 +48,7 @@ class RBLNBartForConditionalGeneration(RBLNModelForSeq2SeqLM):
     support_causal_attn = True
 
     @classmethod
-    def wrap_model_if_needed(self, model: PreTrainedModel, rbln_config: RBLNBartForConditionalGenerationConfig):
+    def _wrap_model_if_needed(self, model: PreTrainedModel, rbln_config: RBLNBartForConditionalGenerationConfig):
         return BartWrapper(
             model, enc_max_seq_len=rbln_config.enc_max_seq_len, use_attention_mask=rbln_config.use_attention_mask
         )
diff --git a/src/optimum/rbln/transformers/models/bert/modeling_bert.py b/src/optimum/rbln/transformers/models/bert/modeling_bert.py
index 547f0146a..e3745c2ad 100644
--- a/src/optimum/rbln/transformers/models/bert/modeling_bert.py
+++ b/src/optimum/rbln/transformers/models/bert/modeling_bert.py
@@ -35,7 +35,7 @@ class RBLNBertModel(RBLNTransformerEncoderForFeatureExtraction):
     rbln_model_input_names = ["input_ids", "attention_mask"]
 
     @classmethod
-    def wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNBertModelConfig) -> torch.nn.Module:
+    def _wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNBertModelConfig) -> torch.nn.Module:
         return BertModelWrapper(model, rbln_config)
 
 
diff --git a/src/optimum/rbln/transformers/models/blip_2/modeling_blip_2.py b/src/optimum/rbln/transformers/models/blip_2/modeling_blip_2.py
index 75f40cdcd..77a856dcb 100644
--- a/src/optimum/rbln/transformers/models/blip_2/modeling_blip_2.py
+++ b/src/optimum/rbln/transformers/models/blip_2/modeling_blip_2.py
@@ -71,7 +71,7 @@ def get_input_embeddings(self):
         return self.embeddings
 
     @classmethod
-    def wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNModelConfig) -> torch.nn.Module:
+    def _wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNModelConfig) -> torch.nn.Module:
         class Blip2VisionModelWrapper(torch.nn.Module):
             def __init__(self, model: "Blip2VisionModel") -> None:
                 super().__init__()
@@ -151,7 +151,7 @@ def get_input_embeddings(self):
         return self.embeddings.word_embeddings
 
     @classmethod
-    def wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNModelConfig) -> torch.nn.Module:
+    def _wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNModelConfig) -> torch.nn.Module:
         class Blip2QFormerModelWrapper(torch.nn.Module):
             def __init__(self, model: "Blip2QFormerModel"):
                 super().__init__()
@@ -349,7 +349,7 @@ def get_input_embeddings(self):
         return self.language_model.get_input_embeddings()
 
     @classmethod
-    def wrap_model_if_needed(cls, model, rbln_config):
+    def _wrap_model_if_needed(cls, model, rbln_config):
         return model.language_projection
 
     @classmethod
diff --git a/src/optimum/rbln/transformers/models/clip/modeling_clip.py b/src/optimum/rbln/transformers/models/clip/modeling_clip.py
index 2a38d8232..e05fdae92 100644
--- a/src/optimum/rbln/transformers/models/clip/modeling_clip.py
+++ b/src/optimum/rbln/transformers/models/clip/modeling_clip.py
@@ -54,7 +54,7 @@ class RBLNCLIPTextModel(RBLNModel):
     _tp_support = False
 
     @classmethod
-    def wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNCLIPTextModelConfig) -> torch.nn.Module:
+    def _wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNCLIPTextModelConfig) -> torch.nn.Module:
         return _TextEncoder(model).eval()
 
     @classmethod
@@ -157,7 +157,7 @@ class RBLNCLIPVisionModel(RBLNModel):
     _tp_support = False
 
     @classmethod
-    def wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNCLIPVisionModelConfig) -> torch.nn.Module:
+    def _wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNCLIPVisionModelConfig) -> torch.nn.Module:
         wrapper_cfg = {
             "interpolate_pos_encoding": rbln_config.interpolate_pos_encoding,
             "output_hidden_states": rbln_config.output_hidden_states,
diff --git a/src/optimum/rbln/transformers/models/colpali/modeling_colpali.py b/src/optimum/rbln/transformers/models/colpali/modeling_colpali.py
index cf3c1a8e9..e3a63fd4b 100644
--- a/src/optimum/rbln/transformers/models/colpali/modeling_colpali.py
+++ b/src/optimum/rbln/transformers/models/colpali/modeling_colpali.py
@@ -182,7 +182,7 @@ def _create_multi_modal_projector(self):
         return multi_modal_projector
 
     @classmethod
-    def wrap_model_if_needed(cls, model: "PreTrainedModel", rbln_config: RBLNModelConfig):
+    def _wrap_model_if_needed(cls, model: "PreTrainedModel", rbln_config: RBLNModelConfig):
         return RBLNColPaliForRetrievalWrapper(
             causal_lm=model.vlm,
             embedding_proj_layer=model.embedding_proj_layer,
@@ -236,49 +236,11 @@ def _update_rbln_config(
         return rbln_config
 
     @classmethod
-    def from_model(
-        cls,
-        model: "PreTrainedModel",
-        config: Optional[PretrainedConfig] = None,
-        rbln_config: Optional[Union[RBLNModelConfig, Dict]] = None,
-        model_save_dir: Optional[Union[str, Path, TemporaryDirectory]] = None,
-        subfolder: str = "",
-        **kwargs: Any,
-    ) -> "RBLNModel":
-        """
-        Converts and compiles a pre-trained HuggingFace library model into a RBLN model.
-        This method performs the actual model conversion and compilation process.
-
-        Args:
-            model (PreTrainedModel): The PyTorch model to be compiled.
-                The object must be an instance of the HuggingFace transformers PreTrainedModel class.
-            config (Optional[PretrainedConfig]): The configuration object associated with the model.
-            rbln_config (Optional[Union[RBLNModelConfig, Dict]]): Configuration for RBLN model compilation and runtime.
-                This can be provided as a dictionary or an instance of the model's configuration class (e.g., `RBLNLlamaForCausalLMConfig` for Llama models).
-                For detailed configuration options, see the specific model's configuration class documentation.
-            kwargs: Additional keyword arguments. Arguments with the prefix `rbln_` are passed to rbln_config, while the remaining arguments are passed to the HuggingFace library.
-
-        The method performs the following steps:
-
-        1. Compiles the PyTorch model into an optimized RBLN graph
-        2. Configures the model for the specified NPU device
-        3. Creates the necessary runtime objects if requested
-        4. Saves the compiled model and configurations
-
-        Returns:
-            (RBLNModel): A RBLN model instance ready for inference on RBLN NPU devices.
-        """
-        if not hasattr(model, "vision_tower"):
+    def _reconstruct_model_if_needed(cls, model: "PreTrainedModel"):
+        if hasattr(model, "vlm"):
             model.vision_tower = model.vlm.vision_tower
             del model.vlm.model.vision_tower
-        model = super().from_model(model, config, rbln_config, model_save_dir, subfolder, **kwargs)
-        return model
-
-    @classmethod
-    def get_pytorch_model(cls, *args, **kwargs):
-        model = super().get_pytorch_model(*args, **kwargs)
-        model.vision_tower = model.vlm.vision_tower
-        del model.vlm.model.vision_tower
+            return model
         return model
 
     def get_image_features(self, pixel_values: torch.Tensor):
diff --git a/src/optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py b/src/optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py
index e8cb0801d..21681d5a5 100644
--- a/src/optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py
+++ b/src/optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py
@@ -216,7 +216,7 @@ def get_kvcache_num_blocks(self) -> int:
         return self.rbln_config.kvcache_num_blocks
 
     @classmethod
-    def wrap_model_if_needed(cls, model: PreTrainedModel, rbln_config: "RBLNDecoderOnlyModelConfig"):
+    def _wrap_model_if_needed(cls, model: PreTrainedModel, rbln_config: "RBLNDecoderOnlyModelConfig"):
         return cls._decoder_wrapper_cls(model, rbln_config, cls._use_rotary_emb).eval()
 
     @classmethod
@@ -272,7 +272,7 @@ def _get_compile_context(
     @classmethod
     @torch.inference_mode()
     def get_compiled_model(cls, model: PreTrainedModel, rbln_config: RBLNDecoderOnlyModelForCausalLMConfig):
-        wrapped_model = cls.wrap_model_if_needed(model, rbln_config)
+        wrapped_model = cls._wrap_model_if_needed(model, rbln_config)
         prefill_compile_config = rbln_config.compile_cfgs[0]
 
         # Here we use meta tensor, for the memory efficiency.
diff --git a/src/optimum/rbln/transformers/models/gemma3/modeling_gemma3.py b/src/optimum/rbln/transformers/models/gemma3/modeling_gemma3.py
index 4197fce07..63111ed29 100644
--- a/src/optimum/rbln/transformers/models/gemma3/modeling_gemma3.py
+++ b/src/optimum/rbln/transformers/models/gemma3/modeling_gemma3.py
@@ -97,11 +97,11 @@ def redirect(func):
 
     def can_generate(self):
         return True
-
+    
+    
     @classmethod
-    def get_pytorch_model(cls, *args, **kwargs):
-        model = super().get_pytorch_model(*args, **kwargs)
-
+    def _reconstruct_model_if_needed(cls, model: "PreTrainedModel"):
+        
         with no_init_weights():
             model_cls_name = model.model.language_model.__class__.__name__
             causal_model_cls_name = model_cls_name.replace("TextModel", "ForCausalLM")
@@ -135,7 +135,7 @@ def get_input_embeddings(self):
         return self.language_model.get_input_embeddings()
 
     @classmethod
-    def wrap_model_if_needed(cls, model: "PreTrainedModel", rbln_config: RBLNModelConfig):
+    def _wrap_model_if_needed(cls, model: "PreTrainedModel", rbln_config: RBLNModelConfig):
         return model.multi_modal_projector
 
     @classmethod
@@ -482,7 +482,7 @@ def _update_rbln_config(
     @classmethod
     @torch.inference_mode()
     def get_compiled_model(cls, model: "PreTrainedModel", rbln_config: RBLNGemma3ForCausalLMConfig):
-        wrapped_model = cls.wrap_model_if_needed(model, rbln_config)
+        wrapped_model = cls._wrap_model_if_needed(model, rbln_config)
 
         rbln_compile_configs = rbln_config.compile_cfgs
         prefill_compile_config = rbln_compile_configs[0]
diff --git a/src/optimum/rbln/transformers/models/grounding_dino/modeling_grounding_dino.py b/src/optimum/rbln/transformers/models/grounding_dino/modeling_grounding_dino.py
index eaca58bf3..b9dddc75f 100644
--- a/src/optimum/rbln/transformers/models/grounding_dino/modeling_grounding_dino.py
+++ b/src/optimum/rbln/transformers/models/grounding_dino/modeling_grounding_dino.py
@@ -204,10 +204,10 @@ def save_torch_artifacts(
         save_dict["bbox_embed"] = model.bbox_embed.state_dict()
 
         torch.save(save_dict, save_dir_path / subfolder / "torch_artifacts.pth")
-
+        
+    
     @classmethod
-    def get_pytorch_model(cls, *args, **kwargs):
-        model = super().get_pytorch_model(*args, **kwargs)
+    def _reconstruct_model_if_needed(cls, model: "PreTrainedModel"):
         model.encoder = model.model.encoder
         model.decoder = model.model.decoder
         model.text_backbone = model.model.text_backbone
@@ -217,7 +217,7 @@ def get_pytorch_model(cls, *args, **kwargs):
         return model
 
     @classmethod
-    def wrap_model_if_needed(
+    def _wrap_model_if_needed(
         cls, model: torch.nn.Module, rbln_config: RBLNGroundingDinoForObjectDetectionConfig
     ) -> torch.nn.Module:
         return model.model.text_projection
@@ -663,7 +663,7 @@ def __post_init__(self, **kwargs):
         self.encoder_runtime = RBLNPytorchRuntime(self.model[0])
 
     @classmethod
-    def wrap_model_if_needed(
+    def _wrap_model_if_needed(
         cls, model: torch.nn.Module, rbln_config: RBLNGroundingDinoForObjectDetectionConfig
     ) -> torch.nn.Module:
         model = _GroundingDinoEncoder(model, rbln_config).eval()
@@ -861,7 +861,7 @@ def __post_init__(self, **kwargs):
         self.decoder_runtime = RBLNPytorchRuntime(self.model[0])
 
     @classmethod
-    def wrap_model_if_needed(
+    def _wrap_model_if_needed(
         cls, model: torch.nn.Module, rbln_config: RBLNGroundingDinoForObjectDetectionConfig
     ) -> torch.nn.Module:
         return _GroundingDinoDecoder(model, rbln_config).eval()
diff --git a/src/optimum/rbln/transformers/models/idefics3/modeling_idefics3.py b/src/optimum/rbln/transformers/models/idefics3/modeling_idefics3.py
index e82b4a6b0..1b6ee0b81 100644
--- a/src/optimum/rbln/transformers/models/idefics3/modeling_idefics3.py
+++ b/src/optimum/rbln/transformers/models/idefics3/modeling_idefics3.py
@@ -110,7 +110,7 @@ def get_input_embeddings(self):
         return self.embeddings
 
     @classmethod
-    def wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNModelConfig) -> torch.nn.Module:
+    def _wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNModelConfig) -> torch.nn.Module:
         class Idefics3VisionTransformerWrapper(torch.nn.Module):
             def __init__(self, model: "Idefics3VisionTransformer"):
                 super().__init__()
@@ -240,9 +240,7 @@ def can_generate(self):
         return True
 
     @classmethod
-    def get_pytorch_model(cls, *args, **kwargs):
-        model = super().get_pytorch_model(*args, **kwargs)
-
+    def _reconstruct_model_if_needed(cls, model: "PreTrainedModel"):
         with no_init_weights():
             model_cls_name = model.model.text_model.__class__.__name__
             causal_model_cls_name = model_cls_name.replace("Model", "ForCausalLM")
@@ -271,7 +269,7 @@ def get_input_embeddings(self):
         return self.text_model.get_input_embeddings()
 
     @classmethod
-    def wrap_model_if_needed(cls, model, rbln_config):
+    def _wrap_model_if_needed(cls, model, rbln_config):
         return model.model.connector
 
     @classmethod
diff --git a/src/optimum/rbln/transformers/models/llava/modeling_llava.py b/src/optimum/rbln/transformers/models/llava/modeling_llava.py
index 6d4e8e0cf..0976c86f6 100644
--- a/src/optimum/rbln/transformers/models/llava/modeling_llava.py
+++ b/src/optimum/rbln/transformers/models/llava/modeling_llava.py
@@ -175,9 +175,7 @@ def can_generate(self):
         return True
 
     @classmethod
-    def get_pytorch_model(cls, *args, **kwargs):
-        model = super().get_pytorch_model(*args, **kwargs)
-
+    def _reconstruct_model_if_needed(cls, model: "PreTrainedModel"):
         with no_init_weights():
             model_cls_name = model.model.language_model.__class__.__name__
             causal_model_cls_name = model_cls_name.replace("Model", "ForCausalLM")
@@ -208,7 +206,7 @@ def get_input_embeddings(self):
         return self.language_model.get_input_embeddings()
 
     @classmethod
-    def wrap_model_if_needed(cls, model: "PreTrainedModel", rbln_config: RBLNModelConfig):
+    def _wrap_model_if_needed(cls, model: "PreTrainedModel", rbln_config: RBLNModelConfig):
         return model.multi_modal_projector
 
     @classmethod
diff --git a/src/optimum/rbln/transformers/models/llava_next/modeling_llava_next.py b/src/optimum/rbln/transformers/models/llava_next/modeling_llava_next.py
index 01f303cef..c2fcd67fc 100644
--- a/src/optimum/rbln/transformers/models/llava_next/modeling_llava_next.py
+++ b/src/optimum/rbln/transformers/models/llava_next/modeling_llava_next.py
@@ -139,9 +139,7 @@ def can_generate(self):
         return True
 
     @classmethod
-    def get_pytorch_model(cls, *args, **kwargs):
-        model = super().get_pytorch_model(*args, **kwargs)
-
+    def _reconstruct_model_if_needed(cls, model: "PreTrainedModel"):
         with no_init_weights():
             model_cls_name = model.model.language_model.__class__.__name__
             causal_model_cls_name = model_cls_name.replace("Model", "ForCausalLM")
@@ -192,7 +190,7 @@ def get_input_embeddings(self):
         return self.language_model.get_input_embeddings()
 
     @classmethod
-    def wrap_model_if_needed(cls, model: "PreTrainedModel", rbln_config: RBLNModelConfig):
+    def _wrap_model_if_needed(cls, model: "PreTrainedModel", rbln_config: RBLNModelConfig):
         return model.multi_modal_projector
 
     @classmethod
diff --git a/src/optimum/rbln/transformers/models/opt/modeling_opt.py b/src/optimum/rbln/transformers/models/opt/modeling_opt.py
index caa4462a4..843cd15ff 100644
--- a/src/optimum/rbln/transformers/models/opt/modeling_opt.py
+++ b/src/optimum/rbln/transformers/models/opt/modeling_opt.py
@@ -69,7 +69,7 @@ def modify_opt_decoder_layer(layer):
         return layer
 
     @classmethod
-    def wrap_model_if_needed(cls, model: PreTrainedModel, rbln_config: RBLNDecoderOnlyModelForCausalLMConfig):
+    def _wrap_model_if_needed(cls, model: PreTrainedModel, rbln_config: RBLNDecoderOnlyModelForCausalLMConfig):
         for i in range(len(model.model.decoder.layers)):
             model.model.decoder.layers[i] = cls.modify_opt_decoder_layer(model.model.decoder.layers[i])
 
@@ -95,7 +95,7 @@ def modify_opt_decoder_layer(layer):
         return layer
 
     @classmethod
-    def wrap_model_if_needed(cls, model: PreTrainedModel, rbln_config: RBLNDecoderOnlyModelForCausalLMConfig):
+    def _wrap_model_if_needed(cls, model: PreTrainedModel, rbln_config: RBLNDecoderOnlyModelForCausalLMConfig):
         for i in range(len(model.decoder.layers)):
             model.decoder.layers[i] = cls.modify_opt_decoder_layer(model.decoder.layers[i])
 
diff --git a/src/optimum/rbln/transformers/models/pegasus/modeling_pegasus.py b/src/optimum/rbln/transformers/models/pegasus/modeling_pegasus.py
index dcd996155..992da2373 100644
--- a/src/optimum/rbln/transformers/models/pegasus/modeling_pegasus.py
+++ b/src/optimum/rbln/transformers/models/pegasus/modeling_pegasus.py
@@ -54,7 +54,7 @@ class RBLNPegasusForConditionalGeneration(RBLNModelForSeq2SeqLM):
     support_causal_attn = True
 
     @classmethod
-    def wrap_model_if_needed(self, model: "PreTrainedModel", rbln_config: RBLNPegasusForConditionalGenerationConfig):
+    def _wrap_model_if_needed(self, model: "PreTrainedModel", rbln_config: RBLNPegasusForConditionalGenerationConfig):
         return PegasusWrapper(
             model, enc_max_seq_len=rbln_config.enc_max_seq_len, use_attention_mask=rbln_config.use_attention_mask
         )
diff --git a/src/optimum/rbln/transformers/models/pixtral/modeling_pixtral.py b/src/optimum/rbln/transformers/models/pixtral/modeling_pixtral.py
index be3728272..258aa4d0d 100644
--- a/src/optimum/rbln/transformers/models/pixtral/modeling_pixtral.py
+++ b/src/optimum/rbln/transformers/models/pixtral/modeling_pixtral.py
@@ -229,7 +229,7 @@ def save_torch_artifacts(
         torch.save(save_dict, save_dir_path / subfolder / "torch_artifacts.pth")
 
     @classmethod
-    def wrap_model_if_needed(
+    def _wrap_model_if_needed(
         cls, model: torch.nn.Module, rbln_config: RBLNPixtralVisionModelConfig
     ) -> torch.nn.Module:
         wrapper_cfg = {
diff --git a/src/optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py b/src/optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py
index 9dc52d3ed..8641c63ed 100644
--- a/src/optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py
+++ b/src/optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py
@@ -88,7 +88,7 @@ def save_torch_artifacts(
         torch.save(save_dict, save_dir_path / subfolder / "torch_artifacts.pth")
 
     @classmethod
-    def wrap_model_if_needed(
+    def _wrap_model_if_needed(
         cls, model: "PreTrainedModel", rbln_config: RBLNQwen2_5_VisionTransformerPretrainedModelConfig
     ):
         return Qwen2_5_VisionTransformerWrapper(model).eval()
@@ -393,8 +393,7 @@ def can_generate(self):
         return True
 
     @classmethod
-    def get_pytorch_model(cls, *args, **kwargs):
-        model = super().get_pytorch_model(*args, **kwargs)
+    def _reconstruct_model_if_needed(cls, model: "PreTrainedModel"):
         model.model.lm_head = model.lm_head
         model.lm_head = None
         del model.lm_head
diff --git a/src/optimum/rbln/transformers/models/qwen2_vl/modeling_qwen2_vl.py b/src/optimum/rbln/transformers/models/qwen2_vl/modeling_qwen2_vl.py
index dfa57833b..764f132ca 100644
--- a/src/optimum/rbln/transformers/models/qwen2_vl/modeling_qwen2_vl.py
+++ b/src/optimum/rbln/transformers/models/qwen2_vl/modeling_qwen2_vl.py
@@ -89,7 +89,7 @@ def save_torch_artifacts(
         torch.save(save_dict, save_dir_path / subfolder / "torch_artifacts.pth")
 
     @classmethod
-    def wrap_model_if_needed(
+    def _wrap_model_if_needed(
         cls, model: "PreTrainedModel", rbln_config: RBLNQwen2VisionTransformerPretrainedModelConfig
     ):
         return Qwen2VisionTransformerWrapper(model).eval()
@@ -282,8 +282,7 @@ def can_generate(self):
         return True
 
     @classmethod
-    def get_pytorch_model(cls, *args, **kwargs):
-        model = super().get_pytorch_model(*args, **kwargs)
+    def _reconstruct_model_if_needed(cls, model: "PreTrainedModel"):
         model.model.lm_head = model.lm_head
         model.lm_head = None
         del model.lm_head
diff --git a/src/optimum/rbln/transformers/models/seq2seq/modeling_seq2seq.py b/src/optimum/rbln/transformers/models/seq2seq/modeling_seq2seq.py
index 9d516dd0c..a5c5391ad 100644
--- a/src/optimum/rbln/transformers/models/seq2seq/modeling_seq2seq.py
+++ b/src/optimum/rbln/transformers/models/seq2seq/modeling_seq2seq.py
@@ -140,7 +140,7 @@ def __post_init__(self, **kwargs):
     @classmethod
     @torch.inference_mode()
     def get_compiled_model(cls, model: PreTrainedModel, rbln_config: RBLNModelForSeq2SeqLMConfig):
-        wrapped_model = cls.wrap_model_if_needed(model, rbln_config)
+        wrapped_model = cls._wrap_model_if_needed(model, rbln_config)
 
         enc_compile_config = rbln_config.compile_cfgs[0]
         dec_compile_config = rbln_config.compile_cfgs[1]
diff --git a/src/optimum/rbln/transformers/models/siglip/modeling_siglip.py b/src/optimum/rbln/transformers/models/siglip/modeling_siglip.py
index f1bdca1dc..476840f34 100644
--- a/src/optimum/rbln/transformers/models/siglip/modeling_siglip.py
+++ b/src/optimum/rbln/transformers/models/siglip/modeling_siglip.py
@@ -66,7 +66,7 @@ class RBLNSiglipVisionModel(RBLNModel):
     _tp_support = False
 
     @classmethod
-    def wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNSiglipVisionModelConfig) -> torch.nn.Module:
+    def _wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNSiglipVisionModelConfig) -> torch.nn.Module:
         wrapper_cfg = {
             "interpolate_pos_encoding": rbln_config.interpolate_pos_encoding,
             "output_hidden_states": rbln_config.output_hidden_states,
diff --git a/src/optimum/rbln/transformers/models/swin/modeling_swin.py b/src/optimum/rbln/transformers/models/swin/modeling_swin.py
index 20d66c404..db793e7b8 100644
--- a/src/optimum/rbln/transformers/models/swin/modeling_swin.py
+++ b/src/optimum/rbln/transformers/models/swin/modeling_swin.py
@@ -203,7 +203,7 @@ def forward(
 
 class RBLNSwinBackbone(RBLNModel):
     @classmethod
-    def wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNSwinBackboneConfig) -> torch.nn.Module:
+    def _wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNSwinBackboneConfig) -> torch.nn.Module:
         for layer in model.encoder.layers:
             for block in layer.blocks:
                 block.get_attn_mask = types.MethodType(get_attn_mask, block)
diff --git a/src/optimum/rbln/transformers/models/t5/modeling_t5.py b/src/optimum/rbln/transformers/models/t5/modeling_t5.py
index dac5a37cd..76baf9474 100644
--- a/src/optimum/rbln/transformers/models/t5/modeling_t5.py
+++ b/src/optimum/rbln/transformers/models/t5/modeling_t5.py
@@ -68,7 +68,7 @@ class RBLNT5EncoderModel(RBLNTransformerEncoderForFeatureExtraction):
     output_class = BaseModelOutputWithPastAndCrossAttentions
 
     @classmethod
-    def wrap_model_if_needed(self, model: "PreTrainedModel", rbln_config: RBLNT5EncoderModelConfig):
+    def _wrap_model_if_needed(self, model: "PreTrainedModel", rbln_config: RBLNT5EncoderModelConfig):
         return T5EncoderWrapper(model)
 
     @classmethod
@@ -113,7 +113,7 @@ class RBLNT5ForConditionalGeneration(RBLNModelForSeq2SeqLM):
     support_causal_attn = False
 
     @classmethod
-    def wrap_model_if_needed(self, model: "PreTrainedModel", rbln_config: RBLNT5ForConditionalGenerationConfig):
+    def _wrap_model_if_needed(self, model: "PreTrainedModel", rbln_config: RBLNT5ForConditionalGenerationConfig):
         return T5Wrapper(
             model, enc_max_seq_len=rbln_config.enc_max_seq_len, dec_max_seq_len=rbln_config.dec_max_seq_len
         )
diff --git a/src/optimum/rbln/transformers/models/time_series_transformer/modeling_time_series_transformer.py b/src/optimum/rbln/transformers/models/time_series_transformer/modeling_time_series_transformer.py
index 63f83786b..9e4a1211e 100644
--- a/src/optimum/rbln/transformers/models/time_series_transformer/modeling_time_series_transformer.py
+++ b/src/optimum/rbln/transformers/models/time_series_transformer/modeling_time_series_transformer.py
@@ -153,7 +153,7 @@ def redirect(func):
             return redirect(val)
 
     @classmethod
-    def wrap_model_if_needed(
+    def _wrap_model_if_needed(
         self, model: "PreTrainedModel", rbln_config: RBLNTimeSeriesTransformerForPredictionConfig
     ):
         return TimeSeriesTransformersWrapper(model, rbln_config.num_parallel_samples)
@@ -161,7 +161,7 @@ def wrap_model_if_needed(
     @classmethod
     @torch.inference_mode()
     def get_compiled_model(cls, model, rbln_config: RBLNTimeSeriesTransformerForPredictionConfig):
-        wrapped_model = cls.wrap_model_if_needed(model, rbln_config)
+        wrapped_model = cls._wrap_model_if_needed(model, rbln_config)
 
         enc_compile_config = rbln_config.compile_cfgs[0]
         dec_compile_config = rbln_config.compile_cfgs[1]
diff --git a/src/optimum/rbln/transformers/models/wav2vec2/modeling_wav2vec2.py b/src/optimum/rbln/transformers/models/wav2vec2/modeling_wav2vec2.py
index 509e33982..437852632 100644
--- a/src/optimum/rbln/transformers/models/wav2vec2/modeling_wav2vec2.py
+++ b/src/optimum/rbln/transformers/models/wav2vec2/modeling_wav2vec2.py
@@ -48,5 +48,5 @@ class RBLNWav2Vec2ForCTC(RBLNModelForMaskedLM):
     rbln_dtype = "float32"
 
     @classmethod
-    def wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNWav2Vec2ForCTCConfig) -> torch.nn.Module:
+    def _wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNWav2Vec2ForCTCConfig) -> torch.nn.Module:
         return _Wav2Vec2(model).eval()
diff --git a/src/optimum/rbln/transformers/models/whisper/modeling_whisper.py b/src/optimum/rbln/transformers/models/whisper/modeling_whisper.py
index a5add32e5..e49feeb25 100644
--- a/src/optimum/rbln/transformers/models/whisper/modeling_whisper.py
+++ b/src/optimum/rbln/transformers/models/whisper/modeling_whisper.py
@@ -203,7 +203,7 @@ def _reorder_cache(self, past_key_values, beam_idx):
         raise NotImplementedError
 
     @classmethod
-    def wrap_model_if_needed(self, model: "PreTrainedModel", rbln_config: RBLNWhisperForConditionalGenerationConfig):
+    def _wrap_model_if_needed(self, model: "PreTrainedModel", rbln_config: RBLNWhisperForConditionalGenerationConfig):
         return WhisperWrapper(
             model,
             use_attention_mask=rbln_config.use_attention_mask,
@@ -213,7 +213,7 @@ def wrap_model_if_needed(self, model: "PreTrainedModel", rbln_config: RBLNWhispe
     @classmethod
     @torch.inference_mode()
     def get_compiled_model(cls, model, rbln_config: RBLNWhisperForConditionalGenerationConfig):
-        wrapped_model = cls.wrap_model_if_needed(model, rbln_config)
+        wrapped_model = cls._wrap_model_if_needed(model, rbln_config)
 
         enc_compile_config = rbln_config.compile_cfgs[0]
         dec_compile_config = rbln_config.compile_cfgs[1]

From eb989b25e3f6b9136b7c01ac3ffa6f55b219c976 Mon Sep 17 00:00:00 2001
From: rebel-jongho <jongho.choi@rebellions.ai>
Date: Mon, 3 Nov 2025 15:38:25 +0900
Subject: [PATCH 02/14] Introduce dev branch

---
 .github/pull_request_template.md             | 20 +++++++++++---------
 .github/scripts/validate_pr_checklist.py     | 11 +++++++++++
 .github/workflows/pr-title-check.yaml        |  1 +
 .github/workflows/pr_checklist_validator.yml |  2 +-
 .github/workflows/rbln_trigger_on_pr.yaml    |  1 +
 5 files changed, 25 insertions(+), 10 deletions(-)

diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
index 8d6cdc808..f48b789ca 100644
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -1,7 +1,14 @@
 # Pull Request Description
 
+> **⚠️ Important: Branch Target**
+> - **New features, enhancements, and non-critical fixes**: Merge to `dev` branch
+> - **Critical hotfixes only**: Merge to `main` branch (must also merge to `dev`)
+> 
+> Please ensure you've selected the correct base branch before submitting!
+
 ## Type of Change
 <!-- Mark the appropriate option with an [x] -->
+- [ ] Release (dev → main merge for production release)
 - [ ] New Model Support
 - [ ] Bug fix (non-breaking change which fixes an issue)
 - [ ] New feature (non-breaking change which adds functionality)
@@ -18,14 +25,6 @@
 ## Motivation and Context
 <!-- Explain why this change is necessary and what problem it solves -->
 
-## Checklist
-<!-- Mark completed items with an [x] -->
-- [ ] I have performed a self-review of my own code
-- [ ] I have added tests that prove my fix is effective or that my feature works (If needed)
-
-## Additional Information
-<!-- Any additional information, configuration, or data that might be necessary to reproduce the issue or use the new feature -->
-
 ## Related Issues
 <!-- Link any related issues here using the syntax: Closes #123, Fixes #456 -->
 
@@ -50,9 +49,12 @@ type(optional scope): description
     - `refactor`: Re-arrange class architecture, or more.
       - ex) Refactor Seq2Seq
   - Documentation
-    - `doc`: Update docstring only     
+    - `doc`: Update docstring only
   - Library Dependencies
     - `dependency`: Update requirements, something like that.
+  - Release
+    - `release`: Merging dev to main for production release
+      - ex) Release v1.2.0
   - Other
     - `other`: None of above.
       - ex) ci update
diff --git a/.github/scripts/validate_pr_checklist.py b/.github/scripts/validate_pr_checklist.py
index 2f59b1633..110384cad 100644
--- a/.github/scripts/validate_pr_checklist.py
+++ b/.github/scripts/validate_pr_checklist.py
@@ -35,6 +35,11 @@ def read_checklist_from_template():
     return checklist_items
 
 
+def is_release_pr(pr):
+    """Check if this is a release PR (dev -> main)"""
+    return pr.base.ref == "main" and pr.head.ref == "dev"
+
+
 def validate_checklist(body, expected_items):
     for item in expected_items:
         if f"- [x] {item}" not in body:
@@ -56,6 +61,12 @@ def main():
     repo = g.get_repo(repo_name)
     pr = repo.get_pull(int(pr_number))
 
+    # Skip checklist validation for release PRs (dev -> main)
+    if is_release_pr(pr):
+        print("This is a release PR (dev -> main). Skipping checklist validation.")
+        print("Release PRs follow a different approval process.")
+        sys.exit(0)
+
     expected_items = read_checklist_from_template()
 
     if not expected_items:
diff --git a/.github/workflows/pr-title-check.yaml b/.github/workflows/pr-title-check.yaml
index 3f67d2f6b..ddb00650e 100644
--- a/.github/workflows/pr-title-check.yaml
+++ b/.github/workflows/pr-title-check.yaml
@@ -26,6 +26,7 @@ jobs:
         id: lint_pr_title
         with:
           types: |
+            release
             model
             performance
             refactor
diff --git a/.github/workflows/pr_checklist_validator.yml b/.github/workflows/pr_checklist_validator.yml
index 547c88038..80903f6f5 100644
--- a/.github/workflows/pr_checklist_validator.yml
+++ b/.github/workflows/pr_checklist_validator.yml
@@ -2,7 +2,7 @@ name: PR Checklist Validator
 
 on:
   pull_request:
-    branches: [main]
+    branches: [main, dev]
     paths:
       - "src/**/*.py"
       - "tests/**/*.py"
diff --git a/.github/workflows/rbln_trigger_on_pr.yaml b/.github/workflows/rbln_trigger_on_pr.yaml
index 46654d088..7592dd594 100644
--- a/.github/workflows/rbln_trigger_on_pr.yaml
+++ b/.github/workflows/rbln_trigger_on_pr.yaml
@@ -4,6 +4,7 @@ on:
   pull_request:
     branches:
       - main
+      - dev
 
 env:
   REBEL_PYPI_ENDPOINT: ${{ vars.REBEL_PYPI_INTERNAL_ENDPOINT }}

From e435ebdc5888c99ff84460ba3e8f12fd2b592338 Mon Sep 17 00:00:00 2001
From: rebel-jongho <112920593+rebel-jongho@users.noreply.github.com>
Date: Mon, 3 Nov 2025 16:04:22 +0900
Subject: [PATCH 03/14] other: apply formatter for #337 (#343)

---
 .../rbln/transformers/models/colpali/modeling_colpali.py      | 3 +--
 .../rbln/transformers/models/gemma3/modeling_gemma3.py        | 4 +---
 .../models/grounding_dino/modeling_grounding_dino.py          | 3 +--
 .../rbln/transformers/models/siglip/modeling_siglip.py        | 4 +++-
 4 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/src/optimum/rbln/transformers/models/colpali/modeling_colpali.py b/src/optimum/rbln/transformers/models/colpali/modeling_colpali.py
index e3a63fd4b..44d285e16 100644
--- a/src/optimum/rbln/transformers/models/colpali/modeling_colpali.py
+++ b/src/optimum/rbln/transformers/models/colpali/modeling_colpali.py
@@ -14,8 +14,7 @@
 
 import bisect
 from pathlib import Path
-from tempfile import TemporaryDirectory
-from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Union
+from typing import TYPE_CHECKING, Optional, Tuple, Union
 
 import torch
 from transformers import PretrainedConfig, PreTrainedModel
diff --git a/src/optimum/rbln/transformers/models/gemma3/modeling_gemma3.py b/src/optimum/rbln/transformers/models/gemma3/modeling_gemma3.py
index 63111ed29..176ab9989 100644
--- a/src/optimum/rbln/transformers/models/gemma3/modeling_gemma3.py
+++ b/src/optimum/rbln/transformers/models/gemma3/modeling_gemma3.py
@@ -97,11 +97,9 @@ def redirect(func):
 
     def can_generate(self):
         return True
-    
-    
+
     @classmethod
     def _reconstruct_model_if_needed(cls, model: "PreTrainedModel"):
-        
         with no_init_weights():
             model_cls_name = model.model.language_model.__class__.__name__
             causal_model_cls_name = model_cls_name.replace("TextModel", "ForCausalLM")
diff --git a/src/optimum/rbln/transformers/models/grounding_dino/modeling_grounding_dino.py b/src/optimum/rbln/transformers/models/grounding_dino/modeling_grounding_dino.py
index b9dddc75f..e4fcd76a8 100644
--- a/src/optimum/rbln/transformers/models/grounding_dino/modeling_grounding_dino.py
+++ b/src/optimum/rbln/transformers/models/grounding_dino/modeling_grounding_dino.py
@@ -204,8 +204,7 @@ def save_torch_artifacts(
         save_dict["bbox_embed"] = model.bbox_embed.state_dict()
 
         torch.save(save_dict, save_dir_path / subfolder / "torch_artifacts.pth")
-        
-    
+
     @classmethod
     def _reconstruct_model_if_needed(cls, model: "PreTrainedModel"):
         model.encoder = model.model.encoder
diff --git a/src/optimum/rbln/transformers/models/siglip/modeling_siglip.py b/src/optimum/rbln/transformers/models/siglip/modeling_siglip.py
index 476840f34..319ecee44 100644
--- a/src/optimum/rbln/transformers/models/siglip/modeling_siglip.py
+++ b/src/optimum/rbln/transformers/models/siglip/modeling_siglip.py
@@ -66,7 +66,9 @@ class RBLNSiglipVisionModel(RBLNModel):
     _tp_support = False
 
     @classmethod
-    def _wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNSiglipVisionModelConfig) -> torch.nn.Module:
+    def _wrap_model_if_needed(
+        cls, model: torch.nn.Module, rbln_config: RBLNSiglipVisionModelConfig
+    ) -> torch.nn.Module:
         wrapper_cfg = {
             "interpolate_pos_encoding": rbln_config.interpolate_pos_encoding,
             "output_hidden_states": rbln_config.output_hidden_states,

From ca085abb2aa972f823f68df42fe40ea6b3fb28e3 Mon Sep 17 00:00:00 2001
From: rebel-thkim <157466331+rebel-thkim@users.noreply.github.com>
Date: Tue, 4 Nov 2025 13:22:34 +0900
Subject: [PATCH 04/14] dependency: full test for dependency update pr (#340)

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Co-authored-by: rebel-kblee <119555851+rebel-kblee@users.noreply.github.com>
---
 .github/renovate.json                         |   5 +-
 .github/workflows/auto_dependency_bot.yml     |   2 +-
 .github/workflows/rbln_optimum_full_test.yaml | 202 ++++++++++++++++++
 .github/workflows/rbln_scheduled_test.yaml    | 199 ++++-------------
 .github/workflows/rbln_trigger_on_pr.yaml     |  49 ++++-
 5 files changed, 301 insertions(+), 156 deletions(-)
 create mode 100644 .github/workflows/rbln_optimum_full_test.yaml

diff --git a/.github/renovate.json b/.github/renovate.json
index 8c874419b..cb082b360 100644
--- a/.github/renovate.json
+++ b/.github/renovate.json
@@ -2,6 +2,9 @@
     "$schema": "https://docs.renovatebot.com/renovate-schema.json",
     "dependencyDashboard": true,
     "prHourlyLimit": 0,
+    "baseBranches": [
+        "dev"
+    ],
     "extends": [
         ":semanticCommitsDisabled"
     ],
@@ -10,7 +13,7 @@
         "rebellions-sw/optimum-rbln"
     ],
     "lockFileMaintenance": {
-        "enabled": true
+        "enabled": false
     },
     "packageRules": [
         {
diff --git a/.github/workflows/auto_dependency_bot.yml b/.github/workflows/auto_dependency_bot.yml
index c0a451e1e..881ff768e 100644
--- a/.github/workflows/auto_dependency_bot.yml
+++ b/.github/workflows/auto_dependency_bot.yml
@@ -1,7 +1,7 @@
 name: Auto Dependency Bot
 on:
   schedule:
-    - cron: '0 0 * * *'  # UTC 00:00 = KST 09:00
+    - cron: '0 21 * * *'  # UTC 19:00 = KST 06:00
   workflow_dispatch:
 
 jobs:
diff --git a/.github/workflows/rbln_optimum_full_test.yaml b/.github/workflows/rbln_optimum_full_test.yaml
new file mode 100644
index 000000000..799a2dbc9
--- /dev/null
+++ b/.github/workflows/rbln_optimum_full_test.yaml
@@ -0,0 +1,202 @@
+name: Optimum-rbln / Full Test
+on:
+  workflow_call:
+    inputs:
+        rebel_compiler_version:
+            description: "rebel_compiler version to run"
+            required: true
+            type: string
+        ref:
+            description: "ref to checkout"
+            required: true
+            type: string
+
+env:
+  HF_USER_ID: ${{ secrets.HF_USER_ID }}
+  HF_AUTH_TOKEN: ${{ secrets.HF_AUTH_TOKEN }}
+  REBEL_PYPI_ENDPOINT: ${{ vars.REBEL_PYPI_INTERNAL_ENDPOINT }}
+  REBEL_PYPI_USERNAME: ${{ secrets.REBEL_PYPI_USERNAME }}
+  REBEL_PYPI_PASSWORD: ${{ secrets.REBEL_PYPI_PASSWORD }}
+
+jobs:
+  check-compiler:
+    uses: ./.github/workflows/rbln_check_compiler.yaml
+    with:
+        compiler_version: ${{ inputs.rebel_compiler_version }}
+    secrets: inherit
+
+  optimum-rbln-pytest:
+    uses: ./.github/workflows/rbln_optimum_pytest.yaml
+    with:
+        ref: ${{ inputs.ref }}
+        rebel_compiler_version: ${{ inputs.rebel_compiler_version }}
+        test_level: "full"
+        enable_hf_hub_tests: true
+        fail_fast: false
+    secrets: inherit
+
+  get-hf-package-version:
+    runs-on: rebel-k8s-runner
+    outputs:
+        transformers_version: ${{ steps.transformers.outputs.version }}
+        diffusers_version: ${{ steps.diffusers.outputs.version }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+            ref: ${{ inputs.ref }}
+      - name: Get Transformers Version
+        id: transformers
+        run: |
+            VERSION=$(grep -m 1 '"transformers==' pyproject.toml | sed -E 's/.*transformers[<>=]{2}([^",]+).*/\1/' || echo "not_found")
+            echo "version=$VERSION" >> $GITHUB_OUTPUT
+
+      - name: Get Diffusers Version
+        id: diffusers
+        run: |
+            VERSION=$(grep -m 1 '"diffusers==' pyproject.toml | sed -E 's/.*diffusers[<>=]{2}([^",]+).*/\1/' || echo "not_found")
+            echo "version=$VERSION" >> $GITHUB_OUTPUT
+
+
+  summary_and_report:
+    if: always()
+    needs: [optimum-rbln-pytest, check-compiler, get-hf-package-version]
+    runs-on: rebel-k8s-runner
+    steps:
+
+      - name: Get failed test details
+        id: get_failed_tests
+        if: needs.optimum-rbln-pytest.result == 'failure'
+        run: |
+            # Get the workflow run jobs
+            JOBS=$(curl -s -H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \
+            "https://api.github.com/repos/${{ github.repository }}/actions/runs/${{ github.run_id }}/jobs")
+            
+            # Extract failed pytest job names step by step for readability
+            FAILED_JOB_NAMES=$(echo "$JOBS" | jq -r '.jobs[] | select(.conclusion == "failure" and (.name | contains("Pytest"))) | .name')
+            # Remove "Pytest (...)" wrapper to get the test name
+            FAILED_TEST_NAMES=$(echo "$FAILED_JOB_NAMES" | sed 's/Pytest (\(.*\))/\1/')
+            # Join names with comma and space
+            FAILED_TESTS=$(echo "$FAILED_TEST_NAMES" | tr '\n' ', ' | sed 's/,$//')
+            
+            if [ -z "$FAILED_TESTS" ]; then
+            echo "failed_tests=Unknown" >> $GITHUB_OUTPUT
+            else
+            echo "failed_tests=$FAILED_TESTS" >> $GITHUB_OUTPUT
+            fi
+      - name: Determine test results
+        id: test_results
+        run: |
+            # Determine overall status
+            if [ "${{ needs.optimum-rbln-pytest.result }}" == "success" ]; then
+            echo "pytest_status=✅ Success - All tests passed" >> $GITHUB_OUTPUT
+            echo "pytest_emoji=✅" >> $GITHUB_OUTPUT
+            elif [ "${{ needs.optimum-rbln-pytest.result }}" == "failure" ]; then
+            FAILED="${{ steps.get_failed_tests.outputs.failed_tests }}"
+            if [ -n "$FAILED" ] && [ "$FAILED" != "Unknown" ]; then
+                echo "pytest_status=❌ Failed - Tests: \`$FAILED\`" >> $GITHUB_OUTPUT
+            else
+                echo "pytest_status=❌ Failed" >> $GITHUB_OUTPUT
+            fi
+            echo "pytest_emoji=❌" >> $GITHUB_OUTPUT
+            elif [ "${{ needs.optimum-rbln-pytest.result }}" == "skipped" ]; then
+            echo "pytest_status=⏭️ Skipped" >> $GITHUB_OUTPUT
+            echo "pytest_emoji=⏭️" >> $GITHUB_OUTPUT
+            else
+            echo "pytest_status=⚠️ Cancelled" >> $GITHUB_OUTPUT
+            echo "pytest_emoji=⚠️" >> $GITHUB_OUTPUT
+            fi
+
+            # Determine compiler check status
+            if [ "${{ needs.check-compiler.result }}" == "success" ]; then
+            echo "compiler_status=✅ Available" >> $GITHUB_OUTPUT
+            else
+            echo "compiler_status=❌ Not Available" >> $GITHUB_OUTPUT
+            fi
+
+      - name: Notify Slack
+        if: always()
+        run: |
+            # Determine overall workflow status emoji
+            if [ "${{ needs.optimum-rbln-pytest.result }}" == "success" ]; then
+            title="✅ Optimum-RBLN Scheduled Pytest Results"
+            elif [ "${{ needs.check-compiler.result }}" != "success" ]; then
+            title="⚠️ Optimum-RBLN Scheduled Pytest Results - Compiler Check Failed"
+            else
+            title="❌ Optimum-RBLN Scheduled Pytest Results"
+            fi
+
+            commit="*Commit*\n<https://github.com/rebellions-sw/optimum-rbln/commit/${{github.sha}}|${{github.sha}}>"
+            action_link="*CI Report*\n<https://github.com/rebellions-sw/optimum-rbln/actions/runs/${{ github.run_id }}|View Details>"
+            compiler_version="${{ inputs.rebel_compiler_version }}"
+            transformers_version="${{ needs.get-hf-package-version.outputs.transformers_version }}"
+            diffusers_version="${{ needs.get-hf-package-version.outputs.diffusers_version }}"
+
+            payload=$(jq -n \
+            --arg channel "${{ secrets.SLACK_CI_REPORTER_CHANNEL }}" \
+            --arg title "$title" \
+            --arg commit "$commit" \
+            --arg action_link "$action_link" \
+            --arg compiler_version "$compiler_version" \
+            --arg transformers_version "$transformers_version" \
+            --arg diffusers_version "$diffusers_version" \
+            --arg compiler_status "${{ steps.test_results.outputs.compiler_status }}" \
+            --arg pytest_status "${{ steps.test_results.outputs.pytest_status }}" \
+            '{
+                channel: $channel,
+                text: "Optimum-RBLN Scheduled Test Results",
+                blocks: [
+                {
+                    type: "header",
+                    text: {
+                    type: "plain_text",
+                    text: $title
+                    }
+                },
+                {
+                    type: "section",
+                    fields: [
+                    { type: "mrkdwn", text: $commit },
+                    { type: "mrkdwn", text: $action_link }
+                    ]
+                },
+                {
+                    type: "section",
+                    fields: [
+                    { type: "mrkdwn", text: "*Compiler Version*" },
+                    { type: "mrkdwn", text: ("`" + $compiler_version + "`") }
+                    ]
+                },
+                {
+                    type: "section",
+                    fields: [
+                    { type: "mrkdwn", text: "*Transformers Version*" },
+                    { type: "mrkdwn", text: ("`" + $transformers_version + "`") }
+                    ]
+                },
+                {
+                    type: "section",
+                    fields: [
+                    { type: "mrkdwn", text: "*Diffusers Version*" },
+                    { type: "mrkdwn", text: ("`" + $diffusers_version + "`") }
+                    ]
+                },
+                { type: "divider" },
+                {
+                    type: "section",
+                    fields: [
+                    { type: "mrkdwn", text: "*Compiler Check*" },
+                    { type: "mrkdwn", text: $compiler_status }
+                    ]
+                },
+                {
+                    type: "section",
+                    fields: [
+                    { type: "mrkdwn", text: "*Pytest Results*" },
+                    { type: "mrkdwn", text: $pytest_status }
+                    ]
+                }
+                ]
+            }'
+            )
+            curl -X POST -H 'Authorization: Bearer ${{ secrets.SLACK_BOT_USER_OAUTH_ACCESS_TOKEN }}' -H 'Content-type: application/json; charset=utf-8' --data "$payload" https://slack.com/api/chat.postMessage
\ No newline at end of file
diff --git a/.github/workflows/rbln_scheduled_test.yaml b/.github/workflows/rbln_scheduled_test.yaml
index 47c9a52f8..6cab73880 100644
--- a/.github/workflows/rbln_scheduled_test.yaml
+++ b/.github/workflows/rbln_scheduled_test.yaml
@@ -2,12 +2,14 @@ name: Optimum-rbln / Scheduled Test
 
 on:
   schedule:
-    # Run every day at 2am (17:00 UTC, 2:00am KST)
-    - cron: '0 17 * * *'
+    # Run every day at 1 (16:00 UTC, 1:00am KST)
+    - cron: '0 16 * * *'
 
 env:
-  HF_USER_ID: ${{ secrets.HF_USER_ID }}
-  HF_AUTH_TOKEN: ${{ secrets.HF_AUTH_TOKEN }}
+  REBEL_PYPI_ENDPOINT: ${{ vars.REBEL_PYPI_INTERNAL_ENDPOINT }}
+  REBEL_PYPI_USERNAME: ${{ secrets.REBEL_PYPI_USERNAME }}
+  REBEL_PYPI_PASSWORD: ${{ secrets.REBEL_PYPI_PASSWORD }}
+  TOKEN: ${{secrets.RENOVATE_TOKEN}}
 
 jobs:
   check-code-quality:
@@ -18,169 +20,62 @@ jobs:
     with:
       test_all_files: true
 
-  load-version:
+  fetch-version:
     runs-on: rebel-k8s-runner
     outputs:
       compiler_version: ${{ steps.get_version.outputs.compiler_version }}
+      need_pr: ${{ steps.check_pr_needed.outputs.need_pr }}
     steps:
       - name: Checkout code
         uses: actions/checkout@v3
-      
+
       - name: Get compiler version
         id: get_version
         run: |
-          VERSION=$(grep rebel_compiler_version .github/version.yaml | cut -d ':' -f2 | tr -d ' ')
-          echo "compiler_version=$VERSION" >> $GITHUB_OUTPUT
-
-  check-compiler:
-    needs: load-version
-    uses: ./.github/workflows/rbln_check_compiler.yaml
-    with:
-      compiler_version: ${{ needs.load-version.outputs.compiler_version }}
-    secrets: inherit
-
-  optimum-rbln-pytest:
-    needs: [load-version, check-compiler]
-    if: ${{ needs.check-compiler.outputs.compiler_version_check == 'true' }}
-    uses: ./.github/workflows/rbln_optimum_pytest.yaml
-    with:
-      ref: main
-      rebel_compiler_version: ${{ needs.check-compiler.outputs.compiler_version }}
-      test_level: "full"
-      enable_hf_hub_tests: true
-      fail_fast: false
-    secrets: inherit
-
-  optimum-rbln-inference-test:
-    needs: check-compiler
-    if: ${{ needs.check-compiler.outputs.compiler_version_check == 'true' }}
-    uses: ./.github/workflows/rbln_optimum_inference_test.yaml
-    with:
-      ref: main
-      rebel_compiler_version: ${{ needs.check-compiler.outputs.compiler_version }}
-    secrets: inherit
-
-  summary_and_report:
-    needs: [load-version, check-compiler, optimum-rbln-pytest]
-    if: always()
-    runs-on: rebel-k8s-runner
-    steps:
-      - name: Get failed test details
-        id: get_failed_tests
-        if: needs.optimum-rbln-pytest.result == 'failure'
-        run: |
-          # Get the workflow run jobs
-          JOBS=$(curl -s -H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \
-            "https://api.github.com/repos/${{ github.repository }}/actions/runs/${{ github.run_id }}/jobs")
-          
-          # Extract failed pytest job names step by step for readability
-          FAILED_JOB_NAMES=$(echo "$JOBS" | jq -r '.jobs[] | select(.conclusion == "failure" and (.name | contains("Pytest"))) | .name')
-          # Remove "Pytest (...)" wrapper to get the test name
-          FAILED_TEST_NAMES=$(echo "$FAILED_JOB_NAMES" | sed 's/Pytest (\(.*\))/\1/')
-          # Join names with comma and space
-          FAILED_TESTS=$(echo "$FAILED_TEST_NAMES" | tr '\n' ', ' | sed 's/,$//')
-          
-          if [ -z "$FAILED_TESTS" ]; then
-            echo "failed_tests=Unknown" >> $GITHUB_OUTPUT
-          else
-            echo "failed_tests=$FAILED_TESTS" >> $GITHUB_OUTPUT
-          fi
+          CUR_VERSION=$(grep rebel_compiler_version .github/version.yaml | cut -d ':' -f2 | tr -d ' ')
+          LATEST_VERSION=$(curl -s -u "${{ env.REBEL_PYPI_USERNAME }}:${{ env.REBEL_PYPI_PASSWORD }}" ${{ env.REBEL_PYPI_INTERNAL_ENDPOINT }}/rebel-compiler/json | jq -r '.releases | keys | .[]' | grep prod | tail -n 1)
+          echo "compiler_version=$LATEST_VERSION" >> $GITHUB_OUTPUT
+          echo "cur_version=$CUR_VERSION" >> $GITHUB_OUTPUT
 
-      - name: Determine test results
-        id: test_results
+      - name: Check if PR needed
+        id: check_pr_needed
         run: |
-          # Determine overall status
-          if [ "${{ needs.optimum-rbln-pytest.result }}" == "success" ]; then
-            echo "pytest_status=✅ Success - All tests passed" >> $GITHUB_OUTPUT
-            echo "pytest_emoji=✅" >> $GITHUB_OUTPUT
-          elif [ "${{ needs.optimum-rbln-pytest.result }}" == "failure" ]; then
-            FAILED="${{ steps.get_failed_tests.outputs.failed_tests }}"
-            if [ -n "$FAILED" ] && [ "$FAILED" != "Unknown" ]; then
-              echo "pytest_status=❌ Failed - Tests: \`$FAILED\`" >> $GITHUB_OUTPUT
-            else
-              echo "pytest_status=❌ Failed" >> $GITHUB_OUTPUT
-            fi
-            echo "pytest_emoji=❌" >> $GITHUB_OUTPUT
-          elif [ "${{ needs.optimum-rbln-pytest.result }}" == "skipped" ]; then
-            echo "pytest_status=⏭️ Skipped" >> $GITHUB_OUTPUT
-            echo "pytest_emoji=⏭️" >> $GITHUB_OUTPUT
+          if [ "${{ steps.get_version.outputs.cur_version }}" == "${{ steps.get_version.outputs.compiler_version }}" ]; then
+            NEED_PR=false
           else
-            echo "pytest_status=⚠️ Cancelled" >> $GITHUB_OUTPUT
-            echo "pytest_emoji=⚠️" >> $GITHUB_OUTPUT
+            NEED_PR=true
           fi
+          echo "need_pr=$NEED_PR" >> $GITHUB_OUTPUT
 
-          # Determine compiler check status
-          if [ "${{ needs.check-compiler.result }}" == "success" ]; then
-            echo "compiler_status=✅ Available" >> $GITHUB_OUTPUT
-          else
-            echo "compiler_status=❌ Not Available" >> $GITHUB_OUTPUT
-          fi
+  create-pr:
+    runs-on: rebel-k8s-runner
+    needs: fetch-version
+    if: ${{ needs.fetch-version.outputs.need_pr == 'true' }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          ref: "dev"
 
-      - name: Notify Slack
-        if: always()
+      - name: Change version.yaml
         run: |
-          # Determine overall workflow status emoji
-          if [ "${{ needs.optimum-rbln-pytest.result }}" == "success" ]; then
-            title="✅ Optimum-RBLN Scheduled Pytest Results"
-          elif [ "${{ needs.check-compiler.result }}" != "success" ]; then
-            title="⚠️ Optimum-RBLN Scheduled Pytest Results - Compiler Check Failed"
-          else
-            title="❌ Optimum-RBLN Scheduled Pytest Results"
-          fi
+          sed -i "s/^rebel_compiler_version:.*/rebel_compiler_version: ${{ needs.fetch-version.outputs.compiler_version }}/" .github/version.yaml
 
-          commit="*Commit*\n<https://github.com/rebellions-sw/optimum-rbln/commit/${{github.sha}}|${{github.sha}}>"
-          action_link="*CI Report*\n<https://github.com/rebellions-sw/optimum-rbln/actions/runs/${{ github.run_id }}|View Details>"
-          compiler_version="${{ needs.load-version.outputs.compiler_version }}"
+      - name: Create PR for updated files
+        id: create-pr-step
+        uses: peter-evans/create-pull-request@v7
+        with:
+          token: ${{ env.TOKEN }}
+          commit-message: "dependency: Update dependency rebel-compiler to ${{ needs.fetch-version.outputs.compiler_version }}"
+          title: "dependency: Update dependency rebel-compiler to ${{ needs.fetch-version.outputs.compiler_version }}"
+          base: dev
+          team-reviewers: rebellions-sw/sw-generalization, rebel-jongho
 
-          payload=$(jq -n \
-            --arg channel "${{ secrets.SLACK_CI_REPORTER_CHANNEL }}" \
-            --arg title "$title" \
-            --arg commit "$commit" \
-            --arg action_link "$action_link" \
-            --arg compiler_version "$compiler_version" \
-            --arg compiler_status "${{ steps.test_results.outputs.compiler_status }}" \
-            --arg pytest_status "${{ steps.test_results.outputs.pytest_status }}" \
-            '{
-              channel: $channel,
-              text: "Optimum-RBLN Scheduled Test Results",
-              blocks: [
-                {
-                  type: "header",
-                  text: {
-                    type: "plain_text",
-                    text: $title
-                  }
-                },
-                {
-                  type: "section",
-                  fields: [
-                    { type: "mrkdwn", text: $commit },
-                    { type: "mrkdwn", text: $action_link }
-                  ]
-                },
-                {
-                  type: "section",
-                  fields: [
-                    { type: "mrkdwn", text: "*Compiler Version*" },
-                    { type: "mrkdwn", text: ("`" + $compiler_version + "`") }
-                  ]
-                },
-                { type: "divider" },
-                {
-                  type: "section",
-                  fields: [
-                    { type: "mrkdwn", text: "*Compiler Check*" },
-                    { type: "mrkdwn", text: $compiler_status }
-                  ]
-                },
-                {
-                  type: "section",
-                  fields: [
-                    { type: "mrkdwn", text: "*Pytest Results*" },
-                    { type: "mrkdwn", text: $pytest_status }
-                  ]
-                }
-              ]
-            }'
-          )
-          curl -X POST -H 'Authorization: Bearer ${{ secrets.SLACK_BOT_USER_OAUTH_ACCESS_TOKEN }}' -H 'Content-type: application/json; charset=utf-8' --data "$payload" https://slack.com/api/chat.postMessage
\ No newline at end of file
+  rbln-full-test:
+    needs: fetch-version
+    if: ${{ needs.fetch-version.outputs.need_pr == 'false' }}
+    uses: ./.github/workflows/rbln_optimum_full_test.yaml
+    with:
+      rebel_compiler_version: ${{ needs.fetch-version.outputs.compiler_version }}
+      ref: "dev"
+    secrets: inherit
diff --git a/.github/workflows/rbln_trigger_on_pr.yaml b/.github/workflows/rbln_trigger_on_pr.yaml
index 7592dd594..f063b12f1 100644
--- a/.github/workflows/rbln_trigger_on_pr.yaml
+++ b/.github/workflows/rbln_trigger_on_pr.yaml
@@ -59,6 +59,34 @@ jobs:
       compiler_version: ${{ needs.load-version.outputs.compiler_version }}
     secrets: inherit
 
+  check-if-dependency-pr:
+    runs-on: rebel-k8s-runner
+    outputs:
+      result: ${{ steps.check_title.outputs.result }}
+    steps:
+      - name: Check out code
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ github.event.pull_request.head.sha }}
+
+      - name: check if pr is dependency check
+        id: check_title
+        run: |
+          PR_TITLE="${{ github.event.pull_request.title }}"
+          LAST_COMMIT_MSG=$(git log -1 --pretty=format:%s)
+          DEP_UPDATE_PATTERN="dependency: Update dependency"
+
+          echo "PR Title: $PR_TITLE"
+          echo "Last Commit: $LAST_COMMIT_MSG"
+
+          if [[ "$PR_TITLE" == *"$DEP_UPDATE_PATTERN"* && "$LAST_COMMIT_MSG" == *"$DEP_UPDATE_PATTERN"* ]]; then
+            echo "result=true" >> $GITHUB_OUTPUT
+          else
+            echo "result=false" >> $GITHUB_OUTPUT
+          fi
+
+          echo $result
+
   check-team-member:
     runs-on: rebel-k8s-runner
     needs: [check-skip-ci, check-code-quality, test-docstrings, check-compiler]
@@ -85,12 +113,29 @@ jobs:
             echo "IS_TEAM_MEMBER=false" >> $GITHUB_OUTPUT
           fi
 
+  # Default PR test if not Dependency update
   optimum-rbln-pytest:
-    needs: [check-skip-ci, check-code-quality, test-docstrings, check-compiler, check-team-member]
-    if: ${{ needs.check-skip-ci.outputs.should_skip != 'true' && needs.check-team-member.outputs.is_team_member == 'true' }}
+    needs: [check-skip-ci, check-code-quality, test-docstrings, check-compiler, check-team-member, check-if-dependency-pr]
+    if: |
+      needs.check-if-dependency-pr.outputs.result == 'false' &&
+      needs.check-skip-ci.outputs.should_skip != 'true' &&
+      needs.check-team-member.outputs.is_team_member == 'true'
     uses: ./.github/workflows/rbln_optimum_pytest.yaml
     with:
       ref: ${{ github.event.pull_request.head.sha }}
       rebel_compiler_version: ${{ needs.check-compiler.outputs.compiler_version }}
       test_level: "default"
     secrets: inherit
+
+  # Full Schedule test if Dependency update
+  optimum-rbln-dependency-pytest:
+    needs: [check-compiler, check-team-member, check-if-dependency-pr]
+    if: |
+      needs.check-if-dependency-pr.outputs.result == 'true' &&
+      needs.check-team-member.outputs.is_team_member == 'true'
+    uses: ./.github/workflows/rbln_optimum_full_test.yaml
+    with:
+      rebel_compiler_version: ${{ needs.check-compiler.outputs.compiler_version }}
+      ref: ${{ github.event.pull_request.head.sha }}
+    secrets: inherit
+ 

From ffdce2558c3a97f3e82287ff87eb147eed26789c Mon Sep 17 00:00:00 2001
From: rebel-thkim <157466331+rebel-thkim@users.noreply.github.com>
Date: Tue, 4 Nov 2025 13:24:43 +0900
Subject: [PATCH 05/14] dependency: Update dependency diffusers to v0.35.2
 (#321)

Co-authored-by: Renovate Bot <renovate@whitesourcesoftware.com>
Co-authored-by: rebel-kblee <119555851+rebel-kblee@users.noreply.github.com>
---
 pyproject.toml                                            | 2 +-
 .../models/decoderonly/modeling_decoderonly.py            | 1 -
 uv.lock                                                   | 8 ++++----
 3 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 6103bbde2..d7da248f0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -34,7 +34,7 @@ dependencies = [
     "torchvision<=0.23.0",
     "accelerate>=1.0.1",
     "transformers==4.57.1",
-    "diffusers==0.35.1",
+    "diffusers==0.35.2",
     "packaging>=24.1",
 ]
 
diff --git a/src/optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py b/src/optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py
index 21681d5a5..086878dd2 100644
--- a/src/optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py
+++ b/src/optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py
@@ -511,7 +511,6 @@ def _update_attention_config(
                     f" than the required number of blocks ({num_full_blocks})."
                     "This can cause a failure during model compilation."
                 )
-
         logger.info(f"[KVCache] Compiling with num_blocks: {rbln_config.kvcache_num_blocks}")
 
         return rbln_config
diff --git a/uv.lock b/uv.lock
index 9db4f0563..64bb41c0c 100644
--- a/uv.lock
+++ b/uv.lock
@@ -440,7 +440,7 @@ wheels = [
 
 [[package]]
 name = "diffusers"
-version = "0.35.1"
+version = "0.35.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "filelock", marker = "sys_platform == 'linux'" },
@@ -454,9 +454,9 @@ dependencies = [
     { name = "requests", marker = "sys_platform == 'linux'" },
     { name = "safetensors", marker = "sys_platform == 'linux'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/49/05/c4c8736c14e0efe9a835fb91c6ff5e1abddf9894a2f2a28fffe6429378a6/diffusers-0.35.1.tar.gz", hash = "sha256:6f4dc0c9d309a4c4914a2179646f2bc801b5e395a43295fff3b5f9dbd3e28fd3", size = 3369127, upload-time = "2025-08-20T04:16:10.668Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/03/68/288ca23c7c05c73e87ffe5efffc282400ac9b017f7a9bb03883f4310ea15/diffusers-0.35.2.tar.gz", hash = "sha256:30ecd552303edfcfe1724573c3918a8462ee3ab4d529bdbd4c0045f763affded", size = 3366711, upload-time = "2025-10-15T04:05:17.213Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/06/a7/c53f294f34d9e1584388721b3d7aa024ea1ac46e86d0c302fc3db40ed960/diffusers-0.35.1-py3-none-any.whl", hash = "sha256:fe29ff10200970c7c5934c6488c213e2a77a03dad5e6fa00bbd8e1d04234cb0e", size = 4121424, upload-time = "2025-08-20T04:16:08.359Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/2e/38d9824f8c6bb048c5ba21c6d4da54c29c162a46b58b3ef907a360a76d3e/diffusers-0.35.2-py3-none-any.whl", hash = "sha256:d50d5e74fdd6dcf55e5c1d304bc52cc7c2659abd1752740d736d7b54078b4db5", size = 4121649, upload-time = "2025-10-15T04:05:14.391Z" },
 ]
 
 [[package]]
@@ -1476,7 +1476,7 @@ tests = [
 [package.metadata]
 requires-dist = [
     { name = "accelerate", specifier = ">=1.0.1" },
-    { name = "diffusers", specifier = "==0.35.1" },
+    { name = "diffusers", specifier = "==0.35.2" },
     { name = "packaging", specifier = ">=24.1" },
     { name = "torch", marker = "sys_platform != 'darwin'", specifier = "==2.8.0", index = "https://download.pytorch.org/whl/cpu" },
     { name = "torch", marker = "sys_platform == 'darwin'", specifier = "==2.8.0" },

From a5e133f36bbdfa733a94d7875201bf35892980a8 Mon Sep 17 00:00:00 2001
From: rebel-thkim <157466331+rebel-thkim@users.noreply.github.com>
Date: Tue, 4 Nov 2025 13:27:20 +0900
Subject: [PATCH 06/14] model: DecoderOnly's Error message for batch, seq
 exceed compiled capacity (#338)

---
 .../decoderonly/decoderonly_runtime_utils.py  |  8 ++++++--
 .../decoderonly/modeling_decoderonly.py       | 19 +++++++++++++++++++
 2 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/src/optimum/rbln/transformers/models/decoderonly/decoderonly_runtime_utils.py b/src/optimum/rbln/transformers/models/decoderonly/decoderonly_runtime_utils.py
index 328ab595b..9daa923b6 100644
--- a/src/optimum/rbln/transformers/models/decoderonly/decoderonly_runtime_utils.py
+++ b/src/optimum/rbln/transformers/models/decoderonly/decoderonly_runtime_utils.py
@@ -46,6 +46,12 @@ def update_block(self, batch_idx: int, block_idx: int):
         """
         If the block is empty (empty_block), allocates a block from the free_block_pool.
         """
+        if batch_idx >= len(self.block_tables) or block_idx >= len(self.block_tables[batch_idx]):
+            raise IndexError(
+                f"Invalid index(batch_idx={batch_idx}, block_idx={block_idx}): \n \
+                               BlockTable Shape(batch_axis, block_axis): {self.block_tables.shape}, BlockSize: {self.rbln_config.kvcache_block_size}"
+            )
+
         if self.block_tables[batch_idx][block_idx] == self.EMPTY_BLOCK:
             if self.free_block_pool:
                 block = self.free_block_pool.popleft()
@@ -96,8 +102,6 @@ def get_global_block_tables():
                 s, e = cache_position[0][0].item(), cache_position[0][-1].item()
                 for position in range(s, e + 1, self.rbln_config.kvcache_block_size):
                     block_idx = position // self.rbln_config.kvcache_block_size
-                    if batch_idx >= len(self.block_tables) or block_idx >= len(self.block_tables[batch_idx]):
-                        raise IndexError(f"Invalid index: batch_idx={batch_idx}, block_idx={block_idx}")
                     self.update_block(batch_idx, block_idx)
 
                 return self.replace_empty_block(self.block_tables[batch_idx])
diff --git a/src/optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py b/src/optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py
index 086878dd2..f5fa4fce1 100644
--- a/src/optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py
+++ b/src/optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py
@@ -758,6 +758,16 @@ def forward(
             logits = []
             inputs = inputs_embeds if inputs_embeds is not None else input_ids
             batch_size = inputs.shape[0]
+            input_len = inputs.shape[1]
+            if batch_size > self.rbln_config.batch_size:
+                raise ValueError(
+                    f"Input's batch({batch_size}) exceeds compiled batch_size({self.rbln_config.batch_size})"
+                )
+            if input_len > self.rbln_config.max_seq_len:
+                raise ValueError(
+                    f"Input's length({input_len}) exceeds compiled max_seq_len({self.rbln_config.max_seq_len})."
+                )
+
             for b_idx in range(batch_size):
                 cache_position = torch.arange(0, generate_idx[b_idx].item(), dtype=torch.int32).unsqueeze(0)
                 output = self.prefill_decoder(
@@ -782,6 +792,15 @@ def forward(
                     f"Available batch sizes are: {list(self.decoders.keys())}. "
                     f"Please run your model with one of these batch sizes or add support for batch size {batch_size}."
                 )
+            if max(cache_position.reshape(-1)) >= self.rbln_config.max_seq_len:
+                raise ValueError(
+                    f"Cache position exceeds the maximum sequence length.\n"
+                    f"  - Current max cache position: {int(torch.max(cache_position).item())}\n"
+                    f"  - Allowed max_seq_len: {self.rbln_config.max_seq_len}\n"
+                    f"Solution: Reduce the generation length by adjusting `max_new_tokens` "
+                    f"or `max_length` in the generation config."
+                )
+
             logits = self.decoders[batch_size](
                 input_ids=input_ids,
                 inputs_embeds=inputs_embeds,

From 1e999736d068905a2bd0c99a278fcac955b18d3b Mon Sep 17 00:00:00 2001
From: rebel-thkim <157466331+rebel-thkim@users.noreply.github.com>
Date: Tue, 4 Nov 2025 15:17:26 +0900
Subject: [PATCH 07/14] other: change Gitpython auth as deprecated api (#346)

---
 .github/scripts/validate_pr_checklist.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/scripts/validate_pr_checklist.py b/.github/scripts/validate_pr_checklist.py
index 110384cad..156082c67 100644
--- a/.github/scripts/validate_pr_checklist.py
+++ b/.github/scripts/validate_pr_checklist.py
@@ -16,7 +16,7 @@
 import re
 import sys
 
-from github import Github
+from github import Auth, Github
 
 
 def read_checklist_from_template():
@@ -57,7 +57,7 @@ def main():
         print("Missing required environment variables")
         sys.exit(1)
 
-    g = Github(github_token)
+    g = Github(auth=Auth.Token(github_token))
     repo = g.get_repo(repo_name)
     pr = repo.get_pull(int(pr_number))
 

From 0a333eb1b84e009c4c2094f3a66ba7b69c14b50d Mon Sep 17 00:00:00 2001
From: rebel-thkim <157466331+rebel-thkim@users.noreply.github.com>
Date: Thu, 13 Nov 2025 15:18:08 +0900
Subject: [PATCH 08/14] other: fix dependency pr title (#353)

---
 .github/workflows/rbln_scheduled_test.yaml | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/rbln_scheduled_test.yaml b/.github/workflows/rbln_scheduled_test.yaml
index b45cc914c..e04394ba6 100644
--- a/.github/workflows/rbln_scheduled_test.yaml
+++ b/.github/workflows/rbln_scheduled_test.yaml
@@ -67,12 +67,24 @@ jobs:
         uses: peter-evans/create-pull-request@v7
         with:
           token: ${{ env.TOKEN }}
-          commit-message: "dependency: Update dependency rebel-compiler to ${{ needs.fetch-version.outputs.compiler_version }}"
-          title: "dependency: Update dependency rebel-compiler to ${{ needs.fetch-version.outputs.compiler_version }}"
+          commit-message: "dependency: Update dependency to sync with rebel-compiler"
+          title: "dependency: Update dependency to sync with rebel-compiler"
           base: dev
           team-reviewers: rebellions-sw/sw-generalization
           reviewers: rebel-jongho
 
+      - name: Add version comment to PR
+        uses: peter-evans/create-or-update-comment@v4
+        if: ${{ steps.create-pr-step.outputs.pull-request-number }}
+        with:
+          token: ${{ env.TOKEN }}
+          repository: ${{ github.repository }}
+          issue-number: ${{ steps.create-pr-step.outputs.pull-request-number }}
+          body: |
+            ### 🔄 rebel-compiler Dependency Update
+            **Updated Version:** `${{ needs.fetch-version.outputs.compiler_version }}`
+            This PR syncs the dependency version with the latest build.
+
   rbln-full-test:
     needs: fetch-version
     if: ${{ needs.fetch-version.outputs.need_pr == 'false' }}

From 862d22da46e564d00d7a5389677958e6ff248d52 Mon Sep 17 00:00:00 2001
From: rebel-thkim <157466331+rebel-thkim@users.noreply.github.com>
Date: Thu, 13 Nov 2025 15:19:59 +0900
Subject: [PATCH 09/14] Revert "other: fix dependency pr title" (#357)

---
 .github/workflows/rbln_scheduled_test.yaml | 16 ++--------------
 1 file changed, 2 insertions(+), 14 deletions(-)

diff --git a/.github/workflows/rbln_scheduled_test.yaml b/.github/workflows/rbln_scheduled_test.yaml
index e04394ba6..b45cc914c 100644
--- a/.github/workflows/rbln_scheduled_test.yaml
+++ b/.github/workflows/rbln_scheduled_test.yaml
@@ -67,24 +67,12 @@ jobs:
         uses: peter-evans/create-pull-request@v7
         with:
           token: ${{ env.TOKEN }}
-          commit-message: "dependency: Update dependency to sync with rebel-compiler"
-          title: "dependency: Update dependency to sync with rebel-compiler"
+          commit-message: "dependency: Update dependency rebel-compiler to ${{ needs.fetch-version.outputs.compiler_version }}"
+          title: "dependency: Update dependency rebel-compiler to ${{ needs.fetch-version.outputs.compiler_version }}"
           base: dev
           team-reviewers: rebellions-sw/sw-generalization
           reviewers: rebel-jongho
 
-      - name: Add version comment to PR
-        uses: peter-evans/create-or-update-comment@v4
-        if: ${{ steps.create-pr-step.outputs.pull-request-number }}
-        with:
-          token: ${{ env.TOKEN }}
-          repository: ${{ github.repository }}
-          issue-number: ${{ steps.create-pr-step.outputs.pull-request-number }}
-          body: |
-            ### 🔄 rebel-compiler Dependency Update
-            **Updated Version:** `${{ needs.fetch-version.outputs.compiler_version }}`
-            This PR syncs the dependency version with the latest build.
-
   rbln-full-test:
     needs: fetch-version
     if: ${{ needs.fetch-version.outputs.need_pr == 'false' }}

From 545d307fa65baf671d8568f5852a5b1bf05b3db7 Mon Sep 17 00:00:00 2001
From: rebel-thkim <157466331+rebel-thkim@users.noreply.github.com>
Date: Mon, 17 Nov 2025 11:04:09 +0900
Subject: [PATCH 10/14] other:  pr test - fix bug of commit-msg (#362)

---
 .github/workflows/rbln_trigger_on_pr.yaml | 49 ++++++++++++-----------
 1 file changed, 25 insertions(+), 24 deletions(-)

diff --git a/.github/workflows/rbln_trigger_on_pr.yaml b/.github/workflows/rbln_trigger_on_pr.yaml
index 875ffefdc..08df7d312 100644
--- a/.github/workflows/rbln_trigger_on_pr.yaml
+++ b/.github/workflows/rbln_trigger_on_pr.yaml
@@ -12,59 +12,63 @@ env:
   REBEL_PYPI_PASSWORD: ${{ secrets.REBEL_PYPI_PASSWORD }}
 
 jobs:
-  check-skip-ci:
+  head-commit-message:
     runs-on: rebel-k8s-runner
     outputs:
-      should_skip: ${{ contains(github.event.pull_request.head.commit.message, '[skip ci]') }}
+      message: ${{ steps.get_commit_message.outputs.message }}
     steps:
-      - name: Check if [skip ci] is in commit message
+      - name: Checkout code
+        uses: actions/checkout@v3
+        with:
+          ref: ${{ github.event.pull_request.head.sha }}
+          fetch-depth: 0
+
+      - name: Get head commit message
+        id: get_commit_message
         run: |
-          if ${{ contains(github.event.pull_request.head.commit.message, '[skip ci]') }}; then
-            echo "Found [skip ci] in commit message, skipping CI"
-          else
-            echo "No [skip ci] found, continuing with CI"
-          fi
+          COMMIT_MESSAGE=$(git log -1 --pretty=%B)
+          echo "message<<EOF" >> $GITHUB_OUTPUT
+          echo "$COMMIT_MESSAGE" >> $GITHUB_OUTPUT
+          echo "EOF" >> $GITHUB_OUTPUT
+          echo "COMMIT_MESSAGE=$COMMIT_MESSAGE"
 
   check-pytest-full:
+    needs: head-commit-message
     runs-on: rebel-k8s-runner
     outputs:
-      should_full_test: ${{ contains(github.event.pull_request.head.commit.message, '[pytest-full]') }}
+      should_full_test: ${{ contains(needs.head-commit-message.outputs.message, '[pytest-full]') && 'true' || 'false' }}
     steps:
       - name: Check if [pytest-full] is in commit message
         run: |
-          if ${{ contains(github.event.pull_request.head.commit.message, '[pytest-full]') }}; then
+          if [[ "${{ contains(needs.head-commit-message.outputs.message, '[pytest-full]') }}" == "true" ]]; then
             echo "Found [pytest-full] in commit message, running full pytest"
           else
             echo "No [pytest-full] found, continuing with default pytest"
           fi
 
   check-slack-report:
+    needs: head-commit-message
     runs-on: rebel-k8s-runner
     outputs:
-      should_slack_report: ${{ contains(github.event.pull_request.head.commit.message, '[slack-report]') }}
+      should_slack_report: ${{ contains(needs.head-commit-message.outputs.message, '[slack-report]') && 'true' || 'false' }}
     steps:
       - name: Check if [slack-report] is in commit message
         run: |
-          if ${{ contains(github.event.pull_request.head.commit.message, '[slack-report]') }}; then
+          if [[ "${{ contains(needs.head-commit-message.outputs.message, '[slack-report]') }}" == "true" ]]; then
             echo "Found [slack-report] in commit message, reporting slack"
           else
             echo "No [slack-report] found, continuing without reporting slack"
           fi
 
   check-code-quality:
-    needs: check-skip-ci
-    if: ${{ needs.check-skip-ci.outputs.should_skip != 'true' }}
     uses: ./.github/workflows/check_code_quality.yml
     
   test-docstrings:
-    needs: check-skip-ci
-    if: ${{ needs.check-skip-ci.outputs.should_skip != 'true' }}
     uses: ./.github/workflows/test-docstrings.yml
 
   load-version:
     runs-on: rebel-k8s-runner
-    needs: [check-skip-ci, check-code-quality, test-docstrings]
-    if: ${{ needs.check-skip-ci.outputs.should_skip != 'true' }}
+    needs: [check-code-quality, test-docstrings]
     outputs:
       compiler_version: ${{ steps.get_version.outputs.compiler_version }}
     steps:
@@ -78,8 +82,7 @@ jobs:
           echo "compiler_version=$VERSION" >> $GITHUB_OUTPUT
 
   check-compiler:
-    needs: [check-skip-ci, check-code-quality, test-docstrings, load-version]
-    if: ${{ needs.check-skip-ci.outputs.should_skip != 'true' }}
+    needs: [check-code-quality, test-docstrings, load-version]
     uses: ./.github/workflows/rbln_check_compiler.yaml
     with:
       compiler_version: ${{ needs.load-version.outputs.compiler_version }}
@@ -87,8 +90,7 @@ jobs:
 
   check-team-member:
     runs-on: rebel-k8s-runner
-    needs: [check-skip-ci, check-code-quality, test-docstrings, check-compiler]
-    if: ${{ needs.check-skip-ci.outputs.should_skip != 'true' && needs.check-compiler.outputs.compiler_version_check == 'true' }}
+    needs: [check-code-quality, test-docstrings, check-compiler]
     outputs:
       is_team_member: ${{ steps.check_member.outputs.IS_TEAM_MEMBER }}
     steps:
@@ -113,10 +115,9 @@ jobs:
 
   # Default PR test if not Dependency update
   optimum-rbln-pytest:
-    needs: [check-skip-ci, check-code-quality, test-docstrings, check-compiler, check-team-member, check-pytest-full]
+    needs: [check-code-quality, test-docstrings, check-compiler, check-team-member, check-pytest-full]
     if: |
       needs.check-pytest-full.outputs.should_full_test != 'true' &&
-      needs.check-skip-ci.outputs.should_skip != 'true' &&
       needs.check-team-member.outputs.is_team_member == 'true'
     uses: ./.github/workflows/rbln_optimum_pytest.yaml
     with:

From 48eb3ae106bbe969a8b5c62c435018db2b4c3872 Mon Sep 17 00:00:00 2001
From: rebel-kblee <119555851+rebel-kblee@users.noreply.github.com>
Date: Mon, 17 Nov 2025 16:24:54 +0900
Subject: [PATCH 11/14] refactor: support RBLNModel with no compilation (#356)

---
 src/optimum/rbln/configuration_utils.py |  4 +++-
 src/optimum/rbln/modeling.py            | 18 ++++++++++++++++++
 src/optimum/rbln/modeling_base.py       |  1 -
 src/optimum/rbln/utils/hub.py           | 17 ++++++++++++++---
 4 files changed, 35 insertions(+), 5 deletions(-)

diff --git a/src/optimum/rbln/configuration_utils.py b/src/optimum/rbln/configuration_utils.py
index 261869fb1..9ca5b282c 100644
--- a/src/optimum/rbln/configuration_utils.py
+++ b/src/optimum/rbln/configuration_utils.py
@@ -528,6 +528,7 @@ def deploy_model():
     ]
     submodules: List[str] = []
     subclass_non_save_attributes = []
+    _allow_no_compile_cfgs = False
 
     def initialize_submodule_config(
         self,
@@ -808,7 +809,8 @@ def freeze(self):
             or len(self._compile_cfgs) == 0
             or not all(isinstance(cfg, RBLNCompileConfig) for cfg in self._compile_cfgs)
         ):
-            raise RuntimeError("`compile_cfgs` must be set before freezing.")
+            if not self._allow_no_compile_cfgs:
+                raise RuntimeError("`compile_cfgs` must contain at least one `RBLNCompileConfig` before freezing.")
 
         for submodule_name in self.submodules:
             submodule_config = getattr(self, submodule_name, None)
diff --git a/src/optimum/rbln/modeling.py b/src/optimum/rbln/modeling.py
index 09e408e4f..1580b6408 100644
--- a/src/optimum/rbln/modeling.py
+++ b/src/optimum/rbln/modeling.py
@@ -60,6 +60,9 @@ def _wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNModelCon
 
     @classmethod
     def get_compiled_model(cls, model: "PreTrainedModel", rbln_config: RBLNModelConfig):
+        if rbln_config._allow_no_compile_cfgs:
+            return {}
+
         model = cls._wrap_model_if_needed(model, rbln_config)
         rbln_compile_config = rbln_config.compile_cfgs[0]
         compiled_model = cls.compile(
@@ -70,6 +73,18 @@ def get_compiled_model(cls, model: "PreTrainedModel", rbln_config: RBLNModelConf
         )
         return compiled_model
 
+    @classmethod
+    def _update_rbln_config(
+        cls,
+        preprocessors: Optional[Any],
+        model: Optional["PreTrainedModel"] = None,
+        model_config: Optional["PretrainedConfig"] = None,
+        rbln_config: Optional[RBLNModelConfig] = None,
+    ) -> RBLNModelConfig:
+        # Default implementation: return config as-is
+        # Subclasses should override to set compile_cfgs if needed
+        return rbln_config
+
     @classmethod
     def _reconstruct_model_if_needed(cls, model: "PreTrainedModel"):
         return model
@@ -234,6 +249,9 @@ def _create_runtimes(
         compiled_models: List[rebel.RBLNCompiledModel],
         rbln_config: RBLNModelConfig,
     ) -> List[rebel.Runtime]:
+        if len(rbln_config.compile_cfgs) == 0:
+            return []
+
         if DEFAULT_COMPILED_MODEL_NAME not in rbln_config.device_map:
             cls._raise_missing_compiled_file_error([DEFAULT_COMPILED_MODEL_NAME])
 
diff --git a/src/optimum/rbln/modeling_base.py b/src/optimum/rbln/modeling_base.py
index ba9beae46..209b6c6af 100644
--- a/src/optimum/rbln/modeling_base.py
+++ b/src/optimum/rbln/modeling_base.py
@@ -71,7 +71,6 @@ def __init__(
         self.rbln_config = rbln_config
         if not rbln_config.is_frozen():
             raise RuntimeError("`rbln_config` must be frozen. Please call `rbln_config.freeze()` first.")
-
         self.compiled_models = rbln_compiled_models
 
         # Registers the RBLN classes into the transformers AutoModel classes to avoid warnings when creating
diff --git a/src/optimum/rbln/utils/hub.py b/src/optimum/rbln/utils/hub.py
index 895375bbe..a04c9dd05 100644
--- a/src/optimum/rbln/utils/hub.py
+++ b/src/optimum/rbln/utils/hub.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import json
 from pathlib import Path
 from typing import List, Optional, Union
 
@@ -67,15 +68,25 @@ def validate_files(
     location: str,
 ):
     """Validate the presence and count of required files."""
-    if len(files) == 0:
-        raise FileNotFoundError(f"Could not find any rbln model file in {location}")
-
     if len(config_files) == 0:
         raise FileNotFoundError(f"Could not find `rbln_config.json` file in {location}")
 
     if len(config_files) > 1:
         raise FileExistsError(f"Multiple rbln_config.json files found in {location}. This is not expected.")
 
+    try:
+        with open(config_files[0], "r") as f:
+            config_data = json.load(f)
+        compile_cfgs = config_data.get("_compile_cfgs", [])
+        if len(compile_cfgs) == 0:
+            # If compile_cfgs is empty, we don't need .rbln files
+            return
+    except (json.JSONDecodeError, KeyError, OSError):
+        pass
+
+    if len(files) == 0:
+        raise FileNotFoundError(f"Could not find any rbln model file in {location}")
+
 
 def _get_huggingface_token(token: Union[bool, str]) -> str:
     if isinstance(token, str):

From 6f108c9030429322b820c4e070a11d741e4b7fa2 Mon Sep 17 00:00:00 2001
From: rebel-jongho <112920593+rebel-jongho@users.noreply.github.com>
Date: Tue, 18 Nov 2025 16:15:43 +0900
Subject: [PATCH 12/14] model: update the logic for estimating num_blocks
 (#354)

---
 .../transformers/modeling_attention_utils.py  | 351 ++++++++++++------
 .../decoderonly/configuration_decoderonly.py  |   6 +
 .../decoderonly/modeling_decoderonly.py       |   9 +-
 .../transformers/utils/rbln_quantization.py   |   9 +
 src/optimum/rbln/utils/runtime_utils.py       |  32 ++
 5 files changed, 291 insertions(+), 116 deletions(-)

diff --git a/src/optimum/rbln/transformers/modeling_attention_utils.py b/src/optimum/rbln/transformers/modeling_attention_utils.py
index 5dbdc82ec..e53a8f05c 100644
--- a/src/optimum/rbln/transformers/modeling_attention_utils.py
+++ b/src/optimum/rbln/transformers/modeling_attention_utils.py
@@ -1,18 +1,18 @@
 import math
-from typing import TYPE_CHECKING, Dict, List, Optional, Tuple
+from collections import Counter, defaultdict
+from typing import TYPE_CHECKING, Dict, Optional, Tuple
 
-from optimum.rbln.transformers.models.decoderonly.configuration_decoderonly import (
-    RBLNDecoderOnlyModelForCausalLMConfig,
-)
+import rebel
 
 from ..utils.logging import get_logger
+from ..utils.runtime_utils import get_available_dram
+from .models.decoderonly.configuration_decoderonly import RBLNDecoderOnlyModelForCausalLMConfig
 
 
 logger = get_logger()
 
 if TYPE_CHECKING:
-    from rebel import RBLNCompiledModel
-    from transformers import PretrainedConfig
+    from transformers import PretrainedConfig, PreTrainedModel
 
 
 DEFAULT_FLASH_ATTN_PARTITION_LENGTH = 16_384
@@ -115,128 +115,261 @@ def validate_sliding_window(rbln_config: RBLNDecoderOnlyModelForCausalLMConfig):
         raise ValueError("`use_attention_mask` must be set to False when `cache_impl` is set to 'sliding_window'.")
 
 
+def align(x: int, nbytes: int) -> int:
+    return int(math.ceil(x / nbytes) * nbytes)
+
+
+def align_2MB(x: int) -> int:
+    return align(x, 2**21)
+
+
+def get_alloc_memory_by_key(compiled_models: Dict[str, "rebel.RBLNCompiledModel"]) -> Dict[str, int]:
+    alloc_memory_by_key = defaultdict(int)
+    # Get the actual memory allocation of each node by key
+    for compiled_model in compiled_models.values():
+        alloc_per_node_by_key = compiled_model.get_alloc_per_node_by_key()
+        for key, memory_per_node in alloc_per_node_by_key.items():
+            alloc_memory_by_key[key] += sum(memory_per_node)
+
+    return alloc_memory_by_key
+
+
+def format_byte_size(nbytes: int) -> str:
+    if nbytes < 1024:
+        return f"{nbytes} B"
+    elif nbytes < 1024**2:
+        return f"{nbytes / 1024:.2f} KB"
+    elif nbytes < 1024**3:
+        return f"{nbytes / 1024**2:.2f} MB"
+    else:
+        return f"{nbytes / 1024**3:.2f} GB"
+
+
 class RBLNDecoderOnlyFlashAttentionMixin:
     @classmethod
-    def get_maximum_num_blocks(
+    def get_maximum_num_blocks_by_model(
         cls,
-        config: "PretrainedConfig",
-        tensor_parallel_size: int,
-        kvcache_block_size: int,
-        nbits_per_param: Optional[int] = None,
-        n_model_params: Optional[int] = None,
-        kernel_size: Optional[int] = None,
-        buffer: Optional[int] = None,
-        num_runtimes: int = 2,
+        model: "PreTrainedModel",
+        model_config: "PretrainedConfig",
+        rbln_config: RBLNDecoderOnlyModelForCausalLMConfig,
     ) -> int:
-        # We are finding max_n_blocks(x) that satisfies the following equation:
-
-        # available_dram - kernel_size - buffer
-        #     - num_layers * 2 * tensor_parallel_size
-        #     * align_2MB(
-        #         x
-        #         * block_size
-        #         * align_64(head_dim)
-        #         * math.ceil(num_key_value_heads / tensor_parallel_size)
-        #         * 2
-        #     ) > 0
-
-        # This inequality can be rewritten as follows:
-
-        # a - c * align_2MB(b * x) > 0
-        # where
-        #    a = available_dram - kernel_size - buffer
-        #    b = block_size * align_64(head_dim) * math.ceil(num_key_value_heads / tensor_parallel_size) * 2
-        #    c = num_layers * 2 * tensor_parallel_size
-
-        # We can rewrite the inequality as follows:
-        # k > align_2MB(b*x)
-        # where
-        #    k = a / c
-
-        # After that, we can derive the following equation:
-        # x = floor(2**21 / b * floor((k - 1) / 2**21))
-
-        def align(x: int, nbytes: int) -> int:
-            return int(math.ceil(x / nbytes) * nbytes)
-
-        def align_2MB(x: int) -> int:
-            return align(x, 2**21)
-
-        num_attention_heads = getattr(config, "n_head", None) or getattr(config, "num_attention_heads")
-        num_layers = getattr(config, "n_layer", None) or getattr(config, "num_hidden_layers")
-        head_dim = getattr(config, "head_dim", None) or config.hidden_size // num_attention_heads
-        vocab_size = config.vocab_size
-        hidden_size = getattr(config, "n_embd", None) or getattr(config, "hidden_size")
-        num_key_value_heads = getattr(config, "num_key_value_heads", None) or num_attention_heads
-
-        # TODO(jongho): Update if target npu is REBEL.
-        ATOM_DRAM_NBYTES = 16 * 2**30
-        ATOM_SYS_DRAM_NBYTES = 288 * 2**20
-        available_dram = tensor_parallel_size * (ATOM_DRAM_NBYTES - ATOM_SYS_DRAM_NBYTES)
-
-        if kernel_size is None:
-            if n_model_params is None:
-                raise ValueError("`n_model_params` should be specified to estimate the kernel memory.")
-            # Get estimated kernel size (approximated)
-            lm_heads_params = align(vocab_size, 64) * hidden_size
-            lm_heads_nbytes = (
-                align_2MB(lm_heads_params * nbits_per_param // 8 / tensor_parallel_size) * tensor_parallel_size
+        tensor_parallel_size = rbln_config.tensor_parallel_size or 1
+        available_dram = get_available_dram(rbln_config.npu) * tensor_parallel_size
+
+        kernel_memory = cls._get_kernel_memory(model, model_config=model_config, rbln_config=rbln_config)
+        buffer = cls._get_buffer(rbln_config)
+
+        remaining_dram = available_dram - kernel_memory - buffer
+        if remaining_dram <= 0:
+            raise ValueError(
+                "Insufficient available DRAM after accounting for kernel memory and buffer. "
+                "Cannot allocate any KV cache blocks."
+                f" (Available DRAM: {format_byte_size(available_dram)}, "
+                f"Kernel Memory: {format_byte_size(kernel_memory)}, "
+                f"Buffer: {format_byte_size(buffer)})"
             )
-            params = n_model_params - lm_heads_params
-            layer_nbytes = (
-                align_2MB(params * nbits_per_param // 8 / num_layers / tensor_parallel_size)
-                * num_layers
+        estimated_num_blocks = cls._estimate_num_blocks(
+            remaining_dram, model_config=model_config, rbln_config=rbln_config
+        )
+
+        return estimated_num_blocks
+
+    @classmethod
+    def _get_kernel_memory(
+        cls,
+        model: "PreTrainedModel",
+        model_config: "PretrainedConfig",
+        rbln_config: RBLNDecoderOnlyModelForCausalLMConfig,
+    ) -> int:
+        if model.get_output_embeddings() is None:
+            lm_head_nbytes = 0
+        else:
+            lm_head_nbytes = cls._get_lm_head_memory(model_config, rbln_config)
+
+        layer_nbytes = cls._get_layer_memory(model, model_config, rbln_config)
+        return lm_head_nbytes + layer_nbytes
+
+    @classmethod
+    def _get_lm_head_memory(
+        cls, model_config: "PretrainedConfig", rbln_config: RBLNDecoderOnlyModelForCausalLMConfig
+    ) -> int:
+        tensor_parallel_size = rbln_config.tensor_parallel_size or 1
+        vocab_size = model_config.vocab_size
+        hidden_size = getattr(model_config, "n_embd", None) or getattr(model_config, "hidden_size")
+        lm_head_params = align(vocab_size, 64) * hidden_size
+
+        nbytes_per_param = 2  # Assuming lm_head is always not quantized
+        lm_head_memory_in_bytes = (
+            align_2MB(lm_head_params * nbytes_per_param / tensor_parallel_size) * tensor_parallel_size
+        )
+
+        return lm_head_memory_in_bytes
+
+    @classmethod
+    def _get_layer_memory(
+        cls,
+        model: "PreTrainedModel",
+        model_config: "PretrainedConfig",
+        rbln_config: RBLNDecoderOnlyModelForCausalLMConfig,
+    ) -> int:
+        # This is an *APPROXIMATE* calculation based on the number of parameters
+        tensor_parallel_size = rbln_config.tensor_parallel_size or 1
+        num_hidden_layers = getattr(model_config, "n_layer", None) or getattr(model_config, "num_hidden_layers")
+
+        n_model_params = sum(p.numel() for p in model.parameters())
+        embed_token_params = sum(p.numel() for p in model.get_input_embeddings().parameters())
+
+        # Check : `embed_token` is same as `lm_head`
+        if model.get_output_embeddings() is not None:
+            params = n_model_params - 2 * embed_token_params
+        else:
+            params = n_model_params - embed_token_params
+
+        # Assuming all layers have the same number of parameters
+        # and all linear layers are quantized if quantization is enabled (This is not always true)
+        # TODO(jongho): More accurate calculation
+        nbits_per_param = rbln_config.nbits_per_param
+        layer_nbytes = (
+            (align_2MB(params // num_hidden_layers * nbits_per_param // 8 / tensor_parallel_size))
+            * num_hidden_layers
+            * tensor_parallel_size
+        )
+
+        return layer_nbytes
+
+    @classmethod
+    def _get_buffer(cls, rbln_config) -> int:
+        # TODO(jongho): Accurate buffer estimation
+        buffer_per_runtime_per_core = 2**28  # 256MB per runtime
+        num_runtimes = 1 if not rbln_config.can_generate else 1 + len(rbln_config.decoder_batch_sizes)
+        tensor_parallel_size = rbln_config.tensor_parallel_size or 1
+
+        buffer_per_core = buffer_per_runtime_per_core * num_runtimes
+        buffer = buffer_per_core * tensor_parallel_size
+        return buffer
+
+    @classmethod
+    def get_maximum_num_blocks_by_compiled_model(
+        cls,
+        compiled_models: Dict[str, "rebel.RBLNCompiledModel"],
+        model_config: "PretrainedConfig",
+        rbln_config: RBLNDecoderOnlyModelForCausalLMConfig,
+    ) -> int:
+        tensor_parallel_size = rbln_config.tensor_parallel_size or 1
+        available_dram = get_available_dram(rbln_config.npu) * tensor_parallel_size
+
+        alloc_memory_by_key = get_alloc_memory_by_key(compiled_models)
+        alloc_memory_by_key.pop("PortRecur", None)  # Old compiler's kv-cache Key
+        alloc_memory_by_key.pop("DramTensor", None)  # kv-cache
+        used_memory = sum(alloc_memory_by_key.values())
+
+        remaining_dram = available_dram - used_memory
+
+        if remaining_dram <= 0:
+            logger.warning(
+                "Insufficient available DRAM after accounting for kernel memory and buffer. "
+                "Model cannot allocate any KV cache blocks."
+            )
+
+        estimated_num_blocks = cls._estimate_num_blocks(
+            remaining_dram, model_config=model_config, rbln_config=rbln_config
+        )
+
+        return estimated_num_blocks
+
+    @classmethod
+    def _estimate_num_blocks(
+        cls, available_dram: int, model_config: "PretrainedConfig", rbln_config: RBLNDecoderOnlyModelForCausalLMConfig
+    ) -> int:
+        """
+        Estimate the maximum number of KV cache blocks that can be allocated.
+
+        if all of the layers are full attention, the dram_per_block can be calculated simply as follows:
+            num_blocks = available_dram // dram_per_block
+
+        However, if the model contains a mix of full attention and sliding window attention layers,
+        we need to consider the memory occupied by the sliding window attention layers first,
+        since their memory usage is constant regardless of the number of blocks.
+            num_blocks = (available_dram - swa_kv_nbytes) // dram_per_block
+
+        """
+
+        def get_dram_per_block(seq_len: int, num_key_value_heads: int, tensor_parallel_size: int) -> int:
+            nbytes_per_param = 2  # Assuming kv-cache is always not quantized
+            dram_per_block = (
+                seq_len
+                * align(head_dim, 64)
+                * math.ceil(num_key_value_heads / tensor_parallel_size)
+                * nbytes_per_param
                 * tensor_parallel_size
+                * 2
+            )  # *2 for key and value
+
+            return dram_per_block
+
+        num_attention_heads = getattr(model_config, "n_head", None) or getattr(model_config, "num_attention_heads")
+        head_dim = getattr(model_config, "head_dim", None) or model_config.hidden_size // num_attention_heads
+        num_hidden_layers = getattr(model_config, "n_layer", None) or getattr(model_config, "num_hidden_layers")
+        num_key_value_heads = getattr(model_config, "num_key_value_heads", None) or num_attention_heads
+        tensor_parallel_size = rbln_config.tensor_parallel_size or 1
+
+        # Consider layer types if available
+        # If layer types are not found, assume all layers are full attention
+        layer_types = getattr(model_config, "layer_types", None)
+        if layer_types:
+            layer_types_dict = Counter(layer_types)
+            num_full_attention = layer_types_dict.pop("full_attention", 0)
+            num_sliding_window_attention = layer_types_dict.pop("sliding_attention", 0)
+            if len(layer_types_dict) > 0:
+                raise ValueError(f"Unknown layer types found in the config: {layer_types_dict.keys()}")
+
+        else:
+            num_full_attention = num_hidden_layers
+            num_sliding_window_attention = 0
+
+        # Reduce available DRAM by sliding window attention kv-cache
+        # Since memory occupation of swa layer is constant regardless of num_blocks
+        swa_kv_nbytes = 0
+        if num_sliding_window_attention > 0:
+            sliding_window = getattr(model_config, "sliding_window", None)
+            if sliding_window is None:
+                logger.warning(
+                    "`sliding_window` is not found in the config while `sliding_attention` layers are present. "
+                    "Assuming maximum sliding window size for estimation."
+                )
+                sliding_window = rbln_config.kvcache_block_size
+
+            swa_kv_nbytes = num_sliding_window_attention * get_dram_per_block(
+                seq_len=sliding_window,
+                num_key_value_heads=num_key_value_heads,
+                tensor_parallel_size=tensor_parallel_size,
             )
-            kernel_size = layer_nbytes + lm_heads_nbytes
-        elif n_model_params is not None:
-            raise ValueError("Both `n_model_params` and `kernel_size` cannot be specified.")
 
-        available_dram -= kernel_size
+            available_dram -= swa_kv_nbytes
 
-        if buffer is None:
-            # TODO: Accurate buffer estimation
-            buffer_per_runtime_per_core = 2**28  # 256MB per runtime
-            buffer_per_core = buffer_per_runtime_per_core * num_runtimes  # 1 for prefill, 1 for decoder
-            buffer = buffer_per_core * tensor_parallel_size
-        available_dram -= buffer
+        dram_per_block = num_full_attention * get_dram_per_block(
+            seq_len=rbln_config.kvcache_block_size,
+            num_key_value_heads=num_key_value_heads,
+            tensor_parallel_size=tensor_parallel_size,
+        )
 
-        b = kvcache_block_size * align(head_dim, 64) * math.ceil(num_key_value_heads / tensor_parallel_size) * 2
-        c = num_layers * 2 * tensor_parallel_size
-        k = available_dram / c
-        max_n_blocks = math.floor(2**21 / b * math.floor((k - 1) / 2**21))
+        if dram_per_block == 0:
+            raise ValueError("DRAM per block is calculated as zero, cannot estimate maximum number of blocks.")
 
+        max_n_blocks = available_dram // dram_per_block
         return max_n_blocks
 
     @classmethod
     def maybe_suggest_kvcache_num_blocks(
         cls,
-        compiled_models: Dict[str, "RBLNCompiledModel"],
+        compiled_models: Dict[str, "rebel.RBLNCompiledModel"],
         model_config: "PretrainedConfig",
         rbln_config: RBLNDecoderOnlyModelForCausalLMConfig,
     ) -> None:
-        # Get the actual memory allocation of each node by key
-        alloc_memory_per_node_by_key: Dict[str, List[int]] = compiled_models["prefill"].get_alloc_per_node_by_key()
-        alloc_memory_by_key: Dict[str, int] = {
-            key: sum(memory_per_node) for key, memory_per_node in alloc_memory_per_node_by_key.items()
-        }
-        for batch_size in rbln_config.decoder_batch_sizes:
-            for key, memory_per_node in (
-                compiled_models[f"decoder_batch_{batch_size}"].get_alloc_per_node_by_key().items()
-            ):
-                alloc_memory_by_key[key] += sum(memory_per_node)
-        alloc_memory_by_key.pop("PortRecur", None)  # Old compiler's kv-cache Key
-        alloc_memory_by_key.pop("DramTensor", None)  # kv-cache
-        kernel_size = alloc_memory_by_key.pop("Kernel")  # model weight
-
-        # Get the maximum number of blocks that can be allocated
-        buffer = sum(alloc_memory_by_key.values())
-        max_num_blocks = cls.get_maximum_num_blocks(
-            config=model_config,
-            tensor_parallel_size=rbln_config.tensor_parallel_size,
-            kvcache_block_size=rbln_config.kvcache_block_size,
-            kernel_size=kernel_size,
-            buffer=buffer,
+        max_num_blocks = cls.get_maximum_num_blocks_by_compiled_model(
+            compiled_models=compiled_models,
+            model_config=model_config,
+            rbln_config=rbln_config,
         )
 
         # Since our estimation logic is not always accurate,
diff --git a/src/optimum/rbln/transformers/models/decoderonly/configuration_decoderonly.py b/src/optimum/rbln/transformers/models/decoderonly/configuration_decoderonly.py
index 0e31ff189..1bf81d0c2 100644
--- a/src/optimum/rbln/transformers/models/decoderonly/configuration_decoderonly.py
+++ b/src/optimum/rbln/transformers/models/decoderonly/configuration_decoderonly.py
@@ -281,6 +281,12 @@ def use_lora(self):
     def can_generate(self) -> bool:
         return "decode" in self.phases
 
+    @property
+    def nbits_per_param(self) -> int:
+        if self.quantization:
+            return self.quantization.nbits_per_param
+        return 16
+
 
 class RBLNDecoderOnlyModelForCausalLMConfig(RBLNDecoderOnlyModelConfig):
     """
diff --git a/src/optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py b/src/optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py
index f5fa4fce1..676214c4a 100644
--- a/src/optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py
+++ b/src/optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py
@@ -466,13 +466,8 @@ def _update_attention_config(
 
         # Update kvcache_num_blocks based on the attention implementation.
         if rbln_config.attn_impl == "flash_attn":
-            estimated_max_num_blocks = cls.get_maximum_num_blocks(
-                config=model_config,
-                tensor_parallel_size=rbln_config.tensor_parallel_size or 1,
-                kvcache_block_size=rbln_config.kvcache_block_size,
-                nbits_per_param=16 if not rbln_config.quantization else 4,  # TODO(jongho): FIX Ad-hoc
-                n_model_params=sum(p.numel() for p in model.parameters()),
-                num_runtimes=1 if not rbln_config.can_generate else 1 + len(rbln_config.decoder_batch_sizes),
+            estimated_max_num_blocks = cls.get_maximum_num_blocks_by_model(
+                model=model, model_config=model_config, rbln_config=rbln_config
             )
 
             if rbln_config.kvcache_num_blocks is None:
diff --git a/src/optimum/rbln/transformers/utils/rbln_quantization.py b/src/optimum/rbln/transformers/utils/rbln_quantization.py
index 76a241d22..1cb7fab31 100644
--- a/src/optimum/rbln/transformers/utils/rbln_quantization.py
+++ b/src/optimum/rbln/transformers/utils/rbln_quantization.py
@@ -123,6 +123,15 @@ def maybe_reset_quantization_env(self):
         if self.RBLN_QUANT_BITS_ENV in os.environ:
             os.environ.pop(self.RBLN_QUANT_BITS_ENV)
 
+    @property
+    def nbits_per_param(self) -> int:
+        if self.weights in ["int4", "fp4"]:
+            return 4
+        elif self.weights in ["int8", "fp8"]:
+            return 8
+        else:
+            raise ValueError(f"Invalid weights: {self.weights}")
+
 
 class QuantizedLayerFactory:
     def __init__(self, quantization_config: RBLNQuantizationConfig):
diff --git a/src/optimum/rbln/utils/runtime_utils.py b/src/optimum/rbln/utils/runtime_utils.py
index a09a8d279..5c4d12bfe 100644
--- a/src/optimum/rbln/utils/runtime_utils.py
+++ b/src/optimum/rbln/utils/runtime_utils.py
@@ -20,6 +20,38 @@
 import torch
 
 
+def get_available_dram(npu: Optional[str] = None) -> int:
+    """
+    Get the available DRAM size of the specified NPU.
+
+    Args:
+        npu : Optional[str], default=None
+            The NPU to get the available DRAM size.
+            If None, the function will attempt to retrieve through `ensure_valid_npu()`
+
+    Returns:
+        int
+            The available DRAM size in bytes.
+    """
+    if npu is None:
+        if not rebel.npu_is_available(0):
+            raise RuntimeError("No NPU is available to get available DRAM size.")
+
+        npu = rebel.get_npu_name(0)
+
+    if npu.startswith("RBLN-CR"):
+        # TODO(jongho): Assuming 4 chiplets.
+        DRAM_NBYTES = 144 * 2**30
+        SYS_DRAM_NBYTES = 4 * 2**30
+    elif npu.startswith("RBLN-CA"):
+        DRAM_NBYTES = 16 * 2**30
+        SYS_DRAM_NBYTES = 288 * 2**20
+    else:
+        raise ValueError(f"Unknown npu name: {npu}")
+
+    return DRAM_NBYTES - SYS_DRAM_NBYTES
+
+
 def normalize_npu(npu: str) -> str:
     """Normalize the NPU string by removing the form factor."""
     match = re.match(r"(RBLN-CA|RBLN-CR)(\d+)", npu)

From 60ae60e76f831a9552d496076aa6bf3d194ef897 Mon Sep 17 00:00:00 2001
From: rebel-thkim <157466331+rebel-thkim@users.noreply.github.com>
Date: Tue, 18 Nov 2025 19:07:37 +0900
Subject: [PATCH 13/14] refactor: remove unncecessary rbln-config attrs &
 deprecate_kwargs decorator (#345)

Co-authored-by: rebel-jongho <jongho.choi@rebellions.ai>
Co-authored-by: rebel-kblee <119555851+rebel-kblee@users.noreply.github.com>
---
 .github/scripts/validate_docstrings.py        |   4 +-
 src/optimum/rbln/configuration_utils.py       |   2 +-
 .../transformers/configuration_generic.py     |  27 ---
 .../rbln/transformers/modeling_generic.py     |  59 -----
 ...iguration_audio_spectrogram_transformer.py |  30 ++-
 .../modeling_audio_spectrogram_transformer.py |  52 ++++-
 .../models/seq2seq/configuration_seq2seq.py   |   6 +-
 .../models/seq2seq/modeling_seq2seq.py        |   8 +-
 src/optimum/rbln/utils/depreacate_utils.py    |  16 --
 src/optimum/rbln/utils/deprecation.py         | 213 ++++++++++++++++++
 10 files changed, 297 insertions(+), 120 deletions(-)
 delete mode 100644 src/optimum/rbln/utils/depreacate_utils.py
 create mode 100644 src/optimum/rbln/utils/deprecation.py

diff --git a/.github/scripts/validate_docstrings.py b/.github/scripts/validate_docstrings.py
index a27571efd..05a335a2b 100644
--- a/.github/scripts/validate_docstrings.py
+++ b/.github/scripts/validate_docstrings.py
@@ -139,8 +139,8 @@ def main():
     module_name = sys.argv[2] if len(sys.argv) > 2 else None
 
     if not file_path.exists():
-        print(f"❌ File not found: {file_path}")
-        sys.exit(1)
+        print(f"⚠️ File not found: {file_path}")
+        sys.exit(0)
 
     print(f"🔍 Testing mkdocstrings parsing: {file_path}")
     if module_name:
diff --git a/src/optimum/rbln/configuration_utils.py b/src/optimum/rbln/configuration_utils.py
index 9ca5b282c..665d2f4a5 100644
--- a/src/optimum/rbln/configuration_utils.py
+++ b/src/optimum/rbln/configuration_utils.py
@@ -24,7 +24,7 @@
 from packaging.version import Version
 
 from .__version__ import __version__
-from .utils.depreacate_utils import warn_deprecated_npu
+from .utils.deprecation import warn_deprecated_npu
 from .utils.logging import get_logger
 from .utils.runtime_utils import ContextRblnConfig
 
diff --git a/src/optimum/rbln/transformers/configuration_generic.py b/src/optimum/rbln/transformers/configuration_generic.py
index c3a00b067..0859008d6 100644
--- a/src/optimum/rbln/transformers/configuration_generic.py
+++ b/src/optimum/rbln/transformers/configuration_generic.py
@@ -118,30 +118,3 @@ class RBLNModelForImageClassificationConfig(RBLNImageModelConfig):
 
 class RBLNModelForDepthEstimationConfig(RBLNImageModelConfig):
     pass
-
-
-class RBLNModelForAudioClassificationConfig(RBLNModelConfig):
-    def __init__(
-        self,
-        batch_size: Optional[int] = None,
-        max_length: Optional[int] = None,
-        num_mel_bins: Optional[int] = None,
-        **kwargs: Any,
-    ):
-        """
-        Args:
-            batch_size (Optional[int]): The batch size for inference. Defaults to 1.
-            max_length (Optional[int]): Maximum length of the audio input in time dimension.
-            num_mel_bins (Optional[int]): Number of Mel frequency bins for audio processing.
-            kwargs: Additional arguments passed to the parent RBLNModelConfig.
-
-        Raises:
-            ValueError: If batch_size is not a positive integer.
-        """
-        super().__init__(**kwargs)
-        self.batch_size = batch_size or 1
-        if not isinstance(self.batch_size, int) or self.batch_size < 0:
-            raise ValueError(f"batch_size must be a positive integer, got {self.batch_size}")
-
-        self.max_length = max_length
-        self.num_mel_bins = num_mel_bins
diff --git a/src/optimum/rbln/transformers/modeling_generic.py b/src/optimum/rbln/transformers/modeling_generic.py
index f49e89d32..b0e0d726c 100644
--- a/src/optimum/rbln/transformers/modeling_generic.py
+++ b/src/optimum/rbln/transformers/modeling_generic.py
@@ -26,7 +26,6 @@
 from torch import nn
 from transformers import (
     AutoModel,
-    AutoModelForAudioClassification,
     AutoModelForDepthEstimation,
     AutoModelForImageClassification,
     AutoModelForMaskedLM,
@@ -42,7 +41,6 @@
 from ..utils.logging import get_logger
 from .configuration_generic import (
     RBLNImageModelConfig,
-    RBLNModelForAudioClassificationConfig,
     RBLNTransformerEncoderConfig,
 )
 
@@ -280,60 +278,3 @@ def forward(self, *args, **kwargs):
                 return output.predicted_depth
 
         return ImageModelWrapper(model, rbln_config).eval()
-
-
-class RBLNModelForAudioClassification(RBLNModel):
-    """
-    This is a generic model class that will be instantiated as one of the model classes of the library (with a audio classification head) when created with the from_pretrained() class method
-    This model inherits from [`RBLNModel`]. Check the superclass documentation for the generic methods the library implements for all its models.
-
-    A class to convert and run pre-trained transformers based AudioClassification models on RBLN devices.
-    It implements the methods to convert a pre-trained transformers AudioClassification model into a RBLN transformer model by:
-
-    - transferring the checkpoint weights of the original into an optimized RBLN graph,
-    - compiling the resulting graph using the RBLN compiler.
-
-    Currently, this model class only supports the 'AST' model from the transformers library. Future updates may include support for additional model types.
-    """
-
-    auto_model_class = AutoModelForAudioClassification
-
-    @classmethod
-    def _update_rbln_config(
-        cls,
-        preprocessors: "AutoFeatureExtractor" = None,
-        model: Optional["PreTrainedModel"] = None,
-        model_config: "PretrainedConfig" = None,
-        rbln_config: Optional[RBLNModelForAudioClassificationConfig] = None,
-    ) -> RBLNModelForAudioClassificationConfig:
-        if rbln_config.num_mel_bins is None:
-            rbln_config.num_mel_bins = getattr(model_config, "num_mel_bins", None)
-            if rbln_config.num_mel_bins is None:
-                for feature_extractor in preprocessors:
-                    if hasattr(feature_extractor, "num_mel_bins"):
-                        rbln_config.num_mel_bins = feature_extractor.num_mel_bins
-                        break
-
-        if rbln_config.num_mel_bins is None:
-            raise ValueError("`num_mel_bins` should be specified!")
-
-        if rbln_config.max_length is None:
-            rbln_config.max_length = getattr(model_config, "max_length", None)
-            for feature_extractor in preprocessors:
-                if hasattr(feature_extractor, "max_length"):
-                    rbln_config.max_length = feature_extractor.max_length
-                    break
-
-        if rbln_config.max_length is None:
-            raise ValueError("`max_length` should be specified!")
-
-        input_info = [
-            (
-                "input_values",
-                [rbln_config.batch_size, rbln_config.max_length, rbln_config.num_mel_bins],
-                "float32",
-            ),
-        ]
-
-        rbln_config.set_compile_cfgs([RBLNCompileConfig(input_info=input_info)])
-        return rbln_config
diff --git a/src/optimum/rbln/transformers/models/audio_spectrogram_transformer/configuration_audio_spectrogram_transformer.py b/src/optimum/rbln/transformers/models/audio_spectrogram_transformer/configuration_audio_spectrogram_transformer.py
index 0a4d8dec2..5372578cb 100644
--- a/src/optimum/rbln/transformers/models/audio_spectrogram_transformer/configuration_audio_spectrogram_transformer.py
+++ b/src/optimum/rbln/transformers/models/audio_spectrogram_transformer/configuration_audio_spectrogram_transformer.py
@@ -12,10 +12,36 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from ...configuration_generic import RBLNModelForAudioClassificationConfig
+from typing import Any, Optional
 
+from ....configuration_utils import RBLNModelConfig
+from ....utils.deprecation import deprecate_kwarg
 
-class RBLNASTForAudioClassificationConfig(RBLNModelForAudioClassificationConfig):
+
+class RBLNASTForAudioClassificationConfig(RBLNModelConfig):
     """
     Configuration class for RBLNASTForAudioClassification.
     """
+
+    @deprecate_kwarg(old_name="num_mel_bins", version="0.10.0")
+    def __init__(
+        self,
+        batch_size: Optional[int] = None,
+        max_length: Optional[int] = None,
+        **kwargs: Any,
+    ):
+        """
+        Args:
+            batch_size (Optional[int]): The batch size for inference. Defaults to 1.
+            max_length (Optional[int]): Maximum length of the audio input in time dimension.
+            kwargs: Additional arguments passed to the parent RBLNModelConfig.
+
+        Raises:
+            ValueError: If batch_size is not a positive integer.
+        """
+        super().__init__(**kwargs)
+        self.batch_size = batch_size or 1
+        if not isinstance(self.batch_size, int) or self.batch_size < 0:
+            raise ValueError(f"batch_size must be a positive integer, got {self.batch_size}")
+
+        self.max_length = max_length
diff --git a/src/optimum/rbln/transformers/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.py b/src/optimum/rbln/transformers/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.py
index f6b7db854..3cd24e9ff 100644
--- a/src/optimum/rbln/transformers/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.py
+++ b/src/optimum/rbln/transformers/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.py
@@ -12,10 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from ...modeling_generic import RBLNModelForAudioClassification
+from typing import TYPE_CHECKING, Optional
 
+from transformers import AutoModelForAudioClassification
+from transformers.modeling_outputs import SequenceClassifierOutput
 
-class RBLNASTForAudioClassification(RBLNModelForAudioClassification):
+from ....configuration_utils import RBLNCompileConfig
+from ....modeling import RBLNModel
+from .configuration_audio_spectrogram_transformer import RBLNASTForAudioClassificationConfig
+
+
+if TYPE_CHECKING:
+    from transformers import AutoFeatureExtractor, PretrainedConfig, PreTrainedModel
+
+
+class RBLNASTForAudioClassification(RBLNModel):
     """
     Audio Spectrogram Transformer model with an audio classification head on top (a linear layer on top of the pooled output) e.g. for datasets like AudioSet, Speech Commands v2.
     This model inherits from [`RBLNModelForAudioClassification`]. Check the superclass documentation for the generic methods the library implements for all its models.
@@ -26,3 +37,40 @@ class RBLNASTForAudioClassification(RBLNModelForAudioClassification):
     - transferring the checkpoint weights of the original into an optimized RBLN graph,
     - compiling the resulting graph using the RBLN Compiler.
     """
+
+    auto_model_class = AutoModelForAudioClassification
+
+    @classmethod
+    def _update_rbln_config(
+        cls,
+        preprocessors: "AutoFeatureExtractor" = None,
+        model: Optional["PreTrainedModel"] = None,
+        model_config: "PretrainedConfig" = None,
+        rbln_config: Optional[RBLNASTForAudioClassificationConfig] = None,
+    ) -> RBLNASTForAudioClassificationConfig:
+        num_mel_bins = getattr(model_config, "num_mel_bins", None)
+
+        if rbln_config.max_length is None:
+            rbln_config.max_length = getattr(model_config, "max_length", None)
+            for feature_extractor in preprocessors:
+                if hasattr(feature_extractor, "max_length"):
+                    rbln_config.max_length = feature_extractor.max_length
+                    break
+
+        if rbln_config.max_length is None:
+            raise ValueError("`max_length` should be specified!")
+
+        input_info = [
+            (
+                "input_values",
+                [rbln_config.batch_size, rbln_config.max_length, num_mel_bins],
+                "float32",
+            ),
+        ]
+
+        rbln_config.set_compile_cfgs([RBLNCompileConfig(input_info=input_info)])
+        return rbln_config
+
+    def _prepare_output(self, output, return_dict):
+        # ignore return_dict as transformers doesn't use it for this model
+        return SequenceClassifierOutput(logits=output)
diff --git a/src/optimum/rbln/transformers/models/seq2seq/configuration_seq2seq.py b/src/optimum/rbln/transformers/models/seq2seq/configuration_seq2seq.py
index 57a3e7f7b..eee325bd3 100644
--- a/src/optimum/rbln/transformers/models/seq2seq/configuration_seq2seq.py
+++ b/src/optimum/rbln/transformers/models/seq2seq/configuration_seq2seq.py
@@ -15,6 +15,7 @@
 from typing import Any, Optional
 
 from ....configuration_utils import RBLNModelConfig
+from ....utils.deprecation import deprecate_kwarg
 from ....utils.logging import get_logger
 
 
@@ -24,13 +25,13 @@
 class RBLNModelForSeq2SeqLMConfig(RBLNModelConfig):
     support_paged_attention = None
 
+    @deprecate_kwarg(old_name="pad_token_id", version="0.10.0")
     def __init__(
         self,
         batch_size: Optional[int] = None,
         enc_max_seq_len: Optional[int] = None,
         dec_max_seq_len: Optional[int] = None,
         use_attention_mask: Optional[bool] = None,
-        pad_token_id: Optional[int] = None,
         kvcache_num_blocks: Optional[int] = None,
         kvcache_block_size: Optional[int] = None,
         **kwargs: Any,
@@ -41,7 +42,6 @@ def __init__(
             enc_max_seq_len (Optional[int]): Maximum sequence length for the encoder.
             dec_max_seq_len (Optional[int]): Maximum sequence length for the decoder.
             use_attention_mask (Optional[bool]): Whether to use attention masks during inference.
-            pad_token_id (Optional[int]): The ID of the padding token in the vocabulary.
             kvcache_num_blocks (Optional[int]): The total number of blocks to allocate for the
                 PagedAttention KV cache for the SelfAttention. Defaults to batch_size.
             kvcache_block_size (Optional[int]): Sets the size (in number of tokens) of each block
@@ -61,8 +61,6 @@ def __init__(
 
         self.use_attention_mask = use_attention_mask
 
-        self.pad_token_id = pad_token_id
-
         if self.support_paged_attention:
             self.kvcache_num_blocks = kvcache_num_blocks
             self.kvcache_block_size = kvcache_block_size
diff --git a/src/optimum/rbln/transformers/models/seq2seq/modeling_seq2seq.py b/src/optimum/rbln/transformers/models/seq2seq/modeling_seq2seq.py
index a5c5391ad..8423f1c60 100644
--- a/src/optimum/rbln/transformers/models/seq2seq/modeling_seq2seq.py
+++ b/src/optimum/rbln/transformers/models/seq2seq/modeling_seq2seq.py
@@ -221,12 +221,6 @@ def _update_rbln_config(
             model_config, "max_position_embeddings", None
         )
 
-        pad_token_id = getattr(model_config, "pad_token_id", None)
-        pad_token_id = pad_token_id or getattr(model_config, "bos_token_id", None)
-        pad_token_id = pad_token_id or getattr(model_config, "eos_token_id", None)
-        pad_token_id = pad_token_id or -1
-        rbln_config.pad_token_id = pad_token_id
-
         if rbln_config.enc_max_seq_len is None:
             enc_max_seq_len = max_position_embeddings
             for tokenizer in preprocessors:
@@ -432,7 +426,7 @@ def _prepare_encoder_decoder_kwargs_for_generation(
         inputs_tensor = torch.nn.functional.pad(
             inputs_tensor,
             (0, self.rbln_config.enc_max_seq_len - input_len),
-            value=self.rbln_config.pad_token_id,
+            value=self.config.pad_token_id,
         )
         model_kwargs["attention_mask"] = torch.nn.functional.pad(
             model_kwargs["attention_mask"], (0, self.rbln_config.enc_max_seq_len - input_len)
diff --git a/src/optimum/rbln/utils/depreacate_utils.py b/src/optimum/rbln/utils/depreacate_utils.py
deleted file mode 100644
index 053648f28..000000000
--- a/src/optimum/rbln/utils/depreacate_utils.py
+++ /dev/null
@@ -1,16 +0,0 @@
-from typing import Optional
-
-import rebel
-
-from .logging import get_logger
-
-
-logger = get_logger(__name__)
-
-
-def warn_deprecated_npu(npu: Optional[str] = None):
-    npu = npu or rebel.get_npu_name()
-    if npu == "RBLN-CA02":
-        logger.warning_once(
-            "Support for the RBLN-CA02 device is provided only up to optimum-rbln v0.8.0 and has reached end of life.",
-        )
diff --git a/src/optimum/rbln/utils/deprecation.py b/src/optimum/rbln/utils/deprecation.py
new file mode 100644
index 000000000..f0a958c2b
--- /dev/null
+++ b/src/optimum/rbln/utils/deprecation.py
@@ -0,0 +1,213 @@
+# Copyright 2024 The HuggingFace Team. All rights reserved.
+# Copyright 2025 Rebellions Inc. All rights reserved.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# **********************************************************************************
+# * NOTE: This file has been modified from its original version in              *
+# * the Hugging Face transformers library.                                      *
+# * Original source:                                                            *
+# * https://github.com/huggingface/transformers/blob/v4.57.1/src/transformers/utils/deprecation.py
+# **********************************************************************************
+
+import inspect
+from enum import Enum
+from functools import wraps
+from typing import Callable, Optional
+
+import packaging.version
+
+from ..__version__ import __version__
+from .logging import get_logger
+
+
+logger = get_logger(__name__)
+
+
+def warn_deprecated_npu(npu: Optional[str] = None):
+    import rebel
+
+    npu = npu or rebel.get_npu_name()
+    if npu == "RBLN-CA02":
+        logger.warning_once(
+            "Support for the RBLN-CA02 device is provided only up to optimum-rbln v0.8.0 and has reached end of life.",
+        )
+
+
+class Action(Enum):
+    NONE = "none"
+    NOTIFY = "notify"
+    RAISE = "raise"
+
+
+# Scenario Table for Deprecation Strategy Example
+# Assume that current version is v0.9.6 and the deprecated version is v0.10.0
+# |--------------------|----------------|----------------|---------------------------------------------|--------------------------------------------------------------------------------------|----------------------------------------------------------------------|
+# | Type               | v0.9.6 (as_is) | v0.9.6 (to_be) | v0.9.6 Patch                                | v0.9.7 Action                                                                        | v0.10.0+ Action                                                      |
+# |--------------------|----------------|----------------|---------------------------------------------|--------------------------------------------------------------------------------------|----------------------------------------------------------------------|
+# | Modify (Key Name)  | a: bool        | a': bool       | Add a', Keep a                              | 1. Only 'a' provided: replace a -> a' & future warning                               | In v0.10.0, raise error once, then remove decorator.                 |
+# |                    |                |                |                                             | 2. Both 'a' & 'a'' provided: ignore 'a' value & future warning                       |                                                                      |
+# |--------------------|----------------|----------------|---------------------------------------------|--------------------------------------------------------------------------------------|----------------------------------------------------------------------|
+# | Modify (Value Type)| b: bool        | b: str         | b: Union[str, bool]                         | 'bool' value provided for 'b': replace with corresponding 'str' & future warning     | In v0.10.0, raise error once, then remove decorator.                 |
+# |                    |                |                |                                             |                                                                                      |                                                                      |
+# |--------------------|----------------|----------------|---------------------------------------------|--------------------------------------------------------------------------------------|----------------------------------------------------------------------|
+# | Deletion           | c              | -              | Delete c or Keep c (flexible)               | ignore c & future warning                                                            | In v0.10.0, raise error once, then remove decorator.                 |
+# |                    |                |                |                                             |                                                                                      |                                                                      |
+# |--------------------|----------------|----------------|---------------------------------------------|--------------------------------------------------------------------------------------|----------------------------------------------------------------------|
+# | Addition           | -              | d              | Add d, set default_value for d              | No action needed as default value is set                                             | Keep default value                                                   |
+# |--------------------|----------------|----------------|---------------------------------------------|--------------------------------------------------------------------------------------|----------------------------------------------------------------------|
+
+
+def deprecate_kwarg(
+    old_name: str,
+    version: str,
+    new_name: Optional[str] = None,
+    deprecated_type: Optional[type] = None,
+    value_replacer: Optional[Callable] = None,
+    raise_if_greater_or_equal_version: bool = True,
+    raise_if_both_names: bool = False,
+    additional_message: Optional[str] = None,
+):
+    """
+    Function or method decorator to notify users about deprecated keyword arguments, replacing them with a new name if specified,
+    or handling deprecated value types.
+
+    This decorator allows you to:
+    - Notify users when a keyword argument name is deprecated (Scenario 'a', 'c').
+    - Notify users when a specific value type for an argument is deprecated (Scenario 'b').
+    - Automatically replace deprecated keyword arguments with new ones.
+    - Automatically replace deprecated values with new ones using a replacer function.
+    - Raise an error if deprecated arguments are used, depending on the specified conditions.
+
+    By default, the decorator notifies the user about the deprecated argument while the `optimum.rbln.__version__` < specified `version`
+    in the decorator. To keep notifications with any version `warn_if_greater_or_equal_version=True` can be set.
+
+    Parameters:
+        old_name (`str`):
+            Name of the deprecated keyword argument, or the argument with a deprecated value type.
+        version (`str`):
+            The version in which the keyword argument or value type was (or will be) deprecated.
+        new_name (`Optional[str]`, *optional*):
+            The new name for the deprecated keyword argument. If specified, the deprecated keyword argument will be replaced with this new name (Scenario 'a').
+        deprecated_type (`type`, *optional*):
+            The deprecated type for the keyword argument specified by `old_name` (Scenario 'b').
+            If this is set, `new_name` should typically be `None`.
+        value_replacer (`Callable`, *optional*):
+            A function that takes the old (deprecated type) value and returns a new value (Scenario 'b').
+            Used in conjunction with `deprecated_type`. If provided, the value will be automatically converted.
+        raise_if_greater_or_equal_version (`bool`, *optional*, defaults to `False`):
+            Whether to raise `ValueError` if current `optimum.rbln.` version is greater or equal to the deprecated version.
+        raise_if_both_names (`bool`, *optional*, defaults to `False`):
+            Whether to raise `ValueError` if both deprecated and new keyword arguments are set (only for Scenario 'a').
+        additional_message (`Optional[str]`, *optional*):
+            An additional message to append to the default deprecation message.
+
+    Raises:
+        ValueError:
+            If raise_if_greater_or_equal_version is True and the current version is greater than or equal to the deprecated version, or if raise_if_both_names is True and both old and new keyword arguments are provided.
+
+    Returns:
+        Callable:
+            A wrapped function that handles the deprecated keyword arguments according to the specified parameters.
+    """
+
+    deprecated_version = packaging.version.parse(version)
+    current_version = packaging.version.parse(__version__)
+    is_greater_or_equal_version = current_version >= deprecated_version
+
+    if is_greater_or_equal_version:
+        version_message = f"and removed starting from version {version}"
+    else:
+        version_message = f"and will be removed in version {version}"
+
+    def wrapper(func):
+        # Required for better warning message
+        sig = inspect.signature(func)
+        function_named_args = set(sig.parameters.keys())
+        is_instance_method = "self" in function_named_args
+        is_class_method = "cls" in function_named_args
+
+        @wraps(func)
+        def wrapped_func(*args, **kwargs):
+            # Get class + function name (just for better warning message)
+            func_name = func.__name__
+            if is_instance_method:
+                func_name = f"{args[0].__class__.__name__}.{func_name}"
+            elif is_class_method:
+                func_name = f"{args[0].__name__}.{func_name}"
+
+            minimum_action = Action.NONE
+            message = None
+
+            # Scenario A: Rename (e.g., a -> a')
+            if new_name is not None:
+                if old_name in kwargs and new_name in kwargs:
+                    minimum_action = Action.RAISE if raise_if_both_names else Action.NOTIFY
+                    message = f"Both `{old_name}` and `{new_name}` are set for `{func_name}`. Using `{new_name}={kwargs[new_name]}` and ignoring deprecated `{old_name}={kwargs[old_name]}`."
+                    kwargs.pop(old_name)
+
+                elif old_name in kwargs and new_name not in kwargs:
+                    minimum_action = Action.NOTIFY
+                    message = (
+                        f"`{old_name}` is deprecated {version_message} for `{func_name}`. Use `{new_name}` instead."
+                    )
+                    kwargs[new_name] = kwargs.pop(old_name)
+
+            # Scenario B: Value Type Change (e.g., b: bool -> str)
+            if deprecated_type is not None:
+                key_to_check = old_name if new_name is None else new_name  # For Senario A + B Mixed
+                if key_to_check in kwargs and isinstance(kwargs[key_to_check], deprecated_type):
+                    minimum_action = Action.NOTIFY
+                    old_value = kwargs[key_to_check]
+                    message = f"Using type `{deprecated_type.__name__}` for argument `{key_to_check}` in `{func_name}` is deprecated {version_message}."
+
+                    if value_replacer:
+                        try:
+                            new_value = value_replacer(old_value)
+                            kwargs[key_to_check] = new_value
+                            message += f" Value `{old_value}` has been automatically replaced with `{new_value}`."
+                        except Exception as e:
+                            logger.error(f"Error during deprecated value replacement for {key_to_check}: {e}")
+                            message += f" Automatic replacement failed: {e}. Passing original value."
+                    else:
+                        raise ValueError(
+                            f"value_replacer should be provided when deprecated_type is set for {key_to_check} in {func_name}"
+                        )
+
+            # Scenario C: Deletion (e.g., c)
+            if old_name in kwargs and new_name is None and deprecated_type is None:
+                minimum_action = Action.NOTIFY
+                message = f"`{old_name}` is deprecated {version_message} for `{func_name}`."
+                kwargs.pop(old_name)
+
+            if message is not None and additional_message is not None:
+                message = f"{message} {additional_message}"
+
+            # update minimum_action if argument is ALREADY deprecated (current version >= deprecated version)
+            if is_greater_or_equal_version:
+                # change to NOTIFY -> RAISE  in case we want to raise error for already deprecated arguments
+                if raise_if_greater_or_equal_version:
+                    minimum_action = Action.RAISE
+
+            # raise error or notify user
+            if minimum_action == Action.RAISE:
+                raise ValueError(message)
+            elif minimum_action == Action.NOTIFY:
+                # DeprecationWarning is ignored by default, so we use FutureWarning instead
+                logger.warning(message, stacklevel=2)
+
+            return func(*args, **kwargs)
+
+        return wrapped_func
+
+    return wrapper

From f08e00da70aa8da6cf19796469c78cb4ffba0e75 Mon Sep 17 00:00:00 2001
From: rebel-jongho <112920593+rebel-jongho@users.noreply.github.com>
Date: Wed, 19 Nov 2025 18:22:48 +0900
Subject: [PATCH 14/14] other(ci): remove old workflows (#365)

---
 .github/scripts/auto_code_review.py          | 129 -------------------
 .github/scripts/validate_pr_checklist.py     |  86 -------------
 .github/workflows/auto_code_review.yml       |  72 -----------
 .github/workflows/pr_checklist_validator.yml |  33 -----
 4 files changed, 320 deletions(-)
 delete mode 100644 .github/scripts/auto_code_review.py
 delete mode 100644 .github/scripts/validate_pr_checklist.py
 delete mode 100644 .github/workflows/auto_code_review.yml
 delete mode 100644 .github/workflows/pr_checklist_validator.yml

diff --git a/.github/scripts/auto_code_review.py b/.github/scripts/auto_code_review.py
deleted file mode 100644
index 3af956c54..000000000
--- a/.github/scripts/auto_code_review.py
+++ /dev/null
@@ -1,129 +0,0 @@
-# Copyright 2025 Rebellions Inc. All rights reserved.
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at:
-
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import sys
-
-import google.generativeai as genai
-import requests
-from github import Github
-
-
-model_name = os.environ["GOOGLE_MODEL_ID"]
-genai.configure(api_key=os.environ["GOOGLE_API_KEY"])
-max_context_token = 500000
-
-
-def get_pr_diff():
-    api_url = f"https://api.github.com/repos/{os.getenv('GITHUB_REPOSITORY')}/pulls/{os.getenv('PR_NUMBER')}"
-    headers = {
-        "Authorization": f"token {os.getenv('GITHUB_TOKEN')}",
-        "Accept": "application/vnd.github.v3.diff",
-    }
-    response = requests.get(api_url, headers=headers)
-    return response.text if response.status_code == 200 else ""
-
-
-def get_prompt(diff, pr):
-    system_prompt = """You are an experienced software engineer specializing in code reviews for deep learning libraries. Your task is to review code changes and related pull request (PR) information for `optimum-rbln`, a Python library that optimizes HuggingFace models for execution on RBLN NPUs.
-
-Focus on providing actionable and constructive feedback. Don't make generalized suggestions."""
-
-    prompt = f"""
-Review the following code changes(GIT DIFF) along with the pull request (PR) details and provide feedback:
-
-<PR_DESCRIPTION>
-  title : {pr.title}
-  body :
-{pr.body[: pr.body.find("## Related Issues")] if pr.body is not None else ""}
-</PR_DESCRIPTION>
-
-
-<GIT_DIFF>
-{diff}
-</GIT_DIFF>
-"""
-    return system_prompt, prompt
-
-
-def review_code(system_prompt, prompt):
-    model = genai.GenerativeModel(model_name, system_instruction=system_prompt)
-    response = model.generate_content(prompt)
-    print(prompt)
-    return response.text
-
-
-def remove_file_from_diff(diff_content, file_to_remove):
-    lines = diff_content.splitlines()
-    result = []
-    skip = False
-    file_header = f"diff --git a/{file_to_remove} b/{file_to_remove}"
-
-    for line in lines:
-        if line.startswith("diff --git"):
-            if line == file_header:
-                skip = True
-            else:
-                skip = False
-
-        if not skip:
-            result.append(line)
-
-    return "\n".join(result)
-
-
-def main():
-    github_token = os.getenv("GITHUB_TOKEN")
-    pr_number = os.getenv("PR_NUMBER")
-    if not pr_number:
-        pr_number = os.getenv("INPUT_PR_NUMBER")
-
-    if not all([github_token, pr_number]):
-        print("Missing required environment variables")
-        sys.exit(1)
-
-    g = Github(github_token)
-    repo = g.get_repo(os.getenv("GITHUB_REPOSITORY"))
-    pr = repo.get_pull(int(pr_number))
-
-    # Get PR diff
-    diff = get_pr_diff()
-    diff = remove_file_from_diff(diff, "uv.lock")
-
-    # Check diff is available
-    if len(diff) == 0:
-        print("Failed to get the contents of PR Diff. Skipping review.")
-        pr.create_issue_comment("Auto Code Review skipped: Failed to get the diff.")
-        sys.exit(0)
-
-    # check token count
-    system_prompt, prompt = get_prompt(diff, pr)
-    model = genai.GenerativeModel(model_name=model_name, system_instruction=system_prompt)
-    num_tokens = model.count_tokens(prompt).total_tokens
-    if num_tokens > max_context_token:
-        msg = f"Diff ({len(diff)}) exceeds maximum allowed tokens ({max_context_token}) > ({num_tokens}). Skipping review."
-        print(msg)
-        pr.create_issue_comment(msg)
-        sys.exit(0)
-
-    # Get Auto review
-    review = review_code(system_prompt, prompt)
-
-    # Post comment on PR
-    pr.create_issue_comment(f"""# Auto Code Review by {model_name}
-\n\n{review}""")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/.github/scripts/validate_pr_checklist.py b/.github/scripts/validate_pr_checklist.py
deleted file mode 100644
index 156082c67..000000000
--- a/.github/scripts/validate_pr_checklist.py
+++ /dev/null
@@ -1,86 +0,0 @@
-# Copyright 2025 Rebellions Inc. All rights reserved.
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at:
-
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import re
-import sys
-
-from github import Auth, Github
-
-
-def read_checklist_from_template():
-    template_path = ".github/pull_request_template.md"
-    checklist_items = []
-
-    with open(template_path, "r") as file:
-        content = file.read()
-        # Find the checklist section
-        checklist_section = re.search(r"## Checklist\n(.*?)\n\n", content, re.DOTALL)
-        if checklist_section:
-            checklist = checklist_section.group(1)
-            # Extract individual checklist items
-            checklist_items = re.findall(r"- \[ \] (.*)", checklist)
-
-    return checklist_items
-
-
-def is_release_pr(pr):
-    """Check if this is a release PR (dev -> main)"""
-    return pr.base.ref == "main" and pr.head.ref == "dev"
-
-
-def validate_checklist(body, expected_items):
-    for item in expected_items:
-        if f"- [x] {item}" not in body:
-            print(f"item : {item}")
-            return False
-    return True
-
-
-def main():
-    github_token = os.getenv("GITHUB_TOKEN")
-    pr_number = os.getenv("PR_NUMBER")
-    repo_name = os.getenv("GITHUB_REPOSITORY")
-
-    if not all([github_token, pr_number, repo_name]):
-        print("Missing required environment variables")
-        sys.exit(1)
-
-    g = Github(auth=Auth.Token(github_token))
-    repo = g.get_repo(repo_name)
-    pr = repo.get_pull(int(pr_number))
-
-    # Skip checklist validation for release PRs (dev -> main)
-    if is_release_pr(pr):
-        print("This is a release PR (dev -> main). Skipping checklist validation.")
-        print("Release PRs follow a different approval process.")
-        sys.exit(0)
-
-    expected_items = read_checklist_from_template()
-
-    if not expected_items:
-        print("No checklist items found in the PR template.")
-        sys.exit(1)
-
-    if validate_checklist(pr.body, expected_items):
-        print("All checklist items are marked. PR is valid.")
-        sys.exit(0)
-    else:
-        print(f"expected items : {expected_items}")
-        print("Not all checklist items are marked. PR is invalid.")
-        sys.exit(1)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/.github/workflows/auto_code_review.yml b/.github/workflows/auto_code_review.yml
deleted file mode 100644
index 94b580688..000000000
--- a/.github/workflows/auto_code_review.yml
+++ /dev/null
@@ -1,72 +0,0 @@
-name: Auto Code Review
-
-on:
-  pull_request:
-  issue_comment:
-    types: [created]
-  push:
-    branches:
-      - '**'
-
-env:
-  GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
-  GOOGLE_MODEL_ID: ${{ vars.GOOGLE_MODEL_ID }}
-
-jobs:
-  auto-review:
-    runs-on: ubuntu-latest-rbln
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v3
-        with:
-          fetch-depth: 2
-      
-      - name: Check if review should run
-        id: check
-        run: |
-          PR_NUMBER=""
-          SHOULD_RUN="false"
-          
-          # For push events, check commit message
-          if [[ "${{ github.event_name }}" == "push" ]]; then
-            if [[ "${{ contains(github.event.head_commit.message, '[autoreview]') }}" == "true" ]]; then
-              SHOULD_RUN="true"
-              # Use GitHub CLI to find PR associated with this commit
-              PR_NUMBER=$(gh pr list --head ${{ github.ref_name }} --json number --jq '.[0].number')
-            fi
-          
-          # For PR events
-          elif [[ "${{ github.event_name }}" == "pull_request" ]]; then
-            PR_NUMBER="${{ github.event.pull_request.number }}"
-          
-          # For comment events, check if it's "/autoreview"
-          elif [[ "${{ github.event_name }}" == "issue_comment" ]]; then
-            if [[ "${{ github.event.issue.pull_request != null }}" == "true" && "${{ contains(github.event.comment.body, '/autoreview') }}" == "true" ]]; then
-              SHOULD_RUN="true"
-              PR_NUMBER="${{ github.event.issue.number }}"
-            fi
-          fi
-          
-          echo "should_run=$SHOULD_RUN" >> $GITHUB_OUTPUT
-          echo "pr_number=$PR_NUMBER" >> $GITHUB_OUTPUT
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-
-      - name: Set up Python
-        if: steps.check.outputs.should_run == 'true' && steps.check.outputs.pr_number != ''
-        uses: actions/setup-python@v4
-        with:
-          python-version: '3.x'
-
-      - name: Install dependencies
-        if: steps.check.outputs.should_run == 'true' && steps.check.outputs.pr_number != ''
-        run: |
-          python -m pip install --upgrade pip
-          pip install google-generativeai PyGithub
-
-      - name: Run Auto Code Review
-        if: steps.check.outputs.should_run == 'true' && steps.check.outputs.pr_number != ''
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          PR_NUMBER: ${{ steps.check.outputs.pr_number }}
-        run: python .github/scripts/auto_code_review.py
\ No newline at end of file
diff --git a/.github/workflows/pr_checklist_validator.yml b/.github/workflows/pr_checklist_validator.yml
deleted file mode 100644
index 80903f6f5..000000000
--- a/.github/workflows/pr_checklist_validator.yml
+++ /dev/null
@@ -1,33 +0,0 @@
-name: PR Checklist Validator
-
-on:
-  pull_request:
-    branches: [main, dev]
-    paths:
-      - "src/**/*.py"
-      - "tests/**/*.py"
-      - "examples/**/*.py"
-
-jobs:
-  validate-pr-checklist:
-    runs-on: ubuntu-latest-rbln
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v2
-      
-      - name: Set up Python
-        uses: actions/setup-python@v2
-        with:
-          python-version: '3.x'
-          
-      - name: Install dependencies
-        run: |
-          python -m pip install --upgrade pip
-          pip install PyGithub
-          
-      - name: Validate PR checklist
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          PR_NUMBER: ${{ github.event.pull_request.number }}
-        run: |
-          python .github/scripts/validate_pr_checklist.py
\ No newline at end of file