Merge pull request #2 from EduardDurech/v0.8.2

AllenHaoHuang · web-flow · commit 88fc1a5778f7 · 2025-04-07T06:31:40.000+12:00
v0.8.2 vLLM + SwissLM
diff --git a/tests/models/decoder_only/language/test_models.py b/tests/models/decoder_only/language/test_models.py
@@ -83,6 +83,7 @@
             "ehristoforu/Falcon3-MoE-2x7B-Insruct",  # mixtral
             marks=[pytest.mark.cpu_model],
         )
+        pytest.param("Saesara/swissai"),  # swissai
     ])
 @pytest.mark.parametrize("dtype", ["half"])
 @pytest.mark.parametrize("max_tokens", [32])
diff --git a/tests/models/registry.py b/tests/models/registry.py
@@ -209,6 +209,7 @@ def check_available_online(
     "StableLmForCausalLM": _HfExamplesInfo("stabilityai/stablelm-3b-4e1t"),
     "Starcoder2ForCausalLM": _HfExamplesInfo("bigcode/starcoder2-3b"),
     "SolarForCausalLM": _HfExamplesInfo("upstage/solar-pro-preview-instruct"),
+    "SwissAIForCausalLM": _HfExamplesInfo("Saesara/swissai"), # TODO test 1.5B model
     "TeleChat2ForCausalLM": _HfExamplesInfo("Tele-AI/TeleChat2-3B",
                                             trust_remote_code=True),
     "TeleFLMForCausalLM": _HfExamplesInfo("CofeAI/FLM-2-52B-Instruct-2407",
diff --git a/vllm/model_executor/layers/activation.py b/vllm/model_executor/layers/activation.py
@@ -15,6 +15,43 @@
 from vllm.utils import LazyDict
 
 
+@CustomOp.register("xielu")
+class XIELU(CustomOp):
+    """
+    Applies the xIELU activation function
+
+    Shapes:
+        x: (num_tokens, d) or (batch_size, seq_len, d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
+
+    def __init__(self, alpha_p_init=0.8, alpha_n_init=0.8, beta=0.5, eps=-1e-6):
+        super().__init__()
+        self.alpha_p = nn.Parameter(torch.log(torch.exp(torch.tensor(alpha_p_init)) - 1.0).unsqueeze(0))
+        self.alpha_n = nn.Parameter(torch.log(torch.exp(torch.tensor(alpha_n_init - beta)) - 1.0).unsqueeze(0))
+        self.beta = beta
+        self.eps = torch.tensor(eps, dtype=torch.bfloat16, device='cuda')
+
+        if current_platform.is_cuda_alike():
+            # TODO CUDA implementation under development, using forward_native for now
+            self._forward_method = self.forward_native
+        elif current_platform.is_cpu():
+            self._forward_method = self.forward_native
+
+    def forward_native(self, x: torch.Tensor) -> torch.Tensor:
+        # TODO optimize to precompute
+        alpha_p = F.softplus(self.alpha_p)
+        alpha_n = self.beta + F.softplus(self.alpha_n)
+        return torch.where(
+            x > 0,
+            alpha_p * x * x + self.beta * x,
+            alpha_n * torch.expm1(torch.min(x, self.eps)) - alpha_n * x + self.beta * x
+        )
+
+    def forward_cuda(self, x: torch.Tensor) -> torch.Tensor:
+        return
+
+
 @CustomOp.register("fatrelu_and_mul")
 class FatreluAndMul(CustomOp):
     """An activation function for FATReLU.
diff --git a/vllm/model_executor/models/registry.py b/vllm/model_executor/models/registry.py
@@ -105,6 +105,7 @@
     "StableLmForCausalLM": ("stablelm", "StablelmForCausalLM"),
     "Starcoder2ForCausalLM": ("starcoder2", "Starcoder2ForCausalLM"),
     "SolarForCausalLM": ("solar", "SolarForCausalLM"),
+    "SwissAIForCausalLM": ("swissai", "SwissAIForCausalLM"),
     "TeleChat2ForCausalLM": ("telechat2", "TeleChat2ForCausalLM"),
     "TeleFLMForCausalLM": ("teleflm", "TeleFLMForCausalLM"),
     "XverseForCausalLM": ("llama", "LlamaForCausalLM"),
diff --git a/vllm/model_executor/models/swissai.py b/vllm/model_executor/models/swissai.py

Original file line number	Diff line number	Diff line change
`@@ -83,6 +83,7 @@`
`83`	`83`	`"ehristoforu/Falcon3-MoE-2x7B-Insruct", # mixtral`
`84`	`84`	`marks=[pytest.mark.cpu_model],`
`85`	`85`	`)`
	`86`	`+ pytest.param("Saesara/swissai"), # swissai`
`86`	`87`	`])`
`87`	`88`	`@pytest.mark.parametrize("dtype", ["half"])`
`88`	`89`	`@pytest.mark.parametrize("max_tokens", [32])`