diff --git a/.pylintrc b/.pylintrc
index 41f7e4e73..5e9f356b9 100644
--- a/.pylintrc
+++ b/.pylintrc
@@ -475,7 +475,7 @@ notes-rgx=
 [REFACTORING]
 
 # Maximum number of nested blocks for function / method body
-max-nested-blocks=5
+max-nested-blocks=6
 
 # Complete name of functions that never returns. When checking for
 # inconsistent-return-statements if a never returning function is called then
diff --git a/README.md b/README.md
index e503a7d63..51876ef6b 100644
--- a/README.md
+++ b/README.md
@@ -902,7 +902,13 @@ Notes:
       - When a boolean is passed, the expert parallel degree defaults to 1 and further the behaviour would be as follows:
           - if True, it is Scatter MoE Kernels with experts sharded based on the top level sharding protocol (e.g. FSDP).
           - if False, Scatter MoE Kernels with complete replication of experts across ranks.
-    - `world_size` must be divisible by the `ep_degree`
+      - lora tuning with ScatterMoE is supported, but because of inference restrictions on vLLM/vanilla PEFT, experts should not be trained as `target_modules` for models being tuned with ScatterMoE. Users have control over which `target_modules` they wish to train:
+          - Passing `all-linear` to adapter layers will include the router, which is a linear layer, and all attn layers. This **will not** train the expert layers.
+          - To train only attention layers, specify target modules specifically (i.e `target_modules: ["q_proj", "v_proj", "o_proj", "k_proj"]`).
+          - To train expert layers, specify `input_linear` and `output_linear` in target modules along with `router` (i.e `target_modules: ["q_proj", "v_proj", "o_proj", "k_proj", "router", "input_linear", "output_linear"]`). If you specify these layers, inference with vLLM/vanilla HF PEFT **is not possible**.
+      - When lora tuning with ScatterMoE, the values `--fast_moe 1` or `--fast_moe True` are not expected to work, as FSDP must be enabled when lora tuning. Run either `--fast_moe False` or `--fast-moe x>1`.
+      - When lora tuning with ScatterMoE, `--r` must be set to 16 or greater.
+    - `world_size` must be divisible by the `--ep_degree`
     - `number of experts` in the MoE module must be divisible by the `ep_degree`
     - Running fast moe modifies the state dict of the model, and must be post-processed which happens automatically and the converted checkpoint can be found at `hf_converted_checkpoint` folder within every saved checkpoint directory. Alternatively, we can perform similar option manually through [checkpoint utils](https://github.com/foundation-model-stack/fms-acceleration/blob/main/plugins/accelerated-moe/src/fms_acceleration_moe/utils/checkpoint_utils.py) script.
       - The typical usecase for this script is to run:
diff --git a/build/accelerate_launch.py b/build/accelerate_launch.py
index 6cbc7d252..bea6d032b 100644
--- a/build/accelerate_launch.py
+++ b/build/accelerate_launch.py
@@ -146,6 +146,17 @@ def main():
                     save_model_dir, save_model_dir, num_added_tokens
                 )
 
+            # In case of ScatterMoE LoRa
+            hf_converted_checkpoint = os.path.join(
+                save_model_dir, "hf_converted_checkpoint"
+            )
+            if os.path.exists(
+                os.path.join(hf_converted_checkpoint, "adapter_model.safetensors")
+            ):
+                post_process_vLLM_adapters_new_tokens(
+                    hf_converted_checkpoint, hf_converted_checkpoint, num_added_tokens
+                )
+
         if (
             os.path.exists(os.path.join(output_dir, "added_tokens_info.json"))
             and job_config.get("save_strategy") != "no"
@@ -159,11 +170,30 @@ def main():
             for _, dirs, _ in os.walk(output_dir, topdown=False):
                 for name in dirs:
                     if "checkpoint-" in name.lower():
-                        post_process_vLLM_adapters_new_tokens(
-                            os.path.join(output_dir, name),
-                            os.path.join(output_dir, name),
-                            num_added_tokens,
+                        checkpoint_dir = os.path.join(output_dir, name)
+                        if os.path.exists(
+                            os.path.join(checkpoint_dir, "adapter_model.safetensors")
+                        ):
+                            post_process_vLLM_adapters_new_tokens(
+                                checkpoint_dir,
+                                checkpoint_dir,
+                                num_added_tokens,
+                            )
+
+                        # In case of ScatterMoE LoRa
+                        hf_converted_checkpoint = os.path.join(
+                            checkpoint_dir, "hf_converted_checkpoint"
                         )
+                        if os.path.exists(
+                            os.path.join(
+                                hf_converted_checkpoint, "adapter_model.safetensors"
+                            )
+                        ):
+                            post_process_vLLM_adapters_new_tokens(
+                                hf_converted_checkpoint,
+                                hf_converted_checkpoint,
+                                num_added_tokens,
+                            )
         else:
             logging.warning(
                 "Failed to post-process: file added_tokens_info.json not in path %s",
diff --git a/tests/test_sft_trainer.py b/tests/test_sft_trainer.py
index 664c67ad7..e97e51383 100644
--- a/tests/test_sft_trainer.py
+++ b/tests/test_sft_trainer.py
@@ -1447,6 +1447,49 @@ def test_run_moe_ft_and_inference_ep1_kernels(dataset_path, ep_degree):
         )
 
 
+@pytest.mark.skipif(
+    not is_fms_accelerate_available(plugins="moe"),
+    reason="Only runs if fms-accelerate is installed along with accelerated-moe plugin",
+)
+@pytest.mark.parametrize(
+    "dataset_path",
+    [
+        TWITTER_COMPLAINTS_DATA_JSONL,
+    ],
+)
+def test_run_moe_lora_and_inference(dataset_path):
+    """Check if we can finetune a moe model and check if hf checkpoint is created"""
+    with tempfile.TemporaryDirectory() as tempdir:
+        data_args = copy.deepcopy(DATA_ARGS)
+        data_args.training_data_path = dataset_path
+        model_args = copy.deepcopy(MODEL_ARGS)
+        model_args.model_name_or_path = "ibm-granite/granite-3.1-1b-a400m-base"
+        train_args = copy.deepcopy(TRAIN_ARGS)
+        train_args.output_dir = tempdir
+        lora_args = copy.deepcopy(PEFT_LORA_ARGS)
+        lora_args.r = 16
+        lora_args.target_modules = [
+            "q_proj",
+            "v_proj",
+            "o_proj",
+            "k_proj",
+        ]  # Router doesn't work with LoRA test inference
+        fast_moe_config = FastMoeConfig(fast_moe=FastMoe(ep_degree=False))
+        sft_trainer.train(
+            model_args,
+            data_args,
+            train_args,
+            lora_args,
+            fast_moe_config=fast_moe_config,
+        )
+        _test_run_inference(
+            checkpoint_path=os.path.join(
+                _get_checkpoint_path(tempdir), "hf_converted_checkpoint"
+            ),
+            base_model_name_or_path="ibm-granite/granite-3.1-1b-a400m-base",
+        )
+
+
 @pytest.mark.skipif(
     not is_fms_accelerate_available(plugins="moe"),
     reason="Only runs if fms-accelerate is installed along with accelerated-moe plugin",
@@ -1485,9 +1528,9 @@ def _test_run_causallm_ft(training_args, model_args, data_args, tempdir):
     _validate_training(tempdir)
 
 
-def _test_run_inference(checkpoint_path):
+def _test_run_inference(checkpoint_path, base_model_name_or_path=None):
     # Load the model
-    loaded_model = TunedCausalLM.load(checkpoint_path)
+    loaded_model = TunedCausalLM.load(checkpoint_path, base_model_name_or_path)
 
     # Run inference on the text
     output_inference = loaded_model.run(
diff --git a/tuning/config/acceleration_configs/fast_moe.py b/tuning/config/acceleration_configs/fast_moe.py
index 1ace18dfa..37602daf1 100644
--- a/tuning/config/acceleration_configs/fast_moe.py
+++ b/tuning/config/acceleration_configs/fast_moe.py
@@ -16,6 +16,7 @@
 from dataclasses import dataclass, field
 from typing import Union
 import argparse
+import json
 import os
 
 # Third Party
@@ -121,10 +122,29 @@ def checkpoint(checkpoint_dir, save_dir):
                             args,
                             os.path.join(hf_converted_output_dir, TRAINING_ARGS_NAME),
                         )
-                        # Save model config files
-                        self.trainer.model.config.save_pretrained(
-                            hf_converted_output_dir
-                        )
+
+                        # Unwrap FSDP module
+                        model = self.trainer.model
+                        if hasattr(model, "module"):
+                            model = model.module
+
+                        if hasattr(model, "peft_config"):
+                            lora_config = model.peft_config["default"]
+                            config_dict = lora_config.to_dict()
+                            config_dict["target_modules"] = sorted(
+                                list(config_dict["target_modules"])
+                            )
+                            with open(
+                                os.path.join(
+                                    hf_converted_output_dir, "adapter_config.json"
+                                ),
+                                "w",
+                                encoding="utf-8",
+                            ) as f:
+                                json.dump(config_dict, f, indent=2)
+
+                        else:
+                            model.config.save_pretrained(hf_converted_output_dir)
 
                     except Exception as e:
                         raise ValueError(