diff --git a/src/llmcompressor/entrypoints/model_free/lifecycle.py b/src/llmcompressor/entrypoints/model_free/lifecycle.py
index b1bbf1ac9b..71d2d593e1 100644
--- a/src/llmcompressor/entrypoints/model_free/lifecycle.py
+++ b/src/llmcompressor/entrypoints/model_free/lifecycle.py
@@ -1,6 +1,4 @@
 import torch
-from compressed_tensors.compressors import BaseCompressor
-from compressed_tensors.config.format import _get_quant_compression_format
 from compressed_tensors.quantization import (
     QuantizationScheme,
     initialize_module_for_quantization,
@@ -20,7 +18,6 @@
     "validate_weight_for_quantization",
     "calibrate_global_scale",
     "calibrate_scale_zp",
-    "compress_module",
 ]
 
 
@@ -64,29 +61,3 @@ def calibrate_scale_zp(module: torch.nn.Linear):
     apply_calibration_status(module)
     update_weight_zp_scale(module)
     freeze_module_quantization(module)
-
-
-def compress_module(module: torch.nn.Linear):
-    scheme: QuantizationScheme = getattr(module, "quantization_scheme")
-
-    format = _get_quant_compression_format(scheme.input_activations, scheme.weights)
-    scheme.format = format.value
-
-    compressor = BaseCompressor.load_from_registry(format.value)
-    data = compressor.compress_weight(
-        module.weight,
-        quantization_args=scheme.weights,
-        scale=getattr(module, "weight_scale"),
-        zero_point=getattr(module, "weight_zero_point", None),
-        global_scale=getattr(module, "weight_global_scale", None),
-    )
-
-    # `compress_weight` is a messy api
-    delattr(module, "weight")
-    for key, value in data.items():
-        if hasattr(module, key):
-            getattr(module, key).data = value
-        else:
-            module.register_parameter(
-                key, torch.nn.Parameter(value, requires_grad=False)
-            )
diff --git a/src/llmcompressor/entrypoints/model_free/process.py b/src/llmcompressor/entrypoints/model_free/process.py
index 44835c7f81..66dcf32469 100644
--- a/src/llmcompressor/entrypoints/model_free/process.py
+++ b/src/llmcompressor/entrypoints/model_free/process.py
@@ -3,6 +3,7 @@
 from typing import Iterable
 
 import torch
+from compressed_tensors.compressors import compress_module
 from compressed_tensors.entrypoints.convert import Converter
 from compressed_tensors.quantization import QuantizationScheme
 from compressed_tensors.utils import match_quantizable_tensors
@@ -12,7 +13,6 @@
 from llmcompressor.entrypoints.model_free.lifecycle import (
     calibrate_global_scale,
     calibrate_scale_zp,
-    compress_module,
     initialize_quantized_linear,
     validate_weight_for_quantization,
 )
diff --git a/src/llmcompressor/transformers/compression/compressed_tensors_utils.py b/src/llmcompressor/transformers/compression/compressed_tensors_utils.py
index 188b757c33..c65a5cbd5a 100644
--- a/src/llmcompressor/transformers/compression/compressed_tensors_utils.py
+++ b/src/llmcompressor/transformers/compression/compressed_tensors_utils.py
@@ -3,7 +3,6 @@
 from functools import wraps
 
 import torch
-from accelerate.accelerator import get_state_dict_offloaded_model
 from compressed_tensors import (
     ModelCompressor,
     SparsityCompressionConfig,
@@ -15,9 +14,6 @@
 
 from llmcompressor.core import active_session
 from llmcompressor.pytorch.model_load.helpers import copy_python_files_from_model_cache
-from llmcompressor.transformers.compression.sparsity_metadata_config import (
-    SparsityConfigMetadata,
-)
 from llmcompressor.transformers.utils import RECIPE_FILE_NAME
 from llmcompressor.transformers.utils.helpers import infer_recipe_from_model_path
 
@@ -143,59 +139,14 @@ def get_model_compressor(
     :param disable_sparse_compression: bool to skip sparse compression
     """
 
-    if sparsity_config is None:
-        """
-        Case 1: No sparsity config is provided
-            1. Will either skip sparsity compression
-            2. Or we will infer sparsity from the model directly
-
-        Check recipe for applied sparsity:
-            - Set skip_sparsity_compression_stats to False if don't find a
-                sparsity structure from the recipe
-            - If we identify sparsity based on the recipe or the user
-                set skip_sparsity_compression_stats to False, generate config
-        """
-        sparsity_structure = SparsityConfigMetadata.infer_sparsity_structure(
-            model, check_only_modifiers=True
+    if (
+        sparsity_config is not None
+        or not skip_sparsity_compression_stats
+        or disable_sparse_compression
+    ):
+        logger.warning(
+            "Sparse compression is no longer supported by compressed-tensors"
         )
-        if sparsity_structure is not None:
-            skip_sparsity_compression_stats = False
-
-        if skip_sparsity_compression_stats:
-            logger.info(
-                "skip_sparsity_compression_stats set to True. Skipping sparsity "
-                "compression statistic calculations. No sparsity compressor will "
-                "be applied."
-            )
-            sparsity_config = None
-        else:
-            state_dict = get_state_dict_offloaded_model(model)
-
-            sparsity_config = SparsityConfigMetadata.from_pretrained(
-                model,
-                state_dict=state_dict,
-                compress=save_compressed,
-                quantization_format=quantization_format,
-                disable_sparse_compression=disable_sparse_compression,
-                sparsity_structure=sparsity_structure,
-            )
-    else:
-        """
-        # Case 2: User provides a Sparsity Config
-            - This is the case when there is existing sparsity in the
-                model that we'd like to account for while compressing
-            - Users should provide a SparsityConfig, conveying the model's
-                sparsity structure when saving the model
-        """
-        if sparsity_config.sparsity_structure is None:
-            logger.info(
-                "SparsityConfigMetadata provided without indicating ",
-                "the sparsity structure. Sparisty will be inferred from the model. "
-                "Consider providing the structure to skip this step ",
-            )
-            sparsity_config.sparsity_structure = (
-                SparsityConfigMetadata.infer_sparsity_structure(model)
-            )
 
     if not save_compressed:
         if quantization_format not in (None, CompressionFormat.dense.value):
@@ -209,7 +160,6 @@ def get_model_compressor(
 
     return ModelCompressor.from_pretrained_model(
         model,
-        sparsity_config_or_format=sparsity_config,
         quantization_format=quantization_format,
     )
 
diff --git a/tests/llmcompressor/transformers/compression/decompression_configs/fp8_dynamic.yaml b/tests/llmcompressor/transformers/compression/decompression_configs/fp8_dynamic.yaml
index 6685efb1ea..070d8718e9 100644
--- a/tests/llmcompressor/transformers/compression/decompression_configs/fp8_dynamic.yaml
+++ b/tests/llmcompressor/transformers/compression/decompression_configs/fp8_dynamic.yaml
@@ -1,4 +1,3 @@
 cadence: "commit"
 test_type: "regression"
-compressed_model_stub: "nm-testing/tinyllama-fp8-dynamic-compressed"
-skeleton_model_stub: "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"
\ No newline at end of file
+compressed_model_stub: "nm-testing/tinyllama-fp8-dynamic-compressed"
\ No newline at end of file
diff --git a/tests/llmcompressor/transformers/compression/decompression_configs/w4a16.yaml b/tests/llmcompressor/transformers/compression/decompression_configs/w4a16.yaml
index 330023a801..1246d535e9 100644
--- a/tests/llmcompressor/transformers/compression/decompression_configs/w4a16.yaml
+++ b/tests/llmcompressor/transformers/compression/decompression_configs/w4a16.yaml
@@ -1,4 +1,3 @@
 cadence: "nightly"
 test_type: "regression"
-compressed_model_stub: "nm-testing/tinyllama-w4a16-compressed"
-skeleton_model_stub: "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"
\ No newline at end of file
+compressed_model_stub: "nm-testing/tinyllama-w4a16-compressed"
\ No newline at end of file
diff --git a/tests/llmcompressor/transformers/compression/decompression_configs/w8a16_dense.yaml b/tests/llmcompressor/transformers/compression/decompression_configs/w8a16_dense.yaml
index 337e6c19e5..86225c2618 100644
--- a/tests/llmcompressor/transformers/compression/decompression_configs/w8a16_dense.yaml
+++ b/tests/llmcompressor/transformers/compression/decompression_configs/w8a16_dense.yaml
@@ -1,4 +1,3 @@
 cadence: "nightly"
 test_type: "regression"
-compressed_model_stub: "nm-testing/tinyllama-w8a16-dense"
-skeleton_model_stub: "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"
\ No newline at end of file
+compressed_model_stub: "nm-testing/tinyllama-w8a16-dense"
\ No newline at end of file
diff --git a/tests/llmcompressor/transformers/compression/decompression_configs/w8a8.yaml b/tests/llmcompressor/transformers/compression/decompression_configs/w8a8.yaml
index b5a846cbc7..16827150fc 100644
--- a/tests/llmcompressor/transformers/compression/decompression_configs/w8a8.yaml
+++ b/tests/llmcompressor/transformers/compression/decompression_configs/w8a8.yaml
@@ -1,4 +1,3 @@
 cadence: "commit"
 test_type: "regression"
-compressed_model_stub: "nm-testing/tinyllama-w8a8-compressed"
-skeleton_model_stub: "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"
\ No newline at end of file
+compressed_model_stub: "nm-testing/tinyllama-w8a8-compressed"
\ No newline at end of file
diff --git a/tests/llmcompressor/transformers/compression/test_compress_tensor_utils.py b/tests/llmcompressor/transformers/compression/test_compress_tensor_utils.py
index a47cb1440d..13dca54e22 100644
--- a/tests/llmcompressor/transformers/compression/test_compress_tensor_utils.py
+++ b/tests/llmcompressor/transformers/compression/test_compress_tensor_utils.py
@@ -1,4 +1,3 @@
-import math
 import os
 import shutil
 
@@ -6,9 +5,8 @@
 import torch
 from accelerate import dispatch_model
 from accelerate.accelerator import get_state_dict_offloaded_model
-from compressed_tensors import QUANTIZATION_CONFIG_NAME, CompressionFormat
-from compressed_tensors.compressors import ModelCompressor
-from compressed_tensors.config import BitmaskConfig, DenseSparsityConfig
+from compressed_tensors import QUANTIZATION_CONFIG_NAME
+from compressed_tensors.compressors.format import infer_model_format
 from compressed_tensors.quantization import (
     QuantizationConfig,
     QuantizationStatus,
@@ -19,140 +17,20 @@
 from transformers.utils.quantization_config import CompressedTensorsConfig
 
 from llmcompressor import oneshot
-from llmcompressor.core import reset_session
-from llmcompressor.pytorch.utils.helpers import tensor_sparsity
 from llmcompressor.transformers.compression.compressed_tensors_utils import (
-    get_model_compressor,
     modify_save_pretrained,
 )
-from llmcompressor.transformers.compression.sparsity_metadata_config import (
-    SparsityConfigMetadata,
-)
 from llmcompressor.utils import untie_word_embeddings
 from tests.testing_utils import requires_gpu
 
 
-@pytest.mark.parametrize(
-    "compressed,config,dtype",
-    [
-        [True, None, torch.float32],
-        [False, DenseSparsityConfig(), torch.float16],
-        [True, BitmaskConfig(), torch.bfloat16],
-        [False, BitmaskConfig(), torch.float32],
-        [False, None, torch.float16],
-    ],
-)
-def test_sparse_model_reload(compressed, config, dtype, tmp_path):
-    recipe_str = "tests/llmcompressor/transformers/sparsegpt/recipes/test_tiny2.yaml"
-    expected_sparsity = 0.5
-    model_path = "nm-testing/tinysmokellama-3.2"
-    dataset = "open_platypus"
-    concatenate_data = False
-    num_calibration_samples = 64
-    output_dir = tmp_path / "oneshot_out"
-    splits = {"calibration": "train[:10%]"}
-    one_of_sparse_weights = "model.layers.1.mlp.up_proj.weight"
-
-    # create a sparse model
-    oneshot(
-        model=model_path,
-        dataset=dataset,
-        output_dir=output_dir,
-        num_calibration_samples=num_calibration_samples,
-        recipe=recipe_str,
-        concatenate_data=concatenate_data,
-        splits=splits,
-        precision=dtype,
-        clear_sparse_session=False,
-        tie_word_embeddings=False,
-    )
-
-    model = AutoModelForCausalLM.from_pretrained(tmp_path / "oneshot_out", dtype=dtype)
-
-    # assert that sample layer has the intended sparsity
-    assert math.isclose(
-        tensor_sparsity(model.state_dict()[one_of_sparse_weights]),
-        expected_sparsity,
-        rel_tol=1e-3,
-    )
-
-    inferred_structure = SparsityConfigMetadata.infer_sparsity_structure()
-    assert inferred_structure == "0:0"
-
-    model.save_pretrained(
-        tmp_path / "compress_out",
-        sparsity_config=config,
-        save_compressed=compressed,
-    )
-
-    config = AutoConfig.from_pretrained(tmp_path / "compress_out")
-    compression_config = getattr(config, QUANTIZATION_CONFIG_NAME, None)
-    sparsity_config = ModelCompressor.parse_sparsity_config(compression_config)
-    assert (
-        sparsity_config["format"] == "dense"
-        if (not compressed and config is None)
-        else "sparse_bitmask"
-    )
-    assert sparsity_config[
-        "global_sparsity"
-    ] == SparsityConfigMetadata.infer_global_sparsity(model)
-    assert sparsity_config["sparsity_structure"] == inferred_structure
-
-    dense_model = AutoModelForCausalLM.from_pretrained(
-        tmp_path / "compress_out", dtype="auto"
-    )
-
-    og_state_dict = model.state_dict()
-    reconstructed_state_dict = dense_model.state_dict()
-    assert len(og_state_dict) == len(reconstructed_state_dict)
-    for key in og_state_dict.keys():
-        dense_tensor = og_state_dict[key]
-        reconstructed_tensor = reconstructed_state_dict[key]
-        assert dense_tensor.dtype == reconstructed_tensor.dtype == dtype
-        assert torch.equal(dense_tensor, reconstructed_tensor)
-
-    if os.path.isdir(tmp_path):
-        shutil.rmtree(tmp_path)
-
-
-@pytest.mark.parametrize(
-    "skip_compression_stats,save_compressed",
-    [[True, True], [True, False], [False, True], [False, False]],
-)
-def test_dense_model_save(tmp_path, skip_compression_stats, save_compressed):
-    reset_session()
-
-    model_path = "nm-testing/tinysmokellama-3.2"
-    model = AutoModelForCausalLM.from_pretrained(model_path)
-
-    inferred_global_sparsity = SparsityConfigMetadata.infer_global_sparsity(model)
-    assert math.isclose(inferred_global_sparsity, 0.0, rel_tol=1e-3)
-    inferred_structure = SparsityConfigMetadata.infer_sparsity_structure()
-    assert inferred_structure == "unstructured"
-
-    model.save_pretrained(
-        tmp_path / "dense_out",
-        skip_compression_stats=skip_compression_stats,
-        save_compressed=save_compressed,
-    )
-
-    # for models with 0% sparsity no sparsity config is saved regardless
-    config = AutoConfig.from_pretrained(tmp_path / "dense_out")
-    compression_config = getattr(config, QUANTIZATION_CONFIG_NAME, None)
-    sparsity_config = ModelCompressor.parse_sparsity_config(compression_config)
-    assert sparsity_config is None
-
-    if os.path.isdir(tmp_path):
-        shutil.rmtree(tmp_path)
-
-
 @pytest.mark.parametrize(
     "format,dtype",
     [
-        ["dense", torch.float32],
-        ["dense", torch.float16],
-        # TODO: Int8 Decompression fails for transformers>4.49
-        # ["int_quantized", torch.float32],
+        ["dense", torch.bfloat16],
+        # NOTE: Int8 Decompression fails for transformers>4.49 due to bug in loading
+        # parameters with integers (hf attempts to attach gradients to int params)
+        # ["int-quantized", torch.bfloat16],
     ],
 )
 def test_quant_model_reload(format, dtype, tmp_path):
@@ -164,7 +42,7 @@ def test_quant_model_reload(format, dtype, tmp_path):
     dataset = "open_platypus"
     concatenate_data = False
     num_calibration_samples = 16
-    splits = {"calibration": "train[:10%]"}
+    splits = {"calibration": f"train[:{num_calibration_samples}]"}
 
     # create a quantized model
     model = oneshot(
@@ -191,7 +69,7 @@ def test_quant_model_reload(format, dtype, tmp_path):
                 module.quantization_status == QuantizationStatus.FROZEN
             ), f"Module {name} has incorrect quantization status"
 
-    # Save to disk
+    # Save to disk, override format
     model.save_pretrained(
         save_path_compressed,
         quantization_format=format,
@@ -200,8 +78,7 @@ def test_quant_model_reload(format, dtype, tmp_path):
 
     # Verify config on disk
     config = AutoConfig.from_pretrained(save_path_compressed)
-    compression_config = getattr(config, QUANTIZATION_CONFIG_NAME, None)
-    quant_config = ModelCompressor.parse_quantization_config(compression_config)
+    quant_config = getattr(config, QUANTIZATION_CONFIG_NAME)
     assert quant_config["format"] == format
 
     decompressed_model = AutoModelForCausalLM.from_pretrained(
@@ -210,6 +87,7 @@ def test_quant_model_reload(format, dtype, tmp_path):
         quantization_config=CompressedTensorsConfig(run_compressed=False),
     )
 
+    _remove_zp(og_state_dict)  # HACK: remove extra zero points added during quant init
     reconstructed_state_dict = decompressed_model.state_dict()
     assert len(og_state_dict) == len(reconstructed_state_dict)
     for key in og_state_dict.keys():
@@ -280,188 +158,12 @@ def test_model_reload_gpu(offload, dtype, tie_word_embeddings, device, tmp_path)
     test_model_reload(offload, dtype, tie_word_embeddings, device, tmp_path)
 
 
-@requires_gpu
-@pytest.mark.parametrize(
-    "model_stub, recipe, sparse_format, quant_format",
-    [
-        (
-            "nm-testing/tinysmokellama-3.2",
-            "tests/llmcompressor/transformers/compression/recipes/sparse_24_fp8.yaml",
-            CompressionFormat.sparse_24_bitmask.value,
-            CompressionFormat.float_quantized.value,
-        ),
-    ],
-)
-def test_compressor_stacking(model_stub, recipe, sparse_format, quant_format, tmp_path):
-    from llmcompressor.pytorch.model_load.helpers import get_session_model
-
-    device = "cuda:0" if not torch.cuda.is_available() else "cpu"
-    dataset = "open_platypus"
-    concatenate_data = False
-    num_calibration_samples = 64
-    splits = {"calibration": "train[:10%]"}
-
-    oneshot(
-        model=model_stub,
-        dataset=dataset,
-        num_calibration_samples=num_calibration_samples,
-        recipe=recipe,
-        concatenate_data=concatenate_data,
-        splits=splits,
-    )
-
-    # Fetch the oneshot model
-    model = get_session_model()
-    og_state_dict = model.state_dict()
-    path = tmp_path / "compressed"
-
-    # As HFQuantizer doesn't decompress the model, use the compressor to decompress
-    # the model instead
-    compressor = ModelCompressor.from_pretrained_model(
-        model, sparsity_config_or_format=sparse_format, quantization_format=quant_format
-    )
-
-    assert (
-        compressor.sparsity_compressor is not None
-    ), "Sparse compressor not initialized"
-    assert compressor.sparsity_config.format == sparse_format
-
-    assert (
-        compressor.quantization_compressor is not None
-    ), "Quantization compressor not initialized"
-
-    compressor.compress_model(model)
-    compressor.decompress_model(model)
-    compressor.quantization_config.quantization_status = QuantizationStatus.FROZEN
-
-    # Verify the abs difference between the decompressed model
-    # and the original model
-    reconstructed_state_dict = model.state_dict()
-    for key in reconstructed_state_dict.keys():
-        dense_tensor = og_state_dict[key].to(device)
-        reconstructed_tensor = reconstructed_state_dict[key].to(device)
-        assert dense_tensor.dtype == reconstructed_tensor.dtype
-        if key.endswith("weight") and quant_format != "dense":
-            # we don't expect an exact match for compressed
-            diff = torch.abs(dense_tensor - reconstructed_tensor)
-            # maximum quantization error as a result of compression is ~0.025
-            assert not torch.any(diff > 0.025), f"Max diff: {torch.max(diff)}"
-        else:
-            assert torch.equal(dense_tensor, reconstructed_tensor)
-
-    # Recompress and save; validate correct formats used
-    model.save_pretrained(path)
-    config = AutoConfig.from_pretrained(path)
-    compression_config = getattr(config, QUANTIZATION_CONFIG_NAME, None)
-    quant_config = ModelCompressor.parse_quantization_config(compression_config)
-    sparsity_config = ModelCompressor.parse_sparsity_config(compression_config)
-    assert quant_config["format"] == quant_format
-    assert sparsity_config["format"] == sparse_format
-
-    if os.path.isdir(tmp_path):
-        shutil.rmtree(tmp_path)
-
-
-@pytest.mark.parametrize(
-    "model_stub, recipe, sparse_format",
-    [
-        (
-            "nm-testing/tinysmokellama-3.2",
-            "tests/llmcompressor/transformers/compression/recipes/sparse_24.yaml",
-            CompressionFormat.sparse_24_bitmask.value,
-        ),
-    ],
-)
-def test_sparse_24_compressor_is_lossless(model_stub, recipe, sparse_format, tmp_path):
-    device = "cuda:0" if not torch.cuda.is_available() else "cpu"
-    dataset = "open_platypus"
-    concatenate_data = False
-    num_calibration_samples = 64
-    splits = {"calibration": "train[:10%]"}
-    empty_model = AutoModelForCausalLM.from_pretrained(model_stub, dtype="auto")
-
-    model = oneshot(
-        model=model_stub,
-        dataset=dataset,
-        num_calibration_samples=num_calibration_samples,
-        recipe=recipe,
-        concatenate_data=concatenate_data,
-        splits=splits,
-        clear_sparse_session=False,
-    )
-
-    og_state_dict = model.state_dict()
-    path = tmp_path / "compressed"
-
-    # Compress and save
-    model.save_pretrained(
-        path,
-        save_compressed=True,
-    )
-
-    # Verify config on disk
-    config = AutoConfig.from_pretrained(path)
-    compression_config = getattr(config, QUANTIZATION_CONFIG_NAME, None)
-
-    # As HFQuantizer doesn't decompress the model, use the compressor to decompress
-    # the model instead
-    compressor = ModelCompressor.from_compression_config(compression_config)
-
-    assert (
-        compressor.sparsity_compressor is not None
-    ), "Sparse compressor not initialized"
-    assert compressor.sparsity_config.format == sparse_format
-
-    compressor.decompress(model_path=path, model=empty_model)
-
-    # Verify the abs difference between the decompressed model
-    # and the original model
-    reconstructed_state_dict = empty_model.state_dict()
-    assert len(og_state_dict) == len(reconstructed_state_dict)
-    for key in og_state_dict.keys():
-        dense_tensor = og_state_dict[key].to(device)
-        reconstructed_tensor = reconstructed_state_dict[key].to(device)
-        assert dense_tensor.dtype == reconstructed_tensor.dtype
-        if key.endswith("weight"):
-            assert torch.equal(dense_tensor, reconstructed_tensor)
-    if os.path.isdir(tmp_path):
-        shutil.rmtree(tmp_path)
-
-
-def test_disable_sparse_compression_flag(tmp_path):
-    two_four_sparse_model_id = "nm-testing/llama2.c-stories42M-pruned2.4"
-    two_four_sparse_model = AutoModelForCausalLM.from_pretrained(
-        two_four_sparse_model_id, dtype="auto"
-    )
-    modify_save_pretrained(two_four_sparse_model)
-
-    save_path = tmp_path / "no_sparse_compression_model"
-    sparsity_config = SparsityConfigMetadata.from_pretrained(
-        two_four_sparse_model,
-        sparsity_structure="2:4",
-    )
-    two_four_sparse_model.save_pretrained(
-        save_path, disable_sparse_compression=True, sparsity_config=sparsity_config
-    )
-
-    config = AutoConfig.from_pretrained(save_path)
-    quantization_config = getattr(config, QUANTIZATION_CONFIG_NAME, None)
-
-    assert quantization_config
-    sparsity_config = quantization_config.get("sparsity_config")
-
-    assert sparsity_config
-    assert sparsity_config["format"] == "dense"
-    if os.path.isdir(tmp_path):
-        shutil.rmtree(tmp_path)
-
-
 class DummyLinearModel(nn.Module):
     """
     A dummy linear model for testing purposes, simulating a quantized linear layer.
     """
 
-    def __init__(self, weights, weight_scale=None, weight_zero_point=None):
+    def __init__(self, weights, weight_scale=None, zero_point=None):
         super().__init__()
         out_features, in_features = weights.shape
 
@@ -470,14 +172,8 @@ def __init__(self, weights, weight_scale=None, weight_zero_point=None):
         self.linear.weight = nn.Parameter(weights, requires_grad=True)
 
         # Attach scale and zero-point if provided
-        if weight_scale is not None:
-            self.linear.weight_scale = nn.Parameter(
-                torch.tensor(weight_scale), requires_grad=False
-            )
-        if weight_zero_point is not None:
-            self.linear.weight_zero_point = nn.Parameter(
-                torch.tensor(weight_zero_point), requires_grad=False
-            )
+        self.linear.weight_scale = nn.Parameter(weight_scale, requires_grad=False)
+        self.linear.weight_zero_point = nn.Parameter(zero_point, requires_grad=False)
 
     def forward(self, x):
         return self.linear(x)
@@ -541,53 +237,23 @@ def _quantization_config_from_string(config_str, q_type):
     )
 
 
-def _make_24_sparse(tensor):
-    """
-    Apply 2:4 sparsity pattern to the given tensor.
-    """
-    reshaped_tensor = tensor.view(tensor.size(0), -1, 4)
-    mask = torch.zeros_like(reshaped_tensor, dtype=torch.bool)
-    mask[..., :2] = True
-    sparsified_tensor = torch.where(
-        mask, reshaped_tensor, torch.tensor(0.0, dtype=tensor.dtype)
-    )
-    return sparsified_tensor.view_as(tensor)
-
-
 @pytest.mark.parametrize(
-    "quant_style, quant_type, is_24, expected_quant_compressor, "
-    "expected_sparsity_compressor",
+    "quant_style,quant_type,expected_format",
     [
-        ("W8A8", "int", False, "int-quantized", "dense"),
-        ("W4A16", "int", False, "pack-quantized", "dense"),
-        ("W8A16", "int", False, "pack-quantized", "dense"),
-        ("W8A8", "int", True, "int-quantized", "sparse-24-bitmask"),
-        ("W4A16", "int", True, "marlin-24", "dense"),
-        ("W8A16", "int", True, "marlin-24", "dense"),
-        ("W8A8", "float", False, "float-quantized", "dense"),
-        ("W8A16", "float", False, "naive-quantized", "dense"),
-        ("W8A8", "float", True, "float-quantized", "sparse-24-bitmask"),
-        ("W8A16", "float", True, "naive-quantized", "dense"),
+        ("W8A8", "int", "int-quantized"),
+        ("W4A16", "int", "pack-quantized"),
+        ("W8A16", "int", "pack-quantized"),
+        ("W8A8", "float", "float-quantized"),
+        ("W8A16", "float", "naive-quantized"),
     ],
 )
 def test_correct_compressor_inferred(
     quant_style,
     quant_type,
-    is_24,
-    expected_quant_compressor,
-    expected_sparsity_compressor,
+    expected_format,
 ):
-    """
-    Test if the correct compressor is inferred based on
-    quantization and sparsity configurations.
-    """
+    """Test if the correct compressor is inferred based on quantization"""
     weights = torch.rand(10, 4)
-    if is_24:
-        weights = _make_24_sparse(weights)
-    else:
-        weights[0, :] = torch.ones(
-            4,
-        )  # guarantee not 24 sparse
 
     quantization_config = _quantization_config_from_string(quant_style, quant_type)
     quantization_args = quantization_config.config_groups["group_0"].weights
@@ -607,20 +273,12 @@ def test_correct_compressor_inferred(
     model.linear.quantization_scheme = quantization_config.config_groups["group_0"]
     model.linear.quantization_status = QuantizationStatus.FROZEN
 
-    if is_24:
-        sparsity_config = SparsityConfigMetadata.from_pretrained(
-            model, sparsity_structure="2:4", compress=True
-        )
-    else:
-        sparsity_config = None
-    compressor = get_model_compressor(model, sparsity_config=sparsity_config)
+    assert infer_model_format(model) == expected_format
 
-    assert compressor.quantization_config.format == expected_quant_compressor
 
-    if expected_sparsity_compressor == "dense":
-        assert (
-            compressor.sparsity_config is None
-            or compressor.sparsity_config.format == expected_sparsity_compressor
-        )
-    else:
-        assert compressor.sparsity_config.format == expected_sparsity_compressor
+def _remove_zp(state_dict: dict) -> dict:
+    return {
+        key: value
+        for key, value in state_dict.items()
+        if not key.endswith("zero_point")
+    }
diff --git a/tests/llmcompressor/transformers/compression/test_decompress.py b/tests/llmcompressor/transformers/compression/test_decompress.py
index 3b8cc6cbad..0148a1afc9 100644
--- a/tests/llmcompressor/transformers/compression/test_decompress.py
+++ b/tests/llmcompressor/transformers/compression/test_decompress.py
@@ -1,11 +1,7 @@
-import copy
-
 import pytest
 import torch
-from compressed_tensors import QUANTIZATION_CONFIG_NAME
 from compressed_tensors.compressors import ModelCompressor
-from compressed_tensors.quantization import QuantizationStatus
-from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
+from transformers import AutoModelForCausalLM, AutoTokenizer
 from transformers.utils.quantization_config import CompressedTensorsConfig
 
 from tests.testing_utils import parse_params, requires_gpu
@@ -19,87 +15,37 @@ def test_hf_quantizer_decompress_match_manual_decompress(config):
     """
     Check that HFQuantizer decompression is working as expected.
     Manually decompress a compressed model and compare the generations
-
-    Decompression:
-    Given a skeleton model and path to the optimized model,
-    write the optimized model's safetensors to the skeleton model and decompress
-    Ex. write weight_scale to the skeleton model and then convert from fp4 to fp16
-
     """
 
     compressed_model_stub = config["compressed_model_stub"]
-    skeleton_model_stub = config["skeleton_model_stub"]
 
     sample_inputs = [
         "I love 4-bit quantization because",
         "What is the capital of France?",
         "def fibonacci(n):",
     ]
-
     tokenizer = AutoTokenizer.from_pretrained(compressed_model_stub)
 
     # Decompress using HFQuantizer from AutoModelForCausalLM
-    decompressed_model_hf_quantizer = AutoModelForCausalLM.from_pretrained(
+    hf_quantizer_model = AutoModelForCausalLM.from_pretrained(
         compressed_model_stub,
         dtype="auto",
         device_map="auto",
         quantization_config=CompressedTensorsConfig(run_compressed=False),
     )
 
-    # Manually decompress this model
-    dense_model = AutoModelForCausalLM.from_pretrained(
-        skeleton_model_stub,
-        dtype=decompressed_model_hf_quantizer.dtype,
-        device_map=decompressed_model_hf_quantizer.device,
-    )
-
-    # decompression from HFQuantizer should populate weight_scale
-    assert hasattr(
-        decompressed_model_hf_quantizer.model.layers[0].self_attn.q_proj,
-        "weight_scale",
-    )
-
-    # dense model should not have weight_scale populated
-    assert not hasattr(dense_model.model.layers[0].self_attn.q_proj, "weight_scale")
-
-    config = AutoConfig.from_pretrained(compressed_model_stub)
-
-    compression_config = getattr(config, QUANTIZATION_CONFIG_NAME, None)
-    compressor = ModelCompressor.from_compression_config(compression_config)
-    compressor.quantization_config.quantization_status = QuantizationStatus.FROZEN
-
-    # use the model_path to load the decompressed weights into dense_model
-    orig_dense_model = copy.deepcopy(dense_model)
-
-    # overwrite the weights of the dense model
-    compressor.decompress(
-        model_path=compressed_model_stub,
-        model=dense_model,
-    )
-
-    # self.dense_model should be decompressed
-    assert dense_model is not orig_dense_model
-
-    decompressed_model_manual = dense_model
-
-    assert hasattr(
-        decompressed_model_manual.model.layers[0].self_attn.q_proj,
-        "weight_scale",
+    # Manually decompress from compressed model
+    manual_model = AutoModelForCausalLM.from_pretrained(
+        compressed_model_stub,
+        dtype=hf_quantizer_model.dtype,
+        device_map=hf_quantizer_model.device,
     )
+    ModelCompressor().decompress_model(manual_model)
 
-    device = decompressed_model_manual.device
-
+    # Check generations
+    device = manual_model.device
     for input in sample_inputs:
         inputs = tokenizer(input, return_tensors="pt", padding=True).to(device)
-
-        decompressed_model_manual_output = decompressed_model_manual.generate(
-            **inputs, max_length=50
-        )
-
-        decompressed_model_hf_quantizer_out = decompressed_model_hf_quantizer.generate(
-            **inputs, max_length=50
-        )
-
-        assert torch.equal(
-            decompressed_model_hf_quantizer_out, decompressed_model_manual_output
-        )
+        manual_output = manual_model.generate(**inputs, max_length=15)
+        hf_quantizer_output = hf_quantizer_model.generate(**inputs, max_length=15)
+        assert torch.equal(manual_output, hf_quantizer_output)
diff --git a/tests/llmcompressor/transformers/compression/test_quantization.py b/tests/llmcompressor/transformers/compression/test_quantization.py
index de0f503402..dd62c01426 100644
--- a/tests/llmcompressor/transformers/compression/test_quantization.py
+++ b/tests/llmcompressor/transformers/compression/test_quantization.py
@@ -39,20 +39,16 @@ def _get_quant_info(model):
     for name, module in model.named_modules():
         with align_module_device(module):
             if is_module_quantized(module):
+                # skip zero points, as these are removed between
+                # compression/decompression for symmetric models
+
                 if module.quantization_scheme.weights is not None:
-                    quant_info_weights[name] = (
-                        module.weight_scale,
-                        module.weight_zero_point,
-                        module.weight,
-                    )
+                    quant_info_weights[name] = (module.weight_scale, module.weight)
 
                 if module.quantization_scheme.input_activations is not None:
                     is_dynamic = module.quantization_scheme.input_activations.dynamic
                     if not is_dynamic:
-                        quant_info_inputs[name] = (
-                            module.input_scale,
-                            module.input_zero_point,
-                        )
+                        quant_info_inputs[name] = (module.input_scale,)
 
     return quant_info_weights, quant_info_inputs
 
@@ -110,23 +106,19 @@ def test_quantization_reload(setup_model_and_config):
     # TODO: can remove `to` calls after
     # https://github.com/neuralmagic/compressed-tensors/pull/427
 
-    for name, (o_scale, o_zp, o_weight) in og_weights.items():
-        n_scale, n_zp, n_weight = reloaded_weights[name]
+    for name, (o_scale, o_weight) in og_weights.items():
+        n_scale, n_weight = reloaded_weights[name]
         assert o_scale.dtype == n_scale.dtype == config["weight_dtype"]
         assert torch.equal(o_scale, n_scale.to(o_scale.device))
-        assert o_zp.dtype == n_zp.dtype
-        assert torch.equal(o_zp, n_zp.to(o_zp.device))
 
         # we don't expect an exact match here because o_weight still has the
         # original weight and n_weight has been fake_quantized
         assert n_weight.dtype == o_weight.dtype == config["weight_dtype"]
 
-    for name, (o_scale, o_zp) in og_inputs.items():
-        n_scale, n_zp = reloaded_inputs[name]
+    for name, (o_scale,) in og_inputs.items():
+        (n_scale,) = reloaded_inputs[name]
         assert o_scale.dtype == n_scale.dtype == config["weight_dtype"]
         assert torch.equal(o_scale, n_scale.to(o_scale.device))
-        assert o_zp.dtype == n_zp.dtype
-        assert torch.equal(o_zp, n_zp.to(o_zp.device))
 
 
 @requires_gpu