Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 0 additions & 29 deletions src/llmcompressor/entrypoints/model_free/lifecycle.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
import torch
from compressed_tensors.compressors import BaseCompressor
from compressed_tensors.config.format import _get_quant_compression_format
from compressed_tensors.quantization import (
QuantizationScheme,
initialize_module_for_quantization,
Expand All @@ -20,7 +18,6 @@
"validate_weight_for_quantization",
"calibrate_global_scale",
"calibrate_scale_zp",
"compress_module",
]


Expand Down Expand Up @@ -64,29 +61,3 @@ def calibrate_scale_zp(module: torch.nn.Linear):
apply_calibration_status(module)
update_weight_zp_scale(module)
freeze_module_quantization(module)


def compress_module(module: torch.nn.Linear):
scheme: QuantizationScheme = getattr(module, "quantization_scheme")

format = _get_quant_compression_format(scheme.input_activations, scheme.weights)
scheme.format = format.value

compressor = BaseCompressor.load_from_registry(format.value)
data = compressor.compress_weight(
module.weight,
quantization_args=scheme.weights,
scale=getattr(module, "weight_scale"),
zero_point=getattr(module, "weight_zero_point", None),
global_scale=getattr(module, "weight_global_scale", None),
)

# `compress_weight` is a messy api
delattr(module, "weight")
for key, value in data.items():
if hasattr(module, key):
getattr(module, key).data = value
else:
module.register_parameter(
key, torch.nn.Parameter(value, requires_grad=False)
)
2 changes: 1 addition & 1 deletion src/llmcompressor/entrypoints/model_free/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from typing import Iterable

import torch
from compressed_tensors.compressors import compress_module
from compressed_tensors.quantization import QuantizationScheme
from compressed_tensors.utils.match import match_name
from safetensors.torch import load_file, save_file
Expand All @@ -12,7 +13,6 @@
from llmcompressor.entrypoints.model_free.lifecycle import (
calibrate_global_scale,
calibrate_scale_zp,
compress_module,
initialize_quantized_linear,
validate_weight_for_quantization,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from functools import wraps

import torch
from accelerate.accelerator import get_state_dict_offloaded_model
from compressed_tensors import (
ModelCompressor,
SparsityCompressionConfig,
Expand All @@ -15,9 +14,6 @@

from llmcompressor.core import active_session
from llmcompressor.pytorch.model_load.helpers import copy_python_files_from_model_cache
from llmcompressor.transformers.compression.sparsity_metadata_config import (
SparsityConfigMetadata,
)
from llmcompressor.transformers.utils import RECIPE_FILE_NAME
from llmcompressor.transformers.utils.helpers import infer_recipe_from_model_path

Expand Down Expand Up @@ -143,59 +139,14 @@ def get_model_compressor(
:param disable_sparse_compression: bool to skip sparse compression
"""

if sparsity_config is None:
"""
Case 1: No sparsity config is provided
1. Will either skip sparsity compression
2. Or we will infer sparsity from the model directly

Check recipe for applied sparsity:
- Set skip_sparsity_compression_stats to False if don't find a
sparsity structure from the recipe
- If we identify sparsity based on the recipe or the user
set skip_sparsity_compression_stats to False, generate config
"""
sparsity_structure = SparsityConfigMetadata.infer_sparsity_structure(
model, check_only_modifiers=True
if (
sparsity_config is not None
or not skip_sparsity_compression_stats
or disable_sparse_compression
):
logger.warning(
"Sparse compression is no longer supported by compressed-tensors"
)
if sparsity_structure is not None:
skip_sparsity_compression_stats = False

if skip_sparsity_compression_stats:
logger.info(
"skip_sparsity_compression_stats set to True. Skipping sparsity "
"compression statistic calculations. No sparsity compressor will "
"be applied."
)
sparsity_config = None
else:
state_dict = get_state_dict_offloaded_model(model)

sparsity_config = SparsityConfigMetadata.from_pretrained(
model,
state_dict=state_dict,
compress=save_compressed,
quantization_format=quantization_format,
disable_sparse_compression=disable_sparse_compression,
sparsity_structure=sparsity_structure,
)
else:
"""
# Case 2: User provides a Sparsity Config
- This is the case when there is existing sparsity in the
model that we'd like to account for while compressing
- Users should provide a SparsityConfig, conveying the model's
sparsity structure when saving the model
"""
if sparsity_config.sparsity_structure is None:
logger.info(
"SparsityConfigMetadata provided without indicating ",
"the sparsity structure. Sparisty will be inferred from the model. "
"Consider providing the structure to skip this step ",
)
sparsity_config.sparsity_structure = (
SparsityConfigMetadata.infer_sparsity_structure(model)
)

if not save_compressed:
if quantization_format not in (None, CompressionFormat.dense.value):
Expand All @@ -209,7 +160,6 @@ def get_model_compressor(

return ModelCompressor.from_pretrained_model(
model,
sparsity_config_or_format=sparsity_config,
quantization_format=quantization_format,
)

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
cadence: "commit"
test_type: "regression"
compressed_model_stub: "nm-testing/tinyllama-fp8-dynamic-compressed"
skeleton_model_stub: "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"
compressed_model_stub: "nm-testing/tinyllama-fp8-dynamic-compressed"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are these all showing up as line changes? Did you remove the new line?

Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
cadence: "nightly"
test_type: "regression"
compressed_model_stub: "nm-testing/tinyllama-w4a16-compressed"
skeleton_model_stub: "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"
compressed_model_stub: "nm-testing/tinyllama-w4a16-compressed"
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
cadence: "nightly"
test_type: "regression"
compressed_model_stub: "nm-testing/tinyllama-w8a16-dense"
skeleton_model_stub: "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"
compressed_model_stub: "nm-testing/tinyllama-w8a16-dense"
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
cadence: "commit"
test_type: "regression"
compressed_model_stub: "nm-testing/tinyllama-w8a8-compressed"
skeleton_model_stub: "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"
compressed_model_stub: "nm-testing/tinyllama-w8a8-compressed"
Loading
Loading