forked from pytorch/ao
-
Notifications
You must be signed in to change notification settings - Fork 0
Open
Description
def get_torch_compiled_model(model, logger):
from torchao.quantization import (
float8_dynamic_activation_float8_weight,
float8_weight_only,
quantize_,
)
from torchao.quantization.granularity import (
PerRow,
PerTensor,
)
mode_map = {}
mode = "dynamic"
granularity = PerTensor()
from functools import partial
mode_map[mode] = partial(
float8_dynamic_activation_float8_weight, granularity=granularity
)
factory = mode_map[mode]()
quantize_(model, factory)
print(f"Quantized model: {model}")
# input 1: ('Explain the history of AI',)
# output 1.1: ('Explain the history of AI and its evolution over time.\n\nArtificial intelligence (AI) has a rich and varied history that spans several decades. The term "Artificial Intelligence" was coined in 1956 by John McCarthy, a computer scientist who organized the first AI conference at Dartmouth College. Here is a brief overview of the history of AI and its evolution over time:\n\n1. Early Years (1950s-1960s): The first AI program was developed in 1951 by Alan Turing, a British mathematic',)
# # gen_text(
# for gpt_bigcode, mpt, bloom, gpt2 model_type
if hasattr(model, "transformer"):
model.transformer = torch.compile(
model.transformer, backend="hpu_backend", options={"keep_input_mutations": True}
)
# for gpt_neox
elif hasattr(model, "gpt_neox"):
model.gpt_neox = torch.compile(model.gpt_neox, backend="hpu_backend", options={"keep_input_mutations": True})
# for llama, mistral, mixtral, qwen2
elif hasattr(model, "model"):
model.model = torch.compile(model.model, backend="hpu_backend", options={"keep_input_mutations": True})
else:
logger.warning(
"In low performance case, please explicitly specify a module you want to wrap with `torch.compile`"
)
model = torch.compile(model, backend="hpu_backend", options={"keep_input_mutations": True})
return modelReactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels