Skip to content

compile OH #23

@yiliu30

Description

@yiliu30
def get_torch_compiled_model(model, logger):
    from torchao.quantization import (
        float8_dynamic_activation_float8_weight,
        float8_weight_only,
        quantize_,
    )
    from torchao.quantization.granularity import (
        PerRow,
        PerTensor,
    )
    mode_map = {}
    mode = "dynamic"
    granularity = PerTensor()
    from functools import partial
    mode_map[mode] = partial(
            float8_dynamic_activation_float8_weight, granularity=granularity
        )
    factory = mode_map[mode]()
    quantize_(model, factory)
    print(f"Quantized model: {model}")
    # input 1: ('Explain the history of AI',)
    # output 1.1: ('Explain the history of AI and its evolution over time.\n\nArtificial intelligence (AI) has a rich and varied history that spans several decades. The term "Artificial Intelligence" was coined in 1956 by John McCarthy, a computer scientist who organized the first AI conference at Dartmouth College. Here is a brief overview of the history of AI and its evolution over time:\n\n1. Early Years (1950s-1960s): The first AI program was developed in 1951 by Alan Turing, a British mathematic',)
    #     # gen_text(
    # for gpt_bigcode, mpt, bloom, gpt2 model_type
    if hasattr(model, "transformer"):
        model.transformer = torch.compile(
            model.transformer, backend="hpu_backend", options={"keep_input_mutations": True}
        )
    # for gpt_neox
    elif hasattr(model, "gpt_neox"):
        model.gpt_neox = torch.compile(model.gpt_neox, backend="hpu_backend", options={"keep_input_mutations": True})
    # for llama, mistral, mixtral, qwen2
    elif hasattr(model, "model"):
        model.model = torch.compile(model.model, backend="hpu_backend", options={"keep_input_mutations": True})
    else:
        logger.warning(
            "In low performance case, please explicitly specify a module you want to wrap with `torch.compile`"
        )
        model = torch.compile(model, backend="hpu_backend", options={"keep_input_mutations": True})
    return model

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions