llm-ghostwriter/core/model/inference/test.py at 33c67a3e6f5939588d1f417b727c333c4e46a407 · ahmedshahriar/llm-ghostwriter · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
from loguru import logger

from core.model.inference.inference import LLMInferenceTransformers
from core.model.inference.run import InferenceExecutor
from core.settings import settings

if __name__ == "__main__":
    # Simple test/example of local Hugging Face Transformers inference
    # text = "#Instruction: Write me a post about Huggingface Inference Client with 800 words"

    # https://huggingface.co/ahmedshahriar/GhostWriterLlama-3.2-1B-DPO
    # DPO tuned on alpaca prompt format
    text = """Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
Write an ~800-word technical blog post about the Hugging Face Inference Client.
Cover: overview, install, minimal code sample, serverless vs endpoint, streaming, auth, costs/latency, common pitfalls. Use headings and short code blocks.

### Response:
"""
    logger.info(f"Running inference for text: '{text}'")
    llm = LLMInferenceTransformers(
        model_id=settings.HUGGINGFACE_INFERENCE_MODEL_ID,
    )
    answer = InferenceExecutor(llm, text).execute()

    logger.info(f"Answer: '{answer}'")