Skip to content

different with transformers implement #3584

@bjtuxck

Description

@bjtuxck

The same input, different results. I don't know which detail is incorrect. The pooling method has been checked.

sentence-transformers 5.1.2
transformers 4.56.1
torch 2.7.1

from sentence_transformers import SentenceTransformer
from sklearn.preprocessing import normalize
from sentence_transformers import SentenceTransformer
from sklearn.preprocessing import normalize
import torch
from torch import nn
import numpy as np
from transformers import AutoModel, AutoTokenizer
from safetensors.torch import load_file
import torch.nn.functional as F

def get_prompteol_input(text: str) -> str:
    return f"This sentence: <|im_start|>“{text}” means in one word: “"

def get_detailed_instruct(task_description: str, query: str) -> str:
    return f'Instruct: {task_description}\nQuery:{query}'

task = "Given a web search query, retrieve relevant passages that answer the query"
queries = [ 
    get_prompteol_input(get_detailed_instruct(task, "光合作用是什么?")),
    get_prompteol_input(get_detailed_instruct(task, "光合作用是什么?"))
]

def main_v1():
    model = SentenceTransformer(
        "/home/jovyan/mnt-xck-notebook-zzzc/pretrained_models/Kingsoft-LLM/QZhou-Embedding-Zh",
        model_kwargs={"device_map": "cuda", "trust_remote_code": True, "torch_dtype": torch.bfloat16},
        tokenizer_kwargs={"padding_side": "left", "trust_remote_code": True},
        trust_remote_code=True,
    )   
    embeddings = model.encode(queries, normalize_embeddings=False)
    print(embeddings)

def main():
    llm = AutoModel.from_pretrained('/home/jovyan/mnt-xck-notebook-zzzc/pretrained_models/Kingsoft-LLM/QZhou-Embedding-Zh', torch_dtype=torch.bfloat16, trust_remote_code=True)
    tokenizer = AutoTokenizer.from_pretrained('/home/jovyan/mnt-xck-notebook-zzzc/pretrained_models/Kingsoft-LLM/QZhou-Embedding-Zh', padding_side='left', trust_remote_code=True, max_length=40960)
    mlp = nn.Linear(4096, 1792, dtype=torch.bfloat16)
    state_dict = load_file('/home/jovyan/mnt-xck-notebook-zzzc/pretrained_models/Kingsoft-LLM/QZhou-Embedding-Zh/2_Dense/model_bak.safetensors')
    mlp.load_state_dict(state_dict)

    llm = llm.to("cuda")
    mlp = mlp.to("cuda")
    query_inputs = tokenizer(queries, padding=True, truncation=True, return_tensors="pt").to("cuda")

    llm.eval()
    mlp.eval()
    with torch.no_grad():
        last_hidden_state = llm(**query_inputs).last_hidden_state
        embedding = last_hidden_state[:, -1]
        embedding = mlp(embedding).cpu().float().numpy()
    print(embedding)

if __name__ == "__main__":
    main_v1()
    main()

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions