-
Notifications
You must be signed in to change notification settings - Fork 2.7k
Open
Description
The same input, different results. I don't know which detail is incorrect. The pooling method has been checked.
sentence-transformers 5.1.2
transformers 4.56.1
torch 2.7.1
from sentence_transformers import SentenceTransformer
from sklearn.preprocessing import normalize
from sentence_transformers import SentenceTransformer
from sklearn.preprocessing import normalize
import torch
from torch import nn
import numpy as np
from transformers import AutoModel, AutoTokenizer
from safetensors.torch import load_file
import torch.nn.functional as F
def get_prompteol_input(text: str) -> str:
return f"This sentence: <|im_start|>“{text}” means in one word: “"
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery:{query}'
task = "Given a web search query, retrieve relevant passages that answer the query"
queries = [
get_prompteol_input(get_detailed_instruct(task, "光合作用是什么?")),
get_prompteol_input(get_detailed_instruct(task, "光合作用是什么?"))
]
def main_v1():
model = SentenceTransformer(
"/home/jovyan/mnt-xck-notebook-zzzc/pretrained_models/Kingsoft-LLM/QZhou-Embedding-Zh",
model_kwargs={"device_map": "cuda", "trust_remote_code": True, "torch_dtype": torch.bfloat16},
tokenizer_kwargs={"padding_side": "left", "trust_remote_code": True},
trust_remote_code=True,
)
embeddings = model.encode(queries, normalize_embeddings=False)
print(embeddings)
def main():
llm = AutoModel.from_pretrained('/home/jovyan/mnt-xck-notebook-zzzc/pretrained_models/Kingsoft-LLM/QZhou-Embedding-Zh', torch_dtype=torch.bfloat16, trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained('/home/jovyan/mnt-xck-notebook-zzzc/pretrained_models/Kingsoft-LLM/QZhou-Embedding-Zh', padding_side='left', trust_remote_code=True, max_length=40960)
mlp = nn.Linear(4096, 1792, dtype=torch.bfloat16)
state_dict = load_file('/home/jovyan/mnt-xck-notebook-zzzc/pretrained_models/Kingsoft-LLM/QZhou-Embedding-Zh/2_Dense/model_bak.safetensors')
mlp.load_state_dict(state_dict)
llm = llm.to("cuda")
mlp = mlp.to("cuda")
query_inputs = tokenizer(queries, padding=True, truncation=True, return_tensors="pt").to("cuda")
llm.eval()
mlp.eval()
with torch.no_grad():
last_hidden_state = llm(**query_inputs).last_hidden_state
embedding = last_hidden_state[:, -1]
embedding = mlp(embedding).cpu().float().numpy()
print(embedding)
if __name__ == "__main__":
main_v1()
main()
Metadata
Metadata
Assignees
Labels
No labels