Skip to content

v1.8.0 - Mistral welcomes 📢

Choose a tag to compare

@patrickvonplaten patrickvonplaten released this 15 Jul 09:39
· 28 commits to main since this release
d201d12

What's Changed

Full Changelog: v1.7.0...v1.8.0

Audio chat example:

from mistral_common.protocol.instruct.messages import TextChunk, AudioChunk, UserMessage, AssistantMessage, RawAudio
from mistral_common.protocol.instruct.request import ChatCompletionRequest
from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
from mistral_common.audio import Audio
from huggingface_hub import hf_hub_download

repo_id = "mistralai/voxtral-mini"
tokenizer = MistralTokenizer.from_hf_hub(repo_id)

obama_file = hf_hub_download("patrickvonplaten/audio_samples", "obama.mp3", repo_type="dataset")
bcn_file = hf_hub_download("patrickvonplaten/audio_samples", "bcn_weather.mp3", repo_type="dataset")

def file_to_chunk(file: str) -> AudioChunk:
    audio = Audio.from_file(file, strict=False)
    return AudioChunk.from_audio(audio)

text_chunk = TextChunk(text="Which speaker do you prefer between the two? Why? How are they different from each other?")
user_msg = UserMessage(content=[file_to_chunk(obama_file), file_to_chunk(bcn_file), text_chunk]).to_openai()


request = ChatCompletionRequest(messages=[user_msg])
tokenized = tokenizer.encode_chat_completion(request)

# pass tokenized.tokens to your favorite audio model
print(tokenized.tokens)
print(tokenized.audios)

# print text to visually see tokens
print(tokenized.text)

Audio transcription example:

from mistral_common.protocol.transcription.request import TranscriptionRequest
from mistral_common.protocol.instruct.messages import RawAudio
from mistral_common.audio import Audio
from mistral_common.tokens.tokenizers.mistral import MistralTokenizer

from huggingface_hub import hf_hub_download

repo_id = "mistralai/voxtral-mini"
tokenizer = MistralTokenizer.from_hf_hub(repo_id)

obama_file = hf_hub_download("patrickvonplaten/audio_samples", "obama.mp3", repo_type="dataset")
audio = Audio.from_file(obama_file, strict=False)

audio = RawAudio.from_audio(audio)
request = TranscriptionRequest(model=repo_id, audio=audio, language="en")

tokenized = tokenizer.encode_transcription(request)

# pass tokenized.tokens to your favorite audio model
print(tokenized.tokens)
print(tokenized.audios)

# print text to visually see tokens
print(tokenized.text)