diff --git a/gallery/index.yaml b/gallery/index.yaml index d8e62c9b03d7..f621099ed12a 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -1,4 +1,62 @@ --- +- name: "qwopus3.5-4b-coder-mtp" + url: "github:mudler/LocalAI/gallery/virtual.yaml@master" + urls: + - https://huggingface.co/Jackrong/Qwopus3.5-4B-Coder-MTP-GGUF + description: | + # Qwen3.5-4B + + [](https://chat.qwen.ai) + + > [!Note] + > This repository contains model weights and configuration files for the post-trained model in the Hugging Face Transformers format. + > + > These artifacts are compatible with Hugging Face Transformers, vLLM, SGLang, KTransformers, etc. + + Over recent months, we have intensified our focus on developing foundation models that deliver exceptional utility and performance. Qwen3.5 represents a significant leap forward, integrating breakthroughs in multimodal learning, architectural efficiency, reinforcement learning scale, and global accessibility to empower developers and enterprises with unprecedented capability and efficiency. + + ## Qwen3.5 Highlights + + Qwen3.5 features the following enhancement: + + - **Unified Vision-Language Foundation**: Early fusion training on multimodal tokens achieves cross-generational parity with Qwen3 and outperforms Qwen3-VL models across reasoning, coding, agents, and visual understanding benchmarks. + + - **Efficient Hybrid Architecture**: Gated Delta Networks combined with sparse Mixture-of-Experts deliver high-throughput inference with minimal latency and cost overhead. + + ... + license: "apache-2.0" + tags: + - llm + - gguf + - vision + - multimodal + - reasoning + icon: https://qianwen-res.oss-accelerate-overseas.aliyuncs.com/Qwen3.5/Figures/qwen3.5_small_size_score.png + overrides: + backend: llama-cpp + function: + automatic_tool_parsing_fallback: true + grammar: + disable: true + known_usecases: + - chat + mmproj: llama-cpp/mmproj/Qwopus3.5-4B-Coder-MTP-GGUF/mmproj-F32.gguf + options: + - use_jinja:true + - spec_type:draft-mtp + - spec_n_max:6 + - spec_p_min:0.75 + parameters: + model: llama-cpp/models/Qwopus3.5-4B-Coder-MTP-GGUF/Qwopus3.5-4B-Coder-MTP-Q4_K_M.gguf + template: + use_tokenizer_template: true + files: + - filename: llama-cpp/models/Qwopus3.5-4B-Coder-MTP-GGUF/Qwopus3.5-4B-Coder-MTP-Q4_K_M.gguf + sha256: 50b426c393fc07aba6438d8f7d66bad156e06a6afb6bb80f15def9c313efa133 + uri: https://huggingface.co/Jackrong/Qwopus3.5-4B-Coder-MTP-GGUF/resolve/main/Qwopus3.5-4B-Coder-MTP-Q4_K_M.gguf + - filename: llama-cpp/mmproj/Qwopus3.5-4B-Coder-MTP-GGUF/mmproj-F32.gguf + sha256: 3a6175c0ae9faddd54926b8660cfed881a13d534a01e571ea11fe9a3e4d36543 + uri: https://huggingface.co/Jackrong/Qwopus3.5-4B-Coder-MTP-GGUF/resolve/main/mmproj-F32.gguf - name: "gemma-4-12b-it-qat-q4_0" url: "github:mudler/LocalAI/gallery/virtual.yaml@master" urls: