From f6c259f3381c7c252905e1f25654b9adf4a5ef92 Mon Sep 17 00:00:00 2001
From: mudler <2420543+mudler@users.noreply.github.com>
Date: Mon, 3 Nov 2025 22:57:02 +0000
Subject: [PATCH] chore(model gallery): :robot: add new models via gallery
 agent

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
---
 gallery/index.yaml | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 514f53d19ff9..ceabe0a41cf7 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -23023,3 +23023,33 @@
     - filename: Evilmind-24B-v1.i1-Q4_K_M.gguf
       sha256: 22e56c86b4f4a8f7eb3269f72a6bb0f06a7257ff733e21063fdec6691a52177d
       uri: huggingface://mradermacher/Evilmind-24B-v1-i1-GGUF/Evilmind-24B-v1.i1-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "lmunit-llama3.1-70b"
+  urls:
+    - https://huggingface.co/mradermacher/LMUnit-llama3.1-70b-GGUF
+  description: |
+    **Model Name:** LMUnit-llama3.1-70b
+    **Base Model:** Meta's Llama-3.1-70B-Instruct
+    **Developed By:** Contextual AI
+    **Model Type:** Fine-tuned language model for fine-grained, natural language-based evaluation of AI responses
+    **Primary Use Case:** Evaluating the quality of model outputs using unit tests (e.g., accuracy, relevance, safety, structure) via human-like judgment
+
+    **Key Features:**
+    - Trained on multi-objective signals (pairwise comparisons, direct ratings, criterion-specific feedback)
+    - Generates continuous scores (1–5) indicating how well a response satisfies a given unit test
+    - Achieves state-of-the-art performance on evaluation benchmarks: **FLASK (72.03)**, **BiGGen-Bench (67.69)**, and **RewardBench (93.5% accuracy)**
+    - Highly aligned with human preferences, ranking in the top 5 of RewardBench and top 2 on RewardBench2
+    - Designed to support nuanced, scenario-specific evaluations of long-form and complex outputs
+
+    **Ideal For:** Researchers and developers building systems that require precise, interpretable, and human-aligned evaluation of LLM outputs — especially in testing, benchmarking, and alignment pipelines.
+
+    **Paper:** [LMUnit: Fine-grained Evaluation with Natural Language Unit Tests](https://arxiv.org/abs/2412.13091)
+    **GitHub:** [ContextualAI/LMUnit](https://github.com/ContextualAI/LMUnit)
+    **Hugging Face:** [ContextualAI/LMUnit-llama3.1-70b](https://huggingface.co/ContextualAI/LMUnit-llama3.1-70b)
+  overrides:
+    parameters:
+      model: LMUnit-llama3.1-70b.Q4_K_S.gguf
+  files:
+    - filename: LMUnit-llama3.1-70b.Q4_K_S.gguf
+      sha256: 59b192396784ed498d00ef96091b0e128ce6ed42f28d1669aa3d3e21720f6a2e
+      uri: huggingface://mradermacher/LMUnit-llama3.1-70b-GGUF/LMUnit-llama3.1-70b.Q4_K_S.gguf