EPFLiGHT
diff --git a/‎.github/workflows/push-to-registry.yml‎
Lines changed: 45 additions & 0 deletions b/‎.github/workflows/push-to-registry.yml‎
Lines changed: 45 additions & 0 deletions
diff --git a/‎.github/workflows/tests.yml‎
Lines changed: 6 additions & 0 deletions b/‎.github/workflows/tests.yml‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 1 addition & 1 deletion b/‎.pre-commit-config.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎README.md‎
Lines changed: 6 additions & 6 deletions b/‎README.md‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎docs/index_api.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/index_api.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/index/config.yaml‎
Lines changed: 1 addition & 1 deletion b/‎examples/index/config.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/postprocessor/config.yaml‎
Lines changed: 19 additions & 3 deletions b/‎examples/postprocessor/config.yaml‎
Lines changed: 19 additions & 3 deletions
diff --git a/‎examples/rag/config.yaml‎
Lines changed: 1 addition & 1 deletion b/‎examples/rag/config.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/rag/config_api.yaml‎
Lines changed: 2 additions & 2 deletions b/‎examples/rag/config_api.yaml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/retriever_api/config.yaml‎
Lines changed: 6 additions & 0 deletions b/‎examples/retriever_api/config.yaml‎
Lines changed: 6 additions & 0 deletions
@@ -0,0 +1,45 @@
+name: ci
+
+on:
+  push:
+  workflow_dispatch:
+
+jobs:
+  docker:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Docker meta
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          # list of Docker images to use as base name for tags
+          images: |
+            androz2091/swiss-ai-mmore
+          # generate Docker tags based on the following events/attributes
+          tags: |
+            type=schedule
+            type=ref,event=branch
+            type=ref,event=pr
+            type=semver,pattern={{version}}
+            type=semver,pattern={{major}}.{{minor}}
+            type=semver,pattern={{major}}
+            type=sha            
+      -
+        name: Set up QEMU
+        uses: docker/setup-qemu-action@v3
+      -
+        name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      -
+        name: Login to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_TOKEN }}
+      -
+        name: Build and push
+        uses: docker/build-push-action@v6
+        with:
+          push: true
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
@@ -29,6 +29,12 @@ jobs:
           pip install -e '.[rag,dev]'  # or custom setup
           pip install pytest  # if not in requirements.txt
 
+      - name: Show installed cohere and langchain-cohere versions
+        run: |
+          pip show cohere || echo "Cohere not installed"
+          pip show langchain-cohere || echo "Langchain-cohere not installed"
+
+
       - name: Run tests
         run: |
           pytest
@@ -10,4 +10,4 @@ repos:
         args: [
           --fix,                # auto-fix lint + style issues
           --unsafe-fixes,       # allows formatting & import sorting
-        ]
+        ]
@@ -43,7 +43,7 @@ To install the package simply run:
 pip install mmore
 ```
 
-> :warning: This is a big package with a lot of dependencies, so we recommend to use `uv` to handle `pip` installations. [Check our tutorial on uv](./docs/uv.md).
+> :warning: This is a big package with a lot of dependencies, so we recommend to use `uv` to handle `pip` installations. [Check our tutorial on uv](https://github.com/swiss-ai/mmore/blob/master/docs/uv.md).
 
 ### Minimal Example
 
@@ -90,22 +90,22 @@ To launch the MMORE pipeline, follow the specialised instructions in the docs.
 1. **:page_facing_up: Input Documents**
    Upload your multimodal documents (PDFs, videos, spreadsheets, and m(m)ore) into the pipeline.
 
-2. [**:mag: Process**](./docs/process.md)
+2. [**:mag: Process**](https://github.com/swiss-ai/mmore/blob/master/docs/process.md)
    Extracts and standardizes text, metadata, and multimedia content from diverse file formats. Easily extensible! You can add your own processors to handle new file types.
    *Supports fast processing for specific types.*
 
-3. [**:file_folder: Index**](./docs/index.md)
-   Organizes extracted data into a **hybrid retrieval-ready Vector Store DB**, combining dense and sparse indexing through [Milvus](https://milvus.io/). Your vector DB can also be remotely hosted and then you only have to provide a standard API. There is also an [HTTP Index API](./docs/index_api.md) for adding new files on the fly with HTTP requests.
+3. [**:file_folder: Index**](https://github.com/swiss-ai/mmore/blob/master/docs/index.md)
+   Organizes extracted data into a **hybrid retrieval-ready Vector Store DB**, combining dense and sparse indexing through [Milvus](https://milvus.io/). Your vector DB can also be remotely hosted and then you only have to provide a standard API. There is also an [HTTP Index API](https://github.com/swiss-ai/mmore/blob/master/docs/index_api.md) for adding new files on the fly with HTTP requests.
 
-4. [**:robot: RAG**](./docs/rag.md)
+4. [**:robot: RAG**](https://github.com/swiss-ai/mmore/blob/master/docs/rag.md)
    Use the indexed documents inside a **Retrieval-Augmented Generation (RAG) system**  that provides a [LangChain](https://www.langchain.com/) interface. Plug in any LLM with a compatible interface or add new ones through an easy-to-use interface.
    *Supports API hosting or local inference.*
 
 5. **:tada: Evaluation**
    *Coming soon*
    An easy way to evaluate the performance of your RAG system using Ragas.
 
-See [the `/docs` directory](./docs) for additional details on each modules and hands-on tutorials on parts of the pipeline.
+See [the `/docs` directory](https://github.com/swiss-ai/mmore/blob/master/docs) for additional details on each modules and hands-on tutorials on parts of the pipeline.
 
 
 #### :construction: Supported File Types
 
@@ -173,7 +173,7 @@ Returns the file with binary content.
 - File types supported:
 
     ```
-    .pdf, .docx, .pptx, .md, .txt, .xlsx, .xls, .csv, .mp4, .avi, .mov, .mkv, .mp3, .wav, .aac, .eml, .html
+    .pdf, .docx, .pptx, .md, .txt, .xlsx, .xls, .csv, .mp4, .avi, .mov, .mkv, .mp3, .wav, .aac, .eml, .html, .htm
     ```
     
 
 
@@ -9,4 +9,4 @@ indexer:
     uri: ./proc_demo.db
     name: my_db
 collection_name: my_docs
-documents_path: 'examples/process/outputs/merged/final_pp.jsonl'
+documents_path: 'examples/postprocessor/outputs/merged/final_pp.jsonl'
@@ -1,7 +1,23 @@
 pp_modules:
-  - type: chunker 
+  - type: file_namer
+  - type: chunker
     args:
       chunking_strategy: sentence
+  - type: translator
+    args:
+      target_language: en
+      attachment_tag: <attachment>
+      confidence_threshold: 0.7
+      constrained_languages: 
+        - fr
+        - en
+  - type: metafuse
+    args:
+      metadata_keys:
+        - file_name
+      content_template: Content from {file_name}
+      position: beginning
+
 output:
-  output_path: examples/process/outputs/merged/
-  save_each_step: True
+  output_path: examples/postprocessor/outputs/merged/
+  save_each_step: True
@@ -1,7 +1,7 @@
 rag:
   llm: 
     llm_name: OpenMeditron/meditron3-8b
-    max_new_tokens: 250
+    max_new_tokens: 1200
   retriever:
     db:
       uri: ./proc_demo.db
 
@@ -2,8 +2,8 @@
 rag: 
   # LLM Config
   llm: 
-    llm_name: "TinyLlama/TinyLlama-1.1B-Chat-v1.0" # "epfl-llm/meditron-70b" # "gpt-4o-mini" # Anything supported
-    max_new_tokens: 100
+    llm_name: Qwen/Qwen3-8B # "epfl-llm/meditron-70b" # "gpt-4o-mini" # Anything supported
+    max_new_tokens: 1200
     temperature: 0.8
   # Retriever Config
   retriever:
 
@@ -0,0 +1,6 @@
+db:
+  uri: ./proc_demo.db
+  name: my_db
+hybrid_search_weight: 0.5
+k: 5
+collection_name: my_docs