SciPhi-AI
diff --git a/‎.github/workflows/build-mainv2.yml
Lines changed: 89 additions & 0 deletions b/‎.github/workflows/build-mainv2.yml
Lines changed: 89 additions & 0 deletions
diff --git a/‎.github/workflows/ci.yml
Lines changed: 46 additions & 37 deletions b/‎.github/workflows/ci.yml
Lines changed: 46 additions & 37 deletions
diff --git a/‎.github/workflows/integration-test-workflow-debian.yml
Lines changed: 14 additions & 1 deletion b/‎.github/workflows/integration-test-workflow-debian.yml
Lines changed: 14 additions & 1 deletion
diff --git a/‎Dockerfile
Lines changed: 6 additions & 3 deletions b/‎Dockerfile
Lines changed: 6 additions & 3 deletions
diff --git a/‎Dockerfile.unstructured
Lines changed: 57 additions & 0 deletions b/‎Dockerfile.unstructured
Lines changed: 57 additions & 0 deletions
@@ -0,0 +1,89 @@
+name: Build and Publish ragtoriches/prod
+
+on:
+  workflow_dispatch:
+    inputs:
+      version:
+        description: 'Version to publish (leave empty to use default versioning)'
+        required: false
+        type: string
+
+jobs:
+  build-and-publish:
+    runs-on: ubuntu-latest
+    permissions:
+      packages: write
+      contents: read
+      actions: write
+    steps:
+      - name: Checkout Repository
+        uses: actions/checkout@v4
+
+      - name: Docker Auth
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.RAGTORICHES_DOCKER_UNAME }}
+          password: ${{ secrets.RAGTORICHES_DOCKER_TOKEN }}
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3
+
+      - name: Determine version to use
+        id: version
+        run: |
+          if [ -n "${{ github.event.inputs.version }}" ]; then
+            echo "RELEASE_VERSION=${{ github.event.inputs.version }}" >> $GITHUB_OUTPUT
+          else
+            echo "RELEASE_VERSION=main" >> $GITHUB_OUTPUT
+          fi
+
+      - name: Generate Cache Buster
+        id: cache-buster
+        run: echo "CACHE_BUSTER=$(date +%s)" >> $GITHUB_OUTPUT
+
+      - name: Extract metadata (tags, labels) for Docker
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ragtoriches/prod
+          tags: |
+            type=raw,value=${{ steps.version.outputs.RELEASE_VERSION }}
+            type=raw,value=latest
+
+      - name: Build and Push Docker Image (Standard)
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          file: ./Dockerfile
+          push: true
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+          platforms: linux/amd64,linux/arm64
+          build-args: |
+            CACHE_BUST=${{ steps.cache-buster.outputs.CACHE_BUSTER }}
+
+      - name: Build and Push Docker Image (Unstructured)
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          file: ./Dockerfile.unstructured
+          push: true
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+          platforms: linux/amd64,linux/arm64
+          build-args: |
+            CACHE_BUST=${{ steps.cache-buster.outputs.CACHE_BUSTER }}
+
+      - name: Modify and Push Unstructured Tags
+        run: |
+          for tag in ${{ steps.meta.outputs.tags }}; do
+            IFS=':' read -ra PARTS <<< "$tag"
+            REPO="${PARTS[0]}"
+            VERSION="${PARTS[1]}"
+            NEW_TAG="${REPO}:${VERSION}-unstructured"
+            docker tag $tag $NEW_TAG
+            docker push $NEW_TAG
+          done
@@ -11,32 +11,36 @@ on:
 
 jobs:
   pre-commit:
-    runs-on: ubuntu-latest
+    runs-on: [ self-hosted, Linux ]
 
     steps:
       - name: Checkout code
         uses: actions/checkout@v3
 
-      - name: Set up Python
-        uses: actions/setup-python@v4
-        with:
-          python-version: '3.12.4'  # Specify your Python version here
-
-      - name: Install dependencies
+      - name: Run pre-commit
         run: |
-          python -m pip install --upgrade pip
-          pip install poetry
+          python3 -m venv venv
+          source venv/bin/activate
           poetry install
-
-      - name: Install pre-commit
-        run: poetry run pre-commit install
-
-      - name: Run pre-commit
-        run: poetry run pre-commit run --all-files
+          pip install pre-commit
+          pre-commit install
+          pre-commit run --all-files
+          deactivate
+          rm -rf venv
+
+      - name: Clean up disk space
+        uses: jlumbroso/free-disk-space@main
+        with:
+          tool-cache: true
+          android: true
+          dotnet: true
+          haskell: true
+          large-packages: true
+          swap-storage: true
 
   pytest:
-    runs-on: ubuntu-latest
-    timeout-minutes: 15  # Increased timeout to accommodate Ollama setup
+    runs-on: [ self-hosted, Linux ]
+    timeout-minutes: 15
 
     env:
       OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
@@ -51,35 +55,40 @@ jobs:
       - name: Checkout code
         uses: actions/checkout@v3
 
-      - name: Set up Python
-        uses: actions/setup-python@v4
-        with:
-          python-version: '3.12.4'
-
-      - name: Install dependencies
-        run: |
-          python -m pip install --upgrade pip
-          pip install poetry
-          poetry install
-
-      - name: Install Ollama
-        run: |
-          curl -fsSL https://ollama.com/install.sh | sudo -E sh
-
       - name: Start Ollama server
         run: |
           ollama serve &
           sleep 5
           curl -i http://localhost:11434
 
-      - name: Pull Ollama model
+      - name: Run pytest
         run: |
-          ollama pull llama2
-
-      - name: Run tests
-        run: poetry run pytest tests/ -k "not redis and not sentence_transformer"
+          python3 -m venv venv
+          source venv/bin/activate
+          poetry install
+          pip install pytest
+          pip install pytest
+          poetry run pytest tests/ -k "not redis and not sentence_transformer"
+          deactivate
+          rm -rf venv
 
       - name: Upload coverage reports to Codecov
         uses: codecov/[email protected]
         with:
           token: ${{ secrets.CODECOV_TOKEN }}
+
+      - name: Clean up disk space
+        uses: jlumbroso/free-disk-space@main
+        with:
+          tool-cache: true
+          android: true
+          dotnet: true
+          haskell: true
+          large-packages: true
+          swap-storage: true
+
+      - name: Cleanup
+        if: always()
+        run: |
+          pkill -f ollama || true
+          echo "Cleaned up Ollama server"
@@ -16,10 +16,21 @@ jobs:
       actions: write
     env:
       OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+      TELEMETRY_ENABLED: false
 
     steps:
     - uses: actions/checkout@v4
 
+    - name: Clean up disk space
+      uses: jlumbroso/free-disk-space@main
+      with:
+        tool-cache: true
+        android: true
+        dotnet: true
+        haskell: true
+        large-packages: true
+        swap-storage: true
+
     - name: Docker Auth
       uses: docker/login-action@v3
       with:
@@ -58,7 +69,7 @@ jobs:
         r2r version
 
         echo "R2R Serve --docker"
-        r2r serve --docker --exclude-neo4j --exclude-ollama --image=ragtoriches/dev:latest
+        r2r serve --docker --exclude-neo4j=true --exclude-ollama=true  --image=ragtoriches/dev:latest
 
         echo "Waiting for services to start..."
         sleep 30
@@ -105,5 +116,7 @@ jobs:
         echo "Clean up Virtual Environment"
         deactivate
         rm -rf venv
+        docker stop $(docker ps -a -q)
         docker system prune -af --volumes
+        docker network prune --force
         docker volume rm $(docker volume ls -qf dangling=true)
@@ -33,11 +33,14 @@ COPY --from=builder /usr/local/bin /usr/local/bin
 
 # Copy the application and config
 COPY r2r /app/r2r
-COPY r2r.json /app/r2r.json
+COPY r2r.toml /app/r2r.toml
 COPY pyproject.toml /app/pyproject.toml
 
 # Expose the port
-EXPOSE 8000
+ARG PORT=8000
+ARG HOST=0.0.0.0
+ENV PORT=$PORT HOST=$HOST
+EXPOSE $PORT
 
 # Run the application
-CMD ["uvicorn", "r2r.main.app_entry:app", "--host", "0.0.0.0", "--port", "8000"]
+CMD ["sh", "-c", "uvicorn r2r.main.app_entry:app --host $HOST --port $PORT"]
@@ -0,0 +1,57 @@
+# Start with your existing base image
+FROM python:3.10-slim AS builder
+
+# Install system dependencies (including those needed for Unstructured and OpenCV)
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    gcc g++ musl-dev curl libffi-dev gfortran libopenblas-dev \
+    tesseract-ocr libtesseract-dev libleptonica-dev pkg-config \
+    poppler-utils libmagic1 \
+    libgl1-mesa-glx libglib2.0-0 \
+    && apt-get clean && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /app
+
+RUN pip install --no-cache-dir poetry
+
+# Copy the dependencies files
+COPY pyproject.toml poetry.lock* ./
+
+# Install the dependencies, including gunicorn, uvicorn, and unstructured
+RUN poetry config virtualenvs.create false \
+    && poetry add "unstructured[all-docs]" \
+    && poetry install --no-dev --no-root \
+    && pip install --no-cache-dir gunicorn uvicorn
+
+# Create the final image
+FROM python:3.10-slim
+
+# Install runtime dependencies
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends \
+    curl tesseract-ocr poppler-utils libmagic1 \
+    libgl1-mesa-glx libglib2.0-0 \
+    && apt-get clean && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /app
+
+# Copy the installed packages from the builder
+COPY --from=builder /usr/local/lib/python3.10/site-packages /usr/local/lib/python3.10/site-packages
+COPY --from=builder /usr/local/bin /usr/local/bin
+
+# Copy the application and config
+COPY r2r /app/r2r
+COPY r2r.toml /app/r2r.toml
+COPY pyproject.toml /app/pyproject.toml
+
+# Download NLTK packages and initialize models
+RUN python3 -m nltk.downloader punkt averaged_perceptron_tagger \
+    && python3 -c "from unstructured.partition.model_init import initialize; initialize()"
+
+# Set environment variables
+ENV TESSDATA_PREFIX=/usr/share/tesseract-ocr/4.00/tessdata
+
+# Expose the port
+EXPOSE 8000
+
+# Run the application
+CMD ["uvicorn", "r2r.main.app_entry:app", "--host", "0.0.0.0", "--port", "8000"]