Skip to content

Commit

Permalink
Merge pull request #822 from SciPhi-AI/dev
Browse files Browse the repository at this point in the history
Release v0.2.76
  • Loading branch information
NolanTrem authored Aug 2, 2024
2 parents 51a0f6a + 7e181ca commit 4e1ed6a
Show file tree
Hide file tree
Showing 114 changed files with 3,455 additions and 2,535 deletions.
89 changes: 89 additions & 0 deletions .github/workflows/build-mainv2.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
name: Build and Publish ragtoriches/prod

on:
workflow_dispatch:
inputs:
version:
description: 'Version to publish (leave empty to use default versioning)'
required: false
type: string

jobs:
build-and-publish:
runs-on: ubuntu-latest
permissions:
packages: write
contents: read
actions: write
steps:
- name: Checkout Repository
uses: actions/checkout@v4

- name: Docker Auth
uses: docker/login-action@v3
with:
username: ${{ secrets.RAGTORICHES_DOCKER_UNAME }}
password: ${{ secrets.RAGTORICHES_DOCKER_TOKEN }}

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Set up QEMU
uses: docker/setup-qemu-action@v3

- name: Determine version to use
id: version
run: |
if [ -n "${{ github.event.inputs.version }}" ]; then
echo "RELEASE_VERSION=${{ github.event.inputs.version }}" >> $GITHUB_OUTPUT
else
echo "RELEASE_VERSION=main" >> $GITHUB_OUTPUT
fi
- name: Generate Cache Buster
id: cache-buster
run: echo "CACHE_BUSTER=$(date +%s)" >> $GITHUB_OUTPUT

- name: Extract metadata (tags, labels) for Docker
id: meta
uses: docker/metadata-action@v5
with:
images: ragtoriches/prod
tags: |
type=raw,value=${{ steps.version.outputs.RELEASE_VERSION }}
type=raw,value=latest
- name: Build and Push Docker Image (Standard)
uses: docker/build-push-action@v5
with:
context: .
file: ./Dockerfile
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
platforms: linux/amd64,linux/arm64
build-args: |
CACHE_BUST=${{ steps.cache-buster.outputs.CACHE_BUSTER }}
- name: Build and Push Docker Image (Unstructured)
uses: docker/build-push-action@v5
with:
context: .
file: ./Dockerfile.unstructured
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
platforms: linux/amd64,linux/arm64
build-args: |
CACHE_BUST=${{ steps.cache-buster.outputs.CACHE_BUSTER }}
- name: Modify and Push Unstructured Tags
run: |
for tag in ${{ steps.meta.outputs.tags }}; do
IFS=':' read -ra PARTS <<< "$tag"
REPO="${PARTS[0]}"
VERSION="${PARTS[1]}"
NEW_TAG="${REPO}:${VERSION}-unstructured"
docker tag $tag $NEW_TAG
docker push $NEW_TAG
done
83 changes: 46 additions & 37 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,32 +11,36 @@ on:

jobs:
pre-commit:
runs-on: ubuntu-latest
runs-on: [ self-hosted, Linux ]

steps:
- name: Checkout code
uses: actions/checkout@v3

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.12.4' # Specify your Python version here

- name: Install dependencies
- name: Run pre-commit
run: |
python -m pip install --upgrade pip
pip install poetry
python3 -m venv venv
source venv/bin/activate
poetry install
- name: Install pre-commit
run: poetry run pre-commit install

- name: Run pre-commit
run: poetry run pre-commit run --all-files
pip install pre-commit
pre-commit install
pre-commit run --all-files
deactivate
rm -rf venv
- name: Clean up disk space
uses: jlumbroso/free-disk-space@main
with:
tool-cache: true
android: true
dotnet: true
haskell: true
large-packages: true
swap-storage: true

pytest:
runs-on: ubuntu-latest
timeout-minutes: 15 # Increased timeout to accommodate Ollama setup
runs-on: [ self-hosted, Linux ]
timeout-minutes: 15

env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
Expand All @@ -51,35 +55,40 @@ jobs:
- name: Checkout code
uses: actions/checkout@v3

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.12.4'

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install poetry
poetry install
- name: Install Ollama
run: |
curl -fsSL https://ollama.com/install.sh | sudo -E sh
- name: Start Ollama server
run: |
ollama serve &
sleep 5
curl -i http://localhost:11434
- name: Pull Ollama model
- name: Run pytest
run: |
ollama pull llama2
- name: Run tests
run: poetry run pytest tests/ -k "not redis and not sentence_transformer"
python3 -m venv venv
source venv/bin/activate
poetry install
pip install pytest
pip install pytest
poetry run pytest tests/ -k "not redis and not sentence_transformer"
deactivate
rm -rf venv
- name: Upload coverage reports to Codecov
uses: codecov/[email protected]
with:
token: ${{ secrets.CODECOV_TOKEN }}

- name: Clean up disk space
uses: jlumbroso/free-disk-space@main
with:
tool-cache: true
android: true
dotnet: true
haskell: true
large-packages: true
swap-storage: true

- name: Cleanup
if: always()
run: |
pkill -f ollama || true
echo "Cleaned up Ollama server"
15 changes: 14 additions & 1 deletion .github/workflows/integration-test-workflow-debian.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,21 @@ jobs:
actions: write
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
TELEMETRY_ENABLED: false

steps:
- uses: actions/checkout@v4

- name: Clean up disk space
uses: jlumbroso/free-disk-space@main
with:
tool-cache: true
android: true
dotnet: true
haskell: true
large-packages: true
swap-storage: true

- name: Docker Auth
uses: docker/login-action@v3
with:
Expand Down Expand Up @@ -58,7 +69,7 @@ jobs:
r2r version
echo "R2R Serve --docker"
r2r serve --docker --exclude-neo4j --exclude-ollama --image=ragtoriches/dev:latest
r2r serve --docker --exclude-neo4j=true --exclude-ollama=true --image=ragtoriches/dev:latest
echo "Waiting for services to start..."
sleep 30
Expand Down Expand Up @@ -105,5 +116,7 @@ jobs:
echo "Clean up Virtual Environment"
deactivate
rm -rf venv
docker stop $(docker ps -a -q)
docker system prune -af --volumes
docker network prune --force
docker volume rm $(docker volume ls -qf dangling=true)
9 changes: 6 additions & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,14 @@ COPY --from=builder /usr/local/bin /usr/local/bin

# Copy the application and config
COPY r2r /app/r2r
COPY r2r.json /app/r2r.json
COPY r2r.toml /app/r2r.toml
COPY pyproject.toml /app/pyproject.toml

# Expose the port
EXPOSE 8000
ARG PORT=8000
ARG HOST=0.0.0.0
ENV PORT=$PORT HOST=$HOST
EXPOSE $PORT

# Run the application
CMD ["uvicorn", "r2r.main.app_entry:app", "--host", "0.0.0.0", "--port", "8000"]
CMD ["sh", "-c", "uvicorn r2r.main.app_entry:app --host $HOST --port $PORT"]
57 changes: 57 additions & 0 deletions Dockerfile.unstructured
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# Start with your existing base image
FROM python:3.10-slim AS builder

# Install system dependencies (including those needed for Unstructured and OpenCV)
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc g++ musl-dev curl libffi-dev gfortran libopenblas-dev \
tesseract-ocr libtesseract-dev libleptonica-dev pkg-config \
poppler-utils libmagic1 \
libgl1-mesa-glx libglib2.0-0 \
&& apt-get clean && rm -rf /var/lib/apt/lists/*

WORKDIR /app

RUN pip install --no-cache-dir poetry

# Copy the dependencies files
COPY pyproject.toml poetry.lock* ./

# Install the dependencies, including gunicorn, uvicorn, and unstructured
RUN poetry config virtualenvs.create false \
&& poetry add "unstructured[all-docs]" \
&& poetry install --no-dev --no-root \
&& pip install --no-cache-dir gunicorn uvicorn

# Create the final image
FROM python:3.10-slim

# Install runtime dependencies
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
curl tesseract-ocr poppler-utils libmagic1 \
libgl1-mesa-glx libglib2.0-0 \
&& apt-get clean && rm -rf /var/lib/apt/lists/*

WORKDIR /app

# Copy the installed packages from the builder
COPY --from=builder /usr/local/lib/python3.10/site-packages /usr/local/lib/python3.10/site-packages
COPY --from=builder /usr/local/bin /usr/local/bin

# Copy the application and config
COPY r2r /app/r2r
COPY r2r.toml /app/r2r.toml
COPY pyproject.toml /app/pyproject.toml

# Download NLTK packages and initialize models
RUN python3 -m nltk.downloader punkt averaged_perceptron_tagger \
&& python3 -c "from unstructured.partition.model_init import initialize; initialize()"

# Set environment variables
ENV TESSDATA_PREFIX=/usr/share/tesseract-ocr/4.00/tessdata

# Expose the port
EXPOSE 8000

# Run the application
CMD ["uvicorn", "r2r.main.app_entry:app", "--host", "0.0.0.0", "--port", "8000"]
Loading

0 comments on commit 4e1ed6a

Please sign in to comment.