Skip to content

Commit 4e1ed6a

Browse files
authored
Merge pull request #822 from SciPhi-AI/dev
Release v0.2.76
2 parents 51a0f6a + 7e181ca commit 4e1ed6a

File tree

114 files changed

+3455
-2535
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

114 files changed

+3455
-2535
lines changed

.github/workflows/build-mainv2.yml

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
name: Build and Publish ragtoriches/prod
2+
3+
on:
4+
workflow_dispatch:
5+
inputs:
6+
version:
7+
description: 'Version to publish (leave empty to use default versioning)'
8+
required: false
9+
type: string
10+
11+
jobs:
12+
build-and-publish:
13+
runs-on: ubuntu-latest
14+
permissions:
15+
packages: write
16+
contents: read
17+
actions: write
18+
steps:
19+
- name: Checkout Repository
20+
uses: actions/checkout@v4
21+
22+
- name: Docker Auth
23+
uses: docker/login-action@v3
24+
with:
25+
username: ${{ secrets.RAGTORICHES_DOCKER_UNAME }}
26+
password: ${{ secrets.RAGTORICHES_DOCKER_TOKEN }}
27+
28+
- name: Set up Docker Buildx
29+
uses: docker/setup-buildx-action@v3
30+
31+
- name: Set up QEMU
32+
uses: docker/setup-qemu-action@v3
33+
34+
- name: Determine version to use
35+
id: version
36+
run: |
37+
if [ -n "${{ github.event.inputs.version }}" ]; then
38+
echo "RELEASE_VERSION=${{ github.event.inputs.version }}" >> $GITHUB_OUTPUT
39+
else
40+
echo "RELEASE_VERSION=main" >> $GITHUB_OUTPUT
41+
fi
42+
43+
- name: Generate Cache Buster
44+
id: cache-buster
45+
run: echo "CACHE_BUSTER=$(date +%s)" >> $GITHUB_OUTPUT
46+
47+
- name: Extract metadata (tags, labels) for Docker
48+
id: meta
49+
uses: docker/metadata-action@v5
50+
with:
51+
images: ragtoriches/prod
52+
tags: |
53+
type=raw,value=${{ steps.version.outputs.RELEASE_VERSION }}
54+
type=raw,value=latest
55+
56+
- name: Build and Push Docker Image (Standard)
57+
uses: docker/build-push-action@v5
58+
with:
59+
context: .
60+
file: ./Dockerfile
61+
push: true
62+
tags: ${{ steps.meta.outputs.tags }}
63+
labels: ${{ steps.meta.outputs.labels }}
64+
platforms: linux/amd64,linux/arm64
65+
build-args: |
66+
CACHE_BUST=${{ steps.cache-buster.outputs.CACHE_BUSTER }}
67+
68+
- name: Build and Push Docker Image (Unstructured)
69+
uses: docker/build-push-action@v5
70+
with:
71+
context: .
72+
file: ./Dockerfile.unstructured
73+
push: true
74+
tags: ${{ steps.meta.outputs.tags }}
75+
labels: ${{ steps.meta.outputs.labels }}
76+
platforms: linux/amd64,linux/arm64
77+
build-args: |
78+
CACHE_BUST=${{ steps.cache-buster.outputs.CACHE_BUSTER }}
79+
80+
- name: Modify and Push Unstructured Tags
81+
run: |
82+
for tag in ${{ steps.meta.outputs.tags }}; do
83+
IFS=':' read -ra PARTS <<< "$tag"
84+
REPO="${PARTS[0]}"
85+
VERSION="${PARTS[1]}"
86+
NEW_TAG="${REPO}:${VERSION}-unstructured"
87+
docker tag $tag $NEW_TAG
88+
docker push $NEW_TAG
89+
done

.github/workflows/ci.yml

Lines changed: 46 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -11,32 +11,36 @@ on:
1111

1212
jobs:
1313
pre-commit:
14-
runs-on: ubuntu-latest
14+
runs-on: [ self-hosted, Linux ]
1515

1616
steps:
1717
- name: Checkout code
1818
uses: actions/checkout@v3
1919

20-
- name: Set up Python
21-
uses: actions/setup-python@v4
22-
with:
23-
python-version: '3.12.4' # Specify your Python version here
24-
25-
- name: Install dependencies
20+
- name: Run pre-commit
2621
run: |
27-
python -m pip install --upgrade pip
28-
pip install poetry
22+
python3 -m venv venv
23+
source venv/bin/activate
2924
poetry install
30-
31-
- name: Install pre-commit
32-
run: poetry run pre-commit install
33-
34-
- name: Run pre-commit
35-
run: poetry run pre-commit run --all-files
25+
pip install pre-commit
26+
pre-commit install
27+
pre-commit run --all-files
28+
deactivate
29+
rm -rf venv
30+
31+
- name: Clean up disk space
32+
uses: jlumbroso/free-disk-space@main
33+
with:
34+
tool-cache: true
35+
android: true
36+
dotnet: true
37+
haskell: true
38+
large-packages: true
39+
swap-storage: true
3640

3741
pytest:
38-
runs-on: ubuntu-latest
39-
timeout-minutes: 15 # Increased timeout to accommodate Ollama setup
42+
runs-on: [ self-hosted, Linux ]
43+
timeout-minutes: 15
4044

4145
env:
4246
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
@@ -51,35 +55,40 @@ jobs:
5155
- name: Checkout code
5256
uses: actions/checkout@v3
5357

54-
- name: Set up Python
55-
uses: actions/setup-python@v4
56-
with:
57-
python-version: '3.12.4'
58-
59-
- name: Install dependencies
60-
run: |
61-
python -m pip install --upgrade pip
62-
pip install poetry
63-
poetry install
64-
65-
- name: Install Ollama
66-
run: |
67-
curl -fsSL https://ollama.com/install.sh | sudo -E sh
68-
6958
- name: Start Ollama server
7059
run: |
7160
ollama serve &
7261
sleep 5
7362
curl -i http://localhost:11434
7463
75-
- name: Pull Ollama model
64+
- name: Run pytest
7665
run: |
77-
ollama pull llama2
78-
79-
- name: Run tests
80-
run: poetry run pytest tests/ -k "not redis and not sentence_transformer"
66+
python3 -m venv venv
67+
source venv/bin/activate
68+
poetry install
69+
pip install pytest
70+
pip install pytest
71+
poetry run pytest tests/ -k "not redis and not sentence_transformer"
72+
deactivate
73+
rm -rf venv
8174
8275
- name: Upload coverage reports to Codecov
8376
uses: codecov/[email protected]
8477
with:
8578
token: ${{ secrets.CODECOV_TOKEN }}
79+
80+
- name: Clean up disk space
81+
uses: jlumbroso/free-disk-space@main
82+
with:
83+
tool-cache: true
84+
android: true
85+
dotnet: true
86+
haskell: true
87+
large-packages: true
88+
swap-storage: true
89+
90+
- name: Cleanup
91+
if: always()
92+
run: |
93+
pkill -f ollama || true
94+
echo "Cleaned up Ollama server"

.github/workflows/integration-test-workflow-debian.yml

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,21 @@ jobs:
1616
actions: write
1717
env:
1818
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
19+
TELEMETRY_ENABLED: false
1920

2021
steps:
2122
- uses: actions/checkout@v4
2223

24+
- name: Clean up disk space
25+
uses: jlumbroso/free-disk-space@main
26+
with:
27+
tool-cache: true
28+
android: true
29+
dotnet: true
30+
haskell: true
31+
large-packages: true
32+
swap-storage: true
33+
2334
- name: Docker Auth
2435
uses: docker/login-action@v3
2536
with:
@@ -58,7 +69,7 @@ jobs:
5869
r2r version
5970
6071
echo "R2R Serve --docker"
61-
r2r serve --docker --exclude-neo4j --exclude-ollama --image=ragtoriches/dev:latest
72+
r2r serve --docker --exclude-neo4j=true --exclude-ollama=true --image=ragtoriches/dev:latest
6273
6374
echo "Waiting for services to start..."
6475
sleep 30
@@ -105,5 +116,7 @@ jobs:
105116
echo "Clean up Virtual Environment"
106117
deactivate
107118
rm -rf venv
119+
docker stop $(docker ps -a -q)
108120
docker system prune -af --volumes
121+
docker network prune --force
109122
docker volume rm $(docker volume ls -qf dangling=true)

Dockerfile

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,14 @@ COPY --from=builder /usr/local/bin /usr/local/bin
3333

3434
# Copy the application and config
3535
COPY r2r /app/r2r
36-
COPY r2r.json /app/r2r.json
36+
COPY r2r.toml /app/r2r.toml
3737
COPY pyproject.toml /app/pyproject.toml
3838

3939
# Expose the port
40-
EXPOSE 8000
40+
ARG PORT=8000
41+
ARG HOST=0.0.0.0
42+
ENV PORT=$PORT HOST=$HOST
43+
EXPOSE $PORT
4144

4245
# Run the application
43-
CMD ["uvicorn", "r2r.main.app_entry:app", "--host", "0.0.0.0", "--port", "8000"]
46+
CMD ["sh", "-c", "uvicorn r2r.main.app_entry:app --host $HOST --port $PORT"]

Dockerfile.unstructured

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
# Start with your existing base image
2+
FROM python:3.10-slim AS builder
3+
4+
# Install system dependencies (including those needed for Unstructured and OpenCV)
5+
RUN apt-get update && apt-get install -y --no-install-recommends \
6+
gcc g++ musl-dev curl libffi-dev gfortran libopenblas-dev \
7+
tesseract-ocr libtesseract-dev libleptonica-dev pkg-config \
8+
poppler-utils libmagic1 \
9+
libgl1-mesa-glx libglib2.0-0 \
10+
&& apt-get clean && rm -rf /var/lib/apt/lists/*
11+
12+
WORKDIR /app
13+
14+
RUN pip install --no-cache-dir poetry
15+
16+
# Copy the dependencies files
17+
COPY pyproject.toml poetry.lock* ./
18+
19+
# Install the dependencies, including gunicorn, uvicorn, and unstructured
20+
RUN poetry config virtualenvs.create false \
21+
&& poetry add "unstructured[all-docs]" \
22+
&& poetry install --no-dev --no-root \
23+
&& pip install --no-cache-dir gunicorn uvicorn
24+
25+
# Create the final image
26+
FROM python:3.10-slim
27+
28+
# Install runtime dependencies
29+
RUN apt-get update \
30+
&& apt-get install -y --no-install-recommends \
31+
curl tesseract-ocr poppler-utils libmagic1 \
32+
libgl1-mesa-glx libglib2.0-0 \
33+
&& apt-get clean && rm -rf /var/lib/apt/lists/*
34+
35+
WORKDIR /app
36+
37+
# Copy the installed packages from the builder
38+
COPY --from=builder /usr/local/lib/python3.10/site-packages /usr/local/lib/python3.10/site-packages
39+
COPY --from=builder /usr/local/bin /usr/local/bin
40+
41+
# Copy the application and config
42+
COPY r2r /app/r2r
43+
COPY r2r.toml /app/r2r.toml
44+
COPY pyproject.toml /app/pyproject.toml
45+
46+
# Download NLTK packages and initialize models
47+
RUN python3 -m nltk.downloader punkt averaged_perceptron_tagger \
48+
&& python3 -c "from unstructured.partition.model_init import initialize; initialize()"
49+
50+
# Set environment variables
51+
ENV TESSDATA_PREFIX=/usr/share/tesseract-ocr/4.00/tessdata
52+
53+
# Expose the port
54+
EXPOSE 8000
55+
56+
# Run the application
57+
CMD ["uvicorn", "r2r.main.app_entry:app", "--host", "0.0.0.0", "--port", "8000"]

0 commit comments

Comments
 (0)