Skip to content

Commit 677c8d4

Browse files
Add PyPI publishing support (#48)
1 parent b5d1de8 commit 677c8d4

File tree

11 files changed

+136
-50
lines changed

11 files changed

+136
-50
lines changed

.dockerignore

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,9 +84,10 @@ Thumbs.db
8484
.github/
8585

8686
# Documentation
87-
README.md
88-
LICENSE
87+
# README.md and LICENSE are needed for Poetry package metadata
8988
*.md
89+
!README.md
90+
!LICENSE
9091
docs/
9192

9293
# Temporary files

.github/workflows/docker-build-pr.yml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -93,22 +93,22 @@ jobs:
9393
9494
# Test help command
9595
echo "Testing --help command..."
96-
docker run --rm ${{ env.IMAGE_NAME }}:pr-${{ github.event.number }} run.py --help
96+
docker run --rm ${{ env.IMAGE_NAME }}:pr-${{ github.event.number }} vector-db-benchmark --help
9797
9898
# Test Python environment
9999
echo "Testing Python environment..."
100-
docker run --rm ${{ env.IMAGE_NAME }}:pr-${{ github.event.number }} -c "import sys; print(f'Python {sys.version}'); import redis; print('Redis module available')"
100+
docker run --rm --entrypoint python ${{ env.IMAGE_NAME }}:pr-${{ github.event.number }} -c "import sys; print(f'Python {sys.version}'); import redis; print('Redis module available')"
101101
102102
# Test Redis connectivity
103103
echo "Testing Redis connectivity..."
104-
docker run --rm --network host ${{ env.IMAGE_NAME }}:pr-${{ github.event.number }} \
104+
docker run --rm --network host --entrypoint python ${{ env.IMAGE_NAME }}:pr-${{ github.event.number }} \
105105
-c "import redis; r = redis.Redis(host='localhost', port=6379); r.ping(); print('Redis connection successful')"
106106
107107
# Test benchmark execution with specific configuration
108108
echo "Testing benchmark execution with redis-m-16-ef-64 configuration..."
109109
mkdir -p ./test-results
110-
docker run --rm --network host -v "$(pwd)/test-results:/app/results" ${{ env.IMAGE_NAME }}:pr-${{ github.event.number }} \
111-
run.py --host localhost --engines redis --dataset random-100 --experiment redis-m-16-ef-64 --skip-upload --skip-search || echo "Benchmark test completed (expected to fail without proper dataset setup)"
110+
docker run --rm --network host -v "$(pwd)/test-results:/code/results" ${{ env.IMAGE_NAME }}:pr-${{ github.event.number }} \
111+
vector-db-benchmark --host localhost --engines redis --dataset random-100 --experiment redis-m-16-ef-64 --skip-upload --skip-search || echo "Benchmark test completed (expected to fail without proper dataset setup)"
112112
113113
echo "✅ Docker image tests passed!"
114114

.github/workflows/publish-pypi.yml

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
name: Publish to PyPI
2+
on:
3+
release:
4+
types: [published]
5+
workflow_dispatch: # Allow manual triggering
6+
7+
jobs:
8+
pypi:
9+
name: Publish to PyPI
10+
runs-on: ubuntu-latest
11+
environment:
12+
name: pypi
13+
url: https://pypi.org/p/vector-benchmark
14+
permissions:
15+
contents: read
16+
id-token: write
17+
steps:
18+
- uses: actions/checkout@v4
19+
20+
- name: Set up Python
21+
uses: actions/setup-python@v5
22+
with:
23+
python-version: '3.10'
24+
25+
- name: Install Poetry
26+
uses: snok/install-poetry@v1
27+
with:
28+
version: latest
29+
virtualenvs-create: true
30+
virtualenvs-in-project: true
31+
32+
- name: Build package
33+
run: poetry build
34+
35+
- name: Check package
36+
run: |
37+
pip install twine
38+
twine check dist/*
39+
40+
- name: Publish to PyPI
41+
env:
42+
POETRY_PYPI_TOKEN_PYPI: ${{ secrets.PYPI_TOKEN }}
43+
run: poetry publish

Dockerfile

Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,15 @@ WORKDIR /code
3030

3131
# Copy dependency files first for better caching
3232
COPY poetry.lock pyproject.toml /code/
33+
COPY README.md /code/
34+
35+
# Copy package directories needed by Poetry
36+
COPY benchmark /code/benchmark
37+
COPY dataset_reader /code/dataset_reader
38+
COPY engine /code/engine
39+
COPY datasets /code/datasets
40+
COPY experiments /code/experiments
41+
COPY run.py /code/run.py
3342

3443
# Configure Poetry and install dependencies
3544
RUN poetry config virtualenvs.create false \
@@ -38,7 +47,7 @@ RUN poetry config virtualenvs.create false \
3847
# Install additional dependencies
3948
RUN pip install "boto3"
4049

41-
# Copy source code
50+
# Copy remaining source code
4251
COPY . /code
4352

4453
# Store Git information
@@ -65,31 +74,28 @@ RUN apt-get update && apt-get install -y \
6574

6675

6776
# Set working directory
68-
WORKDIR /app
77+
WORKDIR /code
6978

7079
# Copy Python environment from builder
7180
COPY --from=builder /usr/local/lib/python3.10/site-packages /usr/local/lib/python3.10/site-packages
7281
COPY --from=builder /usr/local/bin /usr/local/bin
7382

7483
# Copy application code
75-
COPY --from=builder /code /app
84+
COPY --from=builder /code /code
7685

7786
# Create directories with proper permissions
78-
RUN mkdir -p /app/results /app/datasets && \
79-
80-
chmod -R 777 /app/results /app/datasets && \
81-
chmod -R 755 /app
87+
RUN mkdir -p /code/results /code/datasets && \
88+
chmod -R 777 /code/results /code/datasets && \
89+
chmod -R 755 /code
8290

8391
# Create entrypoint script to handle user permissions
8492
RUN echo '#!/bin/bash\n\
8593
# Handle user permissions for volume mounts\n\
86-
if [ "$1" = "run.py" ]; then\n\
87-
# Ensure results directory is writable\n\
88-
mkdir -p /app/results\n\
89-
chmod 777 /app/results\n\
90-
fi\n\
91-
exec python "$@"' > /app/entrypoint.sh && \
92-
chmod +x /app/entrypoint.sh
94+
# Ensure results directory is writable\n\
95+
mkdir -p /code/results\n\
96+
chmod 777 /code/results\n\
97+
exec "$@"' > /code/entrypoint.sh && \
98+
chmod +x /code/entrypoint.sh
9399

94100

95101
# Health check
@@ -100,10 +106,8 @@ HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
100106
EXPOSE 6379 6380
101107

102108
# Set entrypoint
103-
104-
ENTRYPOINT ["/app/entrypoint.sh"]
105-
109+
ENTRYPOINT ["/code/entrypoint.sh"]
106110

107111
# Default command (show help)
108-
CMD ["run.py", "--help"]
112+
CMD ["vector-db-benchmark", "--help"]
109113

benchmark/dataset.py

100644100755
Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ def download(self):
179179
except botocore.exceptions.NoCredentialsError:
180180
print("Credentials not found, downloading without boto3")
181181
if not downloaded_withboto:
182-
print(f"Downloading from URL {self.config.link}...")
182+
print(f"Downloading from URL {self.config.link} to {target_path}...")
183183
tmp_path, _ = download_with_headers(
184184
self.config.link, None, show_progress
185185
)
@@ -191,7 +191,7 @@ def _download_file(self, relative_path: str, url: str):
191191
print(f"{target_path} already exists")
192192
return
193193

194-
print(f"Downloading from {url} to {target_path}")
194+
print(f"Downloading from {url} to {target_path}...")
195195
tmp_path, _ = download_with_headers(url, None, show_progress)
196196
self._extract_or_move_file(tmp_path, target_path)
197197

@@ -210,7 +210,7 @@ def _extract_or_move_file(self, tmp_path, target_path):
210210
final_target_path = str(target_path)[:-4] # Remove .bz2
211211
else:
212212
final_target_path = target_path
213-
213+
214214
with bz2.BZ2File(tmp_path, 'rb') as f_in:
215215
with open(final_target_path, 'wb') as f_out:
216216
shutil.copyfileobj(f_in, f_out)
@@ -227,7 +227,7 @@ def _download_from_s3(self, link, target_path):
227227
tmp_path = f"/tmp/{os.path.basename(s3_key)}"
228228

229229
print(
230-
f"Downloading from S3: {link}... bucket_name={bucket_name}, s3_key={s3_key}"
230+
f"Downloading from S3: {link} to {target_path}... (bucket={bucket_name}, key={s3_key})"
231231
)
232232
object_info = s3.head_object(Bucket=bucket_name, Key=s3_key)
233233
total_size = object_info["ContentLength"]

datasets/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# This file makes the datasets directory a Python package
2+

docker-test.sh

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ print_step "Testing basic functionality..."
6161

6262
# Test help command
6363
print_info "Testing --help command..."
64-
if docker run --rm "$FULL_IMAGE_NAME" run.py --help > /dev/null; then
64+
if docker run --rm "$FULL_IMAGE_NAME" vector-db-benchmark --help > /dev/null; then
6565
print_info "✅ Help command works"
6666
else
6767
print_error "❌ Help command failed"
@@ -70,7 +70,7 @@ fi
7070

7171
# Test Python environment
7272
print_info "Testing Python environment..."
73-
if docker run --rm "$FULL_IMAGE_NAME" -c "import sys; print(f'Python {sys.version}'); import redis; print('Redis module available')" > /dev/null; then
73+
if docker run --rm --entrypoint python "$FULL_IMAGE_NAME" -c "import sys; print(f'Python {sys.version}'); import redis; print('Redis module available')" > /dev/null; then
7474
print_info "✅ Python environment works"
7575
else
7676
print_error "❌ Python environment test failed"
@@ -91,14 +91,14 @@ if docker run -d --name "$REDIS_CONTAINER_NAME" -p 6379:6379 redis:8.2-rc1-bookw
9191
sleep 5
9292

9393
# Test basic connection
94-
if timeout 10 docker run --rm --network=host "$FULL_IMAGE_NAME" \
94+
if timeout 10 docker run --rm --network=host --entrypoint python "$FULL_IMAGE_NAME" \
9595
-c "import redis; r = redis.Redis(host='localhost', port=6379); r.ping(); print('Redis connection successful')" > /dev/null 2>&1; then
9696
print_info "✅ Redis connectivity test passed"
9797

9898
# Test benchmark execution with specific configuration
9999
print_info "Testing benchmark execution with redis-default-simple configuration..."
100-
if timeout 120 docker run --rm --network=host -v "$(pwd)/results:/app/results" "$FULL_IMAGE_NAME" \
101-
run.py --host localhost --engines redis --dataset random-100 --experiment redis-default-simple > /dev/null 2>&1; then
100+
if timeout 120 docker run --rm --network=host -v "$(pwd)/results:/code/results" "$FULL_IMAGE_NAME" \
101+
vector-db-benchmark --host localhost --engines redis --dataset random-100 --experiment redis-default-simple > /dev/null 2>&1; then
102102
print_info "✅ Benchmark execution test passed"
103103
else
104104
print_warning "⚠️ Benchmark execution test failed (this may be expected without proper dataset setup)"
@@ -118,8 +118,8 @@ fi
118118
# Step 4: Test file output permissions
119119
print_step "Testing file output permissions..."
120120
TEMP_DIR=$(mktemp -d)
121-
if docker run --rm -v "$TEMP_DIR:/app/results" "$FULL_IMAGE_NAME" \
122-
-c "import os; os.makedirs('/app/results', exist_ok=True); open('/app/results/test.txt', 'w').write('test'); print('File write successful')" > /dev/null 2>&1; then
121+
if docker run --rm -v "$TEMP_DIR:/code/results" --entrypoint python "$FULL_IMAGE_NAME" \
122+
-c "import os; os.makedirs('/code/results', exist_ok=True); open('/code/results/test.txt', 'w').write('test'); print('File write successful')" > /dev/null 2>&1; then
123123
if [ -f "$TEMP_DIR/test.txt" ]; then
124124
print_info "✅ File output test passed"
125125
else
@@ -137,8 +137,8 @@ print_info "Image size: $IMAGE_SIZE"
137137

138138
# Step 6: Test benchmark configuration loading
139139
print_step "Testing benchmark configuration loading..."
140-
if docker run --rm "$FULL_IMAGE_NAME" \
141-
-c "import json; import os; print('Configuration loading test'); print(os.listdir('/app'))" > /dev/null 2>&1; then
140+
if docker run --rm --entrypoint python "$FULL_IMAGE_NAME" \
141+
-c "import json; import os; print('Configuration loading test'); print(os.listdir('/code'))" > /dev/null 2>&1; then
142142
print_info "✅ Configuration loading test passed"
143143
else
144144
print_warning "⚠️ Configuration loading test completed with warnings"

experiments/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# This file makes the experiments directory a Python package
2+
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# This file makes the configurations directory a Python package
2+

pyproject.toml

100644100755
Lines changed: 36 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,38 @@
11
[tool.poetry]
2-
name = "vector-db-benchmark"
2+
name = "vector-benchmark"
33
version = "0.1.0"
4-
description = ""
5-
authors = ["Kacper Łukawski <kacper.lukawski@qdrant.com>"]
4+
description = "Benchmark suite for vector databases with Redis support. Forked from the original vector-db-benchmark project."
5+
authors = ["Redis Performance Team <performance@redis.com>"]
6+
readme = "README.md"
7+
license = "LICENSE"
8+
homepage = "https://github.com/redislabs/vector-db-benchmark"
9+
repository = "https://github.com/redislabs/vector-db-benchmark"
10+
keywords = ["vector", "database", "benchmark", "redis", "similarity-search"]
11+
classifiers = [
12+
"Development Status :: 3 - Alpha",
13+
"Intended Audience :: Developers",
14+
"Topic :: Software Development :: Testing",
15+
"Topic :: Database",
16+
"Programming Language :: Python :: 3",
17+
"Programming Language :: Python :: 3.9",
18+
"Programming Language :: Python :: 3.10",
19+
"Programming Language :: Python :: 3.11",
20+
"Programming Language :: Python :: 3.12",
21+
"Programming Language :: Python :: 3.13",
22+
]
23+
packages = [
24+
{ include = "benchmark" },
25+
{ include = "dataset_reader" },
26+
{ include = "engine" },
27+
]
28+
include = [
29+
"run.py",
30+
"datasets/__init__.py",
31+
"datasets/datasets.json",
32+
"datasets/random-100/**/*",
33+
"experiments/__init__.py",
34+
"experiments/configurations/**/*",
35+
]
636

737
[tool.poetry.dependencies]
838
python = ">=3.9,<3.14"
@@ -29,6 +59,9 @@ boto3 = "^1.39.4"
2959
pre-commit = "^2.20.0"
3060
pytest = "^7.1"
3161

62+
[tool.poetry.scripts]
63+
vector-db-benchmark = "run:app"
64+
3265
[build-system]
3366
requires = ["poetry-core>=1.0.0"]
3467
build-backend = "poetry.core.masonry.api"

0 commit comments

Comments
 (0)