Skip to content

[OpenVINO] [CI] Support OpenVINO backend through Optimum-intel instead #4

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 16 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 15 additions & 15 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,13 @@ jobs:
working-directory: libs/infinity_emb
secrets: inherit

lint-embed_package:
uses:
./.github/workflows/linting.yaml
with:
working-directory: libs/embed_package
extra_poetry: "--with test,lint,codespell"
secrets: inherit
# lint-embed_package:
# uses:
# ./.github/workflows/linting.yaml
# with:
# working-directory: libs/embed_package
# extra_poetry: "--with test,lint,codespell"
# secrets: inherit

test-infinity_emb:
uses:
Expand All @@ -54,11 +54,11 @@ jobs:
upload_coverage: true
secrets: inherit

test-embed_package:
uses:
./.github/workflows/test.yaml
with:
working-directory: libs/embed_package
upload_coverage: false
extra_poetry: "--with test"
secrets: inherit
# test-embed_package:
# uses:
# ./.github/workflows/test.yaml
# with:
# working-directory: libs/embed_package
# upload_coverage: false
# extra_poetry: "--with test"
# secrets: inherit
26 changes: 13 additions & 13 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,22 +31,22 @@ jobs:
matrix:
os: [ubuntu-latest, windows-latest] # macos-latest
python-version:
- "3.9"
- "3.10"
- "3.11"
# - "3.9"
# - "3.10"
# - "3.11"
- "3.12"
coverage_tests: ["unit_test", "end_to_end"]
coverage_tests: ["unit_test"] #, "end_to_end"]
exclude:
# Exclude unit tests on macOS due to compatibility issues
- python-version: "3.9"
os: macos-latest
coverage_tests: "unit_test"
- python-version: "3.10"
os: macos-latest
coverage_tests: "unit_test"
- python-version: "3.11"
os: macos-latest
coverage_tests: "unit_test"
# - python-version: "3.9"
# os: macos-latest
# coverage_tests: "unit_test"
# - python-version: "3.10"
# os: macos-latest
# coverage_tests: "unit_test"
# - python-version: "3.11"
# os: macos-latest
# coverage_tests: "unit_test"
- python-version: "3.12"
os: macos-latest
coverage_tests: "unit_test"
Expand Down
12 changes: 11 additions & 1 deletion libs/infinity_emb/Docker.template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,18 @@ cpu:
main_install: |
# "RUN poetry install --no-interaction --no-ansi --no-root --extras \"${EXTRAS}\" --without lint,test && poetry cache clear pypi --all"
COPY requirements_install_from_poetry.sh requirements_install_from_poetry.sh
RUN apt update -y && apt install git -y
RUN ./requirements_install_from_poetry.sh --no-root --without lint,test "https://download.pytorch.org/whl/cpu"
RUN poetry run $PYTHON -m pip install --no-cache-dir onnxruntime-openvino

RUN PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" poetry run python -m pip install -U --pre optimum-intel@git+https://github.com/huggingface/optimum-intel.git \
openvino-tokenizers[transformers]==2024.5.* \
openvino==2024.5.* \
nncf>=2.11.0 \
sentence_transformers==3.1.1 \
openai \
"transformers>4.45" \
einops
# RUN poetry run $PYTHON -m pip install --no-cache-dir onnxruntime-openvino
extra_env_variables: |
# Sets default to onnx
ENV INFINITY_ENGINE="optimum"
Expand Down
36 changes: 33 additions & 3 deletions libs/infinity_emb/Dockerfile.cpu_auto
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,35 @@ COPY poetry.lock poetry.toml pyproject.toml README.md /app/
#
# "RUN poetry install --no-interaction --no-ansi --no-root --extras \"${EXTRAS}\" --without lint,test && poetry cache clear pypi --all"
COPY requirements_install_from_poetry.sh requirements_install_from_poetry.sh
RUN apt update -y && apt install git -y
RUN ./requirements_install_from_poetry.sh --no-root --without lint,test "https://download.pytorch.org/whl/cpu"
RUN poetry run $PYTHON -m pip install --no-cache-dir onnxruntime-openvino

RUN PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" poetry run python -m pip install -U --pre optimum-intel@git+https://github.com/huggingface/optimum-intel.git \
openvino-tokenizers[transformers]==2024.5.* \
openvino==2024.5.* \
nncf>=2.11.0 \
sentence_transformers==3.1.1 \
openai \
"transformers>4.45" \
einops
# RUN poetry run $PYTHON -m pip install --no-cache-dir onnxruntime-openvino

COPY infinity_emb infinity_emb
# Install dependency with infinity_emb package
# "RUN poetry install --no-interaction --no-ansi --extras \"${EXTRAS}\" --without lint,test && poetry cache clear pypi --all"
COPY requirements_install_from_poetry.sh requirements_install_from_poetry.sh
RUN apt update -y && apt install git -y
RUN ./requirements_install_from_poetry.sh --without lint,test "https://download.pytorch.org/whl/cpu"
RUN poetry run $PYTHON -m pip install --no-cache-dir onnxruntime-openvino

RUN PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" poetry run python -m pip install -U --pre optimum-intel@git+https://github.com/huggingface/optimum-intel.git \
openvino-tokenizers[transformers]==2024.5.* \
openvino==2024.5.* \
nncf>=2.11.0 \
sentence_transformers==3.1.1 \
openai \
"transformers>4.45" \
einops
# RUN poetry run $PYTHON -m pip install --no-cache-dir onnxruntime-openvino

#

Expand All @@ -59,8 +79,18 @@ FROM builder as testing
# install lint and test dependencies
# "RUN poetry install --no-interaction --no-ansi --extras \"${EXTRAS}\" --with lint,test && poetry cache clear pypi --all"
COPY requirements_install_from_poetry.sh requirements_install_from_poetry.sh
RUN apt update -y && apt install git -y
RUN ./requirements_install_from_poetry.sh --with lint,test "https://download.pytorch.org/whl/cpu"
RUN poetry run $PYTHON -m pip install --no-cache-dir onnxruntime-openvino

RUN PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" poetry run python -m pip install -U --pre optimum-intel@git+https://github.com/huggingface/optimum-intel.git \
openvino-tokenizers[transformers]==2024.5.* \
openvino==2024.5.* \
nncf>=2.11.0 \
sentence_transformers==3.1.1 \
openai \
"transformers>4.45" \
einops
# RUN poetry run $PYTHON -m pip install --no-cache-dir onnxruntime-openvino

# lint
RUN poetry run ruff check .
Expand Down
132 changes: 132 additions & 0 deletions libs/infinity_emb/Dockerfile.intel_auto
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
# Autogenerated warning:
# This file is generated from Dockerfile.jinja2. Do not edit the Dockerfile.cuda|cpu|amd file directly.
# Only contribute to the Dockerfile.jinja2 and dockerfile_template.yaml and regenerate the Dockerfile.cuda|cpu|amd

FROM ubuntu:22.04 AS base

ENV PYTHONUNBUFFERED=1 \
\
# pip
PIP_NO_CACHE_DIR=off \
PIP_DISABLE_PIP_VERSION_CHECK=on \
PIP_DEFAULT_TIMEOUT=100 \
\
# make poetry create the virtual environment in the project's root
# it gets named `.venv`
POETRY_VIRTUALENVS_CREATE="true" \
POETRY_VIRTUALENVS_IN_PROJECT="true" \
# do not ask any interactive question
POETRY_NO_INTERACTION=1 \
EXTRAS="all" \
PYTHON="python3.11"
RUN apt-get update && apt-get install --no-install-recommends -y build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON curl
WORKDIR /app

FROM base as builder
# Set the working directory for the app
# Define the version of Poetry to install (default is 1.7.1)
# Define the directory to install Poetry to (default is /opt/poetry)
ARG POETRY_VERSION=1.8.4
ARG POETRY_HOME=/opt/poetry
# Create a Python virtual environment for Poetry and install it
RUN curl -sSL https://install.python-poetry.org | POETRY_HOME=$POETRY_HOME POETRY_VERSION=$POETRY_VERSION $PYTHON -
ENV PATH=$POETRY_HOME/bin:$PATH
# Test if Poetry is installed in the expected path
RUN echo "Poetry version:" && poetry --version
# Copy the rest of the app source code (this layer will be invalidated and rebuilt whenever the source code changes)
COPY poetry.lock poetry.toml pyproject.toml README.md /app/
# Install dependencies only
#
# "RUN poetry install --no-interaction --no-ansi --no-root --extras \"${EXTRAS}\" --without lint,test && poetry cache clear pypi --all"
COPY requirements_install_from_poetry.sh requirements_install_from_poetry.sh
RUN ./requirements_install_from_poetry.sh --no-root --without lint,test "https://download.pytorch.org/whl/cpu"

RUN poetry run python -m pip install --upgrade --upgrade-strategy eager "optimum[openvino]"

COPY infinity_emb infinity_emb
# Install dependency with infinity_emb package
# "RUN poetry install --no-interaction --no-ansi --extras \"${EXTRAS}\" --without lint,test && poetry cache clear pypi --all"
COPY requirements_install_from_poetry.sh requirements_install_from_poetry.sh
RUN ./requirements_install_from_poetry.sh --without lint,test "https://download.pytorch.org/whl/cpu"

#


FROM builder as testing
# install lint and test dependencies
# "RUN poetry install --no-interaction --no-ansi --extras \"${EXTRAS}\" --with lint,test && poetry cache clear pypi --all"
COPY requirements_install_from_poetry.sh requirements_install_from_poetry.sh
RUN ./requirements_install_from_poetry.sh --with lint,test "https://download.pytorch.org/whl/cpu"

# # lint
# # RUN poetry run ruff check .
# # RUN poetry run mypy .
# # pytest
# COPY tests tests
# # run end to end tests because of duration of build in github ci.
# # Run tests/end_to_end on TARGETPLATFORM x86_64 otherwise run tests/end_to_end_gpu
# # poetry run python -m pytest tests/end_to_end -x # TODO: does not work.
# RUN if [ -z "$TARGETPLATFORM" ]; then \
# ARCH=$(uname -m); \
# if [ "$ARCH" = "x86_64" ]; then \
# TARGETPLATFORM="linux/amd64"; \
# elif [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "arm64" ]; then \
# TARGETPLATFORM="linux/arm64"; \
# else \
# echo "Unsupported architecture: $ARCH"; exit 1; \
# fi; \
# fi; \
# echo "Running tests on TARGETPLATFORM=$TARGETPLATFORM"; \
# if [ "$TARGETPLATFORM" = "linux/arm64" ] ; then \
# poetry run python -m pytest tests/end_to_end/test_api_with_dummymodel.py -x ; \
# else \
# poetry run python -m pytest tests/end_to_end/test_api_with_dummymodel.py tests/end_to_end/test_sentence_transformers.py -m "not performance" -x ; \
# fi
# RUN echo "all tests passed" > "test_results.txt"


# # Use a multi-stage build -> production version, with download
# FROM base AS tested-builder
# COPY --from=builder /app /app
# # force testing stage to run
# COPY --from=testing /app/test_results.txt /app/test_results.txt
# ENV HF_HOME=/app/.cache/huggingface
# ENV PATH=/app/.venv/bin:$PATH
# # do nothing
# RUN echo "copied all files"


# Export with tensorrt, not recommended.
# docker buildx build --target=production-tensorrt -f Dockerfile .
# FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04 AS production-tensorrt
# ENV PYTHONUNBUFFERED=1 \
# PIP_NO_CACHE_DIR=off \
# PYTHON="python3.11"
# RUN apt-get update && apt-get install python3-dev python3-pip $PYTHON build-essential curl -y
# COPY --from=builder /app /app
# # force testing stage to run
# COPY --from=testing /app/test_results.txt /app/test_results.txt
# ENV HF_HOME=/app/.cache/torch
# ENV PATH=/app/.venv/bin:$PATH
# RUN pip install --no-cache-dir "onnxruntime-gpu==1.17.0" "tensorrt==8.6.*"
# ENV LD_LIBRARY_PATH /app/.venv/lib/$(PYTHON)/site-packages/tensorrt:/usr/lib/x86_64-linux-gnu:/app/.venv/lib/$(PYTHON)/site-packages/tensorrt_libs:${LD_LIBRARY_PATH}
# ENV PATH /app/.venv/lib/$(PYTHON)/site-packages/tensorrt/bin:${PATH}
# ENTRYPOINT ["infinity_emb"]


# # Use a multi-stage build -> production version, with download
# # docker buildx build --target=production-with-download \
# # --build-arg MODEL_NAME=BAAI/bge-small-en-v1.5 --build-arg ENGINE=torch -f Dockerfile -t infinity-BAAI-small .
# FROM tested-builder AS production-with-download
# # collect model name and engine from build args
# ARG MODEL_NAME
# RUN if [ -z "${MODEL_NAME}" ]; then echo "Error: Build argument MODEL_NAME not set." && exit 1; fi
# ARG ENGINE
# RUN if [ -z "${ENGINE}" ]; then echo "Error: Build argument ENGINE not set." && exit 1; fi
# # will exit with 3 if model is downloaded # TODO: better exit code
# RUN infinity_emb v2 --model-id $MODEL_NAME --engine $ENGINE --preload-only || [ $? -eq 3 ]
# ENTRYPOINT ["infinity_emb"]

# # Use a multi-stage build -> production version
# FROM tested-builder AS production
# ENTRYPOINT ["infinity_emb"]
1 change: 1 addition & 0 deletions libs/infinity_emb/infinity_emb/_optional_imports.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ def _raise_error(self) -> None:
"optimum.neuron",
"<neuronx not available as extra, only runs on AMI image, no pip install possible.>",
)
CHECK_OPTIMUM_INTEL = OptionalImports("optimum.intel", "optimum")
CHECK_PIL = OptionalImports("PIL", "vision")
CHECK_POSTHOG = OptionalImports("posthog", "server")
CHECK_PYDANTIC = OptionalImports("pydantic", "server")
Expand Down
1 change: 1 addition & 0 deletions libs/infinity_emb/infinity_emb/primitives.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ def default_value():

class Device(EnumType):
cpu = "cpu"
openvino = "openvino"
cuda = "cuda"
mps = "mps"
tensorrt = "tensorrt"
Expand Down
Loading
Loading