diff --git a/.github/workflows/test_cli_misc.yaml b/.github/workflows/test_cli_misc.yaml index 2a6dd666..bd7a2912 100644 --- a/.github/workflows/test_cli_misc.yaml +++ b/.github/workflows/test_cli_misc.yaml @@ -8,7 +8,6 @@ on: paths: - .github/workflows/test_cli_misc.yaml - "optimum_benchmark/**" - - "docker/**" - "tests/**" - "setup.py" pull_request: @@ -17,7 +16,6 @@ on: paths: - .github/workflows/test_cli_misc.yaml - "optimum_benchmark/**" - - "docker/**" - "tests/**" - "setup.py" @@ -31,7 +29,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest] - python: ["3.8", "3.10"] + python: ["3.8", "3.9", "3.10"] runs-on: ${{ matrix.os }} diff --git a/.github/workflows/update_llm_perf_cuda_pytorch.yaml b/.github/workflows/update_llm_perf_cuda_pytorch.yaml index dd023fb2..915ef08d 100644 --- a/.github/workflows/update_llm_perf_cuda_pytorch.yaml +++ b/.github/workflows/update_llm_perf_cuda_pytorch.yaml @@ -29,7 +29,6 @@ jobs: - name: Run benchmarks uses: addnab/docker-run-action@v3 env: - IMAGE: ${{ env.IMAGE }} SUBSET: ${{ matrix.subset }} MACHINE: ${{ matrix.machine.name }} HF_TOKEN: ${{ secrets.HF_TOKEN }} @@ -49,5 +48,5 @@ jobs: run: | pip install packaging && pip install flash-attn einops scipy auto-gptq optimum bitsandbytes autoawq codecarbon pip install -U transformers huggingface_hub[hf_transfer] - pip install -e . + pip install optimum-benchmark python llm_perf/update_llm_perf_cuda_pytorch.py diff --git a/README.md b/README.md index b1face7b..b83cbde6 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,22 @@ -

Optimum-Benchmark Logo

+

Optimum-Benchmark Logo

All benchmarks are wrong, some will cost you less than others.

Optimum-Benchmark ๐Ÿ‹๏ธ

+[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/optimum-benchmark)](https://pypi.org/project/optimum-benchmark/) +[![PyPI - Version](https://img.shields.io/pypi/v/optimum-benchmark)](https://pypi.org/project/optimum-benchmark/) +[![PyPI - Downloads](https://img.shields.io/pypi/dm/optimum-benchmark)](https://pypi.org/project/optimum-benchmark/) +[![PyPI - Implementation](https://img.shields.io/pypi/implementation/optimum-benchmark)](https://pypi.org/project/optimum-benchmark/) +[![PyPI - Format](https://img.shields.io/pypi/format/optimum-benchmark)](https://pypi.org/project/optimum-benchmark/) +[![PyPI - License](https://img.shields.io/pypi/l/optimum-benchmark)](https://pypi.org/project/optimum-benchmark/) + Optimum-Benchmark is a unified [multi-backend & multi-device](#backends--devices-) utility for benchmarking [Transformers](https://github.com/huggingface/transformers), [Diffusers](https://github.com/huggingface/diffusers), [PEFT](https://github.com/huggingface/peft), [TIMM](https://github.com/huggingface/pytorch-image-models) and [Optimum](https://github.com/huggingface/optimum) libraries, along with all their supported [optimizations & quantization schemes](#backends--devices-), for [inference & training](#scenarios-), in [distributed & non-distributed settings](#launchers-), in the most correct, efficient and scalable way possible. *News* ๐Ÿ“ฐ -- PyPI package is now available for installation: `pip install optimum-benchmark` ๐ŸŽ‰ check it out ! +- PyPI package is now available for installation: `pip install optimum-benchmark` ๐ŸŽ‰ [check it out](https://pypi.org/project/optimum-benchmark/) ! - Hosted 4 minimal docker images (`cpu`, `cuda`, `rocm`, `cuda-ort`) in [packages](https://github.com/huggingface/optimum-benchmark/pkgs/container/optimum-benchmark) for testing, benchmarking and reproducibility ๐Ÿณ - Added vLLM backend for benchmarking [vLLM](https://github.com/vllm-project/vllm)'s inference engine ๐Ÿš€ -- Hosted the codebase of the LLM-Perf Leaderboard [LLM-Perf](https://huggingface.co/spaces/optimum/llm-perf-leaderboard) ๐Ÿฅ‡ +- Hosted the codebase of the [LLM-Perf Leaderboard](https://huggingface.co/spaces/optimum/llm-perf-leaderboard) ๐Ÿฅ‡ - Added Py-TXI backend for benchmarking [Py-TXI](https://github.com/IlyasMoutawwakil/py-txi/tree/main) ๐Ÿš€ - Introduced a Python API for running isolated benchmarks from the comfort of your Python scripts ๐Ÿ - Simplified the CLI interface for running benchmarks using the Hydra CLI ๐Ÿงช diff --git a/llm_perf/utils.py b/llm_perf/utils.py index e51d8065..1eea17b0 100644 --- a/llm_perf/utils.py +++ b/llm_perf/utils.py @@ -4,13 +4,11 @@ from optimum_benchmark.report import BenchmarkReport -OPEN_LLM_LEADERBOARD = pd.read_csv("hf://datasets/optimum-benchmark/open-llm-leaderboard/open-llm-leaderboard.csv") - - INPUT_SHAPES = {"batch_size": 1, "sequence_length": 256} GENERATE_KWARGS = {"max_new_tokens": 64, "min_new_tokens": 64} +OPEN_LLM_LEADERBOARD = pd.read_csv("hf://datasets/optimum-benchmark/llm-perf-leaderboard/llm-df.csv") OPEN_LLM_LIST = OPEN_LLM_LEADERBOARD.drop_duplicates(subset=["Model"])["Model"].tolist() PRETRAINED_OPEN_LLM_LIST = ( OPEN_LLM_LEADERBOARD[OPEN_LLM_LEADERBOARD["Type"] == "pretrained"] @@ -44,7 +42,9 @@ # "Qwen", # ], # ] -# CANONICAL_PRETRAINED_OPEN_LLM_LIST = [model for model in PRETRAINED_OPEN_LLM_LIST if model.split("/")[0] in CANONICAL_ORGANIZATIONS] +# CANONICAL_PRETRAINED_OPEN_LLM_LIST = [ +# model for model in PRETRAINED_OPEN_LLM_LIST if model.split("/")[0] in CANONICAL_ORGANIZATIONS +# ] CANONICAL_PRETRAINED_OPEN_LLM_LIST = [ "01-ai/Yi-34B", "01-ai/Yi-6B", diff --git a/optimum_benchmark/version.py b/optimum_benchmark/version.py index d9aa7d8b..0b959d42 100644 --- a/optimum_benchmark/version.py +++ b/optimum_benchmark/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "0.2.0" +__version__ = "0.2.1" diff --git a/setup.py b/setup.py index 900ce99b..b024a738 100644 --- a/setup.py +++ b/setup.py @@ -98,26 +98,25 @@ extras_require=EXTRAS_REQUIRE, entry_points={"console_scripts": ["optimum-benchmark=optimum_benchmark.cli:main"]}, description="Optimum-Benchmark is a unified multi-backend utility for benchmarking " - "Transformers, Timm, Diffusers and Sentence-Transformers with full support of Optimum's " - "hardware optimizations & quantization schemes.", - long_description=open("README.md", "r", encoding="utf-8").read(), - long_description_content_type="text/markdown", + "Transformers, Timm, Diffusers and Sentence-Transformers with full support of " + "Optimum's hardware optimizations & quantization schemes.", + url="https://github.com/huggingface/optimum-benchmark", classifiers=[ - "License :: OSI Approved :: Apache Software License", - "Intended Audience :: Developers", "Intended Audience :: Education", + "Intended Audience :: Developers", + "Operating System :: POSIX :: Linux", "Intended Audience :: Science/Research", - "Operating System :: OS Independent", - "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", + "License :: OSI Approved :: Apache Software License", "Topic :: Scientific/Engineering :: Artificial Intelligence", ], keywords="benchmaek, transformers, quantization, pruning, optimization, training, inference, onnx, onnx runtime, intel, " "habana, graphcore, neural compressor, ipex, ipu, hpu, llm-swarm, py-txi, vllm, auto-gptq, autoawq, " "sentence-transformers, bitsandbytes, codecarbon, flash-attn, deepspeed, diffusers, timm, peft", - url="https://github.com/huggingface/optimum-benchmark", + long_description=open("README.md", "r", encoding="utf-8").read(), + long_description_content_type="text/markdown", author="HuggingFace Inc. Special Ops Team", include_package_data=True, name="optimum-benchmark",