diff --git a/.github/workflows/conformance_ptq.yml b/.github/workflows/conformance_ptq.yml new file mode 100644 index 00000000000..74d3b68f2e7 --- /dev/null +++ b/.github/workflows/conformance_ptq.yml @@ -0,0 +1,74 @@ +name: Post Training Quantization +permissions: read-all + +on: + workflow_call: + workflow_dispatch: + inputs: + pull_request_number: + description: 'The pull request number' + default: '' + pytest_args: + description: 'Pytest arguments' + default: '' + +jobs: + examples-cpu: + name: Post Training Quantization [${{ matrix.group }}/6] + runs-on: ubuntu-latest-16-cores + timeout-minutes: 180 + strategy: + fail-fast: false + matrix: + group: [1, 2, 3, 4, 5, 6] + defaults: + run: + shell: bash + steps: + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + lfs: true + fetch-depth: 0 # Fetch full history to allow checking out any branch or PR + - name: Fetch and Checkout the Pull Request Branch + if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.pull_request_number != '' }} + run: | + git fetch origin pull/${{ github.event.inputs.pull_request_number }}/head:pr-${{ github.event.inputs.pull_request_number }} + git checkout pr-${{ github.event.inputs.pull_request_number }} + - uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0 + with: + python-version: 3.10.14 + - name: cpuinfo + run: cat /proc/cpuinfo + - name: Install NNCF and test requirements + run: pip install . -r tests/post_training/requirements.txt + - name: Print installed modules + run: pip list + - name: Run examples test scope + run: | + python -m pytest -s -ra tests/post_training/test_quantize_conformance.py::test_ptq_quantization \ + --junit-xml=pytest-results.xml \ + --durations-path=tests/post_training/data/ptq_test_durations.json \ + --splitting-algorithm=least_duration \ + --splits 6 \ + --group ${{ matrix.group }} \ + ${{ github.event.inputs.pytest_args || '' }} + ret=$? + [ $ret -eq 5 ] && [ -n "${{ github.event.inputs.pytest_args || '' }}" ] && exit 0 || exit $ret + env: + TQDM_DISABLE: 1 + HOME_HF: "/home/runner/hf_home" + HF_TOKEN: ${{ secrets.HF_TOKEN }} + - name: Print results.csv + if: ${{ !cancelled() }} + run: column -s, -t < tmp/results.csv || echo "no file" + - name: Upload artifact + if: ${{ !cancelled() }} + uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 #v4.6.0 + with: + name: ptq_results_${{ matrix.group }} + path: tmp/results.csv + - name: Test Summary + if: ${{ !cancelled() }} + run: | + pip install defusedxml==0.7.1 + python .github/scripts/pytest_md_summary.py pytest-results.xml >> $GITHUB_STEP_SUMMARY diff --git a/tests/post_training/README.md b/tests/post_training/README.md index e1a84aafdb1..703d20e65f8 100644 --- a/tests/post_training/README.md +++ b/tests/post_training/README.md @@ -23,17 +23,13 @@ pip install -r requirements.txt ## Data preparation -## Imagenet +Using datasets from huggingface, required set HF_TOKEN environment variable. +For using imagenet-1k need to sign licence https://huggingface.co/datasets/mlx-vision/imagenet-1k. -/imagenet/val - name of path -Since Torchvision `ImageFolder` class is used to work with data the ImageNet validation dataset should be structured accordingly. Below is an example of the `val` folder: - -```text -n01440764 -n01695060 -n01843383 -... -``` +> [!IMPORTANT] +> Used modified version of loader imagenet-1k to download only validation subset. +> To avoid any conflict with full dataset set another cache directory for this test. +> https://huggingface.co/docs/datasets/en/cache#cache-directory ## Usage diff --git a/tests/post_training/data/ptq_reference_data.yaml b/tests/post_training/data/ptq_reference_data.yaml index 54b03f8c2c7..209a580584c 100644 --- a/tests/post_training/data/ptq_reference_data.yaml +++ b/tests/post_training/data/ptq_reference_data.yaml @@ -37,7 +37,7 @@ torchvision/resnet18_backend_OV: torchvision/resnet18_backend_ONNX: metric_value: 0.6948 torchvision/resnet18_backend_TORCH: - metric_value: 0.69152 + metric_value: 0.69404 torchvision/resnet18_backend_CUDA_TORCH: metric_value: 0.69152 torchvision/resnet18_backend_FX_TORCH: @@ -55,9 +55,9 @@ torchvision/resnet18_backend_CUDA_FX_TORCH: torchvision/mobilenet_v3_small_BC_backend_FP32: metric_value: 0.6766 torchvision/mobilenet_v3_small_BC_backend_OV: - metric_value: 0.6681 + metric_value: 0.66498 torchvision/mobilenet_v3_small_BC_backend_ONNX: - metric_value: 0.6679 + metric_value: 0.66556 torchvision/mobilenet_v3_small_BC_backend_FX_TORCH: metric_value: 0.6679 exception_xfail_reason: @@ -109,19 +109,19 @@ timm/crossvit_9_240_backend_FP32: timm/crossvit_9_240_backend_ONNX: metric_value: 0.73484 timm/crossvit_9_240_backend_OV: - metric_value: 0.72788 + metric_value: 0.73346 timm/crossvit_9_240_backend_TORCH: - metric_value: 0.72744 + metric_value: 0.73364 timm/darknet53_backend_CUDA_TORCH: metric_value: 0.79176 timm/darknet53_backend_FP32: metric_value: 0.80006 timm/darknet53_backend_ONNX: - metric_value: 0.79176 + metric_value: 0.79578 timm/darknet53_backend_OV: - metric_value: 0.79216 + metric_value: 0.79604 timm/darknet53_backend_TORCH: - metric_value: 0.79094 + metric_value: 0.79578 timm/deit3_small_patch16_224_backend_CUDA_TORCH: metric_value: 0.81246 timm/deit3_small_patch16_224_backend_FP32: @@ -141,7 +141,7 @@ timm/dla34_backend_ONNX: timm/dla34_backend_OV: metric_value: 0.74532 timm/dla34_backend_TORCH: - metric_value: 0.74256 + metric_value: 0.74362 timm/dpn68_backend_CUDA_TORCH: metric_value: 0.75786 timm/dpn68_backend_FP32: @@ -151,23 +151,23 @@ timm/dpn68_backend_ONNX: timm/dpn68_backend_OV: metric_value: 0.75972 timm/dpn68_backend_TORCH: - metric_value: 0.75868 + metric_value: 0.7597 timm/efficientnet_b0_BC_backend_FP32: metric_value: 0.77698 timm/efficientnet_b0_BC_backend_ONNX: - metric_value: 0.77132 + metric_value: 0.77328 timm/efficientnet_b0_BC_backend_OV: - metric_value: 0.77166 + metric_value: 0.77374 timm/efficientnet_b0_backend_CUDA_TORCH: metric_value: 0.77124 timm/efficientnet_b0_backend_FP32: metric_value: 0.77698 timm/efficientnet_b0_backend_ONNX: - metric_value: 0.7719 + metric_value: 0.77352 timm/efficientnet_b0_backend_OV: - metric_value: 0.77104 + metric_value: 0.77342 timm/efficientnet_b0_backend_TORCH: - metric_value: 0.77042 + metric_value: 0.77196 timm/efficientnet_lite0_backend_CUDA_TORCH: metric_value: 0.75162 timm/efficientnet_lite0_backend_FP32: @@ -175,7 +175,7 @@ timm/efficientnet_lite0_backend_FP32: timm/efficientnet_lite0_backend_ONNX: metric_value: 0.75184 timm/efficientnet_lite0_backend_OV: - metric_value: 0.75176 + metric_value: 0.75284 timm/efficientnet_lite0_backend_TORCH: metric_value: 0.7517 timm/hrnet_w18_backend_CUDA_TORCH: @@ -183,11 +183,11 @@ timm/hrnet_w18_backend_CUDA_TORCH: timm/hrnet_w18_backend_FP32: metric_value: 0.78124 timm/hrnet_w18_backend_ONNX: - metric_value: 0.7743 + metric_value: 0.77666 timm/hrnet_w18_backend_OV: - metric_value: 0.7743 + metric_value: 0.77636 timm/hrnet_w18_backend_TORCH: - metric_value: 0.7722 + metric_value: 0.77558 timm/inception_resnet_v2_backend_CUDA_TORCH: metric_value: 0.80334 timm/inception_resnet_v2_backend_FP32: @@ -195,7 +195,7 @@ timm/inception_resnet_v2_backend_FP32: timm/inception_resnet_v2_backend_ONNX: metric_value: 0.80396 timm/inception_resnet_v2_backend_OV: - metric_value: 0.80422 + metric_value: 0.80304 timm/inception_resnet_v2_backend_TORCH: metric_value: 0.80334 timm/mobilenetv2_050_BC_backend_FP32: @@ -203,7 +203,7 @@ timm/mobilenetv2_050_BC_backend_FP32: timm/mobilenetv2_050_BC_backend_ONNX: metric_value: 0.65486 timm/mobilenetv2_050_BC_backend_OV: - metric_value: 0.65332 + metric_value: 0.65454 timm/mobilenetv2_050_backend_CUDA_TORCH: metric_value: 0.6534 timm/mobilenetv2_050_backend_FP32: @@ -219,27 +219,27 @@ timm/mobilenetv3_small_050_backend_CUDA_TORCH: timm/mobilenetv3_small_050_backend_FP32: metric_value: 0.57906 timm/mobilenetv3_small_050_backend_ONNX: - metric_value: 0.41828 + metric_value: 0.42322 timm/mobilenetv3_small_050_backend_OV: - metric_value: 0.41874 + metric_value: 0.42372 timm/mobilenetv3_small_050_backend_TORCH: - metric_value: 0.4267 + metric_value: 0.4309 timm/mobilenetv3_small_050_BC_backend_FP32: metric_value: 0.57906 timm/mobilenetv3_small_050_BC_backend_ONNX: metric_value: 0.56556 timm/mobilenetv3_small_050_BC_backend_OV: - metric_value: 0.5655 + metric_value: 0.56658 timm/regnetx_002_backend_CUDA_TORCH: metric_value: 0.68596 timm/regnetx_002_backend_FP32: metric_value: 0.68756 timm/regnetx_002_backend_ONNX: - metric_value: 0.6854 + metric_value: 0.68422 timm/regnetx_002_backend_OV: metric_value: 0.6852 timm/regnetx_002_backend_TORCH: - metric_value: 0.68576 + metric_value: 0.6845 timm/resnest14d_backend_CUDA_TORCH: metric_value: 0.74898 timm/resnest14d_backend_FP32: @@ -253,7 +253,7 @@ timm/resnest14d_backend_TORCH: timm/swin_base_patch4_window7_224_backend_FP32: metric_value: 0.85274 timm/swin_base_patch4_window7_224_backend_OV: - metric_value: 0.83566 + metric_value: 0.8454 timm/swin_base_patch4_window7_224_no_sq_backend_FP32: metric_value: 0.85274 timm/swin_base_patch4_window7_224_no_sq_backend_CUDA_TORCH: @@ -271,7 +271,7 @@ timm/tf_inception_v3_backend_ONNX: timm/tf_inception_v3_backend_OV: metric_value: 0.77748 timm/tf_inception_v3_backend_TORCH: - metric_value: 0.77586 + metric_value: 0.77732 timm/vgg11_backend_CUDA_TORCH: metric_value: 0.688 timm/vgg11_backend_FP32: diff --git a/tests/post_training/data/ptq_test_durations.json b/tests/post_training/data/ptq_test_durations.json new file mode 100644 index 00000000000..ec07210f371 --- /dev/null +++ b/tests/post_training/data/ptq_test_durations.json @@ -0,0 +1,136 @@ +{ + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[hf/bert-base-uncased_backend_CUDA_TORCH]": 68, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[hf/bert-base-uncased_backend_FP32]": 68, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[hf/bert-base-uncased_backend_ONNX]": 68, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[hf/bert-base-uncased_backend_OPTIMUM]": 68, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[hf/bert-base-uncased_backend_OV]": 68, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[hf/bert-base-uncased_backend_TORCH]": 68, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[hf/hf-internal-testing/tiny-random-GPTNeoXForCausalLM_statefull_backend_FP32]": 27, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[hf/hf-internal-testing/tiny-random-GPTNeoXForCausalLM_stateless_backend_FP32]": 28, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[hf/hf-internal-testing/tiny-random-GPTNeoXForCausalLM_statefull_backend_OPTIMUM]": 27, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[hf/hf-internal-testing/tiny-random-GPTNeoXForCausalLM_stateless_backend_OPTIMUM]": 28, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[hf/hf-internal-testing/tiny-random-gpt2_backend_FP32]": 29, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[hf/hf-internal-testing/tiny-random-gpt2_backend_OPTIMUM]": 29, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[hf/hf-internal-testing/tiny-random-gpt2_backend_OV]": 29, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[hf/hf-internal-testing/tiny-random-gpt2_backend_TORCH]": 29, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[torchvision/resnet18_backend_FP32]": 284, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[torchvision/resnet18_backend_OV]": 284, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[torchvision/resnet18_backend_ONNX]": 284, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[torchvision/resnet18_backend_TORCH]": 284, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[torchvision/resnet18_backend_CUDA_TORCH]": 284, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[torchvision/resnet18_backend_FX_TORCH]": 284, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[torchvision/resnet18_backend_CUDA_FX_TORCH]": 284, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[torchvision/mobilenet_v3_small_BC_backend_FP32]": 187, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[torchvision/mobilenet_v3_small_BC_backend_OV]": 187, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[torchvision/mobilenet_v3_small_BC_backend_ONNX]": 187, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[torchvision/mobilenet_v3_small_BC_backend_FX_TORCH]": 187, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[torchvision/mobilenet_v3_small_BC_backend_CUDA_FX_TORCH]": 187, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[torchvision/vit_b_16_backend_FP32]": 511, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[torchvision/vit_b_16_backend_OV]": 511, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[torchvision/vit_b_16_backend_FX_TORCH]": 511, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[torchvision/vit_b_16_backend_CUDA_FX_TORCH]": 511, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[torchvision/swin_v2_s_backend_FP32]": 2024, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[torchvision/swin_v2_s_backend_OV]": 2024, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[torchvision/swin_v2_s_backend_FX_TORCH]": 2024, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[torchvision/swin_v2_s_backend_CUDA_FX_TORCH]": 2024, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/crossvit_9_240_backend_CUDA_TORCH]": 424, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/crossvit_9_240_backend_FP32]": 424, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/crossvit_9_240_backend_ONNX]": 424, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/crossvit_9_240_backend_OV]": 424, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/crossvit_9_240_backend_TORCH]": 424, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/darknet53_backend_CUDA_TORCH]": 1180, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/darknet53_backend_FP32]": 1180, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/darknet53_backend_ONNX]": 1180, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/darknet53_backend_OV]": 1180, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/darknet53_backend_TORCH]": 1180, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/deit3_small_patch16_224_backend_CUDA_TORCH]": 624, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/deit3_small_patch16_224_backend_FP32]": 624, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/deit3_small_patch16_224_backend_ONNX]": 624, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/deit3_small_patch16_224_backend_OV]": 624, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/deit3_small_patch16_224_backend_TORCH]": 624, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/dla34_backend_CUDA_TORCH]": 418, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/dla34_backend_FP32]": 418, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/dla34_backend_ONNX]": 418, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/dla34_backend_OV]": 418, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/dla34_backend_TORCH]": 418, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/dpn68_backend_CUDA_TORCH]": 914, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/dpn68_backend_FP32]": 914, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/dpn68_backend_ONNX]": 914, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/dpn68_backend_OV]": 914, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/dpn68_backend_TORCH]": 914, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/efficientnet_b0_BC_backend_FP32]": 322, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/efficientnet_b0_BC_backend_ONNX]": 322, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/efficientnet_b0_BC_backend_OV]": 322, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/efficientnet_b0_backend_CUDA_TORCH]": 514, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/efficientnet_b0_backend_FP32]": 514, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/efficientnet_b0_backend_ONNX]": 514, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/efficientnet_b0_backend_OV]": 514, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/efficientnet_b0_backend_TORCH]": 514, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/efficientnet_lite0_backend_CUDA_TORCH]": 479, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/efficientnet_lite0_backend_FP32]": 479, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/efficientnet_lite0_backend_ONNX]": 479, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/efficientnet_lite0_backend_OV]": 479, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/efficientnet_lite0_backend_TORCH]": 479, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/hrnet_w18_backend_CUDA_TORCH]": 696, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/hrnet_w18_backend_FP32]": 696, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/hrnet_w18_backend_ONNX]": 696, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/hrnet_w18_backend_OV]": 696, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/hrnet_w18_backend_TORCH]": 696, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/inception_resnet_v2_backend_CUDA_TORCH]": 1757, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/inception_resnet_v2_backend_FP32]": 1757, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/inception_resnet_v2_backend_ONNX]": 1757, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/inception_resnet_v2_backend_OV]": 1757, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/inception_resnet_v2_backend_TORCH]": 1757, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/mobilenetv2_050_BC_backend_FP32]": 435, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/mobilenetv2_050_BC_backend_ONNX]": 435, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/mobilenetv2_050_BC_backend_OV]": 435, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/mobilenetv2_050_backend_CUDA_TORCH]": 530, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/mobilenetv2_050_backend_FP32]": 530, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/mobilenetv2_050_backend_ONNX]": 530, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/mobilenetv2_050_backend_OV]": 530, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/mobilenetv2_050_backend_TORCH]": 530, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/mobilenetv3_small_050_backend_CUDA_TORCH]": 404, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/mobilenetv3_small_050_backend_FP32]": 404, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/mobilenetv3_small_050_backend_ONNX]": 404, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/mobilenetv3_small_050_backend_OV]": 404, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/mobilenetv3_small_050_backend_TORCH]": 404, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/mobilenetv3_small_050_BC_backend_FP32]": 298, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/mobilenetv3_small_050_BC_backend_ONNX]": 298, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/mobilenetv3_small_050_BC_backend_OV]": 298, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/regnetx_002_backend_CUDA_TORCH]": 397, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/regnetx_002_backend_FP32]": 397, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/regnetx_002_backend_ONNX]": 397, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/regnetx_002_backend_OV]": 397, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/regnetx_002_backend_TORCH]": 397, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/resnest14d_backend_CUDA_TORCH]": 888, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/resnest14d_backend_FP32]": 888, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/resnest14d_backend_ONNX]": 888, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/resnest14d_backend_OV]": 888, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/resnest14d_backend_TORCH]": 888, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/swin_base_patch4_window7_224_backend_FP32]": 2300, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/swin_base_patch4_window7_224_backend_OV]": 2300, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/swin_base_patch4_window7_224_no_sq_backend_FP32]": 2941, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/swin_base_patch4_window7_224_no_sq_backend_CUDA_TORCH]": 2941, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/swin_base_patch4_window7_224_no_sq_backend_ONNX]": 2941, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/swin_base_patch4_window7_224_no_sq_backend_TORCH]": 2941, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/tf_inception_v3_backend_CUDA_TORCH]": 911, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/tf_inception_v3_backend_FP32]": 911, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/tf_inception_v3_backend_ONNX]": 911, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/tf_inception_v3_backend_OV]": 911, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/tf_inception_v3_backend_TORCH]": 911, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/vgg11_backend_CUDA_TORCH]": 742, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/vgg11_backend_FP32]": 742, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/vgg11_backend_ONNX]": 742, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/vgg11_backend_OV]": 742, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/vgg11_backend_TORCH]": 742, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/visformer_small_backend_CUDA_TORCH]": 428, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/visformer_small_backend_FP32]": 428, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/visformer_small_backend_ONNX]": 428, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/visformer_small_backend_OV]": 428, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/visformer_small_backend_TORCH]": 428, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/wide_resnet50_2_backend_CUDA_TORCH]": 1011, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/wide_resnet50_2_backend_FP32]": 1011, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/wide_resnet50_2_backend_ONNX]": 1011, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/wide_resnet50_2_backend_OV]": 1011, + "tests/post_training/test_quantize_conformance.py::test_ptq_quantization[timm/wide_resnet50_2_backend_TORCH]": 1011 +} diff --git a/tests/post_training/pipelines/image_classification_base.py b/tests/post_training/pipelines/image_classification_base.py index b3ba58f8a36..3b20ee73707 100644 --- a/tests/post_training/pipelines/image_classification_base.py +++ b/tests/post_training/pipelines/image_classification_base.py @@ -11,25 +11,79 @@ import copy import os +import types import numpy as np import openvino as ov import torch +from datasets import Split +from datasets import SplitGenerator +from datasets import load_dataset_builder from sklearn.metrics import accuracy_score -from torchvision import datasets +from torchvision import transforms import nncf from nncf.common.logging.track_progress import track from tests.post_training.pipelines.base import DEFAULT_VAL_THREADS from tests.post_training.pipelines.base import FX_BACKENDS +from tests.post_training.pipelines.base import PT_BACKENDS +from tests.post_training.pipelines.base import BackendType from tests.post_training.pipelines.base import PTQTestPipeline +def hf_imagenet_1k_val(model_transform): + """ + Download only VAL subset of ImageNet-1k dataset from Hugging Face. + load_dataset("imagenet-1k") loads full dataset, which is not needed. + """ + + builder_instance = load_dataset_builder("imagenet-1k") + + def val_split_generators(self, dl_manager): + DATA_URL_VAL = { + "val": ["data/val_images.tar.gz"], + } + archives = dl_manager.download(DATA_URL_VAL) + + return [ + SplitGenerator( + name=Split.VALIDATION, + gen_kwargs={ + "archives": [dl_manager.iter_archive(archive) for archive in archives["val"]], + "split": "validation", + }, + ), + ] + + builder_instance._split_generators = types.MethodType(val_split_generators, builder_instance) + builder_instance.download_and_prepare() + dataset = builder_instance.as_dataset(split=Split.VALIDATION) + + def transform_fn(examples): + def f(image): + """If input image grayscale, convert it to RGB""" + if len(image.getbands()) < 3: + return transforms.Grayscale(num_output_channels=3)(image) + return image + + transform = transforms.Compose( + [ + transforms.Lambda(f), + model_transform, + ] + ) + examples["image"] = [transform(img) for img in examples["image"]] + return examples + + dataset.set_transform(transform_fn) + return dataset + + class ImageClassificationBase(PTQTestPipeline): """Base pipeline for Image Classification models""" def prepare_calibration_dataset(self): - dataset = datasets.ImageFolder(root=self.data_dir / "imagenet" / "val", transform=self.transform) + dataset = hf_imagenet_1k_val(self.transform) loader = torch.utils.data.DataLoader(dataset, batch_size=self.batch_size, num_workers=2, shuffle=False) self.calibration_dataset = nncf.Dataset(loader, self.get_transform_calibration_fn()) @@ -63,7 +117,9 @@ def process_result(request, userdata): infer_queue.set_callback(process_result) - for i, (images, target) in enumerate(val_loader): + for i, data in enumerate(val_loader): + images = data["image"] + target = data["label"] # W/A for memory leaks when using torch DataLoader and OpenVINO image_copies = copy.deepcopy(images.numpy()) infer_queue.start_async(image_copies, userdata=i) @@ -76,7 +132,9 @@ def _validate_torch_compile( self, val_loader: torch.utils.data.DataLoader, predictions: np.ndarray, references: np.ndarray ): compiled_model = torch.compile(self.compressed_model.cpu(), backend="openvino") - for i, (images, target) in enumerate(val_loader): + for i, data in enumerate(val_loader): + images = data["image"] + target = data["label"] # W/A for memory leaks when using torch DataLoader and OpenVINO pred = compiled_model(images) pred = torch.argmax(pred, dim=1) @@ -85,7 +143,7 @@ def _validate_torch_compile( return predictions, references def _validate(self) -> None: - val_dataset = datasets.ImageFolder(root=self.data_dir / "imagenet" / "val", transform=self.transform) + val_dataset = hf_imagenet_1k_val(self.transform) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, num_workers=2, shuffle=False) dataset_size = len(val_loader) @@ -103,3 +161,19 @@ def _validate(self) -> None: self.run_info.metric_name = "Acc@1" self.run_info.metric_value = acc_top1 + + def get_transform_calibration_fn(self): + if self.backend in FX_BACKENDS + PT_BACKENDS: + device = torch.device( + "cuda" if self.backend in [BackendType.CUDA_TORCH, BackendType.CUDA_FX_TORCH] else "cpu" + ) + + def transform_fn(data_item): + return data_item["image"].to(device) + + else: + + def transform_fn(data_item): + return {self.input_name: np.array(data_item["image"], dtype=np.float32)} + + return transform_fn diff --git a/tests/post_training/pipelines/image_classification_timm.py b/tests/post_training/pipelines/image_classification_timm.py index 4327f3a91f1..f18a3d9dadb 100644 --- a/tests/post_training/pipelines/image_classification_timm.py +++ b/tests/post_training/pipelines/image_classification_timm.py @@ -9,7 +9,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import numpy as np import onnx import openvino as ov import timm @@ -89,19 +88,3 @@ def prepare_preprocessor(self) -> None: mean=config["mean"], std=config["std"], ) - - def get_transform_calibration_fn(self): - if self.backend in PT_BACKENDS: - device = torch.device("cuda" if self.backend == BackendType.CUDA_TORCH else "cpu") - - def transform_fn(data_item): - images, _ = data_item - return images.to(device) - - else: - - def transform_fn(data_item): - images, _ = data_item - return {self.input_name: np.array(images, dtype=np.float32)} - - return transform_fn diff --git a/tests/post_training/pipelines/image_classification_torchvision.py b/tests/post_training/pipelines/image_classification_torchvision.py index 386ff834b86..9dc0c5ca914 100644 --- a/tests/post_training/pipelines/image_classification_torchvision.py +++ b/tests/post_training/pipelines/image_classification_torchvision.py @@ -12,7 +12,6 @@ from dataclasses import dataclass from typing import Any, Callable, Tuple -import numpy as np import onnx import openvino as ov import torch @@ -142,21 +141,3 @@ def _dump_model_fp32(self) -> None: def prepare_preprocessor(self) -> None: self.transform = self.model_params.weights.transforms() - - def get_transform_calibration_fn(self): - if self.backend in FX_BACKENDS + PT_BACKENDS: - device = torch.device( - "cuda" if self.backend in [BackendType.CUDA_TORCH, BackendType.CUDA_FX_TORCH] else "cpu" - ) - - def transform_fn(data_item): - images, _ = data_item - return images.to(device) - - else: - - def transform_fn(data_item): - images, _ = data_item - return {self.input_name: np.array(images, dtype=np.float32)} - - return transform_fn diff --git a/tests/post_training/test_quantize_conformance.py b/tests/post_training/test_quantize_conformance.py index feeb28cc3f8..d2c0d21337a 100644 --- a/tests/post_training/test_quantize_conformance.py +++ b/tests/post_training/test_quantize_conformance.py @@ -267,7 +267,6 @@ def run_pipeline( def test_ptq_quantization( ptq_reference_data: dict, test_case_name: str, - data_dir: Path, output_dir: Path, result_data: Dict[str, RunInfo], no_eval: bool, @@ -287,7 +286,7 @@ def test_ptq_quantization( PTQ_TEST_CASES, result_data, output_dir, - data_dir, + None, no_eval, batch_size, run_fp32_backend,