Skip to content

Commit 6a83349

Browse files
committed
test2
1 parent dcd86f7 commit 6a83349

10 files changed

Lines changed: 47 additions & 43 deletions

File tree

.github/workflows/linux.yml

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,6 @@ env:
3333
BASE_PRODUCT_TYPE: public_linux_ubuntu_22_04_x86_64
3434
GENAI_WHEELS_ARTIFACT_NAME: 'genai_wheels'
3535
GENAI_ARCHIVE_ARTIFACT_BASE_NAME: 'genai_archive'
36-
HF_DATASETS_CACHE: /mount/caches/pytest/datasets
37-
HF_HUB_DOWNLOAD_TIMEOUT: 60
3836

3937
jobs:
4038
smart_ci:
@@ -544,11 +542,19 @@ jobs:
544542
run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).RAG.test }}
545543
timeout: 30
546544
- name: 'WWB tests'
545+
env:
546+
HF_DATASETS_CACHE: /mount/caches/pytest/datasets
547+
HF_HUB_DOWNLOAD_TIMEOUT: 60
547548
cmd: 'python -m pytest -v ./tools/who_what_benchmark/tests -m "not nanollava"'
548549
run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).WWB.test }}
549550
timeout: 120
550551
- name: 'WWB tests (nanollava)'
552+
env:
553+
HF_DATASETS_CACHE: /mount/caches/pytest/datasets
554+
HF_HUB_DOWNLOAD_TIMEOUT: 60
551555
cmd: |
556+
echo "HF_HUB_DOWNLOAD_TIMEOUT=$HF_HUB_DOWNLOAD_TIMEOUT"
557+
echo "HF_DATASETS_CACHE=$HF_DATASETS_CACHE"
552558
python -m pip install transformers==4.48.0
553559
python -m pytest -v ./tools/who_what_benchmark/tests -m nanollava
554560
run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).WWB.test }}

.github/workflows/mac.yml

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,6 @@ env:
2525
OV_CACHE: ~/.cache/ov_cache/
2626
CLEANUP_CACHE: 1
2727
OPENVINO_LOG_LEVEL: 4
28-
HF_DATASETS_CACHE: ~/.cache/pytest/datasets
29-
HF_HUB_DOWNLOAD_TIMEOUT: 60
3028

3129
jobs:
3230
smart_ci:
@@ -470,10 +468,16 @@ jobs:
470468
run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).RAG.test }}
471469
timeout: 30
472470
- name: 'WWB tests'
471+
env:
472+
HF_DATASETS_CACHE: ~/.cache/pytest/datasets
473+
HF_HUB_DOWNLOAD_TIMEOUT: 60
473474
cmd: 'python -m pytest -v ./tools/who_what_benchmark/tests -m "not nanollava"'
474475
run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).WWB.test }}
475476
timeout: 120
476477
- name: 'WWB tests (nanollava)'
478+
env:
479+
HF_DATASETS_CACHE: ~/.cache/pytest/datasets
480+
HF_HUB_DOWNLOAD_TIMEOUT: 60
477481
cmd: |
478482
python -m pip install transformers==4.48.0
479483
python -m pytest -v ./tools/who_what_benchmark/tests -m nanollava

.github/workflows/manylinux_2_28.yml

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: Manylinux 2_28
1+
sccachewname: Manylinux 2_28
22
on:
33
workflow_dispatch:
44
pull_request:
@@ -32,9 +32,7 @@ env:
3232
ARTIFACTS_SHARE: '/mount/build-artifacts'
3333
BASE_PRODUCT_TYPE: public_manylinux_2_28_x86_64
3434
GENAI_WHEELS_ARTIFACT_NAME: 'genai_wheels'
35-
GENAI_ARCHIVE_ARTIFACT_BASE_NAME: 'genai_archive'
36-
HF_DATASETS_CACHE: /mount/caches/pytest/datasets
37-
HF_HUB_DOWNLOAD_TIMEOUT: 60
35+
wb GENAI_ARCHIVE_ARTIFACT_BASE_NAME: 'genai_archive'
3836

3937
jobs:
4038
smart_ci:
@@ -485,10 +483,16 @@ jobs:
485483
run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).RAG.test }}
486484
timeout: 30
487485
- name: 'WWB tests'
486+
env:
487+
HF_DATASETS_CACHE: /mount/caches/pytest/datasets
488+
HF_HUB_DOWNLOAD_TIMEOUT: 60
488489
cmd: 'python -m pytest -v ./tools/who_what_benchmark/tests -m "not nanollava"'
489490
run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).WWB.test }}
490491
timeout: 120
491492
- name: 'WWB tests (nanollava)'
493+
env:
494+
HF_DATASETS_CACHE: /mount/caches/pytest/datasets
495+
HF_HUB_DOWNLOAD_TIMEOUT: 60
492496
cmd: |
493497
python -m pip install transformers==4.48.0
494498
python -m pytest -v ./tools/who_what_benchmark/tests -m nanollava

.github/workflows/windows.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -631,8 +631,14 @@ jobs:
631631
run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).RAG.test }}
632632
timeout: 30
633633
- name: 'WWB tests'
634+
env:
635+
HF_DATASETS_CACHE: C:/mount/caches/datasets
636+
HF_HUB_DOWNLOAD_TIMEOUT: 60
634637
cmd: 'python -m pytest -s -v tools/who_what_benchmark/tests -m "not nanollava"'
635638
run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).WWB.test }}
639+
env:
640+
HF_DATASETS_CACHE: C:/mount/caches/datasets
641+
HF_HUB_DOWNLOAD_TIMEOUT: 60
636642
timeout: 120
637643
- name: 'WWB tests (nanollava)'
638644
cmd: |

tools/who_what_benchmark/tests/conftest.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -122,12 +122,7 @@ def run_wwb(args, env=None):
122122
if env:
123123
base_env.update(env)
124124
try:
125-
return subprocess.check_output(
126-
command,
127-
stderr=subprocess.STDOUT,
128-
encoding="utf-8",
129-
env=base_env,
130-
)
125+
return subprocess.check_output(command, stderr=subprocess.STDOUT, encoding="utf-8", env=base_env, errors="replace")
131126
except subprocess.CalledProcessError as error:
132127
logger.error(f"'{' '.join(map(str, command))}' returned {error.returncode}. Output:\n{error.output}")
133128
raise

tools/who_what_benchmark/whowhatbench/embeddings_evaluator.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,10 @@
1212
from transformers import set_seed
1313

1414
from filelock import FileLock
15-
from .utils import load_dataset_with_retry
15+
from .utils import load_dataset_with_retry, LOCK_PATH, LOCK_MAX_TIMEOUT
1616
from .whowhat_metrics import EmbedsSimilarity
1717
from .registry import register_evaluator, BaseEvaluator
1818

19-
20-
lock_path = os.environ.get("HF_DATASETS_CACHE", '.')
21-
lock_file_name = "emb_dataset.lock"
2219
DEFAULT_MAX_LENGTH = 200
2320

2421

@@ -27,8 +24,8 @@ def prepare_default_data(num_samples=None):
2724
DATASET_NAME = "microsoft/ms_marco"
2825
NUM_SAMPLES = num_samples if num_samples else 24
2926
set_seed(42)
30-
lock = FileLock(os.path.join(lock_path, lock_file_name))
31-
with lock.acquire(timeout=300):
27+
lock = FileLock(os.path.join(LOCK_PATH, "emb_dataset_load.lock"))
28+
with lock.acquire(timeout=LOCK_MAX_TIMEOUT):
3229
default_dataset = datasets.load_dataset(
3330
DATASET_NAME, 'v2.1', split="test", streaming=True
3431
).shuffle(42).take(NUM_SAMPLES)

tools/who_what_benchmark/whowhatbench/im2im_evaluator.py

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,16 +10,11 @@
1010

1111
from filelock import FileLock
1212
from .registry import register_evaluator
13-
from .utils import load_dataset_with_retry
13+
from .utils import load_dataset_with_retry, LOCK_PATH, LOCK_MAX_TIMEOUT
1414
from .whowhat_metrics import ImageSimilarity
1515
from .text2image_evaluator import Text2ImageEvaluator
1616

1717

18-
from .whowhat_metrics import ImageSimilarity
19-
20-
lock_path = os.environ.get("HF_DATASETS_CACHE", '.')
21-
lock_file_name = "im2im_dataset.lock"
22-
2318
def preprocess_fn(example):
2419
return {
2520
"prompts": example["Instruction_VLM-LLM"],
@@ -32,8 +27,8 @@ def prepare_default_data(num_samples=None):
3227
DATASET_NAME = "paint-by-inpaint/PIPE"
3328
NUM_SAMPLES = 10 if num_samples is None else num_samples
3429
set_seed(42)
35-
lock = FileLock(os.path.join(lock_path, lock_file_name))
36-
with lock.acquire(timeout=300):
30+
lock = FileLock(os.path.join(LOCK_PATH, "im2im_dataset.lock"))
31+
with lock.acquire(timeout=LOCK_MAX_TIMEOUT):
3732
default_dataset = datasets.load_dataset(
3833
DATASET_NAME, split="test", streaming=True
3934
).filter(lambda example: example["Instruction_VLM-LLM"] != "").take(NUM_SAMPLES)

tools/who_what_benchmark/whowhatbench/inpaint_evaluator.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,9 @@
1616
from .registry import register_evaluator
1717
from .text2image_evaluator import Text2ImageEvaluator
1818

19-
from .utils import load_dataset_with_retry
19+
from .utils import load_dataset_with_retry, LOCK_PATH, LOCK_MAX_TIMEOUT
2020
from .whowhat_metrics import ImageSimilarity
2121

22-
lock_path = os.environ.get("HF_DATASETS_CACHE", '.')
23-
lock_file_name = "inpainting_dataset.lock"
24-
2522

2623
# monkey patch of Parquet._generate_tables to avoid issue https://github.com/huggingface/datasets/issues/7357
2724
@contextmanager
@@ -47,8 +44,8 @@ def prepare_default_data(num_samples=None):
4744
DATASET_NAME = "phiyodr/InpaintCOCO"
4845
NUM_SAMPLES = 10 if num_samples is None else num_samples
4946
set_seed(42)
50-
lock = FileLock(os.path.join(lock_path, lock_file_name))
51-
with lock.acquire(timeout=300):
47+
lock = FileLock(os.path.join(LOCK_PATH, "inpainting_dataset_load.lock"))
48+
with lock.acquire(timeout=LOCK_MAX_TIMEOUT):
5249
default_dataset = datasets.load_dataset(
5350
DATASET_NAME, split="test", streaming=True,
5451
).filter(lambda example: example["inpaint_caption"] != "").take(NUM_SAMPLES)

tools/who_what_benchmark/whowhatbench/reranking_evaluator.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from transformers import set_seed
1212
import datasets
1313
import numpy as np
14-
from .utils import load_dataset_with_retry
14+
from .utils import load_dataset_with_retry, LOCK_PATH, LOCK_MAX_TIMEOUT
1515

1616

1717
# we would like to evalute score for all documents
@@ -20,8 +20,6 @@
2020
DEFAULT_TOP_K = 100
2121
DEFAULT_MAX_LENGTH = 200
2222
DEFAULT_MAX_LENGTH_QWEN = 8192
23-
lock_path = os.environ.get("HF_DATASETS_CACHE", '.')
24-
lock_file_name = "reranker_dataset.lock"
2523

2624

2725
def is_qwen3(config):
@@ -44,8 +42,8 @@ def prepare_default_data(num_samples=None):
4442
DATASET_NAME = "microsoft/ms_marco"
4543
NUM_SAMPLES = num_samples if num_samples else 24
4644
set_seed(42)
47-
lock = FileLock(os.path.join(lock_path, lock_file_name))
48-
with lock.acquire(timeout=300):
45+
lock = FileLock(os.path.join(LOCK_PATH, "reranker_dataset_load.lock"))
46+
with lock.acquire(timeout=LOCK_MAX_TIMEOUT):
4947
default_dataset = datasets.load_dataset(
5048
DATASET_NAME, 'v2.1', split="test", streaming=True
5149
).shuffle(42).take(NUM_SAMPLES)

tools/who_what_benchmark/whowhatbench/utils.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,10 @@
2626
logging.basicConfig(level=logging.INFO)
2727
logger = logging.getLogger(__name__)
2828

29+
# for dataset download regulation mechanism
30+
LOCK_PATH = os.environ.get("HF_DATASETS_CACHE", ".")
31+
LOCK_MAX_TIMEOUT = 300
32+
2933

3034
def new_randn_tensor(
3135
shape: Union[tuple, list],
@@ -176,13 +180,11 @@ def preprocess_fn(example):
176180

177181
@load_dataset_with_retry(retries=3, delay=5)
178182
def prepare_default_data_image(num_samples=None):
179-
lock_path = os.environ.get("HF_DATASETS_CACHE", '.')
180-
lock_file_name = "vlm_dataset.lock"
181183
DATASET_NAME = "ucla-contextual/contextual_test"
182184
NUM_SAMPLES = 24 if num_samples is None else num_samples
183185
set_seed(42)
184-
lock = FileLock(os.path.join(lock_path, lock_file_name))
185-
with lock.acquire(timeout=300):
186+
lock = FileLock(os.path.join(LOCK_PATH, "vlm_dataset_load.lock"))
187+
with lock.acquire(timeout=LOCK_MAX_TIMEOUT):
186188
default_dataset = datasets.load_dataset(
187189
DATASET_NAME, split="test", streaming=True
188190
).shuffle(42).take(NUM_SAMPLES)

0 commit comments

Comments
 (0)