Skip to content
This repository was archived by the owner on Apr 30, 2026. It is now read-only.

Commit 4b77d1f

Browse files
committed
Only require submodlib-py on linux systems
Subset selection only works on Linux and CUDA devices for now, so don't add it as a requirement except on Linux machines. Signed-off-by: Ben Browning <bbrownin@redhat.com>
1 parent 37ef74b commit 4b77d1f

3 files changed

Lines changed: 11 additions & 5 deletions

File tree

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ openai>=1.13.3,<2.0.0
1414
numba
1515
sentencepiece>=0.2.0
1616
# Note: this dependency has to be built from source
17-
submodlib-py==0.0.1
17+
submodlib-py==0.0.1; sys_platform == 'linux'
1818
tabulate>=0.9.0
1919

2020
# Note: this dependency goes along with langchain-text-splitters and may be

src/instructlab/sdg/subset_selection.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
# Third Party
1515
from datasets import concatenate_datasets, load_dataset
1616
from jinja2 import BaseLoader, Environment
17-
from submodlib import FacilityLocationFunction
1817
from tqdm import tqdm
1918
import h5py
2019
import numpy as np
@@ -908,6 +907,10 @@ def process_folds_with_gpu(args):
908907
testing_mode,
909908
) = args
910909

910+
# Third Party
911+
# pylint: disable=import-error, import-outside-toplevel
912+
from submodlib import FacilityLocationFunction
913+
911914
try:
912915
if torch.cuda.is_available():
913916
torch.cuda.set_device(gpu_id)

tests/functional/test_subset_selection.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,6 @@
1414
import pytest
1515
import torch
1616

17-
# First Party
18-
from instructlab.sdg.subset_selection import subset_datasets
19-
2017

2118
def create_test_data(num_samples=50):
2219
"""Create synthetic conversation data similar to the real dataset."""
@@ -85,8 +82,14 @@ def map(self, func, iterable):
8582
return [mock_process_folds_with_gpu(item) for item in iterable]
8683

8784

85+
@pytest.mark.gpu
8886
def test_subset_datasets_functional():
8987
"""Functional test for subset_datasets."""
88+
89+
# Lazy import down here to not trigger the import except when we're running GPU tests
90+
# First Party
91+
from instructlab.sdg.subset_selection import subset_datasets
92+
9093
logger = logging.getLogger(__name__)
9194

9295
# Create a mock encoder class

0 commit comments

Comments
 (0)