Skip to content

Commit 73b97df

Browse files
committed
[bugfix] fix datasets cache hash (#9284)
1 parent 088860e commit 73b97df

1 file changed

Lines changed: 5 additions & 6 deletions

File tree

swift/dataset/preprocessor/core.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121

2222
logger = get_logger()
2323

24-
datasets_4 = version.parse(datasets.__version__) >= version.parse('4.0')
2524
_pair_keys = ['messages', 'images', 'videos', 'audios', 'tools', 'objects']
2625

2726

@@ -55,6 +54,7 @@ def __init__(self,
5554
self.traceback_limit = traceback_limit
5655
self._traceback_counter = 0
5756
self.dataset_sample = dataset_sample
57+
self.datasets_4 = version.parse(datasets.__version__) >= version.parse('4.0')
5858
if not isinstance(random_state, np.random.RandomState):
5959
random_state = np.random.RandomState(random_state)
6060
self.random_state = random_state
@@ -244,17 +244,16 @@ def remove_useless_columns(dataset: DATASET_TYPE) -> DATASET_TYPE:
244244
dataset = dataset.select_columns(k_list)
245245
return dataset
246246

247-
@staticmethod
248247
@contextmanager
249-
def _patch_arrow_writer():
248+
def _patch_arrow_writer(self):
250249
# fix AI-ModelScope/ms_agent_for_agentfabric:all
251250
from datasets.arrow_writer import ArrowWriter
252251

253-
def _new_init(self, schema=None, features=None, *args, **kwargs):
252+
def _new_init(_self, schema=None, features=None, *args, **kwargs):
254253

255254
if features is not None:
256255

257-
if datasets_4:
256+
if self.datasets_4:
258257
from datasets.features import Json, List
259258
messages_feature = List(Json())
260259
for key in ['messages', 'rejected_messages', 'positive_messages', 'negative_messages']:
@@ -283,7 +282,7 @@ def _new_init(self, schema=None, features=None, *args, **kwargs):
283282
'bbox_type': Value(dtype='string'),
284283
'image_id': Sequence(feature=Value(dtype='int64'), length=-1),
285284
}
286-
ArrowWriter.__origin_init__(self, schema, features, *args, **kwargs)
285+
ArrowWriter.__origin_init__(_self, schema, features, *args, **kwargs)
287286

288287
ArrowWriter.__origin_init__ = ArrowWriter.__init__
289288
ArrowWriter.__init__ = _new_init

0 commit comments

Comments
 (0)