File tree 2 files changed +7
-1
lines changed
apache_beam/ml/rag/chunking
2 files changed +7
-1
lines changed Original file line number Diff line number Diff line change @@ -167,7 +167,7 @@ def test_huggingface_tokenizer_splitter(self):
167
167
def check_token_lengths (chunks ):
168
168
for chunk in chunks :
169
169
# Verify each chunk's token length is within limits
170
- num_tokens = len (tokenizer .encode (chunk .content .text ))
170
+ num_tokens = len (tokenizer .tokenize (chunk .content .text ))
171
171
if not num_tokens <= 10 :
172
172
raise BeamAssertException (
173
173
f"Chunk has { num_tokens } tokens, expected <= 10" )
Original file line number Diff line number Diff line change @@ -105,6 +105,8 @@ commands =
105
105
106
106
[testenv:py{39,310,311}-ml]
107
107
# Don't set TMPDIR to avoid "AF_UNIX path too long" errors in certain tests.
108
+ deps =
109
+ accelerate>=1.6.0
108
110
setenv =
109
111
extras = test,gcp,dataframe,ml_test
110
112
commands =
@@ -115,6 +117,8 @@ commands =
115
117
[testenv:py312-ml]
116
118
# many packages do not support py3.12
117
119
# Don't set TMPDIR to avoid "AF_UNIX path too long" errors in certain tests.
120
+ deps =
121
+ accelerate>=1.6.0
118
122
setenv =
119
123
extras = test,gcp,dataframe,p312_ml_test
120
124
commands =
@@ -466,6 +470,7 @@ deps =
466
470
448: torch>=2.0.0,<2.1.0
467
471
latest: transformers>=4.48.0
468
472
latest: torch>=2.0.0
473
+ latest: accelerate>=1.6.0
469
474
tensorflow ==2.12.0
470
475
protobuf ==4.25.5
471
476
extras = test,gcp,ml_test
@@ -494,6 +499,7 @@ commands =
494
499
[testenv:py{39,310}-embeddings]
495
500
deps =
496
501
sentence-transformers ==3.3.1
502
+ accelerate>=1.6.0
497
503
passenv = HF_INFERENCE_TOKEN
498
504
extras = test,gcp
499
505
commands =
You can’t perform that action at this time.
0 commit comments