Fix PostCommit Python Dependency and PreCommit Python ML tests (#34559)

akashorabek · web-flow · commit aba093df025d · 2025-04-07T15:12:12.000-04:00
diff --git a/sdks/python/apache_beam/ml/rag/chunking/langchain_test.py b/sdks/python/apache_beam/ml/rag/chunking/langchain_test.py
@@ -167,7 +167,7 @@ def test_huggingface_tokenizer_splitter(self):
       def check_token_lengths(chunks):
         for chunk in chunks:
           # Verify each chunk's token length is within limits
-          num_tokens = len(tokenizer.encode(chunk.content.text))
+          num_tokens = len(tokenizer.tokenize(chunk.content.text))
           if not num_tokens <= 10:
             raise BeamAssertException(
                 f"Chunk has {num_tokens} tokens, expected <= 10")
diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini
@@ -105,6 +105,8 @@ commands =
 
 [testenv:py{39,310,311}-ml]
 # Don't set TMPDIR to avoid "AF_UNIX path too long" errors in certain tests.
+deps =
+  accelerate>=1.6.0
 setenv =
 extras = test,gcp,dataframe,ml_test
 commands =
@@ -115,6 +117,8 @@ commands =
 [testenv:py312-ml]
 # many packages do not support py3.12
 # Don't set TMPDIR to avoid "AF_UNIX path too long" errors in certain tests.
+deps =
+  accelerate>=1.6.0
 setenv =
 extras = test,gcp,dataframe,p312_ml_test
 commands =
@@ -466,6 +470,7 @@ deps =
   448: torch>=2.0.0,<2.1.0
   latest: transformers>=4.48.0
   latest: torch>=2.0.0
+  latest: accelerate>=1.6.0
   tensorflow==2.12.0
   protobuf==4.25.5
 extras = test,gcp,ml_test
@@ -494,6 +499,7 @@ commands =
 [testenv:py{39,310}-embeddings]
 deps =
   sentence-transformers==3.3.1
+  accelerate>=1.6.0
 passenv = HF_INFERENCE_TOKEN
 extras = test,gcp
 commands =