@@ -678,6 +678,37 @@ def test_lmcache_local_storage(self):
678678 ])
679679 client .run ("vllm_lmcache llama3-8b-lmcache-local-storage" .split ())
680680
681+ def test_lmcache_s3 (self ):
682+ with Runner ("lmi" , "llama3-8b-lmcache-s3" ) as r :
683+ prepare .build_vllm_async_model ("llama3-8b-lmcache-s3" )
684+ r .launch (env_vars = [
685+ "LMCACHE_CONFIG_FILE=/opt/ml/model/test/lmcache_s3.yaml" ,
686+ "PYTHONHASHSEED=0"
687+ ])
688+ client .run ("vllm_lmcache llama3-8b-lmcache-s3" .split ())
689+
690+ def test_lmcache_redis (self ):
691+ # Start Redis via Docker
692+ redis_proc = subprocess .Popen (
693+ ["docker" , "run" , "-d" , "--rm" , "-p" , "6379:6379" , "redis:alpine" ],
694+ stdout = subprocess .PIPE ,
695+ stderr = subprocess .DEVNULL )
696+ container_id = redis_proc .stdout .read ().decode ().strip ()
697+ time .sleep (3 ) # Wait for Redis to start
698+
699+ try :
700+ with Runner ("lmi" , "llama3-8b-lmcache-redis" ) as r :
701+ prepare .build_vllm_async_model ("llama3-8b-lmcache-redis" )
702+ r .launch (env_vars = [
703+ "LMCACHE_CONFIG_FILE=/opt/ml/model/test/lmcache_redis.yaml"
704+ ])
705+ client .run ("vllm_lmcache llama3-8b-lmcache-redis" .split ())
706+ finally :
707+ # Cleanup Redis container
708+ subprocess .run (["docker" , "stop" , container_id ],
709+ stdout = subprocess .DEVNULL ,
710+ stderr = subprocess .DEVNULL )
711+
681712 def test_lmcache_missing_role (self ):
682713 with Runner ("lmi" , "llama3-8b-lmcache-missing-role" ) as r :
683714 prepare .build_vllm_async_model ("llama3-8b-lmcache-missing-role" )
@@ -714,6 +745,40 @@ def test_lmcache_performance_local_storage(self):
714745 "vllm_lmcache_performance llama3-8b-lmcache-local-storage" .
715746 split ())
716747
748+ @pytest .mark .vllm
749+ @pytest .mark .gpu_4
750+ class TestVllmLmcachePerformanceBenchmarks_g6 :
751+ def test_lmcache_performance_s3 (self ):
752+ with Runner ("lmi" , "llama3-8b-lmcache-s3" ) as r :
753+ prepare .build_vllm_async_model ("llama3-8b-lmcache-s3" )
754+ r .launch (env_vars = [
755+ "LMCACHE_CONFIG_FILE=/opt/ml/model/test/lmcache_s3.yaml"
756+ ])
757+ client .run ("vllm_lmcache_performance llama3-8b-lmcache-s3" .split ())
758+
759+ def test_lmcache_performance_redis (self ):
760+ # Start Redis via Docker
761+ redis_proc = subprocess .Popen (
762+ ["docker" , "run" , "-d" , "--rm" , "-p" , "6379:6379" , "redis:alpine" ],
763+ stdout = subprocess .PIPE ,
764+ stderr = subprocess .DEVNULL )
765+ container_id = redis_proc .stdout .read ().decode ().strip ()
766+ time .sleep (3 ) # Wait for Redis to start
767+
768+ try :
769+ with Runner ("lmi" , "llama3-8b-lmcache-redis" ) as r :
770+ prepare .build_vllm_async_model ("llama3-8b-lmcache-redis" )
771+ r .launch (env_vars = [
772+ "LMCACHE_CONFIG_FILE=/opt/ml/model/test/lmcache_redis.yaml"
773+ ])
774+ client .run (
775+ "vllm_lmcache_performance llama3-8b-lmcache-redis" .split ())
776+ finally :
777+ # Cleanup Redis container
778+ subprocess .run (["docker" , "stop" , container_id ],
779+ stdout = subprocess .DEVNULL ,
780+ stderr = subprocess .DEVNULL )
781+
717782 def test_lmcache_long_doc_qa_qwen (self ):
718783 """Run the lmcache long_doc_qa benchmark inside the container
719784
@@ -755,6 +820,100 @@ def test_lmcache_long_doc_qa_qwen(self):
755820 raise RuntimeError (
756821 f"Benchmark failed with return code { result } " )
757822
823+ def test_lmcache_s3_benchmark (self ):
824+ """
825+ Test LMCache with S3 storage backend for long document QA.
826+ This benchmark tests S3 performance for distributed caching scenarios.
827+ """
828+ with Runner ('lmi' , 'qwen3-8b-lmcache-s3' ) as r :
829+ prepare .build_vllm_async_model ("qwen3-8b-lmcache-s3" )
830+
831+ r .launch (
832+ env_vars = [
833+ "LMCACHE_CONFIG_FILE=/opt/ml/model/test/lmcache_s3.yaml" ,
834+ "PYTHONHASHSEED=0"
835+ ])
836+
837+ # Run benchmark with same config for comparison
838+ benchmark_script = "lmcache_configs/djl_long_doc_qa_clean.py"
839+ benchmark_cmd = (f"PYTHONHASHSEED=0 python { benchmark_script } "
840+ f"--model Qwen/Qwen3-8B "
841+ "--host localhost "
842+ "--port 8080 "
843+ "--num-documents 46 "
844+ "--document-length 10000 "
845+ "--output-len 100 "
846+ "--repeat-count 1 "
847+ "--repeat-mode tile "
848+ "--max-inflight-requests 4" )
849+
850+ logging .info (
851+ f"Running S3 storage benchmark from host: { benchmark_cmd } " )
852+ result = os .system (benchmark_cmd )
853+
854+ if result == 0 :
855+ logging .info (
856+ "S3 benchmark PASSED"
857+ )
858+ else :
859+ raise RuntimeError (
860+ f"S3 storage benchmark failed with return code { result } "
861+ )
862+
863+ def test_lmcache_redis_benchmark (self ):
864+ """
865+ Test LMCache with Redis storage backend for long document QA.
866+ This benchmark tests Redis performance for distributed caching scenarios.
867+ """
868+ # Start Redis via Docker
869+ redis_proc = subprocess .Popen (
870+ ["docker" , "run" , "-d" , "--rm" , "-p" , "6379:6379" , "redis:alpine" ],
871+ stdout = subprocess .PIPE ,
872+ stderr = subprocess .DEVNULL )
873+ container_id = redis_proc .stdout .read ().decode ().strip ()
874+ time .sleep (3 ) # Wait for Redis to start
875+
876+ try :
877+ with Runner ('lmi' , 'qwen3-8b-lmcache-redis' ) as r :
878+ prepare .build_vllm_async_model ("qwen3-8b-lmcache-redis" )
879+
880+ r .launch (
881+ env_vars = [
882+ "LMCACHE_CONFIG_FILE=/opt/ml/model/test/lmcache_redis.yaml" ,
883+ "PYTHONHASHSEED=0"
884+ ])
885+
886+ # Run benchmark with same config for comparison
887+ benchmark_script = "lmcache_configs/djl_long_doc_qa_clean.py"
888+ benchmark_cmd = (f"PYTHONHASHSEED=0 python { benchmark_script } "
889+ f"--model Qwen/Qwen3-8B "
890+ "--host localhost "
891+ "--port 8080 "
892+ "--num-documents 46 "
893+ "--document-length 10000 "
894+ "--output-len 100 "
895+ "--repeat-count 1 "
896+ "--repeat-mode tile "
897+ "--max-inflight-requests 4" )
898+
899+ logging .info (
900+ f"Running Redis storage benchmark from host: { benchmark_cmd } " )
901+ result = os .system (benchmark_cmd )
902+
903+ if result == 0 :
904+ logging .info (
905+ "Redis benchmark PASSED"
906+ )
907+ else :
908+ raise RuntimeError (
909+ f"Redis storage benchmark failed with return code { result } "
910+ )
911+ finally :
912+ # Cleanup Redis container
913+ subprocess .run (["docker" , "stop" , container_id ],
914+ stdout = subprocess .DEVNULL ,
915+ stderr = subprocess .DEVNULL )
916+
758917 def test_lmcache_ebs_benchmark (self ):
759918 """
760919 Test LMCache with disk storage backend (EBS) instead of NVMe.
@@ -1011,48 +1170,72 @@ class TestVllmLmcacheScaling_g6:
10111170
10121171 def test_qwen25_1_5b (self ):
10131172 """Test 1A: 8 docs × 128K = 1M context"""
1173+ # Start Redis via Docker
1174+ redis_proc = subprocess .Popen (
1175+ ["docker" , "run" , "-d" , "--rm" , "-p" , "6379:6379" , "redis:alpine" ],
1176+ stdout = subprocess .PIPE ,
1177+ stderr = subprocess .DEVNULL )
1178+ container_id = redis_proc .stdout .read ().decode ().strip ()
1179+ time .sleep (3 ) # Wait for Redis to start
1180+
10141181 with Runner ("lmi" , "qwen2.5-1.5b-1a" ) as r :
10151182 prepare .build_vllm_async_model ("qwen2.5-1.5b-lmcache" )
10161183 r .launch (env_vars = [
1017- "LMCACHE_CONFIG_FILE=/opt/ml/model/test/lmcache_qwen25_1_5b .yaml" ,
1184+ "LMCACHE_CONFIG_FILE=/opt/ml/model/test/lmcache_redis .yaml" ,
10181185 "PYTHONHASHSEED=0" , "CUDA_VISIBLE_DEVICES=0"
10191186 ])
10201187 benchmark_cmd = (
10211188 "python lmcache_configs/djl_long_doc_qa_clean.py "
10221189 "--model Qwen/Qwen2.5-1.5B --host localhost --port 8080 "
1023- "--num-documents 8 --document-length 128000 --output-len 100 "
1190+ "--num-documents 200 --document-length 128000 --output-len 100 "
10241191 "--repeat-count 1 --repeat-mode tile --max-inflight-requests 4"
10251192 )
10261193 os .system (benchmark_cmd )
10271194
10281195 def test_qwen25_7b (self ):
10291196 """Test 2A: 4 docs × 128K = 512K context"""
1197+ # Start Redis via Docker
1198+ redis_proc = subprocess .Popen (
1199+ ["docker" , "run" , "-d" , "--rm" , "-p" , "6379:6379" , "redis:alpine" ],
1200+ stdout = subprocess .PIPE ,
1201+ stderr = subprocess .DEVNULL )
1202+ container_id = redis_proc .stdout .read ().decode ().strip ()
1203+ time .sleep (5 ) # Wait for Redis to start
1204+
10301205 with Runner ("lmi" , "qwen2.5-7b-2a" ) as r :
10311206 prepare .build_vllm_async_model ("qwen2.5-7b-lmcache" )
10321207 r .launch (env_vars = [
1033- "LMCACHE_CONFIG_FILE=/opt/ml/model/test/lmcache_qwen25_7b .yaml" ,
1208+ "LMCACHE_CONFIG_FILE=/opt/ml/model/test/lmcache_redis .yaml" ,
10341209 "PYTHONHASHSEED=0" , "CUDA_VISIBLE_DEVICES=0"
10351210 ])
10361211 benchmark_cmd = (
10371212 "python lmcache_configs/djl_long_doc_qa_clean.py "
10381213 "--model Qwen/Qwen2.5-7B --host localhost --port 8080 "
1039- "--num-documents 4 --document-length 128000 --output-len 100 "
1214+ "--num-documents 24 --document-length 128000 --output-len 100 "
10401215 "--repeat-count 1 --repeat-mode tile --max-inflight-requests 4"
10411216 )
10421217 os .system (benchmark_cmd )
10431218
10441219 def test_qwen25_72b (self ):
10451220 """Test 3A: 4 docs × 100K < 450K context"""
1221+ # Start Redis via Docker
1222+ redis_proc = subprocess .Popen (
1223+ ["docker" , "run" , "-d" , "--rm" , "-p" , "6379:6379" , "redis:alpine" ],
1224+ stdout = subprocess .PIPE ,
1225+ stderr = subprocess .DEVNULL )
1226+ container_id = redis_proc .stdout .read ().decode ().strip ()
1227+ time .sleep (5 ) # Wait for Redis to start
1228+
10461229 with Runner ("lmi" , "qwen2.5-72b-3a-lmcache" ) as r :
1047- prepare .build_vllm_async_model ("qwen2.5-72b-lmcadhe " )
1230+ prepare .build_vllm_async_model ("qwen2.5-72b-lmcache " )
10481231 r .launch (env_vars = [
1049- "LMCACHE_CONFIG_FILE=/opt/ml/model/test/lmcache_qwen25_72b .yaml" ,
1232+ "LMCACHE_CONFIG_FILE=/opt/ml/model/test/lmcache_redis .yaml" ,
10501233 "PYTHONHASHSEED=0" , "CUDA_VISIBLE_DEVICES=0,1,2,3"
10511234 ])
10521235 benchmark_cmd = (
10531236 "python lmcache_configs/djl_long_doc_qa_clean.py "
10541237 "--model Qwen/Qwen2.5-72B --host localhost --port 8080 "
1055- "--num-documents 40 --document-length 10000 --output-len 100 "
1238+ "--num-documents 40 --document-length 20000 --output-len 100 "
10561239 "--repeat-count 1 --repeat-mode tile --max-inflight-requests 4"
10571240 )
10581241 os .system (benchmark_cmd )
0 commit comments