Skip to content

Commit 27b8792

Browse files
committed
Update meta/llama models to latest supported and add AI functions tests
+ Update all llama3 8B models spec to use meta/llama3-8b-instruct + Add ai functions tests Change-Id: I06a427bf5bd90aff6cc8e0c7ab8c810b6dc07dd1 Reviewed-on: https://review.couchbase.org/c/perfrunner/+/234284 Reviewed-by: Daniel Nagy <daniel.nagy@couchbase.com> Tested-by: Build Bot <build@couchbase.com>
1 parent 069b476 commit 27b8792

15 files changed

Lines changed: 272 additions & 47 deletions

cloud/infrastructure/ai_services/capella_aws_1c_1llm_g6_xlarge_llama_3_2_3b_instruct.spec renamed to cloud/infrastructure/ai_services/capella_aws_1c_1llm_g6_xlarge_llama_3_8b_instruct.spec

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ volume_size = 100
2424
iops = 3000
2525

2626
[text-generation]
27-
model_name = meta/llama-3.2-3b-instruct
27+
model_name = meta/llama3-8b-instruct
2828
instance_type = g6.xlarge
2929
instance_capacity = 1
3030

cloud/infrastructure/ai_services/capella_aws_1c_1llm_g6e_12xlarge_llama_3_1_8b_instruct.spec renamed to cloud/infrastructure/ai_services/capella_aws_1c_1llm_g6e_12xlarge_llama_3_8b_instruct.spec

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ volume_size = 100
2424
iops = 3000
2525

2626
[text-generation]
27-
model_name = meta/llama-3.1-8b-instruct
27+
model_name = meta/llama3-8b-instruct
2828
instance_type = g6e.12xlarge
2929
instance_capacity = 1
3030

cloud/infrastructure/ai_services/capella_aws_1c_1llm_g6e_xlarge_llama_3_1_8b_instruct.spec

Lines changed: 0 additions & 34 deletions
This file was deleted.

cloud/infrastructure/ai_services/capella_aws_1c_1llm_g6e_xlarge_llama_3_2_3b_instruct.spec renamed to cloud/infrastructure/ai_services/capella_aws_1c_1llm_g6e_xlarge_llama_3_8b_instruct.spec

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ volume_size = 100
2424
iops = 3000
2525

2626
[text-generation]
27-
model_name = meta/llama-3.2-3b-instruct
27+
model_name = meta/llama3-8b-instruct
2828
instance_type = g6e.xlarge
2929
instance_capacity = 1
3030

cloud/infrastructure/ai_services/capella_aws_1c_1llm_p4de_24xlarge_llama_3_1_8b_instruct.spec renamed to cloud/infrastructure/ai_services/capella_aws_1c_1llm_p4de_24xlarge_llama_3_3_70b_instruct.spec

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ volume_size = 100
2424
iops = 3000
2525

2626
[text-generation]
27-
model_name = meta-llama/Llama-3.1-8B-Instruct
27+
model_name = meta/llama3.3-70b-instruct
2828
instance_type = p4de.24xlarge
2929
instance_capacity = 1
3030

cloud/infrastructure/ai_services/capella_aws_1c_1llm_p4de_24xlarge_llama_3_70b_instruct.spec renamed to cloud/infrastructure/ai_services/capella_aws_1c_1llm_p4de_24xlarge_llama_3_8b_instruct.spec

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ volume_size = 100
2424
iops = 3000
2525

2626
[text-generation]
27-
model_name = meta/llama3-70b-instruct
27+
model_name = meta/llama3-8b-instruct
2828
instance_type = p4de.24xlarge
2929
instance_capacity = 1
3030

cloud/infrastructure/ai_services/capella_aws_1c_1llm_p5_48xlarge_llama_3_70b_instruct.spec renamed to cloud/infrastructure/ai_services/capella_aws_1c_1llm_p5_48xlarge_llama_3_3_70b_instruct.spec

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ volume_size = 100
2424
iops = 3000
2525

2626
[text-generation]
27-
model_name = meta/llama3-70b-instruct
27+
model_name = meta/llama3.3-70b-instruct
2828
instance_type = p5.48xlarge
2929
instance_capacity = 1
3030

cloud/infrastructure/ai_services/capella_aws_7s_1c_c5_2xlarge_1llm_g6e_xlarge_llama_3_1_8b_instruct.spec renamed to cloud/infrastructure/ai_services/capella_aws_7s_1c_c5_2xlarge_1llm_g6e_xlarge_llama_3_8b_instruct.spec

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ data = var/cb/data
5151
source = default_capella
5252

5353
[text-generation]
54-
model_name = meta/llama-3.1-8b-instruct
54+
model_name = meta/llama3-8b-instruct
5555
instance_type = g6e.xlarge
5656
instance_capacity = 1
5757

perfrunner/helpers/monitor.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1870,3 +1870,28 @@ def wait_for_ai_gateway_models_health(self, model_status_func: Callable[[], dict
18701870
time.sleep(self.POLLING_INTERVAL)
18711871

18721872
logger.info(f"AI Gateway models are healthy: {misc.pretty_dict(models_status)}")
1873+
1874+
def wait_for_ai_functions_healthy(self, host: str, deployed_functions: list[str]):
1875+
logger.info(f"Waiting for AI functions to be healthy: {deployed_functions}")
1876+
retries = 0
1877+
while deployed_functions:
1878+
try:
1879+
functions = self.rest.list_ai_functions(host)
1880+
healthy_functions = [
1881+
f.get("data", {}).get("name")
1882+
for f in functions
1883+
if f.get("data", {}).get("functionStatus") == "healthy"
1884+
]
1885+
if len(healthy_functions) == len(deployed_functions):
1886+
logger.info(f"All AI functions are healthy: {healthy_functions}")
1887+
return
1888+
1889+
pending_functions = list(set(deployed_functions) - set(healthy_functions))
1890+
if retries % 60 == 0:
1891+
logger.info(f"AI functions not healthy yet: {pending_functions}")
1892+
except Exception as e:
1893+
logger.error(f"Error while checking AI functions health status: {e}")
1894+
retries += 1
1895+
if retries >= self.MAX_RETRY:
1896+
raise Exception(f"AI functions are not healthy after {retries} retries.")
1897+
time.sleep(self.MONITORING_DELAY)

perfrunner/tests/ai_services.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -563,16 +563,23 @@ def create_openai_integration(self, cluster_uuid: str):
563563
)
564564
logger.info(f"Created openAI integration: {self.openai_integration_id}")
565565

566+
@timeit
566567
def deploy_ai_functions(self):
567568
uuid = self.cluster_spec.infrastructure_settings.get("uuid", uuid4().hex[:6])
568569
self.create_openai_integration(uuid)
569570
payload = self._create_ai_functions_payload()
570571
logger.info(f"Deploying AI functions with payload: {pretty_dict(payload)}")
571572
self.rest.create_ai_functions(self.master_node, payload)
572-
# Cant deteministically monitor deployment due to AV-108636, so wait for 30 seconds
573-
sleep(30)
573+
self.monitor.wait_for_ai_functions_healthy(
574+
self.master_node, self.ai_services_settings.functions_names
575+
)
574576

575577
def run(self):
578+
functions_deployment_time = self.deploy_ai_functions()
579+
logger.info(f"AI Functions deployment time: {functions_deployment_time} seconds")
580+
# Workaround for AV-110058
581+
self.rest.refresh_cluster_allowlist(self.master_node)
582+
576583
self.load()
577584
self.wait_for_persistence()
578585
self.check_num_items()
@@ -581,10 +588,6 @@ def run(self):
581588
self.wait_for_indexing()
582589
self.store_plans()
583590

584-
self.deploy_ai_functions()
585-
# Workaround for AV-110058
586-
self.rest.refresh_cluster_allowlist(self.master_node)
587-
588591
self.access_bg()
589592
self.access()
590593

0 commit comments

Comments
 (0)