@@ -327,14 +327,9 @@ def test_recommend_full_flow(self, mock_httpx: MagicMock) -> None:
327327
328328 @patch ("rhoai_mcp.composites.neuralnav.client.httpx" )
329329 def test_recommend_with_overrides (self , mock_httpx : MagicMock ) -> None :
330- """Overrides replace extracted intent values ."""
330+ """When both use_case and user_count overrides are provided, extraction is skipped ."""
331331 mock_client = MagicMock ()
332332
333- extract_resp = MagicMock ()
334- extract_resp .status_code = 200
335- extract_resp .json .return_value = SAMPLE_INTENT
336- extract_resp .raise_for_status = MagicMock ()
337-
338333 slo_resp = MagicMock ()
339334 slo_resp .status_code = 200
340335 slo_resp .json .return_value = SAMPLE_SLO_DEFAULTS
@@ -355,7 +350,8 @@ def test_recommend_with_overrides(self, mock_httpx: MagicMock) -> None:
355350 ranked_resp .json .return_value = SAMPLE_RANKED_RESPONSE
356351 ranked_resp .raise_for_status = MagicMock ()
357352
358- mock_client .post .side_effect = [extract_resp , ranked_resp ]
353+ # Only one POST (ranked-recommend), extraction is skipped
354+ mock_client .post .side_effect = [ranked_resp ]
359355 mock_client .get .side_effect = [slo_resp , workload_resp , rps_resp ]
360356
361357 mock_httpx .Client .return_value .__enter__ = MagicMock (return_value = mock_client )
@@ -372,6 +368,8 @@ def test_recommend_with_overrides(self, mock_httpx: MagicMock) -> None:
372368 # Verify the overridden use_case was used for SLO defaults fetch
373369 get_calls = mock_client .get .call_args_list
374370 assert "code_completion" in get_calls [0 ].args [0 ]
371+ # Extraction was skipped — only one POST call (ranked-recommend)
372+ assert mock_client .post .call_count == 1
375373
376374 @patch ("rhoai_mcp.composites.neuralnav.client.httpx" )
377375 def test_recommend_api_error (self , mock_httpx : MagicMock ) -> None :
@@ -625,6 +623,150 @@ def test_recommend_forwards_constraints(self, mock_httpx: MagicMock) -> None:
625623 assert payload ["percentile" ] == "p99"
626624
627625
626+ class TestNeuralNavClientRecommendExtractionBypass :
627+ """Tests for skipping extraction when overrides are sufficient."""
628+
629+ @patch ("rhoai_mcp.composites.neuralnav.client.httpx" )
630+ def test_recommend_skips_extraction_when_all_overrides_provided (
631+ self , mock_httpx : MagicMock
632+ ) -> None :
633+ """When all overrides are provided, extraction is skipped."""
634+ mock_client = MagicMock ()
635+
636+ slo_resp = MagicMock ()
637+ slo_resp .status_code = 200
638+ slo_resp .json .return_value = SAMPLE_SLO_DEFAULTS
639+ slo_resp .raise_for_status = MagicMock ()
640+
641+ workload_resp = MagicMock ()
642+ workload_resp .status_code = 200
643+ workload_resp .json .return_value = SAMPLE_WORKLOAD_PROFILE
644+ workload_resp .raise_for_status = MagicMock ()
645+
646+ rps_resp = MagicMock ()
647+ rps_resp .status_code = 200
648+ rps_resp .json .return_value = SAMPLE_EXPECTED_RPS
649+ rps_resp .raise_for_status = MagicMock ()
650+
651+ ranked_resp = MagicMock ()
652+ ranked_resp .status_code = 200
653+ ranked_resp .json .return_value = SAMPLE_RANKED_RESPONSE
654+ ranked_resp .raise_for_status = MagicMock ()
655+
656+ # Only one POST call: ranked-recommend (no extract call)
657+ mock_client .post .side_effect = [ranked_resp ]
658+ mock_client .get .side_effect = [slo_resp , workload_resp , rps_resp ]
659+
660+ mock_httpx .Client .return_value .__enter__ = MagicMock (return_value = mock_client )
661+ mock_httpx .Client .return_value .__exit__ = MagicMock (return_value = False )
662+
663+ client = NeuralNavClient ("http://localhost:8000" )
664+ result = client .recommend (
665+ "I need a chatbot for 1000 users" ,
666+ use_case_override = "chatbot_conversational" ,
667+ user_count_override = 1000 ,
668+ gpu_types_override = ["A100" ],
669+ )
670+
671+ # Only one POST call was made (ranked-recommend, not extract)
672+ assert mock_client .post .call_count == 1
673+ assert result .specification ["use_case" ] == "chatbot_conversational"
674+ assert result .specification ["user_count" ] == 1000
675+
676+ @patch ("rhoai_mcp.composites.neuralnav.client.httpx" )
677+ def test_recommend_still_extracts_when_only_use_case_override (
678+ self , mock_httpx : MagicMock
679+ ) -> None :
680+ """When only use_case override is provided, extraction still runs for user_count."""
681+ mock_client = MagicMock ()
682+
683+ extract_resp = MagicMock ()
684+ extract_resp .status_code = 200
685+ extract_resp .json .return_value = SAMPLE_INTENT
686+ extract_resp .raise_for_status = MagicMock ()
687+
688+ slo_resp = MagicMock ()
689+ slo_resp .status_code = 200
690+ slo_resp .json .return_value = SAMPLE_SLO_DEFAULTS
691+ slo_resp .raise_for_status = MagicMock ()
692+
693+ workload_resp = MagicMock ()
694+ workload_resp .status_code = 200
695+ workload_resp .json .return_value = SAMPLE_WORKLOAD_PROFILE
696+ workload_resp .raise_for_status = MagicMock ()
697+
698+ rps_resp = MagicMock ()
699+ rps_resp .status_code = 200
700+ rps_resp .json .return_value = SAMPLE_EXPECTED_RPS
701+ rps_resp .raise_for_status = MagicMock ()
702+
703+ ranked_resp = MagicMock ()
704+ ranked_resp .status_code = 200
705+ ranked_resp .json .return_value = SAMPLE_RANKED_RESPONSE
706+ ranked_resp .raise_for_status = MagicMock ()
707+
708+ mock_client .post .side_effect = [extract_resp , ranked_resp ]
709+ mock_client .get .side_effect = [slo_resp , workload_resp , rps_resp ]
710+
711+ mock_httpx .Client .return_value .__enter__ = MagicMock (return_value = mock_client )
712+ mock_httpx .Client .return_value .__exit__ = MagicMock (return_value = False )
713+
714+ client = NeuralNavClient ("http://localhost:8000" )
715+ result = client .recommend (
716+ "I need a chatbot" ,
717+ use_case_override = "code_completion" ,
718+ )
719+
720+ # Two POST calls: extract + ranked-recommend
721+ assert mock_client .post .call_count == 2
722+ # Use case override is applied
723+ assert result .specification ["use_case" ] == "code_completion"
724+
725+ @patch ("rhoai_mcp.composites.neuralnav.client.httpx" )
726+ def test_recommend_skips_extraction_uses_gpu_override (self , mock_httpx : MagicMock ) -> None :
727+ """When extraction is skipped, gpu_types_override is used."""
728+ mock_client = MagicMock ()
729+
730+ slo_resp = MagicMock ()
731+ slo_resp .status_code = 200
732+ slo_resp .json .return_value = SAMPLE_SLO_DEFAULTS
733+ slo_resp .raise_for_status = MagicMock ()
734+
735+ workload_resp = MagicMock ()
736+ workload_resp .status_code = 200
737+ workload_resp .json .return_value = SAMPLE_WORKLOAD_PROFILE
738+ workload_resp .raise_for_status = MagicMock ()
739+
740+ rps_resp = MagicMock ()
741+ rps_resp .status_code = 200
742+ rps_resp .json .return_value = SAMPLE_EXPECTED_RPS
743+ rps_resp .raise_for_status = MagicMock ()
744+
745+ ranked_resp = MagicMock ()
746+ ranked_resp .status_code = 200
747+ ranked_resp .json .return_value = SAMPLE_RANKED_RESPONSE
748+ ranked_resp .raise_for_status = MagicMock ()
749+
750+ mock_client .post .side_effect = [ranked_resp ]
751+ mock_client .get .side_effect = [slo_resp , workload_resp , rps_resp ]
752+
753+ mock_httpx .Client .return_value .__enter__ = MagicMock (return_value = mock_client )
754+ mock_httpx .Client .return_value .__exit__ = MagicMock (return_value = False )
755+
756+ client = NeuralNavClient ("http://localhost:8000" )
757+ client .recommend (
758+ "I need a chatbot" ,
759+ use_case_override = "chatbot_conversational" ,
760+ user_count_override = 1000 ,
761+ gpu_types_override = ["H100" ],
762+ )
763+
764+ # Verify the GPU override was forwarded
765+ ranked_call = mock_client .post .call_args
766+ payload = ranked_call .kwargs .get ("json" ) or ranked_call [1 ].get ("json" )
767+ assert payload ["preferred_gpu_types" ] == ["H100" ]
768+
769+
628770class TestNeuralNavClientRequestErrors :
629771 """Tests for _request error handling edge cases."""
630772
0 commit comments