|
39 | 39 | import infer_util as iu
|
40 | 40 | import test_util as tu
|
41 | 41 | import threading
|
42 |
| -import concurrent.futures |
43 | 42 |
|
44 | 43 | import tritonclient.grpc as grpcclient
|
45 | 44 | import tritonclient.http as httpclient
|
@@ -2626,122 +2625,6 @@ def test_load_gpu_limit(self):
|
2626 | 2625 | except Exception as ex:
|
2627 | 2626 | self.assertTrue(False, "unexpected error {}".format(ex))
|
2628 | 2627 |
|
2629 |
| - def test_concurrent_load_speedup(self): |
2630 |
| - # Initialize client |
2631 |
| - try: |
2632 |
| - triton_client = grpcclient.InferenceServerClient("localhost:8001", |
2633 |
| - verbose=True) |
2634 |
| - except Exception as ex: |
2635 |
| - self.assertTrue(False, "unexpected error {}".format(ex)) |
2636 |
| - # Load both models concurrently |
2637 |
| - model_names = ["identity_zero_1_int32_1", "identity_zero_1_int32_2"] |
2638 |
| - threads = [] |
2639 |
| - for model_name in model_names: |
2640 |
| - threads.append( |
2641 |
| - threading.Thread(target=triton_client.load_model, |
2642 |
| - args=(model_name,))) |
2643 |
| - start_time = time.time() |
2644 |
| - for thread in threads: |
2645 |
| - thread.start() |
2646 |
| - for thread in threads: |
2647 |
| - thread.join() |
2648 |
| - end_time = time.time() |
2649 |
| - loading_time = end_time - start_time |
2650 |
| - # Each of the two models has a minimum loading delay of 10 seconds |
2651 |
| - # Speedup is observed when the concurrent loading time < 20 seconds but |
2652 |
| - # use a tighter bound of 15 seconds |
2653 |
| - self.assertLess(loading_time, 15.0, |
2654 |
| - "Concurrent loading speedup not observed") |
2655 |
| - # Concurrent loading time cannot be < 10 seconds |
2656 |
| - self.assertGreaterEqual(loading_time, 10.0, |
2657 |
| - "Invalid concurrent loading time") |
2658 |
| - # Make sure the models are loaded |
2659 |
| - self.assertTrue(triton_client.is_server_live()) |
2660 |
| - self.assertTrue(triton_client.is_server_ready()) |
2661 |
| - for model_name in model_names: |
2662 |
| - self.assertTrue(triton_client.is_model_ready(model_name)) |
2663 |
| - |
2664 |
| - def test_concurrent_load(self): |
2665 |
| - # Initialize client |
2666 |
| - try: |
2667 |
| - triton_client = grpcclient.InferenceServerClient("localhost:8001", |
2668 |
| - verbose=True) |
2669 |
| - except Exception as ex: |
2670 |
| - self.assertTrue(False, "unexpected error {}".format(ex)) |
2671 |
| - # Load same model concurrently |
2672 |
| - with concurrent.futures.ThreadPoolExecutor() as pool: |
2673 |
| - thread_1 = pool.submit(triton_client.load_model, |
2674 |
| - "identity_zero_1_int32") |
2675 |
| - time.sleep(2) # wait between load and unload |
2676 |
| - thread_2 = pool.submit(triton_client.load_model, |
2677 |
| - "identity_zero_1_int32") |
2678 |
| - thread_1.result() |
2679 |
| - with self.assertRaises(Exception) as ex: |
2680 |
| - thread_2.result() |
2681 |
| - self.assertEqual( |
2682 |
| - str(ex.exception), |
2683 |
| - "[StatusCode.INVALID_ARGUMENT] a related model 'identity_zero_1_int32' to a load/unload request is currently loading or unloading" |
2684 |
| - ) |
2685 |
| - self.assertTrue(triton_client.is_server_live()) |
2686 |
| - self.assertTrue(triton_client.is_server_ready()) |
2687 |
| - self.assertTrue(triton_client.is_model_ready("identity_zero_1_int32")) |
2688 |
| - |
2689 |
| - def test_concurrent_load_unload(self): |
2690 |
| - # Initialize client |
2691 |
| - try: |
2692 |
| - triton_client = grpcclient.InferenceServerClient("localhost:8001", |
2693 |
| - verbose=True) |
2694 |
| - except Exception as ex: |
2695 |
| - self.assertTrue(False, "unexpected error {}".format(ex)) |
2696 |
| - # Load identity_zero_1_int32 and unload it while it is loading |
2697 |
| - # The unload operation should have no effect |
2698 |
| - with concurrent.futures.ThreadPoolExecutor() as pool: |
2699 |
| - load_thread = pool.submit(triton_client.load_model, |
2700 |
| - "identity_zero_1_int32") |
2701 |
| - time.sleep(2) # wait between load and unload |
2702 |
| - unload_thread = pool.submit(triton_client.unload_model, |
2703 |
| - "identity_zero_1_int32") |
2704 |
| - load_thread.result() |
2705 |
| - with self.assertRaises(Exception) as ex: |
2706 |
| - unload_thread.result() |
2707 |
| - self.assertEqual( |
2708 |
| - str(ex.exception), |
2709 |
| - "[StatusCode.INVALID_ARGUMENT] a related model 'identity_zero_1_int32' to a load/unload request is currently loading or unloading" |
2710 |
| - ) |
2711 |
| - self.assertTrue(triton_client.is_server_live()) |
2712 |
| - self.assertTrue(triton_client.is_server_ready()) |
2713 |
| - self.assertTrue(triton_client.is_model_ready("identity_zero_1_int32")) |
2714 |
| - # Load ensemble_zero_1_float32 and unload its dependency while it is loading |
2715 |
| - # The unload operation should have no effect |
2716 |
| - with concurrent.futures.ThreadPoolExecutor() as pool: |
2717 |
| - load_thread = pool.submit(triton_client.load_model, |
2718 |
| - "ensemble_zero_1_float32") |
2719 |
| - time.sleep(2) # wait between load and unload |
2720 |
| - unload_thread = pool.submit(triton_client.unload_model, |
2721 |
| - "custom_zero_1_float32") |
2722 |
| - load_thread.result() |
2723 |
| - with self.assertRaises(Exception) as ex: |
2724 |
| - unload_thread.result() |
2725 |
| - self.assertEqual( |
2726 |
| - str(ex.exception), |
2727 |
| - "[StatusCode.INVALID_ARGUMENT] a related model 'custom_zero_1_float32' to a load/unload request is currently loading or unloading" |
2728 |
| - ) |
2729 |
| - self.assertTrue(triton_client.is_server_live()) |
2730 |
| - self.assertTrue(triton_client.is_server_ready()) |
2731 |
| - self.assertTrue(triton_client.is_model_ready("ensemble_zero_1_float32")) |
2732 |
| - self.assertTrue(triton_client.is_model_ready("custom_zero_1_float32")) |
2733 |
| - # Unload models concurrently |
2734 |
| - model_names = ["identity_zero_1_int32", "ensemble_zero_1_float32"] |
2735 |
| - with concurrent.futures.ThreadPoolExecutor() as pool: |
2736 |
| - threads = [] |
2737 |
| - for model_name in model_names: |
2738 |
| - threads.append( |
2739 |
| - pool.submit(triton_client.unload_model, model_name)) |
2740 |
| - for thread in concurrent.futures.as_completed(threads): |
2741 |
| - thread.result() |
2742 |
| - for model_name in model_names: |
2743 |
| - self.assertFalse(triton_client.is_model_ready(model_name)) |
2744 |
| - |
2745 | 2628 |
|
2746 | 2629 | if __name__ == '__main__':
|
2747 | 2630 | unittest.main()
|
0 commit comments