Skip to content

Commit 3cfeca2

Browse files
nv-kmcgill53mc-nv
authored andcommitted
Revert "Add concurrent loading speedup test (#5032)" (#5179)
This reverts commit 8a1a015.
1 parent c800283 commit 3cfeca2

File tree

2 files changed

+0
-217
lines changed

2 files changed

+0
-217
lines changed

qa/L0_lifecycle/lifecycle_test.py

-117
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@
3939
import infer_util as iu
4040
import test_util as tu
4141
import threading
42-
import concurrent.futures
4342

4443
import tritonclient.grpc as grpcclient
4544
import tritonclient.http as httpclient
@@ -2626,122 +2625,6 @@ def test_load_gpu_limit(self):
26262625
except Exception as ex:
26272626
self.assertTrue(False, "unexpected error {}".format(ex))
26282627

2629-
def test_concurrent_load_speedup(self):
2630-
# Initialize client
2631-
try:
2632-
triton_client = grpcclient.InferenceServerClient("localhost:8001",
2633-
verbose=True)
2634-
except Exception as ex:
2635-
self.assertTrue(False, "unexpected error {}".format(ex))
2636-
# Load both models concurrently
2637-
model_names = ["identity_zero_1_int32_1", "identity_zero_1_int32_2"]
2638-
threads = []
2639-
for model_name in model_names:
2640-
threads.append(
2641-
threading.Thread(target=triton_client.load_model,
2642-
args=(model_name,)))
2643-
start_time = time.time()
2644-
for thread in threads:
2645-
thread.start()
2646-
for thread in threads:
2647-
thread.join()
2648-
end_time = time.time()
2649-
loading_time = end_time - start_time
2650-
# Each of the two models has a minimum loading delay of 10 seconds
2651-
# Speedup is observed when the concurrent loading time < 20 seconds but
2652-
# use a tighter bound of 15 seconds
2653-
self.assertLess(loading_time, 15.0,
2654-
"Concurrent loading speedup not observed")
2655-
# Concurrent loading time cannot be < 10 seconds
2656-
self.assertGreaterEqual(loading_time, 10.0,
2657-
"Invalid concurrent loading time")
2658-
# Make sure the models are loaded
2659-
self.assertTrue(triton_client.is_server_live())
2660-
self.assertTrue(triton_client.is_server_ready())
2661-
for model_name in model_names:
2662-
self.assertTrue(triton_client.is_model_ready(model_name))
2663-
2664-
def test_concurrent_load(self):
2665-
# Initialize client
2666-
try:
2667-
triton_client = grpcclient.InferenceServerClient("localhost:8001",
2668-
verbose=True)
2669-
except Exception as ex:
2670-
self.assertTrue(False, "unexpected error {}".format(ex))
2671-
# Load same model concurrently
2672-
with concurrent.futures.ThreadPoolExecutor() as pool:
2673-
thread_1 = pool.submit(triton_client.load_model,
2674-
"identity_zero_1_int32")
2675-
time.sleep(2) # wait between load and unload
2676-
thread_2 = pool.submit(triton_client.load_model,
2677-
"identity_zero_1_int32")
2678-
thread_1.result()
2679-
with self.assertRaises(Exception) as ex:
2680-
thread_2.result()
2681-
self.assertEqual(
2682-
str(ex.exception),
2683-
"[StatusCode.INVALID_ARGUMENT] a related model 'identity_zero_1_int32' to a load/unload request is currently loading or unloading"
2684-
)
2685-
self.assertTrue(triton_client.is_server_live())
2686-
self.assertTrue(triton_client.is_server_ready())
2687-
self.assertTrue(triton_client.is_model_ready("identity_zero_1_int32"))
2688-
2689-
def test_concurrent_load_unload(self):
2690-
# Initialize client
2691-
try:
2692-
triton_client = grpcclient.InferenceServerClient("localhost:8001",
2693-
verbose=True)
2694-
except Exception as ex:
2695-
self.assertTrue(False, "unexpected error {}".format(ex))
2696-
# Load identity_zero_1_int32 and unload it while it is loading
2697-
# The unload operation should have no effect
2698-
with concurrent.futures.ThreadPoolExecutor() as pool:
2699-
load_thread = pool.submit(triton_client.load_model,
2700-
"identity_zero_1_int32")
2701-
time.sleep(2) # wait between load and unload
2702-
unload_thread = pool.submit(triton_client.unload_model,
2703-
"identity_zero_1_int32")
2704-
load_thread.result()
2705-
with self.assertRaises(Exception) as ex:
2706-
unload_thread.result()
2707-
self.assertEqual(
2708-
str(ex.exception),
2709-
"[StatusCode.INVALID_ARGUMENT] a related model 'identity_zero_1_int32' to a load/unload request is currently loading or unloading"
2710-
)
2711-
self.assertTrue(triton_client.is_server_live())
2712-
self.assertTrue(triton_client.is_server_ready())
2713-
self.assertTrue(triton_client.is_model_ready("identity_zero_1_int32"))
2714-
# Load ensemble_zero_1_float32 and unload its dependency while it is loading
2715-
# The unload operation should have no effect
2716-
with concurrent.futures.ThreadPoolExecutor() as pool:
2717-
load_thread = pool.submit(triton_client.load_model,
2718-
"ensemble_zero_1_float32")
2719-
time.sleep(2) # wait between load and unload
2720-
unload_thread = pool.submit(triton_client.unload_model,
2721-
"custom_zero_1_float32")
2722-
load_thread.result()
2723-
with self.assertRaises(Exception) as ex:
2724-
unload_thread.result()
2725-
self.assertEqual(
2726-
str(ex.exception),
2727-
"[StatusCode.INVALID_ARGUMENT] a related model 'custom_zero_1_float32' to a load/unload request is currently loading or unloading"
2728-
)
2729-
self.assertTrue(triton_client.is_server_live())
2730-
self.assertTrue(triton_client.is_server_ready())
2731-
self.assertTrue(triton_client.is_model_ready("ensemble_zero_1_float32"))
2732-
self.assertTrue(triton_client.is_model_ready("custom_zero_1_float32"))
2733-
# Unload models concurrently
2734-
model_names = ["identity_zero_1_int32", "ensemble_zero_1_float32"]
2735-
with concurrent.futures.ThreadPoolExecutor() as pool:
2736-
threads = []
2737-
for model_name in model_names:
2738-
threads.append(
2739-
pool.submit(triton_client.unload_model, model_name))
2740-
for thread in concurrent.futures.as_completed(threads):
2741-
thread.result()
2742-
for model_name in model_names:
2743-
self.assertFalse(triton_client.is_model_ready(model_name))
2744-
27452628

27462629
if __name__ == '__main__':
27472630
unittest.main()

qa/L0_lifecycle/test.sh

-100
Original file line numberDiff line numberDiff line change
@@ -1650,8 +1650,6 @@ fi
16501650
kill $SERVER_PID
16511651
wait $SERVER_PID
16521652

1653-
LOG_IDX=$((LOG_IDX+1))
1654-
16551653
# LifeCycleTest.test_load_gpu_limit
16561654
# dependency of the Python model to be used
16571655
pip install cuda-python
@@ -1675,8 +1673,6 @@ elif [ `grep -c "expects device ID >= 0, got -1" $SERVER_LOG` == "0" ]; then
16751673
RET=1
16761674
fi
16771675

1678-
LOG_IDX=$((LOG_IDX+1))
1679-
16801676
SERVER_ARGS="--model-repository=`pwd`/models --model-control-mode=explicit --model-load-gpu-limit 0:-0.4"
16811677
SERVER_LOG="./inference_server_$LOG_IDX.log"
16821678
run_server
@@ -1691,8 +1687,6 @@ elif [ `grep -c "expects limit fraction to be in range \[0.0, 1.0\], got -0.4" $
16911687
RET=1
16921688
fi
16931689

1694-
LOG_IDX=$((LOG_IDX+1))
1695-
16961690
# Run server to stop model loading if > 60% of GPU 0 memory is used
16971691
SERVER_ARGS="--model-repository=`pwd`/models --model-control-mode=explicit --model-load-gpu-limit 0:0.6"
16981692
SERVER_LOG="./inference_server_$LOG_IDX.log"
@@ -1711,100 +1705,6 @@ set -e
17111705
kill $SERVER_PID
17121706
wait $SERVER_PID
17131707

1714-
LOG_IDX=$((LOG_IDX+1))
1715-
1716-
# LifeCycleTest.test_concurrent_load_speedup
1717-
rm -rf models
1718-
mkdir models
1719-
cp -r identity_zero_1_int32 models/identity_zero_1_int32_1 && mkdir -p models/identity_zero_1_int32_1/1
1720-
cp -r models/identity_zero_1_int32_1 models/identity_zero_1_int32_2
1721-
sed -i "s/identity_zero_1_int32/identity_zero_1_int32_1/" models/identity_zero_1_int32_1/config.pbtxt
1722-
sed -i "s/identity_zero_1_int32/identity_zero_1_int32_2/" models/identity_zero_1_int32_2/config.pbtxt
1723-
1724-
SERVER_ARGS="--model-repository=`pwd`/models --model-control-mode=explicit"
1725-
SERVER_LOG="./inference_server_$LOG_IDX.log"
1726-
run_server
1727-
if [ "$SERVER_PID" == "0" ]; then
1728-
echo -e "\n***\n*** Failed to start $SERVER\n***"
1729-
cat $SERVER_LOG
1730-
exit 1
1731-
fi
1732-
1733-
set +e
1734-
python $LC_TEST LifeCycleTest.test_concurrent_load_speedup >>$CLIENT_LOG 2>&1
1735-
if [ $? -ne 0 ]; then
1736-
cat $CLIENT_LOG
1737-
echo -e "\n***\n*** Test Failed\n***"
1738-
RET=1
1739-
fi
1740-
set -e
1741-
1742-
kill $SERVER_PID
1743-
wait $SERVER_PID
1744-
1745-
LOG_IDX=$((LOG_IDX+1))
1746-
1747-
# LifeCycleTest.test_concurrent_load
1748-
rm -rf models
1749-
mkdir models
1750-
cp -r identity_zero_1_int32 models && mkdir -p models/identity_zero_1_int32/1
1751-
1752-
SERVER_ARGS="--model-repository=`pwd`/models --model-control-mode=explicit"
1753-
SERVER_LOG="./inference_server_$LOG_IDX.log"
1754-
run_server
1755-
if [ "$SERVER_PID" == "0" ]; then
1756-
echo -e "\n***\n*** Failed to start $SERVER\n***"
1757-
cat $SERVER_LOG
1758-
exit 1
1759-
fi
1760-
1761-
set +e
1762-
python $LC_TEST LifeCycleTest.test_concurrent_load >>$CLIENT_LOG 2>&1
1763-
if [ $? -ne 0 ]; then
1764-
cat $CLIENT_LOG
1765-
echo -e "\n***\n*** Test Failed\n***"
1766-
RET=1
1767-
fi
1768-
set -e
1769-
1770-
kill $SERVER_PID
1771-
wait $SERVER_PID
1772-
1773-
LOG_IDX=$((LOG_IDX+1))
1774-
1775-
# LifeCycleTest.test_concurrent_load_unload
1776-
rm -rf models
1777-
mkdir models
1778-
cp -r identity_zero_1_int32 models && mkdir -p models/identity_zero_1_int32/1
1779-
cp -r ensemble_zero_1_float32 models && mkdir -p models/ensemble_zero_1_float32/1
1780-
cp -r ../custom_models/custom_zero_1_float32 models/. && \
1781-
mkdir -p models/custom_zero_1_float32/1 && \
1782-
(cd models/custom_zero_1_float32 && \
1783-
echo "parameters [" >> config.pbtxt && \
1784-
echo "{ key: \"creation_delay_sec\"; value: { string_value: \"10\" }}" >> config.pbtxt && \
1785-
echo "]" >> config.pbtxt)
1786-
1787-
SERVER_ARGS="--model-repository=`pwd`/models --model-control-mode=explicit"
1788-
SERVER_LOG="./inference_server_$LOG_IDX.log"
1789-
run_server
1790-
if [ "$SERVER_PID" == "0" ]; then
1791-
echo -e "\n***\n*** Failed to start $SERVER\n***"
1792-
cat $SERVER_LOG
1793-
exit 1
1794-
fi
1795-
1796-
set +e
1797-
python $LC_TEST LifeCycleTest.test_concurrent_load_unload >>$CLIENT_LOG 2>&1
1798-
if [ $? -ne 0 ]; then
1799-
cat $CLIENT_LOG
1800-
echo -e "\n***\n*** Test Failed\n***"
1801-
RET=1
1802-
fi
1803-
set -e
1804-
1805-
kill $SERVER_PID
1806-
wait $SERVER_PID
1807-
18081708
if [ $RET -eq 0 ]; then
18091709
echo -e "\n***\n*** Test Passed\n***"
18101710
fi

0 commit comments

Comments
 (0)