|
1 | 1 | import io |
2 | 2 | import math |
| 3 | +import os |
3 | 4 | import tempfile |
4 | 5 |
|
5 | 6 | from PIL import Image |
@@ -431,36 +432,57 @@ def predict(compressed_spectrogram_id: int): |
431 | 432 | return label, score, confs |
432 | 433 |
|
433 | 434 |
|
434 | | -def predict_compressed(image_file): |
| 435 | +def _fully_local_inference(image_file, use_mlflow_model): |
435 | 436 | import json |
436 | | - import os |
437 | 437 |
|
438 | 438 | import onnx |
439 | 439 | import onnxruntime as ort |
440 | 440 | import tqdm |
441 | 441 |
|
442 | 442 | img = Image.open(image_file) |
443 | 443 |
|
444 | | - relative = ('..',) * 3 |
445 | | - asset_path = os.path.abspath(os.path.join(__file__, *relative, 'assets')) |
446 | | - |
447 | | - onnx_filename = os.path.join(asset_path, 'model.mobilenet.onnx') |
448 | | - assert os.path.exists(onnx_filename) |
449 | | - |
450 | | - session = ort.InferenceSession( |
451 | | - onnx_filename, |
452 | | - providers=[ |
453 | | - ( |
454 | | - 'CUDAExecutionProvider', |
455 | | - { |
456 | | - 'cudnn_conv_use_max_workspace': '1', |
457 | | - 'device_id': 0, |
458 | | - 'cudnn_conv_algo_search': 'HEURISTIC', |
459 | | - }, |
460 | | - ), |
461 | | - 'CPUExecutionProvider', |
462 | | - ], |
463 | | - ) |
| 444 | + if not use_mlflow_model: |
| 445 | + relative = ('..',) * 3 |
| 446 | + asset_path = os.path.abspath(os.path.join(__file__, *relative, 'assets')) |
| 447 | + |
| 448 | + onnx_filename = os.path.join(asset_path, 'model.mobilenet.onnx') |
| 449 | + assert os.path.exists(onnx_filename) |
| 450 | + |
| 451 | + session = ort.InferenceSession( |
| 452 | + onnx_filename, |
| 453 | + providers=[ |
| 454 | + ( |
| 455 | + 'CUDAExecutionProvider', |
| 456 | + { |
| 457 | + 'cudnn_conv_use_max_workspace': '1', |
| 458 | + 'device_id': 0, |
| 459 | + 'cudnn_conv_algo_search': 'HEURISTIC', |
| 460 | + }, |
| 461 | + ), |
| 462 | + 'CPUExecutionProvider', |
| 463 | + ], |
| 464 | + ) |
| 465 | + else: |
| 466 | + import mlflow |
| 467 | + import mlflow.onnx |
| 468 | + |
| 469 | + MODEL_URI = 'models:/prototype/1' |
| 470 | + mlflow.set_tracking_uri(settings.MLFLOW_ENDPOINT) |
| 471 | + model = mlflow.onnx.load_model(model_uri=MODEL_URI) |
| 472 | + session = ort.InferenceSession( |
| 473 | + model.SerializeToString(), |
| 474 | + providers=[ |
| 475 | + ( |
| 476 | + 'CUDAExecutionProvider', |
| 477 | + { |
| 478 | + 'cudnn_conv_use_max_workspace': '1', |
| 479 | + 'device_id': 0, |
| 480 | + 'cudnn_conv_algo_search': 'HEURISTIC', |
| 481 | + }, |
| 482 | + ), |
| 483 | + 'CPUExecutionProvider', |
| 484 | + ], |
| 485 | + ) |
464 | 486 |
|
465 | 487 | img = np.array(img) |
466 | 488 |
|
@@ -507,6 +529,19 @@ def predict_compressed(image_file): |
507 | 529 | return label, score, confs |
508 | 530 |
|
509 | 531 |
|
| 532 | +def predict_compressed(image_file): |
| 533 | + # 0: use the local file and do inference with that |
| 534 | + # 1: get the file from mlflow and do inference locally |
| 535 | + # 2: do inference from deployed mlflow model |
| 536 | + inference_mode = int(os.getenv('INFERENCE_MODE', 0)) |
| 537 | + if inference_mode == 1: |
| 538 | + pass |
| 539 | + elif inference_mode == 2: |
| 540 | + pass |
| 541 | + else: |
| 542 | + return _fully_local_inference(image_file, False) |
| 543 | + |
| 544 | + |
510 | 545 | def train_body(experiment_name: str): |
511 | 546 | import mlflow |
512 | 547 | from mlflow.models import infer_signature |
|
0 commit comments