|
1 | | -// Copyright (C) 2023-2024 Intel Corporation |
| 1 | +// Copyright (C) 2023-2025 Intel Corporation |
2 | 2 | // SPDX-License-Identifier: Apache-2.0 |
3 | 3 | // |
4 | 4 | #include "compiled_model.hpp" |
|
21 | 21 | #include "openvino/util/common_util.hpp" |
22 | 22 | #include "partitioning/patterns/opt.hpp" |
23 | 23 | #include "plugin.hpp" |
| 24 | +#include "serialization.hpp" |
24 | 25 | #include "unfold_sync_infer_request.hpp" |
25 | 26 | #include "util.hpp" |
26 | 27 |
|
@@ -486,6 +487,222 @@ ov::npuw::CompiledModel::CompiledModel(const std::shared_ptr<ov::Model>& model, |
486 | 487 | report_io(); |
487 | 488 | } |
488 | 489 |
|
| 490 | +ov::npuw::CompiledModel::CompiledModel(const std::shared_ptr<ov::Model>& model, |
| 491 | + const std::shared_ptr<const ov::IPlugin>& plugin, |
| 492 | + const bool serialized) |
| 493 | + : ov::npuw::ICompiledModel(model, plugin), |
| 494 | + m_options_desc(std::make_shared<::intel_npu::OptionsDesc>()), |
| 495 | + m_cfg(m_options_desc), |
| 496 | + m_name(model->get_friendly_name()), |
| 497 | + m_loaded_from_cache(serialized) { |
| 498 | + ::intel_npu::registerNPUWOptions(*m_options_desc); |
| 499 | + NPUW_ASSERT(serialized && "This constructor should only be utilized during deserialization!"); |
| 500 | + LOG_DEBUG("CompiledModel is being deserialized, skipping the full constructor flow..."); |
| 501 | +} |
| 502 | + |
| 503 | +void ov::npuw::CompiledModel::CompiledModelDesc::serialize(std::ostream& stream) const { |
| 504 | + using namespace ov::npuw::s11n; |
| 505 | + |
| 506 | + LOG_DEBUG("Serializing CompiledModelDesc..."); |
| 507 | + LOG_BLOCK(); |
| 508 | + |
| 509 | + write(stream, replaced_by); |
| 510 | + |
| 511 | + write(stream, param_base); |
| 512 | + write(stream, forced_to_fcall); |
| 513 | + |
| 514 | + write(stream, host_gather.dst_idx); |
| 515 | + write(stream, host_gather.src_idx); |
| 516 | + write(stream, host_gather.idx_idx); |
| 517 | + |
| 518 | + write(stream, spatial); |
| 519 | + |
| 520 | + write(stream, scales); |
| 521 | + write(stream, zerops); |
| 522 | + write(stream, is_remote); |
| 523 | + |
| 524 | + // NOTE: for closure only serialize uids - full flow |
| 525 | + write(stream, closure_uid); |
| 526 | + |
| 527 | + // Some tensors might be present in CPU closure already - need to serialize as is |
| 528 | + // FIXME: When weightless serialization is introduced, this should be handled differently |
| 529 | + write(stream, closure.size()); |
| 530 | + std::vector<ov::Tensor> cpu_closures; |
| 531 | + std::vector<std::size_t> cpu_closure_ids; |
| 532 | + for (std::size_t cidx = 0; cidx < closure.size(); ++cidx) { |
| 533 | + if (closure_uid[cidx] == -1) { // CPU closure, not in the bank |
| 534 | + cpu_closure_ids.push_back(cidx); |
| 535 | + cpu_closures.push_back(closure[cidx]); |
| 536 | + } |
| 537 | + } |
| 538 | + |
| 539 | + write(stream, cpu_closure_ids); |
| 540 | + |
| 541 | + for (const auto& tensor : cpu_closures) { |
| 542 | + write(stream, tensor); |
| 543 | + } |
| 544 | + |
| 545 | + // FIXME: support weightless flow! |
| 546 | + |
| 547 | + LOG_DEBUG("DONE."); |
| 548 | +} |
| 549 | + |
| 550 | +void ov::npuw::CompiledModel::CompiledModelDesc::deserialize(std::istream& stream) { |
| 551 | + using namespace ov::npuw::s11n; |
| 552 | + |
| 553 | + LOG_DEBUG("Deserializing CompiledModelDesc..."); |
| 554 | + LOG_BLOCK(); |
| 555 | + |
| 556 | + read(stream, replaced_by); |
| 557 | + |
| 558 | + read(stream, param_base); |
| 559 | + read(stream, forced_to_fcall); |
| 560 | + |
| 561 | + read(stream, host_gather.dst_idx); |
| 562 | + read(stream, host_gather.src_idx); |
| 563 | + read(stream, host_gather.idx_idx); |
| 564 | + |
| 565 | + read(stream, spatial); |
| 566 | + |
| 567 | + read(stream, scales); |
| 568 | + read(stream, zerops); |
| 569 | + read(stream, is_remote); |
| 570 | + |
| 571 | + // NOTE: for closure only deserialize uids - full flow |
| 572 | + read(stream, closure_uid); |
| 573 | + |
| 574 | + // Some tensors might be present in CPU closure already - need to deserialize as is |
| 575 | + // FIXME: When weightless serialization is introduced, this should be handled differently |
| 576 | + std::size_t closure_size = 0; |
| 577 | + read(stream, closure_size); |
| 578 | + std::vector<std::size_t> cpu_closure_ids; |
| 579 | + read(stream, cpu_closure_ids); |
| 580 | + closure.resize(closure_size); |
| 581 | + for (const auto& cidx : cpu_closure_ids) { |
| 582 | + read(stream, closure[cidx]); |
| 583 | + } |
| 584 | + |
| 585 | + // FIXME: support weightless flow! |
| 586 | + |
| 587 | + LOG_DEBUG("DONE."); |
| 588 | +} |
| 589 | + |
| 590 | +void ov::npuw::CompiledModel::serialize(std::ostream& stream) const { |
| 591 | + LOG_INFO("Serializing CompiledModel..."); |
| 592 | + LOG_BLOCK(); |
| 593 | + |
| 594 | + using namespace ov::npuw::s11n; |
| 595 | + |
| 596 | + // Serialize name |
| 597 | + write(stream, m_name); |
| 598 | + |
| 599 | + // Serialize inputs and outputs |
| 600 | + write(stream, inputs()); |
| 601 | + write(stream, outputs()); |
| 602 | + |
| 603 | + // Serialize meta |
| 604 | + write(stream, m_inputs_to_submodels_inputs); |
| 605 | + write(stream, m_outputs_to_submodels_outputs); |
| 606 | + write(stream, m_param_subscribers); |
| 607 | + write(stream, m_submodels_input_to_prev_output); |
| 608 | + |
| 609 | + // Write device list |
| 610 | + write(stream, m_dev_list); |
| 611 | + |
| 612 | + // Write config |
| 613 | + write(stream, m_cfg); |
| 614 | + |
| 615 | + // Serialize compiled submodels |
| 616 | + write(stream, m_compiled_submodels.size()); |
| 617 | + for (const auto& subm : m_compiled_submodels) { |
| 618 | + // Write device idx |
| 619 | + std::size_t device_idx = subm.device_it - m_dev_list.begin(); |
| 620 | + write(stream, device_idx); |
| 621 | + // Write ICompiledModel if it's there |
| 622 | + if (subm.compiled_model) { |
| 623 | + write(stream, true); |
| 624 | + // FIXME: workaround for import/export model since import model seem to reset the file pointer |
| 625 | + std::stringstream ss; |
| 626 | + subm.compiled_model->export_model(ss); |
| 627 | + write(stream, ss.str()); |
| 628 | + } else { |
| 629 | + write(stream, false); |
| 630 | + } |
| 631 | + // Write the rest of the submodel desc |
| 632 | + subm.serialize(stream); |
| 633 | + } |
| 634 | + |
| 635 | + LOG_INFO("Done."); |
| 636 | +} |
| 637 | + |
| 638 | +std::shared_ptr<ov::npuw::CompiledModel> ov::npuw::CompiledModel::deserialize( |
| 639 | + std::istream& stream, |
| 640 | + const std::shared_ptr<const ov::IPlugin>& plugin) { |
| 641 | + LOG_INFO("Deserializing CompiledModel..."); |
| 642 | + LOG_BLOCK(); |
| 643 | + |
| 644 | + using namespace ov::npuw::s11n; |
| 645 | + |
| 646 | + // Deserialize model name first |
| 647 | + std::string model_name; |
| 648 | + read(stream, model_name); |
| 649 | + |
| 650 | + // Create a dummy CompiledModel with an empty ov::Model - this will skip the constructor flow |
| 651 | + // to continue deserialization |
| 652 | + ov::ParameterVector parameters; |
| 653 | + ov::NodeVector results; |
| 654 | + |
| 655 | + read(stream, parameters); |
| 656 | + read(stream, results); |
| 657 | + |
| 658 | + auto ov_model = std::make_shared<ov::Model>(results, parameters, model_name); |
| 659 | + |
| 660 | + auto compiled = std::make_shared<ov::npuw::CompiledModel>(ov_model, plugin, true); |
| 661 | + |
| 662 | + // Deserialize meta |
| 663 | + compiled->m_name = model_name; |
| 664 | + read(stream, compiled->m_inputs_to_submodels_inputs); |
| 665 | + read(stream, compiled->m_outputs_to_submodels_outputs); |
| 666 | + read(stream, compiled->m_param_subscribers); |
| 667 | + read(stream, compiled->m_submodels_input_to_prev_output); |
| 668 | + |
| 669 | + // Deserialize device list |
| 670 | + read(stream, compiled->m_dev_list); |
| 671 | + |
| 672 | + // Deserialize config |
| 673 | + read(stream, compiled->m_cfg); |
| 674 | + |
| 675 | + // Deserialize compiled submodels |
| 676 | + std::size_t subm_size = 0; |
| 677 | + read(stream, subm_size); |
| 678 | + compiled->m_compiled_submodels.resize(subm_size); |
| 679 | + for (std::size_t i = 0; i < subm_size; ++i) { |
| 680 | + std::size_t device_idx = 0; |
| 681 | + read(stream, device_idx); |
| 682 | + |
| 683 | + bool has_compiled_model = false; |
| 684 | + read(stream, has_compiled_model); |
| 685 | + if (has_compiled_model) { |
| 686 | + // Import model from the plugin |
| 687 | + // FIXME: workaround for import/export model since import model seems to reset the file pointer |
| 688 | + std::string buf; |
| 689 | + read(stream, buf); |
| 690 | + std::stringstream buffer(buf); |
| 691 | + compiled->m_compiled_submodels[i].compiled_model = |
| 692 | + plugin->get_core()->import_model(buffer, compiled->m_dev_list[device_idx]); |
| 693 | + } |
| 694 | + compiled->m_compiled_submodels[i].device_it = compiled->m_dev_list.begin() + device_idx; |
| 695 | + compiled->m_compiled_submodels[i].deserialize(stream); |
| 696 | + } |
| 697 | + |
| 698 | + compiled->implement_properties(); |
| 699 | + compiled->report_io(); |
| 700 | + |
| 701 | + LOG_INFO("Done."); |
| 702 | + |
| 703 | + return compiled; |
| 704 | +} |
| 705 | + |
489 | 706 | void ov::npuw::CompiledModel::finalize_weights_bank() { |
490 | 707 | LOG_INFO("Finalizing weights bank..."); |
491 | 708 | // Register lazy tensors |
@@ -541,6 +758,33 @@ void ov::npuw::CompiledModel::finalize_weights_bank() { |
541 | 758 | LOG_INFO("Done."); |
542 | 759 | } |
543 | 760 |
|
| 761 | +void ov::npuw::CompiledModel::reconstruct_closure() { |
| 762 | + for (size_t idx = 0; idx < m_compiled_submodels.size(); ++idx) { |
| 763 | + auto& comp_model_desc = m_compiled_submodels[idx]; |
| 764 | + |
| 765 | + // Skip optimized out and non-functions |
| 766 | + if (!comp_model_desc.compiled_model && !comp_model_desc.replaced_by) { |
| 767 | + continue; |
| 768 | + } |
| 769 | + |
| 770 | + const auto real_idx = comp_model_desc.replaced_by.value_or(idx); |
| 771 | + auto& func_desc = m_compiled_submodels[real_idx]; |
| 772 | + |
| 773 | + // At this point closure size should have already been deserialized |
| 774 | + NPUW_ASSERT(!comp_model_desc.closure.empty() && "Closure shouldn't be empty at this point!"); |
| 775 | + for (std::size_t cidx = 0; cidx < comp_model_desc.closure.size(); ++cidx) { |
| 776 | + if (comp_model_desc.closure[cidx]) { |
| 777 | + // host-side closure - already set, do nothing |
| 778 | + NPUW_ASSERT(!comp_model_desc.is_remote[cidx]); |
| 779 | + continue; |
| 780 | + } |
| 781 | + NPUW_ASSERT(comp_model_desc.closure_uid[cidx] != -1); |
| 782 | + comp_model_desc.closure[cidx] = |
| 783 | + m_weights_bank->get(comp_model_desc.closure_uid[cidx], *func_desc.device_it); |
| 784 | + } |
| 785 | + } |
| 786 | +} |
| 787 | + |
544 | 788 | void ov::npuw::CompiledModel::detach_memory() { |
545 | 789 | LOG_INFO("Detaching model & weight memory..."); |
546 | 790 | LOG_BLOCK(); |
|
0 commit comments