openvinotoolkit · riverlijunjie · Feb 11, 2025 · Feb 13, 2025 · Mar 26, 2025 · Mar 31, 2025
@@ -0,0 +1,77 @@
+// Copyright (C) 2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <array>
+#include <memory>
+
+#include "openvino/core/node.hpp"
+#include "openvino/core/type/element_type.hpp"
+#include "openvino/op/constant.hpp"
+#include "openvino/op/op.hpp"
+#include "transformations_visibility.hpp"
+
+namespace ov::op::internal {
+///
+/// \brief MOE experts
+class TRANSFORMATIONS_API MOE : public ov::op::Op {
+public:
+    OPENVINO_OP("MOE", "ie_internal_opset");
+
+    MOE() = default;
+
+    struct Config {
+        size_t topk = 0;
+        size_t expert_num = 0;
+        size_t hidden_size = 0;
+        size_t intermediate_size = 0;
+        size_t fused_router_logic = false;
+        size_t group_size = 0;  // quantized group size, 0 for no group size. same for gate/up/down
+        ov::element::Type weight_type = ov::element::dynamic;  // same for gate/up/down
+        ov::element::Type scale_type = ov::element::dynamic;   // same for gate/up/down
+        ov::element::Type zp_type = ov::element::dynamic;      // same for gate/up/down
+        bool operator==(const Config& rhs) const {
+#define CMP(x) (x == rhs.x)
+            return CMP(topk) && CMP(expert_num) && CMP(hidden_size) && CMP(intermediate_size) &&
+                   CMP(fused_router_logic) && CMP(group_size) && CMP(weight_type) && CMP(scale_type) && CMP(zp_type);
+#undef CMP
+        }
+    };
+
+    // 0: weight, 1: scale, 2: zp
+    struct ConstsPerExpert {
+        std::array<std::shared_ptr<ov::op::v0::Constant>, 3> gates;
+        std::array<std::shared_ptr<ov::op::v0::Constant>, 3> ups;
+        std::array<std::shared_ptr<ov::op::v0::Constant>, 3> downs;
+    };
+    struct Attributes {
+        // expert config
+        Config config;
+        // expert weight/scale/zp
+        std::vector<ConstsPerExpert> consts;
+    };
+
+    MOE(const OutputVector& args, const Attributes& attrs);
+
+    const Config& get_config() const;
+    void set_config(const Config& config);
+    const std::vector<ConstsPerExpert>& get_consts() const {
+        return m_attrs.consts;
+    }
+
+    void add_consts(size_t expert_no, const ConstsPerExpert& consts) {
+        OPENVINO_ASSERT(expert_no == m_attrs.consts.size());
+        m_attrs.consts.push_back(consts);
+    }
+
+    bool visit_attributes(AttributeVisitor& visitor) override;
+    void validate_and_infer_types() override;
+    std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
+
+private:
+    Attributes m_attrs;
+};
+
+}  // namespace ov::op::internal
@@ -0,0 +1,29 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "openvino/pass/matcher_pass.hpp"
+#include "transformations_visibility.hpp"
+
+namespace ov {
+namespace pass {
+
+class TRANSFORMATIONS_API FuseMOE;
+class TRANSFORMATIONS_API FuseMOERouter;
+
+}  // namespace pass
+}  // namespace ov
+
+class ov::pass::FuseMOE : public ov::pass::MatcherPass {
+public:
+    OPENVINO_MATCHER_PASS_RTTI("FuseMOE");
+    FuseMOE();
+};
+
+class ov::pass::FuseMOERouter : public ov::pass::MatcherPass {
+public:
+    OPENVINO_MATCHER_PASS_RTTI("FuseMOERouter");
+    FuseMOERouter();
+};
@@ -0,0 +1,66 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "ov_ops/moe.hpp"
+
+#include "itt.hpp"
+
+namespace ov {
+namespace op {
+namespace internal {
+
+MOE::MOE(const OutputVector& args, const Attributes& attrs) : Op(args), m_attrs(attrs) {
+    constructor_validate_and_infer_types();
+}
+
+const MOE::Config& MOE::get_config() const {
+    return m_attrs.config;
+}
+
+void MOE::set_config(const Config& config) {
+    m_attrs.config = config;
+}
+
+std::shared_ptr<ov::Node> MOE::clone_with_new_inputs(const ov::OutputVector& new_args) const {
+    INTERNAL_OP_SCOPE(internal_MOE_clone_with_new_inputs);
+    check_new_args_count(this, new_args);
+
+    return std::make_shared<MOE>(new_args, m_attrs);
+}
+
+void MOE::validate_and_infer_types() {
+    INTERNAL_OP_SCOPE(internal_MOE_validate_and_infer_types);
+    OPENVINO_ASSERT(get_input_size() == 2 || get_input_size() == 4,
+                    "MOE must have 2/4 inputs whereas it has ",
+                    get_input_size());
+
+    set_output_type(0, get_input_element_type(0), get_input_partial_shape(0));
+}
+
+bool MOE::visit_attributes(ov::AttributeVisitor& visitor) {
+    INTERNAL_OP_SCOPE(internal_MOE_visit_attributes);
+    visitor.start_structure("config");
+
+    visitor.on_attribute("topk", m_attrs.config.topk);
+    visitor.on_attribute("expert_num", m_attrs.config.expert_num);
+    visitor.on_attribute("hidden_size", m_attrs.config.hidden_size);
+    visitor.on_attribute("intermediate_size", m_attrs.config.intermediate_size);
+    visitor.on_attribute("group_size", m_attrs.config.group_size);
+    visitor.on_attribute("fused_router_logic", m_attrs.config.fused_router_logic);
+    visitor.on_attribute("weight_type", m_attrs.config.weight_type);
+    visitor.on_attribute("scale_type", m_attrs.config.scale_type);
+    visitor.on_attribute("zp_type", m_attrs.config.zp_type);
+    visitor.finish_structure();
+    m_attrs.consts.resize(m_attrs.config.expert_num);
+    for (size_t i = 0; i < m_attrs.config.expert_num; i++) {
+        for (size_t j = 0; j < 3; j++) {
+            m_attrs.consts[i].gates[j]->visit_attributes(visitor);
+        }
+    }
+    return true;
+}
+
+}  // namespace internal
+}  // namespace op
+}  // namespace ov