diff --git a/ggml/src/ggml-openvino/ggml-decoder.cpp b/ggml/src/ggml-openvino/ggml-decoder.cpp index b6886733ba1..72f6144708a 100644 --- a/ggml/src/ggml-openvino/ggml-decoder.cpp +++ b/ggml/src/ggml-openvino/ggml-decoder.cpp @@ -134,9 +134,6 @@ void GgmlOvDecoder::set_input_output(ggml_tensor * node, bool naive) { node_output = node->view_src; } - m_output_names.push_back(node_output_name); - m_outputs[node_output_name] = node_output; - current_node_info.node = node; current_node_info.node_name = node_name; current_node_info.node_output = node_output; @@ -150,8 +147,6 @@ void GgmlOvDecoder::set_input_output(ggml_tensor * node, bool naive) { continue; } std::string src_name = std::string(src->name); - m_input_names.push_back(src_name); - m_inputs[src_name] = src; current_node_info.node_inputs[src_name] = src; current_node_info.node_inputs_names.push_back(src_name); @@ -167,6 +162,7 @@ void GgmlOvDecoder::set_input_output(ggml_tensor * node, bool naive) { if (m_model_inputs.find(src_name) != m_model_inputs.end()) { continue; } + m_inputs[src_name] = src; auto param_node = std::make_shared(get_ov_type(src), get_graph_input_shape(node, src)); param_node->set_friendly_name(src_name); @@ -739,58 +735,37 @@ ov::element::Type GgmlOvDecoder::get_ov_type(const ggml_tensor * tensor) { } } -ov::PartialShape GgmlOvDecoder::get_input_shape(const std::string & name) const { - return ov::PartialShape(get_shape(m_inputs.at(name))); -} - ov::PartialShape GgmlOvDecoder::get_input_shape(int node_idx, const std::string & name) const { return ov::PartialShape(get_shape(m_node_info_list[node_idx].node_inputs.at(name))); } -std::vector GgmlOvDecoder::get_input_stride(const std::string & name) const { - return get_stride(m_inputs.at(name)); -} - std::vector GgmlOvDecoder::get_input_stride(int node_idx, const std::string & name) const { return get_stride(m_node_info_list[node_idx].node_inputs.at(name)); } -ov::element::Type GgmlOvDecoder::get_input_type(const std::string & name) const { - return get_ov_type(m_inputs.at(name)); +ov::element::Type GgmlOvDecoder::get_input_type(int node_idx, const std::string & name) const { + return get_ov_type(m_node_info_list[node_idx].node_inputs.at(name)); } size_t GgmlOvDecoder::get_input_size() const { - return m_input_names.size(); + return m_model_inputs.size(); } size_t GgmlOvDecoder::get_input_size(int node_idx) const { return m_node_info_list[node_idx].node_inputs_names.size(); } -std::vector GgmlOvDecoder::get_input_names() const { - return m_input_names; -} - std::vector GgmlOvDecoder::get_input_names(int node_idx) const { return m_node_info_list[node_idx].node_inputs_names; } -ov::PartialShape GgmlOvDecoder::get_output_shape(const std::string & name) const { - auto * ggml_tensor = m_outputs.at(name); - return ov::PartialShape(get_shape(ggml_tensor)); -} - ov::PartialShape GgmlOvDecoder::get_output_shape(int node_idx) const { auto * ggml_tensor = m_node_info_list[node_idx].node_output; return ov::PartialShape(get_shape(ggml_tensor)); } -ov::element::Type GgmlOvDecoder::get_output_type(const std::string & name) const { - return get_ov_type(m_outputs.at(name)); -} - -std::vector GgmlOvDecoder::get_output_names() const { - return m_output_names; +ov::element::Type GgmlOvDecoder::get_output_type(const int node_idx) const { + return get_ov_type(m_node_info_list[node_idx].node); } std::vector GgmlOvDecoder::get_output_names(int node_idx) const { @@ -806,18 +781,10 @@ const std::string & GgmlOvDecoder::get_op_name(int node_idx) const { return m_node_info_list[node_idx].node_name; } -int32_t * GgmlOvDecoder::get_input_op_params(const std::string & name) const { - return m_inputs.at(name)->op_params; -} - int32_t * GgmlOvDecoder::get_input_op_params(int node_idx, const std::string & name) const { return m_node_info_list[node_idx].node_inputs.at(name)->op_params; } -int32_t * GgmlOvDecoder::get_output_op_params(const std::string & name) const { - return m_outputs.at(name)->op_params; -} - int32_t * GgmlOvDecoder::get_output_op_params(int node_idx) const { return m_node_info_list[node_idx].node->op_params; } diff --git a/ggml/src/ggml-openvino/ggml-decoder.h b/ggml/src/ggml-openvino/ggml-decoder.h index 51f314f17cd..1e51a7e1a8d 100644 --- a/ggml/src/ggml-openvino/ggml-decoder.h +++ b/ggml/src/ggml-openvino/ggml-decoder.h @@ -77,15 +77,11 @@ class GgmlOvDecoder : public ov::frontend::ggml::GgmlDecoder { GGML_UNUSED(name); } - virtual ov::PartialShape get_input_shape(const std::string & name) const override; - virtual ov::PartialShape get_input_shape(int node_idx, const std::string & name) const override; - virtual std::vector get_input_stride(const std::string & name) const override; - virtual std::vector get_input_stride(int node_idx, const std::string & name) const override; - virtual ov::element::Type get_input_type(const std::string & name) const override; + virtual ov::element::Type get_input_type(int node_idx, const std::string & name) const override; virtual size_t get_input_size() const override; @@ -101,26 +97,16 @@ class GgmlOvDecoder : public ov::frontend::ggml::GgmlDecoder { GGML_UNUSED(producer_output_port_index); } - virtual std::vector get_input_names() const override; - virtual std::vector get_input_names(int node_idx) const override; - virtual ov::PartialShape get_output_shape(const std::string & name) const override; - virtual ov::PartialShape get_output_shape(int node_idx) const override; - virtual ov::element::Type get_output_type(const std::string & name) const override; - - virtual int32_t * get_input_op_params(const std::string & name) const override; + virtual ov::element::Type get_output_type(const int node_idx) const override; virtual int32_t * get_input_op_params(int node_idx, const std::string & name) const override; - virtual int32_t * get_output_op_params(const std::string & name) const override; - virtual int32_t * get_output_op_params(int node_idx) const override; - virtual std::vector get_output_names() const override; - virtual std::vector get_output_names(int node_idx) const override; virtual const std::string & get_op_type() const override; @@ -135,8 +121,6 @@ class GgmlOvDecoder : public ov::frontend::ggml::GgmlDecoder { ggml_tensor * get_input_ggml_tensor(const std::string & name) const { return m_inputs.at(name); } - ggml_tensor * get_output_ggml_tensor(const std::string & name) const { return m_outputs.at(name); } - virtual int get_op_case(int node_idx) const override { return m_node_info_list[node_idx].node_op_case; } virtual const std::map> & get_model_inputs() const override { @@ -237,9 +221,6 @@ class GgmlOvDecoder : public ov::frontend::ggml::GgmlDecoder { ggml_cgraph * m_cgraph = nullptr; std::vector m_nodes; std::map m_inputs; - std::vector m_input_names; - std::map m_outputs; - std::vector m_output_names; std::map> m_model_inputs; std::map> m_model_extra_inputs; diff --git a/ggml/src/ggml-openvino/openvino/decoder.hpp b/ggml/src/ggml-openvino/openvino/decoder.hpp index 54fe890fd1a..1603c7fd201 100644 --- a/ggml/src/ggml-openvino/openvino/decoder.hpp +++ b/ggml/src/ggml-openvino/openvino/decoder.hpp @@ -14,15 +14,11 @@ class GgmlDecoder : public DecoderBase { public: virtual ov::Any get_attribute(const std::string& name) const = 0; - virtual PartialShape get_input_shape(const std::string& name) const = 0; - virtual PartialShape get_input_shape(int node_idx, const std::string& name) const = 0; - virtual std::vector get_input_stride(const std::string& name) const = 0; - virtual std::vector get_input_stride(int node_idx, const std::string& name) const = 0; - virtual element::Type get_input_type(const std::string& name) const = 0; + virtual element::Type get_input_type(int node_idx, const std::string& name) const = 0; virtual size_t get_input_size() const = 0; @@ -33,26 +29,16 @@ class GgmlDecoder : public DecoderBase { std::string& producer_output_port_name, size_t& producer_output_port_index) const = 0; - virtual std::vector get_input_names() const = 0; - virtual std::vector get_input_names(int node_idx) const = 0; - virtual PartialShape get_output_shape(const std::string& name) const = 0; - virtual PartialShape get_output_shape(int node_idx) const = 0; - virtual element::Type get_output_type(const std::string& name) const = 0; - - virtual int32_t* get_input_op_params(const std::string& name) const = 0; + virtual element::Type get_output_type(const int node_idx) const = 0; virtual int32_t* get_input_op_params(int node_idx, const std::string& name) const = 0; - virtual int32_t* get_output_op_params(const std::string& name) const = 0; - virtual int32_t * get_output_op_params(int node_idx) const = 0; - virtual std::vector get_output_names() const = 0; - virtual std::vector get_output_names(int node_idx) const = 0; virtual const std::string& get_op_type() const = 0; diff --git a/ggml/src/ggml-openvino/openvino/node_context.hpp b/ggml/src/ggml-openvino/openvino/node_context.hpp index 42d950c3eb4..a0666b21ac3 100644 --- a/ggml/src/ggml-openvino/openvino/node_context.hpp +++ b/ggml/src/ggml-openvino/openvino/node_context.hpp @@ -40,11 +40,11 @@ class NodeContext : public frontend::NodeContext { } ov::element::Type get_input_type(size_t index) const { - return m_decoder->get_input_type(m_input_names[index]); + return m_decoder->get_input_type(m_node_idx, m_input_names[index]); } - PartialShape get_input_shape(size_t index) const { - return m_decoder->get_input_shape(m_node_idx, m_input_names[index]); + PartialShape get_input_shape(size_t input_index) const { + return m_decoder->get_input_shape(m_node_idx, m_input_names[input_index]); } std::vector get_input_stride(size_t index) const { @@ -61,8 +61,8 @@ class NodeContext : public frontend::NodeContext { int32_t * get_output_op_params() const { return m_decoder->get_output_op_params(m_node_idx); } - ov::element::Type get_output_type(size_t index) const { - return m_decoder->get_output_type(m_output_names[index]); + ov::element::Type get_output_type() const { + return m_decoder->get_output_type(m_node_idx); } Output get_input(int idx) const override { diff --git a/ggml/src/ggml-openvino/openvino/op/cpy.cpp b/ggml/src/ggml-openvino/openvino/op/cpy.cpp index d5186cddee7..ded2f0ca788 100644 --- a/ggml/src/ggml-openvino/openvino/op/cpy.cpp +++ b/ggml/src/ggml-openvino/openvino/op/cpy.cpp @@ -11,7 +11,7 @@ namespace ggml { namespace op { OutputVector translate_cpy(const NodeContext & context) { - auto res = std::make_shared(context.get_input(0), context.get_output_type(0)); + auto res = std::make_shared(context.get_input(0), context.get_output_type()); return rename_outputs_with_suffix({res}, context.get_name()); } diff --git a/ggml/src/ggml-openvino/openvino/op/get_rows.cpp b/ggml/src/ggml-openvino/openvino/op/get_rows.cpp index ace79c33a9b..dc8454a1998 100644 --- a/ggml/src/ggml-openvino/openvino/op/get_rows.cpp +++ b/ggml/src/ggml-openvino/openvino/op/get_rows.cpp @@ -42,8 +42,8 @@ OutputVector translate_get_rows(const NodeContext & context) { res = std::make_shared(data, indices, axis); } - if (res.get_element_type() != context.get_output_type(0)) { - res = std::make_shared(res, context.get_output_type(0)); + if (res.get_element_type() != context.get_output_type()) { + res = std::make_shared(res, context.get_output_type()); } res = std::make_shared(res, ov::op::v0::Constant::create(ov::element::i64, {1}, {0})); return rename_outputs_with_suffix({res}, context.get_name()); diff --git a/ggml/src/ggml-openvino/openvino/op/set_rows.cpp b/ggml/src/ggml-openvino/openvino/op/set_rows.cpp index eb128f04a36..4ceb55589ea 100644 --- a/ggml/src/ggml-openvino/openvino/op/set_rows.cpp +++ b/ggml/src/ggml-openvino/openvino/op/set_rows.cpp @@ -32,7 +32,7 @@ OutputVector translate_set_rows(const NodeContext & context) { auto indices = context.get_input(1); auto dst = context.get_input(2); - data = std::make_shared(data, context.get_output_type(0)); + data = std::make_shared(data, context.get_output_type()); auto dst_shape = context.get_output_shape().to_shape(); diff --git a/ggml/src/ggml-openvino/openvino/op/softmax.cpp b/ggml/src/ggml-openvino/openvino/op/softmax.cpp index 921475e51ae..782fdf078d7 100644 --- a/ggml/src/ggml-openvino/openvino/op/softmax.cpp +++ b/ggml/src/ggml-openvino/openvino/op/softmax.cpp @@ -63,8 +63,8 @@ OutputVector translate_soft_max(const NodeContext & context) { mask_node_sliced = std::make_shared(mask_node, zero, token_len, one, one); } - if (mask_node_sliced.get_element_type() != context.get_output_type(0)) { - mask_node_sliced = std::make_shared(mask_node_sliced, context.get_output_type(0)); + if (mask_node_sliced.get_element_type() != context.get_output_type()) { + mask_node_sliced = std::make_shared(mask_node_sliced, context.get_output_type()); } Output slope_mask; diff --git a/ggml/src/ggml-openvino/utils.cpp b/ggml/src/ggml-openvino/utils.cpp index 8ce50c332a4..f52b2cb07c3 100644 --- a/ggml/src/ggml-openvino/utils.cpp +++ b/ggml/src/ggml-openvino/utils.cpp @@ -480,9 +480,9 @@ ov::Tensor convert_ggml_input_to_ov(std::shared_ptr ggml_decoder, // This case is added to make test-backend-ops work input_shape = ggml_decoder->get_shape(ggml_tensor->view_src); } else { - input_shape = ggml_decoder->get_input_shape(name).to_shape(); + input_shape = ggml_decoder->get_shape(ggml_tensor); } - auto input_tensor = ov::Tensor(ggml_decoder->get_input_type(name), input_shape, input_data); + auto input_tensor = ov::Tensor(ggml_decoder->get_ov_type(ggml_tensor), input_shape, input_data); return input_tensor; } } // namespace @@ -506,7 +506,7 @@ ov::Tensor get_ov_input_tensor_static_decode(std::shared_ptr ggml (op->op == GGML_OP_SET_ROWS && op->src[1] == ggml_tensor)) { assert(ggml_tensor->ne[0] == 1); ov::Shape input_shape = {1, 1, 1, 1}; - ov::Tensor input_tensor(ggml_decoder->get_input_type(param_name), input_shape); + ov::Tensor input_tensor(ggml_decoder->get_ov_type(ggml_tensor), input_shape); if (ggml_tensor->type == GGML_TYPE_I32) { *input_tensor.data() = *((int32_t *) ggml_tensor->data); } else if (ggml_tensor->type == GGML_TYPE_I64) { @@ -519,7 +519,7 @@ ov::Tensor get_ov_input_tensor_static_decode(std::shared_ptr ggml if (param_name == "inp_out_ids") { ov::Shape input_shape = {1, 1, 1, 1}; - ov::Tensor input_tensor(ggml_decoder->get_input_type(param_name), input_shape); + ov::Tensor input_tensor(ggml_decoder->get_ov_type(ggml_tensor), input_shape); int32_t inp_out_id = *((int32_t *) ggml_tensor->data); assert(ggml_tensor->ne[0] == 1); assert(inp_out_id == 0); @@ -553,7 +553,7 @@ ov::Tensor get_ov_input_tensor_static_prefill(std::shared_ptr ggm if (param_name == "inp_pos" || param_name == "inp_tokens" || (op->op == GGML_OP_SET_ROWS && op->src[1] == ggml_tensor)) { ov::Shape input_shape = {1, 1, 1, chunk_size}; - ov::Tensor input_tensor(ggml_decoder->get_input_type(param_name), input_shape); + ov::Tensor input_tensor(ggml_decoder->get_ov_type(ggml_tensor), input_shape); // copy the chunk_index-th chunk from ggml_tensor size_t element_size = ggml_type_size(ggml_tensor->type); void * input_data = (char *) ggml_tensor->data + chunk_index * chunk_size * element_size; @@ -581,7 +581,7 @@ ov::Tensor get_ov_input_tensor_static_prefill(std::shared_ptr ggm if (param_name == "inp_out_ids") { size_t output_len = ggml_decoder->get_compute_params().output_len; ov::Shape input_shape = {1, 1, 1, output_len}; - ov::Tensor input_tensor(ggml_decoder->get_input_type(param_name), input_shape); + ov::Tensor input_tensor(ggml_decoder->get_ov_type(ggml_tensor), input_shape); if (ggml_tensor->ne[0] == 0) { *input_tensor.data() = 0; } else {