Skip to content

Commit f81e880

Browse files
committed
Add option normalize support
1 parent 427753f commit f81e880

File tree

3 files changed

+33
-15
lines changed

3 files changed

+33
-15
lines changed

src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1101,15 +1101,6 @@ std::optional<NPUDesc> extract_npu_descriptor(const std::shared_ptr<const ov::IP
11011101
return std::make_optional(std::move(desc));
11021102
}
11031103

1104-
std::optional<ov::Any> pop_option(ov::AnyMap& config, const std::string& option_name) {
1105-
if (auto it = config.find(option_name); it != config.end()) {
1106-
std::optional<ov::Any> found = std::make_optional(it->second);
1107-
config.erase(it);
1108-
return found;
1109-
}
1110-
return std::nullopt;
1111-
}
1112-
11131104
void apply_weights_bank_name(ov::AnyMap& config, const std::string& bank_name) {
11141105
auto it = config.find("NPUW_WEIGHTS_BANK");
11151106
if (it != config.end()) {
@@ -1537,8 +1528,7 @@ ov::npuw::LLMCompiledModel::LLMCompiledModel(const std::shared_ptr<ov::Model>& m
15371528
ov::npuw::util::prepare_text_embedding_model(kvcache_model, seq_len_dim);
15381529
}
15391530

1540-
auto post_type = pop_option(other_props, std::string("NPUW_TEXT_EMBED_POST_TYPE"));
1541-
ov::npuw::util::create_text_embedding_post_model(kvcache_model, text_embedding_post_model, post_type);
1531+
ov::npuw::util::create_text_embedding_post_model(kvcache_model, text_embedding_post_model, other_props);
15421532
} else {
15431533
LOG_DEBUG("Transform kvcache model from stateful to stateless.");
15441534
ov::pass::StatefulToStateless().run_on_model(kvcache_model);

src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model_utils.cpp

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -647,6 +647,15 @@ class CachePositionInput : public ov::pass::MatcherPass {
647647
# pragma GCC diagnostic pop
648648
#endif
649649

650+
std::optional<ov::Any> ov::npuw::util::pop_option(ov::AnyMap& config, const std::string& option_name) {
651+
if (auto it = config.find(option_name); it != config.end()) {
652+
std::optional<ov::Any> found = std::make_optional(it->second);
653+
config.erase(it);
654+
return found;
655+
}
656+
return std::nullopt;
657+
}
658+
650659
bool ov::npuw::util::has_input(const std::shared_ptr<ov::Model>& model, const std::string& name) {
651660
auto inputs = model->inputs();
652661
auto it = std::find_if(inputs.begin(), inputs.end(), [&](const auto& port) {
@@ -837,11 +846,21 @@ std::shared_ptr<ov::op::Op> get_last_token_pooling_op(std::shared_ptr<ov::Model>
837846
return std::make_shared<op::v8::Gather>(last_hidden_state_node, subtract, one, 1);
838847
}
839848

849+
std::shared_ptr<ov::op::Op> normalize_output(std::shared_ptr<ov::op::Op> last_hidden_state_node) {
850+
using namespace ov;
851+
852+
auto axis_const = std::make_shared<op::v0::Constant>(ov::element::i32, ov::Shape{1}, std::vector{1});
853+
return std::make_shared<op::v0::NormalizeL2>(last_hidden_state_node,
854+
axis_const,
855+
static_cast<float>(1e-7),
856+
op::EpsMode::MAX);
857+
}
858+
840859
} // namespace
841860

842861
void ov::npuw::util::create_text_embedding_post_model(std::shared_ptr<ov::Model> model,
843862
std::shared_ptr<ov::Model>& post_model,
844-
std::optional<ov::Any>& post_type_any) {
863+
ov::AnyMap& config) {
845864
auto output_node = model->outputs()[0];
846865
auto input_param =
847866
std::make_shared<ov::op::v0::Parameter>(output_node.get_element_type(), output_node.get_partial_shape());
@@ -850,7 +869,9 @@ void ov::npuw::util::create_text_embedding_post_model(std::shared_ptr<ov::Model>
850869
auto attention_mask = std::make_shared<ov::op::v0::Parameter>(ov::element::i64, ov::PartialShape{-1, -1});
851870
set_node_name(attention_mask, "attention_mask");
852871

853-
auto post_type = post_type_any.value_or(std::string("last_token")).as<std::string>();
872+
auto post_type_opt = pop_option(config, std::string("NPUW_TEXT_EMBED_POST_TYPE"));
873+
auto post_type = post_type_opt.value_or(std::string("last_token")).as<std::string>();
874+
854875
std::shared_ptr<ov::op::Op> post_output;
855876
if (post_type == "cls") {
856877
post_output = get_cls_pooling_op(input_param);
@@ -859,9 +880,14 @@ void ov::npuw::util::create_text_embedding_post_model(std::shared_ptr<ov::Model>
859880
} else if (post_type == "last_token") {
860881
post_output = get_last_token_pooling_op(model, input_param, attention_mask);
861882
}
862-
863883
OPENVINO_ASSERT(post_output != nullptr);
864884

885+
auto is_to_normalize_opt = pop_option(config, std::string("NPUW_TEXT_EMBED_NORMALIZE"));
886+
auto is_to_normalize = is_to_normalize_opt.value_or(true).as<bool>();
887+
if (is_to_normalize) {
888+
post_output = normalize_output(post_output);
889+
}
890+
865891
auto result_node = std::make_shared<ov::op::v0::Result>(post_output);
866892
post_model =
867893
std::make_shared<ov::Model>(ov::OutputVector{result_node}, ov::ParameterVector{input_param, attention_mask});

src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model_utils.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,14 +33,16 @@ bool optimize_value_tensors(std::shared_ptr<ov::Model> model, bool isPrefill);
3333
void prepare_text_embedding_model(std::shared_ptr<ov::Model> model, uint32_t seq_len_dim);
3434
void create_text_embedding_post_model(std::shared_ptr<ov::Model> model,
3535
std::shared_ptr<ov::Model>& post_model,
36-
std::optional<ov::Any>& post_type);
36+
ov::AnyMap& config);
3737

3838
std::shared_ptr<ov::Model> prepare_whisper_prefill_model(std::shared_ptr<ov::Model>& model,
3939
const uint32_t& max_prompt_size,
4040
const uint32_t& lhs_seq_size);
4141

4242
std::shared_ptr<ov::Model> prepare_whisper_kvcache_model(std::shared_ptr<ov::Model>& model);
4343

44+
std::optional<ov::Any> pop_option(ov::AnyMap& config, const std::string& option_name);
45+
4446
// clang-format off
4547
} // namespace ov
4648
// clang-format on

0 commit comments

Comments
 (0)