@@ -647,6 +647,15 @@ class CachePositionInput : public ov::pass::MatcherPass {
647647# pragma GCC diagnostic pop
648648#endif
649649
650+ std::optional<ov::Any> ov::npuw::util::pop_option (ov::AnyMap& config, const std::string& option_name) {
651+ if (auto it = config.find (option_name); it != config.end ()) {
652+ std::optional<ov::Any> found = std::make_optional (it->second );
653+ config.erase (it);
654+ return found;
655+ }
656+ return std::nullopt ;
657+ }
658+
650659bool ov::npuw::util::has_input (const std::shared_ptr<ov::Model>& model, const std::string& name) {
651660 auto inputs = model->inputs ();
652661 auto it = std::find_if (inputs.begin (), inputs.end (), [&](const auto & port) {
@@ -837,11 +846,21 @@ std::shared_ptr<ov::op::Op> get_last_token_pooling_op(std::shared_ptr<ov::Model>
837846 return std::make_shared<op::v8::Gather>(last_hidden_state_node, subtract, one, 1 );
838847}
839848
849+ std::shared_ptr<ov::op::Op> normalize_output (std::shared_ptr<ov::op::Op> last_hidden_state_node) {
850+ using namespace ov ;
851+
852+ auto axis_const = std::make_shared<op::v0::Constant>(ov::element::i32 , ov::Shape{1 }, std::vector{1 });
853+ return std::make_shared<op::v0::NormalizeL2>(last_hidden_state_node,
854+ axis_const,
855+ static_cast <float >(1e-7 ),
856+ op::EpsMode::MAX);
857+ }
858+
840859} // namespace
841860
842861void ov::npuw::util::create_text_embedding_post_model (std::shared_ptr<ov::Model> model,
843862 std::shared_ptr<ov::Model>& post_model,
844- std::optional< ov::Any>& post_type_any ) {
863+ ov::AnyMap& config ) {
845864 auto output_node = model->outputs ()[0 ];
846865 auto input_param =
847866 std::make_shared<ov::op::v0::Parameter>(output_node.get_element_type (), output_node.get_partial_shape ());
@@ -850,7 +869,9 @@ void ov::npuw::util::create_text_embedding_post_model(std::shared_ptr<ov::Model>
850869 auto attention_mask = std::make_shared<ov::op::v0::Parameter>(ov::element::i64 , ov::PartialShape{-1 , -1 });
851870 set_node_name (attention_mask, " attention_mask" );
852871
853- auto post_type = post_type_any.value_or (std::string (" last_token" )).as <std::string>();
872+ auto post_type_opt = pop_option (config, std::string (" NPUW_TEXT_EMBED_POST_TYPE" ));
873+ auto post_type = post_type_opt.value_or (std::string (" last_token" )).as <std::string>();
874+
854875 std::shared_ptr<ov::op::Op> post_output;
855876 if (post_type == " cls" ) {
856877 post_output = get_cls_pooling_op (input_param);
@@ -859,9 +880,14 @@ void ov::npuw::util::create_text_embedding_post_model(std::shared_ptr<ov::Model>
859880 } else if (post_type == " last_token" ) {
860881 post_output = get_last_token_pooling_op (model, input_param, attention_mask);
861882 }
862-
863883 OPENVINO_ASSERT (post_output != nullptr );
864884
885+ auto is_to_normalize_opt = pop_option (config, std::string (" NPUW_TEXT_EMBED_NORMALIZE" ));
886+ auto is_to_normalize = is_to_normalize_opt.value_or (true ).as <bool >();
887+ if (is_to_normalize) {
888+ post_output = normalize_output (post_output);
889+ }
890+
865891 auto result_node = std::make_shared<ov::op::v0::Result>(post_output);
866892 post_model =
867893 std::make_shared<ov::Model>(ov::OutputVector{result_node}, ov::ParameterVector{input_param, attention_mask});
0 commit comments