From eba47dd2273ab29f5579eb1e463dfc17ec8dbab6 Mon Sep 17 00:00:00 2001
From: Vahid Tavanashad <vahid.tavanashad@intel.com>
Date: Tue, 11 Feb 2025 08:07:25 -0800
Subject: [PATCH 01/10] implement bitwise_count

---
 dpnp/backend/extensions/ufunc/CMakeLists.txt  |   1 +
 .../elementwise_functions/bitwise_count.cpp   | 137 +++++++++++
 .../elementwise_functions/bitwise_count.hpp   |  35 +++
 .../ufunc/elementwise_functions/common.cpp    |   2 +
 .../elementwise_functions/bitwise_count.hpp   |  54 +++++
 dpnp/dpnp_iface_bitwise.py                    |  68 ++++++
 dpnp/tests/test_bitwise.py                    | 214 +++++++-----------
 dpnp/tests/test_sycl_queue.py                 | 143 +++++++-----
 dpnp/tests/test_usm_type.py                   |  17 +-
 9 files changed, 474 insertions(+), 197 deletions(-)
 create mode 100644 dpnp/backend/extensions/ufunc/elementwise_functions/bitwise_count.cpp
 create mode 100644 dpnp/backend/extensions/ufunc/elementwise_functions/bitwise_count.hpp
 create mode 100644 dpnp/backend/kernels/elementwise_functions/bitwise_count.hpp

diff --git a/dpnp/backend/extensions/ufunc/CMakeLists.txt b/dpnp/backend/extensions/ufunc/CMakeLists.txt
index 5f892506b81c..d363910f74df 100644
--- a/dpnp/backend/extensions/ufunc/CMakeLists.txt
+++ b/dpnp/backend/extensions/ufunc/CMakeLists.txt
@@ -24,6 +24,7 @@
 # *****************************************************************************
 
 set(_elementwise_sources
+    ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/bitwise_count.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/common.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/degrees.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/fabs.cpp
diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/bitwise_count.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/bitwise_count.cpp
new file mode 100644
index 000000000000..2fe60d2a5efe
--- /dev/null
+++ b/dpnp/backend/extensions/ufunc/elementwise_functions/bitwise_count.cpp
@@ -0,0 +1,137 @@
+//*****************************************************************************
+// Copyright (c) 2025, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#include <sycl/sycl.hpp>
+
+#include "dpctl4pybind11.hpp"
+
+#include "bitwise_count.hpp"
+#include "kernels/elementwise_functions/bitwise_count.hpp"
+#include "populate.hpp"
+
+// include a local copy of elementwise common header from dpctl tensor:
+// dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp
+// TODO: replace by including dpctl header once available
+#include "../../elementwise_functions/elementwise_functions.hpp"
+
+// dpctl tensor headers
+#include "kernels/elementwise_functions/common.hpp"
+#include "utils/type_dispatch.hpp"
+
+namespace dpnp::extensions::ufunc
+{
+namespace py = pybind11;
+namespace py_int = dpnp::extensions::py_internal;
+
+namespace impl
+{
+namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common;
+namespace td_ns = dpctl::tensor::type_dispatch;
+
+/**
+ * @brief A factory to define pairs of supported types for which
+ * sycl::bitwise_count<T> function is available.
+ *
+ * @tparam T Type of input vector `a` and of result vector `y`.
+ */
+template <typename T>
+struct OutputType
+{
+    using value_type = typename std::disjunction<
+        td_ns::TypeMapResultEntry<T, std::uint8_t, std::uint8_t>,
+        td_ns::TypeMapResultEntry<T, std::int8_t, std::uint8_t>,
+        td_ns::TypeMapResultEntry<T, std::uint16_t, std::uint8_t>,
+        td_ns::TypeMapResultEntry<T, std::int16_t, std::uint8_t>,
+        td_ns::TypeMapResultEntry<T, std::uint32_t, std::uint8_t>,
+        td_ns::TypeMapResultEntry<T, std::int32_t, std::uint8_t>,
+        td_ns::TypeMapResultEntry<T, std::uint64_t, std::uint8_t>,
+        td_ns::TypeMapResultEntry<T, std::int64_t, std::uint8_t>,
+        td_ns::DefaultResultEntry<void>>::result_type;
+};
+
+using dpnp::kernels::bitwise_count::BitwiseCountFunctor;
+
+template <typename argT,
+          typename resT = argT,
+          unsigned int vec_sz = 4,
+          unsigned int n_vecs = 2,
+          bool enable_sg_loadstore = true>
+using ContigFunctor =
+    ew_cmn_ns::UnaryContigFunctor<argT,
+                                  resT,
+                                  BitwiseCountFunctor<argT, resT>,
+                                  vec_sz,
+                                  n_vecs,
+                                  enable_sg_loadstore>;
+
+template <typename argTy, typename resTy, typename IndexerT>
+using StridedFunctor =
+    ew_cmn_ns::UnaryStridedFunctor<argTy,
+                                   resTy,
+                                   IndexerT,
+                                   BitwiseCountFunctor<argTy, resTy>>;
+
+using ew_cmn_ns::unary_contig_impl_fn_ptr_t;
+using ew_cmn_ns::unary_strided_impl_fn_ptr_t;
+
+static unary_contig_impl_fn_ptr_t
+    bitwise_count_contig_dispatch_vector[td_ns::num_types];
+static int bitwise_count_output_typeid_vector[td_ns::num_types];
+static unary_strided_impl_fn_ptr_t
+    bitwise_count_strided_dispatch_vector[td_ns::num_types];
+
+MACRO_POPULATE_DISPATCH_VECTORS(bitwise_count);
+} // namespace impl
+
+void init_bitwise_count(py::module_ m)
+{
+    using arrayT = dpctl::tensor::usm_ndarray;
+    using event_vecT = std::vector<sycl::event>;
+    {
+        impl::populate_bitwise_count_dispatch_vectors();
+        using impl::bitwise_count_contig_dispatch_vector;
+        using impl::bitwise_count_output_typeid_vector;
+        using impl::bitwise_count_strided_dispatch_vector;
+
+        auto bitwise_count_pyapi = [&](const arrayT &src, const arrayT &dst,
+                                       sycl::queue &exec_q,
+                                       const event_vecT &depends = {}) {
+            return py_int::py_unary_ufunc(
+                src, dst, exec_q, depends, bitwise_count_output_typeid_vector,
+                bitwise_count_contig_dispatch_vector,
+                bitwise_count_strided_dispatch_vector);
+        };
+        m.def("_bitwise_count", bitwise_count_pyapi, "", py::arg("src"),
+              py::arg("dst"), py::arg("sycl_queue"),
+              py::arg("depends") = py::list());
+
+        auto bitwise_count_result_type_pyapi = [&](const py::dtype &dtype) {
+            return py_int::py_unary_ufunc_result_type(
+                dtype, bitwise_count_output_typeid_vector);
+        };
+        m.def("_bitwise_count_result_type", bitwise_count_result_type_pyapi);
+    }
+}
+} // namespace dpnp::extensions::ufunc
diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/bitwise_count.hpp b/dpnp/backend/extensions/ufunc/elementwise_functions/bitwise_count.hpp
new file mode 100644
index 000000000000..53afc55f2709
--- /dev/null
+++ b/dpnp/backend/extensions/ufunc/elementwise_functions/bitwise_count.hpp
@@ -0,0 +1,35 @@
+//*****************************************************************************
+// Copyright (c) 2025, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#pragma once
+
+#include <pybind11/pybind11.h>
+
+namespace py = pybind11;
+
+namespace dpnp::extensions::ufunc
+{
+void init_bitwise_count(py::module_ m);
+} // namespace dpnp::extensions::ufunc
diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/common.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/common.cpp
index f9d179d5ca4e..8ff89a1b03b6 100644
--- a/dpnp/backend/extensions/ufunc/elementwise_functions/common.cpp
+++ b/dpnp/backend/extensions/ufunc/elementwise_functions/common.cpp
@@ -25,6 +25,7 @@
 
 #include <pybind11/pybind11.h>
 
+#include "bitwise_count.hpp"
 #include "degrees.hpp"
 #include "fabs.hpp"
 #include "fix.hpp"
@@ -52,6 +53,7 @@ namespace dpnp::extensions::ufunc
  */
 void init_elementwise_functions(py::module_ m)
 {
+    init_bitwise_count(m);
     init_degrees(m);
     init_fabs(m);
     init_fix(m);
diff --git a/dpnp/backend/kernels/elementwise_functions/bitwise_count.hpp b/dpnp/backend/kernels/elementwise_functions/bitwise_count.hpp
new file mode 100644
index 000000000000..cccab02c2403
--- /dev/null
+++ b/dpnp/backend/kernels/elementwise_functions/bitwise_count.hpp
@@ -0,0 +1,54 @@
+//*****************************************************************************
+// Copyright (c) 2025, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#pragma once
+
+#include <sycl/sycl.hpp>
+
+namespace dpnp::kernels::bitwise_count
+{
+template <typename argT, typename resT>
+struct BitwiseCountFunctor
+{
+    // is function constant for given argT
+    using is_constant = typename std::false_type;
+    // constant value, if constant
+    // constexpr resT constant_value = resT{};
+    // is function defined for sycl::vec
+    using supports_vec = typename std::false_type;
+    // do both argT and resT support subgroup store/load operation
+    using supports_sg_loadstore = typename std::true_type;
+
+    resT operator()(const argT &x) const
+    {
+        if constexpr (std::is_unsigned<argT>::value) {
+            return sycl::popcount(x);
+        }
+        else {
+            return sycl::popcount(sycl::abs(x));
+        }
+    }
+};
+} // namespace dpnp::kernels::bitwise_count
diff --git a/dpnp/dpnp_iface_bitwise.py b/dpnp/dpnp_iface_bitwise.py
index f7f19f42a085..462bde846d5a 100644
--- a/dpnp/dpnp_iface_bitwise.py
+++ b/dpnp/dpnp_iface_bitwise.py
@@ -43,11 +43,13 @@
 import dpctl.tensor._tensor_elementwise_impl as ti
 import numpy
 
+import dpnp.backend.extensions.ufunc._ufunc_impl as ufi
 from dpnp.dpnp_algo.dpnp_elementwise_common import DPNPBinaryFunc, DPNPUnaryFunc
 
 __all__ = [
     "binary_repr",
     "bitwise_and",
+    "bitwise_count",
     "bitwise_invert",
     "bitwise_left_shift",
     "bitwise_not",
@@ -84,6 +86,7 @@ def binary_repr(num, width=None):
         at least a sufficient number of bits for `num` to be represented in the
         designated form. If the `width` value is insufficient, an error is
         raised.
+
         Default: ``None``.
 
     Returns
@@ -145,9 +148,11 @@ def binary_repr(num, width=None):
 out : {None, dpnp.ndarray, usm_ndarray}, optional
     Output array to populate.
     Array must have the correct shape and the expected data type.
+
     Default: ``None``.
 order : {"C", "F", "A", "K"}, optional
     Memory layout of the newly output array, if parameter `out` is ``None``.
+
     Default: ``"K"``.
 
 Returns
@@ -212,6 +217,59 @@ def binary_repr(num, width=None):
 )
 
 
+_BITWISE_COUNT_DOCSTRING = """
+Computes the number of 1-bits in the absolute value of `x`.
+
+For full documentation refer to :obj:`numpy.bitwise_count`.
+
+Parameters
+----------
+x : {dpnp.ndarray, usm_ndarray}
+    Input array, expected to have integer or boolean data type.
+out : {None, dpnp.ndarray, usm_ndarray}, optional
+    Output array to populate.
+    Array must have the correct shape and the expected data type.
+
+    Default: ``None``.
+order : {"C", "F", "A", "K"}, optional
+    Memory layout of the newly output array, if parameter `out` is ``None``.
+
+    Default: ``"K"``.
+
+Returns
+-------
+out : dpnp.ndarray
+    The corresponding number of 1-bits in the input. Returns ``uint8`` for all
+    integer types.
+
+Limitations
+-----------
+Parameters `where` and `subok` are supported with their default values.
+Keyword argument `kwargs` is currently unsupported.
+Otherwise ``NotImplementedError`` exception will be raised.
+
+Examples
+--------
+>>> import dpnp as np
+>>> a = np.array(1023)
+>>> np.bitwise_count(a)
+array(10, dtype=uint8)
+
+>>> a = np.array([2**i - 1 for i in range(16)])
+>>> np.bitwise_count(a)
+array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15],
+      dtype=uint8)
+
+"""
+
+bitwise_count = DPNPUnaryFunc(
+    "bitwise_count",
+    ufi._bitwise_count_result_type,
+    ufi._bitwise_count,
+    _BITWISE_COUNT_DOCSTRING,
+)
+
+
 _BITWISE_OR_DOCSTRING = """
 Computes the bitwise OR of the underlying binary representation of each
 element `x1_i` of the input array `x1` with the respective element `x2_i`
@@ -232,9 +290,11 @@ def binary_repr(num, width=None):
 out : {None, dpnp.ndarray, usm_ndarray}, optional
     Output array to populate.
     Array must have the correct shape and the expected data type.
+
     Default: ``None``.
 order : {"C", "F", "A", "K"}, optional
     Memory layout of the newly output array, if parameter `out` is ``None``.
+
     Default: ``"K"``.
 
 Returns
@@ -310,9 +370,11 @@ def binary_repr(num, width=None):
 out : {None, dpnp.ndarray, usm_ndarray}, optional
     Output array to populate.
     Array must have the correct shape and the expected data type.
+
     Default: ``None``.
 order : {"C", "F", "A", "K"}, optional
     Memory layout of the newly output array, if parameter `out` is ``None``.
+
     Default: ``"K"``.
 
 Returns
@@ -386,9 +448,11 @@ def binary_repr(num, width=None):
 out : {None, dpnp.ndarray, usm_ndarray}, optional
     Output array to populate.
     Array must have the correct shape and the expected data type.
+
     Default: ``None``.
 order : {"C", "F", "A", "K"}, optional
     Memory layout of the newly output array, if parameter `out` is ``None``.
+
     Default: ``"K"``.
 
 Returns
@@ -472,9 +536,11 @@ def binary_repr(num, width=None):
 out : {None, dpnp.ndarray, usm_ndarray}, optional
     Output array to populate.
     Array must have the correct shape and the expected data type.
+
     Default: ``None``.
 order : {"C", "F", "A", "K"}, optional
     Memory layout of the newly output array, if parameter `out` is ``None``.
+
     Default: ``"K"``.
 Returns
 -------
@@ -549,9 +615,11 @@ def binary_repr(num, width=None):
 out : {None, dpnp.ndarray, usm_ndarray}, optional
     Output array to populate.
     Array must have the correct shape and the expected data type.
+
     Default: ``None``.
 order : {"C", "F", "A", "K"}, optional
     Memory layout of the newly output array, if parameter `out` is ``None``.
+
     Default: ``"K"``.
 
 Returns
diff --git a/dpnp/tests/test_bitwise.py b/dpnp/tests/test_bitwise.py
index 220a7a1eacde..a7c6a34a1f6a 100644
--- a/dpnp/tests/test_bitwise.py
+++ b/dpnp/tests/test_bitwise.py
@@ -2,7 +2,7 @@
 import pytest
 from numpy.testing import assert_array_equal
 
-import dpnp as inp
+import dpnp
 
 from .helper import assert_dtype_allclose, get_integer_dtypes
 
@@ -23,167 +23,67 @@
         3,
     ],
 )
-@pytest.mark.parametrize("dtype", [inp.bool] + get_integer_dtypes())
-class TestBitwise:
+@pytest.mark.parametrize("dtype", [dpnp.bool] + get_integer_dtypes())
+class TestBitwiseBinary:
     @staticmethod
     def array_or_scalar(xp, data, dtype=None):
         if numpy.isscalar(data):
-            if dtype == inp.bool:
+            if dtype == dpnp.bool:
                 return numpy.dtype(dtype).type(data)
             return data
 
         return xp.array(data, dtype=dtype)
 
-    def _test_unary_int(self, name, data, dtype):
-        if numpy.isscalar(data):
-            pytest.skip("Input can't be scalar")
-        dp_a = self.array_or_scalar(inp, data, dtype=dtype)
-        result = getattr(inp, name)(dp_a)
-
-        np_a = self.array_or_scalar(numpy, data, dtype=dtype)
-        expected = getattr(numpy, name)(np_a)
-
-        assert_array_equal(result, expected)
-        return (dp_a, np_a)
-
-    def _test_binary_int(self, name, lhs, rhs, dtype):
-        if name in ("left_shift", "right_shift") and dtype == inp.bool:
-            pytest.skip("A shift operation isn't implemented for bool type")
-        elif numpy.isscalar(lhs) and numpy.isscalar(rhs):
+    def _test_binary(self, name, lhs, rhs, dtype):
+        if numpy.isscalar(lhs) and numpy.isscalar(rhs):
             pytest.skip("Both inputs can't be scalars")
 
-        dp_a = self.array_or_scalar(inp, lhs, dtype=dtype)
-        dp_b = self.array_or_scalar(inp, rhs, dtype=dtype)
-        result = getattr(inp, name)(dp_a, dp_b)
+        ia = self.array_or_scalar(dpnp, lhs, dtype=dtype)
+        ib = self.array_or_scalar(dpnp, rhs, dtype=dtype)
+        a = self.array_or_scalar(numpy, lhs, dtype=dtype)
+        b = self.array_or_scalar(numpy, rhs, dtype=dtype)
 
-        np_a = self.array_or_scalar(numpy, lhs, dtype=dtype)
-        np_b = self.array_or_scalar(numpy, rhs, dtype=dtype)
-        expected = getattr(numpy, name)(np_a, np_b)
+        result = getattr(dpnp, name)(ia, ib)
+        expected = getattr(numpy, name)(a, b)
+        assert_array_equal(result, expected)
 
+        iout = dpnp.empty_like(result)
+        result = getattr(dpnp, name)(ia, ib, out=iout)
+        assert result is iout
         assert_array_equal(result, expected)
-        return (dp_a, dp_b, np_a, np_b)
+
+        return (ia, ib, a, b)
 
     def test_bitwise_and(self, lhs, rhs, dtype):
-        dp_a, dp_b, np_a, np_b = self._test_binary_int(
-            "bitwise_and", lhs, rhs, dtype
-        )
-        assert_array_equal(dp_a & dp_b, np_a & np_b)
-
-        if (
-            not (inp.isscalar(dp_a) or inp.isscalar(dp_b))
-            and dp_a.shape == dp_b.shape
-        ):
-            dp_a &= dp_b
-            np_a &= np_b
-            assert_array_equal(dp_a, np_a)
+        ia, ib, a, b = self._test_binary("bitwise_and", lhs, rhs, dtype)
+        assert_array_equal(ia & ib, a & b)
 
     def test_bitwise_or(self, lhs, rhs, dtype):
-        dp_a, dp_b, np_a, np_b = self._test_binary_int(
-            "bitwise_or", lhs, rhs, dtype
-        )
-        assert_array_equal(dp_a | dp_b, np_a | np_b)
-
-        if (
-            not (inp.isscalar(dp_a) or inp.isscalar(dp_b))
-            and dp_a.shape == dp_b.shape
-        ):
-            dp_a |= dp_b
-            np_a |= np_b
-            assert_array_equal(dp_a, np_a)
+        ia, ib, a, b = self._test_binary("bitwise_or", lhs, rhs, dtype)
+        assert_array_equal(ia | ib, a | b)
 
     def test_bitwise_xor(self, lhs, rhs, dtype):
-        dp_a, dp_b, np_a, np_b = self._test_binary_int(
-            "bitwise_xor", lhs, rhs, dtype
-        )
-        assert_array_equal(dp_a ^ dp_b, np_a ^ np_b)
-
-        if (
-            not (inp.isscalar(dp_a) or inp.isscalar(dp_b))
-            and dp_a.shape == dp_b.shape
-        ):
-            dp_a ^= dp_b
-            np_a ^= np_b
-            assert_array_equal(dp_a, np_a)
-
-    def test_invert(self, lhs, rhs, dtype):
-        dp_a, np_a = self._test_unary_int("invert", lhs, dtype)
-        assert_array_equal(~dp_a, ~np_a)
+        ia, ib, a, b = self._test_binary("bitwise_xor", lhs, rhs, dtype)
+        assert_array_equal(ia ^ ib, a ^ b)
 
     def test_left_shift(self, lhs, rhs, dtype):
-        dp_a, dp_b, np_a, np_b = self._test_binary_int(
-            "left_shift", lhs, rhs, dtype
-        )
-        assert_array_equal(dp_a << dp_b, np_a << np_b)
-
-        if (
-            not (inp.isscalar(dp_a) or inp.isscalar(dp_b))
-            and dp_a.shape == dp_b.shape
-        ):
-            dp_a <<= dp_b
-            np_a <<= np_b
-            assert_array_equal(dp_a, np_a)
+        _ = self._test_binary("bitwise_left_shift", lhs, rhs, dtype)
+        ia, ib, a, b = self._test_binary("left_shift", lhs, rhs, dtype)
+        assert_array_equal(ia << ib, a << b)
 
     def test_right_shift(self, lhs, rhs, dtype):
-        dp_a, dp_b, np_a, np_b = self._test_binary_int(
-            "right_shift", lhs, rhs, dtype
-        )
-        assert_array_equal(dp_a >> dp_b, np_a >> np_b)
-
-        if (
-            not (inp.isscalar(dp_a) or inp.isscalar(dp_b))
-            and dp_a.shape == dp_b.shape
-        ):
-            dp_a >>= dp_b
-            np_a >>= np_b
-            assert_array_equal(dp_a, np_a)
-
-    def test_bitwise_aliase1(self, lhs, rhs, dtype):
-        if numpy.isscalar(lhs):
-            pytest.skip("Input can't be scalar")
-        dp_a = self.array_or_scalar(inp, lhs, dtype=dtype)
-        result1 = inp.invert(dp_a)
-        result2 = inp.bitwise_invert(dp_a)
-        assert_array_equal(result1, result2)
-
-        result2 = inp.bitwise_not(dp_a)
-        assert_array_equal(result1, result2)
-
-    def test_bitwise_aliase2(self, lhs, rhs, dtype):
-        if dtype == inp.bool:
-            pytest.skip("A shift operation isn't implemented for bool type")
-        elif numpy.isscalar(lhs) and numpy.isscalar(rhs):
-            pytest.skip("Both inputs can't be scalars")
+        _ = self._test_binary("bitwise_right_shift", lhs, rhs, dtype)
+        ia, ib, a, b = self._test_binary("right_shift", lhs, rhs, dtype)
+        assert_array_equal(ia >> ib, a >> b)
 
-        dp_a = self.array_or_scalar(inp, lhs, dtype=dtype)
-        dp_b = self.array_or_scalar(inp, rhs, dtype=dtype)
-        result1 = inp.left_shift(dp_a, dp_b)
-        result2 = inp.bitwise_left_shift(dp_a, dp_b)
-        assert_array_equal(result1, result2)
 
-        result1 = inp.right_shift(dp_a, dp_b)
-        result2 = inp.bitwise_right_shift(dp_a, dp_b)
-        assert_array_equal(result1, result2)
-
-
-@pytest.mark.parametrize("dtype", get_integer_dtypes())
-def test_invert_out(dtype):
-    np_a = numpy.arange(-5, 5, dtype=dtype)
-    dp_a = inp.array(np_a)
-
-    expected = numpy.invert(np_a)
-    dp_out = inp.empty(expected.shape, dtype=expected.dtype)
-    result = inp.invert(dp_a, out=dp_out)
-    assert result is dp_out
-    assert_dtype_allclose(result, expected)
-
-
-@pytest.mark.parametrize("dtype1", [inp.bool] + get_integer_dtypes())
-@pytest.mark.parametrize("dtype2", [inp.bool] + get_integer_dtypes())
+@pytest.mark.parametrize("dtype1", [dpnp.bool] + get_integer_dtypes())
+@pytest.mark.parametrize("dtype2", [dpnp.bool] + get_integer_dtypes())
 class TestBitwiseInplace:
     def test_bitwise_and(self, dtype1, dtype2):
         a = numpy.array([[-7, 6, -3, 2, -1], [0, -3, 4, 5, -6]], dtype=dtype1)
         b = numpy.array([5, -2, 0, 1, 0], dtype=dtype2)
-        ia, ib = inp.array(a), inp.array(b)
+        ia, ib = dpnp.array(a), dpnp.array(b)
 
         a &= True
         ia &= True
@@ -203,7 +103,7 @@ def test_bitwise_and(self, dtype1, dtype2):
     def test_bitwise_or(self, dtype1, dtype2):
         a = numpy.array([[-7, 6, -3, 2, -1], [0, -3, 4, 5, -6]], dtype=dtype1)
         b = numpy.array([5, -2, 0, 1, 0], dtype=dtype2)
-        ia, ib = inp.array(a), inp.array(b)
+        ia, ib = dpnp.array(a), dpnp.array(b)
 
         a |= False
         ia |= False
@@ -223,7 +123,7 @@ def test_bitwise_or(self, dtype1, dtype2):
     def test_bitwise_xor(self, dtype1, dtype2):
         a = numpy.array([[-7, 6, -3, 2, -1], [0, -3, 4, 5, -6]], dtype=dtype1)
         b = numpy.array([5, -2, 0, 1, 0], dtype=dtype2)
-        ia, ib = inp.array(a), inp.array(b)
+        ia, ib = dpnp.array(a), dpnp.array(b)
 
         a ^= False
         ia ^= False
@@ -231,7 +131,7 @@ def test_bitwise_xor(self, dtype1, dtype2):
 
         a = numpy.array([[-7, 6, -3, 2, -1], [0, -3, 4, 5, -6]], dtype=dtype1)
         b = numpy.array([5, -2, 0, 1, 0], dtype=dtype2)
-        ia, ib = inp.array(a), inp.array(b)
+        ia, ib = dpnp.array(a), dpnp.array(b)
         if numpy.can_cast(dtype2, dtype1, casting="same_kind"):
             a ^= b
             ia ^= ib
@@ -250,7 +150,7 @@ class TestBitwiseShiftInplace:
     def test_bitwise_left_shift(self, dtype1, dtype2):
         a = numpy.array([[-7, 6, -3, 2, -1], [0, -3, 4, 5, -6]], dtype=dtype1)
         b = numpy.array([5, 2, 0, 1, 0], dtype=dtype2)
-        ia, ib = inp.array(a), inp.array(b)
+        ia, ib = dpnp.array(a), dpnp.array(b)
 
         a <<= True
         ia <<= True
@@ -270,7 +170,7 @@ def test_bitwise_left_shift(self, dtype1, dtype2):
     def test_bitwise_right_shift(self, dtype1, dtype2):
         a = numpy.array([[-7, 6, -3, 2, -1], [0, -3, 4, 5, -6]], dtype=dtype1)
         b = numpy.array([5, 2, 0, 1, 0], dtype=dtype2)
-        ia, ib = inp.array(a), inp.array(b)
+        ia, ib = dpnp.array(a), dpnp.array(b)
 
         a >>= True
         ia >>= True
@@ -286,3 +186,41 @@ def test_bitwise_right_shift(self, dtype1, dtype2):
 
             with pytest.raises(ValueError):
                 ia >>= ib
+
+
+@pytest.mark.parametrize(
+    "val",
+    [
+        [[-7, -6, -5, -4, -3, -2, -1], [0, 1, 2, 3, 4, 5, 6]],
+        [-3, -2, -1, 0, 1, 2, 3],
+    ],
+)
+@pytest.mark.parametrize("dtype", [dpnp.bool] + get_integer_dtypes())
+class TestBitwiseUnary:
+    @staticmethod
+    def array_or_scalar(xp, data, dtype=None):
+        return xp.array(data, dtype=dtype)
+
+    def _test_unary(self, name, data, dtype):
+        a = numpy.array(data, dtype=dtype)
+        ia = dpnp.array(a)
+
+        result = getattr(dpnp, name)(ia)
+        expected = getattr(numpy, name)(a)
+        assert_array_equal(result, expected)
+
+        iout = dpnp.empty_like(result)
+        result = getattr(dpnp, name)(ia, out=iout)
+        assert result is iout
+        assert_array_equal(result, expected)
+
+        return (ia, a)
+
+    def test_bitwise_count(self, val, dtype):
+        _ = self._test_unary("bitwise_count", val, dtype)
+
+    def test_invert(self, val, dtype):
+        _ = self._test_unary("invert", val, dtype)
+        _ = self._test_unary("bitwise_invert", val, dtype)
+        ia, a = self._test_unary("bitwise_not", val, dtype)
+        assert_array_equal(~ia, ~a)
diff --git a/dpnp/tests/test_sycl_queue.py b/dpnp/tests/test_sycl_queue.py
index c1041760823d..a7018ac2e000 100644
--- a/dpnp/tests/test_sycl_queue.py
+++ b/dpnp/tests/test_sycl_queue.py
@@ -580,6 +580,51 @@ def test_1in_1out(func, data, device):
     assert_sycl_queue_equal(result_queue, expected_queue)
 
 
+@pytest.mark.parametrize(
+    "op", ["bitwise_count", "bitwise_not"], ids=["bitwise_count", "bitwise_not"]
+)
+@pytest.mark.parametrize(
+    "device",
+    valid_devices,
+    ids=[device.filter_string for device in valid_devices],
+)
+def test_bitwise_op_1in(op, device):
+    x = dpnp.arange(-10, 10, device=device)
+    z = getattr(dpnp, op)(x)
+
+    assert_sycl_queue_equal(x.sycl_queue, z.sycl_queue)
+
+
+@pytest.mark.parametrize(
+    "op",
+    ["bitwise_and", "bitwise_or", "bitwise_xor", "left_shift", "right_shift"],
+    ids=[
+        "bitwise_and",
+        "bitwise_or",
+        "bitwise_xor",
+        "left_shift",
+        "right_shift",
+    ],
+)
+@pytest.mark.parametrize(
+    "device",
+    valid_devices,
+    ids=[device.filter_string for device in valid_devices],
+)
+def test_bitwise_op_2in(op, device):
+    x = dpnp.arange(25, device=device)
+    y = dpnp.arange(25, device=device)[::-1]
+
+    z = getattr(dpnp, op)(x, y)
+    zx = getattr(dpnp, op)(x, 7)
+    zy = getattr(dpnp, op)(12, y)
+
+    assert_sycl_queue_equal(z.sycl_queue, x.sycl_queue)
+    assert_sycl_queue_equal(z.sycl_queue, y.sycl_queue)
+    assert_sycl_queue_equal(zx.sycl_queue, x.sycl_queue)
+    assert_sycl_queue_equal(zy.sycl_queue, y.sycl_queue)
+
+
 @pytest.mark.parametrize(
     "op",
     [
@@ -603,15 +648,51 @@ def test_logic_op_1in(op, device):
         [-dpnp.inf, -1.0, 0.0, 1.0, dpnp.inf, dpnp.nan], device=device
     )
     result = getattr(dpnp, op)(x)
+    assert_sycl_queue_equal(x.sycl_queue, result.sycl_queue)
 
-    x_orig = dpnp.asnumpy(x)
-    expected = getattr(numpy, op)(x_orig)
-    assert_dtype_allclose(result, expected)
 
-    expected_queue = x.sycl_queue
-    result_queue = result.sycl_queue
+@pytest.mark.parametrize(
+    "op",
+    [
+        "array_equal",
+        "array_equiv",
+        "equal",
+        "greater",
+        "greater_equal",
+        "isclose",
+        "less",
+        "less_equal",
+        "logical_and",
+        "logical_or",
+        "logical_xor",
+        "not_equal",
+    ],
+)
+@pytest.mark.parametrize(
+    "device",
+    valid_devices,
+    ids=[device.filter_string for device in valid_devices],
+)
+def test_logic_op_2in(op, device):
+    x1 = dpnp.array(
+        [-dpnp.inf, -1.0, 0.0, 1.0, dpnp.inf, dpnp.nan], device=device
+    )
+    x2 = dpnp.array(
+        [dpnp.inf, 1.0, 0.0, -1.0, -dpnp.inf, dpnp.nan], device=device
+    )
+    # Remove NaN value from input arrays because numpy raises RuntimeWarning
+    if op in [
+        "greater",
+        "greater_equal",
+        "less",
+        "less_equal",
+    ]:
+        x1 = x1[:-1]
+        x2 = x2[:-1]
 
-    assert_sycl_queue_equal(result_queue, expected_queue)
+    result = getattr(dpnp, op)(x1, x2)
+    assert_sycl_queue_equal(result.sycl_queue, x1.sycl_queue)
+    assert_sycl_queue_equal(result.sycl_queue, x2.sycl_queue)
 
 
 @pytest.mark.parametrize(
@@ -858,56 +939,6 @@ def test_2in_1out(func, data1, data2, device):
     assert_sycl_queue_equal(result.sycl_queue, x2.sycl_queue)
 
 
-@pytest.mark.parametrize(
-    "op",
-    [
-        "array_equal",
-        "array_equiv",
-        "equal",
-        "greater",
-        "greater_equal",
-        "isclose",
-        "less",
-        "less_equal",
-        "logical_and",
-        "logical_or",
-        "logical_xor",
-        "not_equal",
-    ],
-)
-@pytest.mark.parametrize(
-    "device",
-    valid_devices,
-    ids=[device.filter_string for device in valid_devices],
-)
-def test_logic_op_2in(op, device):
-    x1 = dpnp.array(
-        [-dpnp.inf, -1.0, 0.0, 1.0, dpnp.inf, dpnp.nan], device=device
-    )
-    x2 = dpnp.array(
-        [dpnp.inf, 1.0, 0.0, -1.0, -dpnp.inf, dpnp.nan], device=device
-    )
-    # Remove NaN value from input arrays because numpy raises RuntimeWarning
-    if op in [
-        "greater",
-        "greater_equal",
-        "less",
-        "less_equal",
-    ]:
-        x1 = x1[:-1]
-        x2 = x2[:-1]
-    result = getattr(dpnp, op)(x1, x2)
-
-    x1_orig = dpnp.asnumpy(x1)
-    x2_orig = dpnp.asnumpy(x2)
-    expected = getattr(numpy, op)(x1_orig, x2_orig)
-
-    assert_dtype_allclose(result, expected)
-
-    assert_sycl_queue_equal(result.sycl_queue, x1.sycl_queue)
-    assert_sycl_queue_equal(result.sycl_queue, x2.sycl_queue)
-
-
 @pytest.mark.parametrize(
     "func, data, scalar",
     [
diff --git a/dpnp/tests/test_usm_type.py b/dpnp/tests/test_usm_type.py
index b3452e0ccf72..610ed8bf39a8 100644
--- a/dpnp/tests/test_usm_type.py
+++ b/dpnp/tests/test_usm_type.py
@@ -394,7 +394,7 @@ def test_tril_triu(func, usm_type):
     ],
 )
 @pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types)
-def test_coerced_usm_types_logic_op_1in(op, usm_type_x):
+def test_logic_op_1in(op, usm_type_x):
     x = dp.arange(-10, 10, usm_type=usm_type_x)
     res = getattr(dp, op)(x)
 
@@ -420,7 +420,7 @@ def test_coerced_usm_types_logic_op_1in(op, usm_type_x):
 )
 @pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types)
 @pytest.mark.parametrize("usm_type_y", list_of_usm_types, ids=list_of_usm_types)
-def test_coerced_usm_types_logic_op_2in(op, usm_type_x, usm_type_y):
+def test_logic_op_2in(op, usm_type_x, usm_type_y):
     x = dp.arange(100, usm_type=usm_type_x)
     y = dp.arange(100, usm_type=usm_type_y)[::-1]
 
@@ -433,6 +433,17 @@ def test_coerced_usm_types_logic_op_2in(op, usm_type_x, usm_type_y):
     assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_y])
 
 
+@pytest.mark.parametrize(
+    "op", ["bitwise_count", "bitwise_not"], ids=["bitwise_count", "bitwise_not"]
+)
+@pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types)
+def test_bitwise_op_1in(op, usm_type_x):
+    x = dp.arange(-10, 10, usm_type=usm_type_x)
+    res = getattr(dp, op)(x)
+
+    assert x.usm_type == res.usm_type == usm_type_x
+
+
 @pytest.mark.parametrize(
     "op",
     ["bitwise_and", "bitwise_or", "bitwise_xor", "left_shift", "right_shift"],
@@ -446,7 +457,7 @@ def test_coerced_usm_types_logic_op_2in(op, usm_type_x, usm_type_y):
 )
 @pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types)
 @pytest.mark.parametrize("usm_type_y", list_of_usm_types, ids=list_of_usm_types)
-def test_coerced_usm_types_bitwise_op(op, usm_type_x, usm_type_y):
+def test_bitwise_op_2in(op, usm_type_x, usm_type_y):
     x = dp.arange(25, usm_type=usm_type_x)
     y = dp.arange(25, usm_type=usm_type_y)[::-1]
 

From bf95b3866c89c3f67bb369ee5789a104a359efb3 Mon Sep 17 00:00:00 2001
From: Vahid Tavanashad <vahid.tavanashad@intel.com>
Date: Tue, 11 Feb 2025 12:08:19 -0800
Subject: [PATCH 02/10] fix pre-commit

---
 dpnp/dpnp_iface_bitwise.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dpnp/dpnp_iface_bitwise.py b/dpnp/dpnp_iface_bitwise.py
index 462bde846d5a..c79555f9b377 100644
--- a/dpnp/dpnp_iface_bitwise.py
+++ b/dpnp/dpnp_iface_bitwise.py
@@ -38,7 +38,7 @@
 """
 
 # pylint: disable=protected-access
-
+# pylint: disable=no-name-in-module
 
 import dpctl.tensor._tensor_elementwise_impl as ti
 import numpy

From a660cb7d91e9cfe6509a40098e85cc80e96f66c8 Mon Sep 17 00:00:00 2001
From: Vahid Tavanashad <vahid.tavanashad@intel.com>
Date: Tue, 11 Feb 2025 13:30:22 -0800
Subject: [PATCH 03/10] using std::is_unsigned_v

---
 dpnp/backend/kernels/elementwise_functions/bitwise_count.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dpnp/backend/kernels/elementwise_functions/bitwise_count.hpp b/dpnp/backend/kernels/elementwise_functions/bitwise_count.hpp
index cccab02c2403..f4240f028c66 100644
--- a/dpnp/backend/kernels/elementwise_functions/bitwise_count.hpp
+++ b/dpnp/backend/kernels/elementwise_functions/bitwise_count.hpp
@@ -43,7 +43,7 @@ struct BitwiseCountFunctor
 
     resT operator()(const argT &x) const
     {
-        if constexpr (std::is_unsigned<argT>::value) {
+        if constexpr (std::is_unsigned_v<argT>) {
             return sycl::popcount(x);
         }
         else {

From 1dafb3b18dc98666e1ba21147ffd5ad1bd22aa0e Mon Sep 17 00:00:00 2001
From: Vahid Tavanashad <vahid.tavanashad@intel.com>
Date: Tue, 11 Feb 2025 13:31:43 -0800
Subject: [PATCH 04/10] update tests for numpy>=2

---
 dpnp/tests/test_bitwise.py | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/dpnp/tests/test_bitwise.py b/dpnp/tests/test_bitwise.py
index a7c6a34a1f6a..71821dd76d00 100644
--- a/dpnp/tests/test_bitwise.py
+++ b/dpnp/tests/test_bitwise.py
@@ -4,7 +4,8 @@
 
 import dpnp
 
-from .helper import assert_dtype_allclose, get_integer_dtypes
+from .helper import assert_dtype_allclose, get_integer_dtypes, numpy_version
+from .third_party.cupy import testing
 
 
 @pytest.mark.parametrize(
@@ -67,12 +68,14 @@ def test_bitwise_xor(self, lhs, rhs, dtype):
         assert_array_equal(ia ^ ib, a ^ b)
 
     def test_left_shift(self, lhs, rhs, dtype):
-        _ = self._test_binary("bitwise_left_shift", lhs, rhs, dtype)
+        if numpy_version() >= "2.0.0":
+            _ = self._test_binary("bitwise_left_shift", lhs, rhs, dtype)
         ia, ib, a, b = self._test_binary("left_shift", lhs, rhs, dtype)
         assert_array_equal(ia << ib, a << b)
 
     def test_right_shift(self, lhs, rhs, dtype):
-        _ = self._test_binary("bitwise_right_shift", lhs, rhs, dtype)
+        if numpy_version() >= "2.0.0":
+            _ = self._test_binary("bitwise_right_shift", lhs, rhs, dtype)
         ia, ib, a, b = self._test_binary("right_shift", lhs, rhs, dtype)
         assert_array_equal(ia >> ib, a >> b)
 
@@ -216,11 +219,13 @@ def _test_unary(self, name, data, dtype):
 
         return (ia, a)
 
+    @testing.with_requires("numpy>=2.0")
     def test_bitwise_count(self, val, dtype):
         _ = self._test_unary("bitwise_count", val, dtype)
 
     def test_invert(self, val, dtype):
-        _ = self._test_unary("invert", val, dtype)
-        _ = self._test_unary("bitwise_invert", val, dtype)
-        ia, a = self._test_unary("bitwise_not", val, dtype)
+        if numpy_version() >= "2.0.0":
+            _ = self._test_unary("bitwise_not", val, dtype)
+            _ = self._test_unary("bitwise_invert", val, dtype)
+        ia, a = self._test_unary("invert", val, dtype)
         assert_array_equal(~ia, ~a)

From b5fc64a88945520a306aa6c755f05cb07c00ee8b Mon Sep 17 00:00:00 2001
From: Vahid Tavanashad <vahid.tavanashad@intel.com>
Date: Wed, 12 Feb 2025 06:39:48 -0800
Subject: [PATCH 05/10] remove unnecessary ids

---
 dpnp/tests/test_sycl_queue.py | 25 ++++++-------------------
 dpnp/tests/test_usm_type.py   | 31 +++++++------------------------
 2 files changed, 13 insertions(+), 43 deletions(-)

diff --git a/dpnp/tests/test_sycl_queue.py b/dpnp/tests/test_sycl_queue.py
index a7018ac2e000..8b2bb4b27e41 100644
--- a/dpnp/tests/test_sycl_queue.py
+++ b/dpnp/tests/test_sycl_queue.py
@@ -580,9 +580,7 @@ def test_1in_1out(func, data, device):
     assert_sycl_queue_equal(result_queue, expected_queue)
 
 
-@pytest.mark.parametrize(
-    "op", ["bitwise_count", "bitwise_not"], ids=["bitwise_count", "bitwise_not"]
-)
+@pytest.mark.parametrize("op", ["bitwise_count", "bitwise_not"])
 @pytest.mark.parametrize(
     "device",
     valid_devices,
@@ -598,13 +596,6 @@ def test_bitwise_op_1in(op, device):
 @pytest.mark.parametrize(
     "op",
     ["bitwise_and", "bitwise_or", "bitwise_xor", "left_shift", "right_shift"],
-    ids=[
-        "bitwise_and",
-        "bitwise_or",
-        "bitwise_xor",
-        "left_shift",
-        "right_shift",
-    ],
 )
 @pytest.mark.parametrize(
     "device",
@@ -1909,11 +1900,7 @@ def test_norm(device, ord, axis):
         "(1, 0, 3)",
     ],
 )
-@pytest.mark.parametrize(
-    "mode",
-    ["r", "raw", "complete", "reduced"],
-    ids=["r", "raw", "complete", "reduced"],
-)
+@pytest.mark.parametrize("mode", ["r", "raw", "complete", "reduced"])
 @pytest.mark.parametrize(
     "device",
     valid_devices,
@@ -1945,8 +1932,8 @@ def test_qr(shape, mode, device):
     valid_devices,
     ids=[device.filter_string for device in valid_devices],
 )
-@pytest.mark.parametrize("full_matrices", [True, False], ids=["True", "False"])
-@pytest.mark.parametrize("compute_uv", [True, False], ids=["True", "False"])
+@pytest.mark.parametrize("full_matrices", [True, False])
+@pytest.mark.parametrize("compute_uv", [True, False])
 @pytest.mark.parametrize(
     "shape",
     [
@@ -2454,7 +2441,7 @@ def test_take_along_axis(data, ind, axis, device):
     valid_devices,
     ids=[device.filter_string for device in valid_devices],
 )
-@pytest.mark.parametrize("sparse", [True, False], ids=["True", "False"])
+@pytest.mark.parametrize("sparse", [True, False])
 def test_indices(device, sparse):
     sycl_queue = dpctl.SyclQueue(device)
     grid = dpnp.indices((2, 3), sparse=sparse, sycl_queue=sycl_queue)
@@ -2946,7 +2933,7 @@ def test_unique(axis, device):
         assert_sycl_queue_equal(iv_queue, ia.sycl_queue)
 
 
-@pytest.mark.parametrize("copy", [True, False], ids=["True", "False"])
+@pytest.mark.parametrize("copy", [True, False])
 @pytest.mark.parametrize(
     "device",
     valid_devices,
diff --git a/dpnp/tests/test_usm_type.py b/dpnp/tests/test_usm_type.py
index 610ed8bf39a8..8660126509c1 100644
--- a/dpnp/tests/test_usm_type.py
+++ b/dpnp/tests/test_usm_type.py
@@ -372,7 +372,7 @@ def test_linspace_arrays(usm_type_start, usm_type_stop):
     )
 
 
-@pytest.mark.parametrize("func", ["tril", "triu"], ids=["tril", "triu"])
+@pytest.mark.parametrize("func", ["tril", "triu"])
 @pytest.mark.parametrize("usm_type", list_of_usm_types, ids=list_of_usm_types)
 def test_tril_triu(func, usm_type):
     x0 = dp.ones((3, 3), usm_type=usm_type)
@@ -433,9 +433,7 @@ def test_logic_op_2in(op, usm_type_x, usm_type_y):
     assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_y])
 
 
-@pytest.mark.parametrize(
-    "op", ["bitwise_count", "bitwise_not"], ids=["bitwise_count", "bitwise_not"]
-)
+@pytest.mark.parametrize("op", ["bitwise_count", "bitwise_not"])
 @pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types)
 def test_bitwise_op_1in(op, usm_type_x):
     x = dp.arange(-10, 10, usm_type=usm_type_x)
@@ -447,13 +445,6 @@ def test_bitwise_op_1in(op, usm_type_x):
 @pytest.mark.parametrize(
     "op",
     ["bitwise_and", "bitwise_or", "bitwise_xor", "left_shift", "right_shift"],
-    ids=[
-        "bitwise_and",
-        "bitwise_or",
-        "bitwise_xor",
-        "left_shift",
-        "right_shift",
-    ],
 )
 @pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types)
 @pytest.mark.parametrize("usm_type_y", list_of_usm_types, ids=list_of_usm_types)
@@ -1153,7 +1144,7 @@ def test_grid(usm_type, func):
 
 
 @pytest.mark.parametrize("usm_type", list_of_usm_types, ids=list_of_usm_types)
-@pytest.mark.parametrize("sparse", [True, False], ids=["True", "False"])
+@pytest.mark.parametrize("sparse", [True, False])
 def test_indices_sparse(usm_type, sparse):
     x = dp.indices((2, 3), sparse=sparse, usm_type=usm_type)
     for i in x:
@@ -1469,12 +1460,8 @@ def test_inv(shape, is_empty, usm_type):
 
 
 @pytest.mark.parametrize("usm_type", list_of_usm_types, ids=list_of_usm_types)
-@pytest.mark.parametrize(
-    "full_matrices_param", [True, False], ids=["True", "False"]
-)
-@pytest.mark.parametrize(
-    "compute_uv_param", [True, False], ids=["True", "False"]
-)
+@pytest.mark.parametrize("full_matrices_param", [True, False])
+@pytest.mark.parametrize("compute_uv_param", [True, False])
 @pytest.mark.parametrize(
     "shape",
     [
@@ -1595,11 +1582,7 @@ def test_pinv(shape, hermitian, usm_type):
         "(1, 0, 3)",
     ],
 )
-@pytest.mark.parametrize(
-    "mode",
-    ["r", "raw", "complete", "reduced"],
-    ids=["r", "raw", "complete", "reduced"],
-)
+@pytest.mark.parametrize("mode", ["r", "raw", "complete", "reduced"])
 def test_qr(shape, mode, usm_type):
     count_elems = numpy.prod(shape)
     a = dp.arange(count_elems, usm_type=usm_type).reshape(shape)
@@ -1782,7 +1765,7 @@ def test_unique(axis, usm_type):
         assert x.usm_type == usm_type
 
 
-@pytest.mark.parametrize("copy", [True, False], ids=["True", "False"])
+@pytest.mark.parametrize("copy", [True, False])
 @pytest.mark.parametrize("usm_type_a", list_of_usm_types, ids=list_of_usm_types)
 def test_nan_to_num(copy, usm_type_a):
     a = dp.array([-dp.nan, -1, 0, 1, dp.nan], usm_type=usm_type_a)

From 220cf9afc6ad974abb3c4ddebb9ad86826196ccf Mon Sep 17 00:00:00 2001
From: Vahid Tavanashad <vahid.tavanashad@intel.com>
Date: Wed, 12 Feb 2025 13:19:05 -0800
Subject: [PATCH 06/10] add vector implementation of the kernel

---
 .../elementwise_functions/bitwise_count.hpp   | 40 ++++++++++++++++++-
 1 file changed, 39 insertions(+), 1 deletion(-)

diff --git a/dpnp/backend/kernels/elementwise_functions/bitwise_count.hpp b/dpnp/backend/kernels/elementwise_functions/bitwise_count.hpp
index f4240f028c66..c25ac26cf60e 100644
--- a/dpnp/backend/kernels/elementwise_functions/bitwise_count.hpp
+++ b/dpnp/backend/kernels/elementwise_functions/bitwise_count.hpp
@@ -27,8 +27,15 @@
 
 #include <sycl/sycl.hpp>
 
+#include "dpctl4pybind11.hpp"
+
+// dpctl tensor headers
+#include "utils/type_utils.hpp"
+
 namespace dpnp::kernels::bitwise_count
 {
+namespace tu_ns = dpctl::tensor::type_utils;
+
 template <typename argT, typename resT>
 struct BitwiseCountFunctor
 {
@@ -37,7 +44,7 @@ struct BitwiseCountFunctor
     // constant value, if constant
     // constexpr resT constant_value = resT{};
     // is function defined for sycl::vec
-    using supports_vec = typename std::false_type;
+    using supports_vec = typename std::true_type;
     // do both argT and resT support subgroup store/load operation
     using supports_sg_loadstore = typename std::true_type;
 
@@ -50,5 +57,36 @@ struct BitwiseCountFunctor
             return sycl::popcount(sycl::abs(x));
         }
     }
+
+    template <int vec_sz>
+    sycl::vec<resT, vec_sz> operator()(const sycl::vec<argT, vec_sz> &x) const
+    {
+        if constexpr (std::is_unsigned_v<argT>) {
+            auto const &res_vec = sycl::popcount(x);
+
+            using deducedT = typename std::remove_cv_t<
+                std::remove_reference_t<decltype(res_vec)>>::element_type;
+
+            if constexpr (std::is_same_v<resT, deducedT>) {
+                return res_vec;
+            }
+            else {
+                return tu_ns::vec_cast<std::uint8_t, deducedT, vec_sz>(res_vec);
+            }
+        }
+        else {
+            auto const &res_vec = sycl::popcount(x);
+
+            using deducedT = typename std::remove_cv_t<
+                std::remove_reference_t<decltype(res_vec)>>::element_type;
+
+            if constexpr (std::is_same_v<resT, deducedT>) {
+                return res_vec;
+            }
+            else {
+                return tu_ns::vec_cast<std::uint8_t, deducedT, vec_sz>(res_vec);
+            }
+        }
+    }
 };
 } // namespace dpnp::kernels::bitwise_count

From f85f07a092abb114d5a414b544edcbf2ccdd9465 Mon Sep 17 00:00:00 2001
From: Vahid Tavanashad <vahid.tavanashad@intel.com>
Date: Wed, 12 Feb 2025 13:22:40 -0800
Subject: [PATCH 07/10] remove unused include

---
 dpnp/backend/kernels/elementwise_functions/bitwise_count.hpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/dpnp/backend/kernels/elementwise_functions/bitwise_count.hpp b/dpnp/backend/kernels/elementwise_functions/bitwise_count.hpp
index c25ac26cf60e..eac7971429fb 100644
--- a/dpnp/backend/kernels/elementwise_functions/bitwise_count.hpp
+++ b/dpnp/backend/kernels/elementwise_functions/bitwise_count.hpp
@@ -27,8 +27,6 @@
 
 #include <sycl/sycl.hpp>
 
-#include "dpctl4pybind11.hpp"
-
 // dpctl tensor headers
 #include "utils/type_utils.hpp"
 
@@ -75,7 +73,7 @@ struct BitwiseCountFunctor
             }
         }
         else {
-            auto const &res_vec = sycl::popcount(x);
+            auto const &res_vec = sycl::popcount(sycl::abs(x));
 
             using deducedT = typename std::remove_cv_t<
                 std::remove_reference_t<decltype(res_vec)>>::element_type;

From be9d9e6ff2e44176e53abd3c5320dec5049586d6 Mon Sep 17 00:00:00 2001
From: Vahid Tavanashad <vahid.tavanashad@intel.com>
Date: Tue, 18 Feb 2025 06:43:43 -0800
Subject: [PATCH 08/10] remove skip leftovers

---
 dpnp/tests/test_umath.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/dpnp/tests/test_umath.py b/dpnp/tests/test_umath.py
index 72e69b428b3e..115fc32fbbae 100644
--- a/dpnp/tests/test_umath.py
+++ b/dpnp/tests/test_umath.py
@@ -78,8 +78,6 @@ def get_id(val):
 @pytest.mark.parametrize("test_cases", test_cases, ids=get_id)
 def test_umaths(test_cases):
     umath, args_str = test_cases
-    if umath in new_umaths_numpy_20:
-        pytest.skip("new umaths from numpy 2.0 are not supported yet")
 
     if umath in ["matmul", "matvec", "vecmat"]:
         sh = (4, 4)

From d021892a0fa849105f9654bb69067b82cbd0b78b Mon Sep 17 00:00:00 2001
From: Vahid Tavanashad <vahid.tavanashad@intel.com>
Date: Tue, 4 Mar 2025 08:46:44 -0800
Subject: [PATCH 09/10] remove vector overload for sycl op

---
 .../elementwise_functions/bitwise_count.hpp   | 33 +------------------
 1 file changed, 1 insertion(+), 32 deletions(-)

diff --git a/dpnp/backend/kernels/elementwise_functions/bitwise_count.hpp b/dpnp/backend/kernels/elementwise_functions/bitwise_count.hpp
index eac7971429fb..0d42f30ae9ec 100644
--- a/dpnp/backend/kernels/elementwise_functions/bitwise_count.hpp
+++ b/dpnp/backend/kernels/elementwise_functions/bitwise_count.hpp
@@ -42,7 +42,7 @@ struct BitwiseCountFunctor
     // constant value, if constant
     // constexpr resT constant_value = resT{};
     // is function defined for sycl::vec
-    using supports_vec = typename std::true_type;
+    using supports_vec = typename std::false_type;
     // do both argT and resT support subgroup store/load operation
     using supports_sg_loadstore = typename std::true_type;
 
@@ -55,36 +55,5 @@ struct BitwiseCountFunctor
             return sycl::popcount(sycl::abs(x));
         }
     }
-
-    template <int vec_sz>
-    sycl::vec<resT, vec_sz> operator()(const sycl::vec<argT, vec_sz> &x) const
-    {
-        if constexpr (std::is_unsigned_v<argT>) {
-            auto const &res_vec = sycl::popcount(x);
-
-            using deducedT = typename std::remove_cv_t<
-                std::remove_reference_t<decltype(res_vec)>>::element_type;
-
-            if constexpr (std::is_same_v<resT, deducedT>) {
-                return res_vec;
-            }
-            else {
-                return tu_ns::vec_cast<std::uint8_t, deducedT, vec_sz>(res_vec);
-            }
-        }
-        else {
-            auto const &res_vec = sycl::popcount(sycl::abs(x));
-
-            using deducedT = typename std::remove_cv_t<
-                std::remove_reference_t<decltype(res_vec)>>::element_type;
-
-            if constexpr (std::is_same_v<resT, deducedT>) {
-                return res_vec;
-            }
-            else {
-                return tu_ns::vec_cast<std::uint8_t, deducedT, vec_sz>(res_vec);
-            }
-        }
-    }
 };
 } // namespace dpnp::kernels::bitwise_count

From 3451a1e0c169f7fb05626bc6782d6d8127c1b5f3 Mon Sep 17 00:00:00 2001
From: Vahid Tavanashad <vahid.tavanashad@intel.com>
Date: Tue, 4 Mar 2025 10:41:58 -0800
Subject: [PATCH 10/10] rename a test

---
 dpnp/tests/test_usm_type.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dpnp/tests/test_usm_type.py b/dpnp/tests/test_usm_type.py
index 00f3c46d3e7a..48b21bfd023f 100644
--- a/dpnp/tests/test_usm_type.py
+++ b/dpnp/tests/test_usm_type.py
@@ -386,7 +386,7 @@ def test_bitwise_op_1in(op, usm_type):
 )
 @pytest.mark.parametrize("usm_type_x", list_of_usm_types)
 @pytest.mark.parametrize("usm_type_y", list_of_usm_types)
-def test_bitwise_op(op, usm_type_x, usm_type_y):
+def test_bitwise_op_2in(op, usm_type_x, usm_type_y):
     x = dpnp.arange(25, usm_type=usm_type_x)
     y = dpnp.arange(25, usm_type=usm_type_y)[::-1]