@@ -946,9 +946,9 @@ dpct_memcpy(sycl::ext::oneapi::experimental::image_mem_handle src,
946946 sycl::range<3 >(w_offset_src / ele_size, h_offset_src, 0 );
947947 const auto dest_offset = sycl::range<3 >(0 , 0 , 0 );
948948 const auto dest_extend = sycl::range<3 >(p / ele_size, 0 , 0 );
949- const auto copy_extend = sycl::range<3 >(w / ele_size, h, 0 );
949+ const auto copy_extend = sycl::range<3 >(w / ele_size, h, 1 );
950950 return q.ext_oneapi_copy (src, src_offset, desc_src, dest, dest_offset,
951- dest_extend , copy_extend);
951+ desc_src, p , copy_extend);
952952}
953953
954954static inline std::vector<sycl::event> dpct_memcpy_to_host (
@@ -966,7 +966,7 @@ static inline std::vector<sycl::event> dpct_memcpy_to_host(
966966 const auto dest_offset = sycl::range<3 >(offset_dest / ele_size, 0 , 0 );
967967 const auto dest_extend = sycl::range<3 >(0 , 0 , 0 );
968968 const auto copy_extend =
969- sycl::range<3 >((w - w_offset_src) / ele_size, 1 , 0 );
969+ sycl::range<3 >((w - w_offset_src) / ele_size, 1 , 1 );
970970 event_list.push_back (q.ext_oneapi_copy (src, src_offset, desc_src,
971971 dest_host_ptr, dest_offset,
972972 dest_extend, copy_extend));
@@ -979,7 +979,7 @@ static inline std::vector<sycl::event> dpct_memcpy_to_host(
979979 sycl::range<3 >(w_offset_src / ele_size, h_offset_src, 0 );
980980 const auto dest_offset = sycl::range<3 >(offset_dest / ele_size, 0 , 0 );
981981 const auto dest_extend = sycl::range<3 >(0 , 0 , 0 );
982- const auto copy_extend = sycl::range<3 >((s - offset_dest) / ele_size, 1 , 0 );
982+ const auto copy_extend = sycl::range<3 >((s - offset_dest) / ele_size, 1 , 1 );
983983 event_list.push_back (q.ext_oneapi_copy (src, src_offset, desc_src,
984984 dest_host_ptr, dest_offset,
985985 dest_extend, copy_extend));
@@ -1018,7 +1018,7 @@ dpct_memcpy(const void *src,
10181018 const auto src_extend = sycl::range<3 >(p / ele_size, 0 , 0 );
10191019 const auto dest_offset =
10201020 sycl::range<3 >(w_offset_dest / ele_size, h_offset_dest, 0 );
1021- const auto copy_extend = sycl::range<3 >(w / ele_size, h, 0 );
1021+ const auto copy_extend = sycl::range<3 >(w / ele_size, h, 1 );
10221022 // TODO: Remove const_cast after refining the signature of ext_oneapi_copy.
10231023 return q.ext_oneapi_copy (const_cast <void *>(src), src_offset, src_extend,
10241024 dest, dest_offset, desc_dest, copy_extend);
@@ -1039,7 +1039,7 @@ static inline std::vector<sycl::event> dpct_memcpy_from_host(
10391039 const auto dest_offset =
10401040 sycl::range<3 >(w_offset_dest / ele_size, h_offset_dest, 0 );
10411041 const auto copy_extend =
1042- sycl::range<3 >((w - w_offset_dest) / ele_size, 1 , 0 );
1042+ sycl::range<3 >((w - w_offset_dest) / ele_size, 1 , 1 );
10431043 // TODO: Remove const_cast after refining the signature of ext_oneapi_copy.
10441044 event_list.push_back (q.ext_oneapi_copy (
10451045 const_cast <void *>(src_host_ptr), src_offset, src_extend, dest,
@@ -1053,7 +1053,7 @@ static inline std::vector<sycl::event> dpct_memcpy_from_host(
10531053 const auto src_extend = sycl::range<3 >(0 , 0 , 0 );
10541054 const auto dest_offset =
10551055 sycl::range<3 >(w_offset_dest / ele_size, h_offset_dest, 0 );
1056- const auto copy_extend = sycl::range<3 >((s - offset_src) / ele_size, 1 , 0 );
1056+ const auto copy_extend = sycl::range<3 >((s - offset_src) / ele_size, 1 , 1 );
10571057 // TODO: Remove const_cast after refining the signature of ext_oneapi_copy.
10581058 event_list.push_back (q.ext_oneapi_copy (
10591059 const_cast <void *>(src_host_ptr), src_offset, src_extend, dest,
@@ -1098,7 +1098,7 @@ dpct_memcpy(const image_mem_wrapper *src, const sycl::id<3> &src_id,
10981098 sycl::range<3 >(dest.get_pitch () / ele_size, dest.get_y (), 1 );
10991099 const auto copy_extend = sycl::range<3 >(
11001100 copy_x_size_byte != 0 ? copy_x_size_byte / ele_size : size[0 ], size[1 ],
1101- size[2 ]);
1101+ size[2 ] != 0 ? size[ 2 ] : 1 );
11021102 return q.ext_oneapi_copy (src->get_handle (), src_offset, src->get_desc (),
11031103 dest.get_data_ptr (), dest_offset, dest_extend,
11041104 copy_extend);
@@ -1894,11 +1894,19 @@ static inline void dpct_memcpy(image_mem_wrapper *dest, size_t w_offset_dest,
18941894 size_t w_offset_src, size_t h_offset_src,
18951895 size_t w, size_t h,
18961896 sycl::queue q = get_default_queue()) {
1897- auto temp = (void *)sycl::malloc_device (w * h, q);
1898- // TODO: Need change logic when sycl support image_mem to image_mem copy.
1899- dpct_memcpy (temp, w, src, w_offset_src, h_offset_src, w, h, q);
1900- dpct_memcpy (dest, w_offset_dest, h_offset_dest, temp, w, w, h, q);
1901- sycl::free (temp, q);
1897+ const auto from_ele_size = detail::get_ele_size (src->get_desc ());
1898+ const auto src_offset = sycl::range<3 >(
1899+ w_offset_src != 0 ? w_offset_src / from_ele_size : w_offset_src,
1900+ h_offset_src, 0 );
1901+ const auto to_ele_size = detail::get_ele_size (dest->get_desc ());
1902+ const auto dest_offset = sycl::range<3 >(
1903+ w_offset_dest != 0 ? w_offset_dest / to_ele_size : w_offset_dest,
1904+ h_offset_dest, 0 );
1905+ const auto copy_extent = sycl::range<3 >(w / from_ele_size, h, 1 );
1906+ q.ext_oneapi_copy (src->get_handle (), src_offset, src->get_desc (),
1907+ dest->get_handle (), dest_offset, dest->get_desc (),
1908+ copy_extent)
1909+ .wait ();
19021910}
19031911
19041912// / Synchronously copies from image memory to the image memory, The function
@@ -1916,11 +1924,19 @@ static inline void dpct_memcpy(image_mem_wrapper *dest, size_t w_offset_dest,
19161924 const image_mem_wrapper *src,
19171925 size_t w_offset_src, size_t h_offset_src,
19181926 size_t s, sycl::queue q = get_default_queue()) {
1919- auto temp = (void *)sycl::malloc_device (s, q);
1920- // TODO: Need change logic when sycl support image_mem to image_mem copy.
1921- dpct_memcpy (temp, src, w_offset_src, h_offset_src, s, q);
1922- dpct_memcpy (dest, w_offset_dest, h_offset_dest, temp, s, q);
1923- sycl::free (temp, q);
1927+ const auto from_ele_size = detail::get_ele_size (src->get_desc ());
1928+ const auto src_offset = sycl::range<3 >(
1929+ w_offset_src != 0 ? w_offset_src / from_ele_size : w_offset_src,
1930+ h_offset_src, 0 );
1931+ const auto to_ele_size = detail::get_ele_size (dest->get_desc ());
1932+ const auto dest_offset = sycl::range<3 >(
1933+ w_offset_dest != 0 ? w_offset_dest / to_ele_size : w_offset_dest,
1934+ h_offset_dest, 0 );
1935+ const auto copy_extent = sycl::range<3 >(s / from_ele_size, 1 , 1 );
1936+ q.ext_oneapi_copy (src->get_handle (), src_offset, src->get_desc (),
1937+ dest->get_handle (), dest_offset, dest->get_desc (),
1938+ copy_extent)
1939+ .wait ();
19241940}
19251941// A wrapper for sycl fetch_image function for the byte addressing image.
19261942template <typename DataT, typename HintT = DataT, typename CoordT>
0 commit comments