Faster dr::Texture construction

wjakob · wjakob · commit c8210689a4f0 · 2025-04-15T00:19:13.000+09:00
The Dr.Jit texture constructor currently spends a significant amount of
time zero-initializing and copying arrays when these are backed by
``dr::DynamicArray&lt;T&gt;`` (i.e., in scalar mode).

This commit changes the constructor so that it takes universal (T&amp;&amp;)
reference to automatically copy or move as needed.
diff --git a/ext/drjit-core b/ext/drjit-core
@@ -1 +1 @@
-Subproject commit 4beb5189a9906eb6be0abfeb99dd9422c44f3ce6
+Subproject commit c3c8b14fdbf61341e410293b619c63a4b497e0b2
diff --git a/include/drjit/dynamic.h b/include/drjit/dynamic.h
@@ -407,6 +407,14 @@ struct DynamicArray
         m_free = true;
     }
 
+    static DynamicArray map_(Value *value, size_t size) {
+        DynamicArray result;
+        result.m_data = value;
+        result.m_size = size;
+        result.m_free = false;
+        return result;
+    }
+
     static auto counter(size_t size) {
         uint32_array_t<DynamicArray> result;
         result.init_(size);
diff --git a/include/drjit/tensor.h b/include/drjit/tensor.h
@@ -161,6 +161,18 @@ struct Tensor
         }
     }
 
+    Tensor(Array &&data, size_t ndim, const size_t *shape)
+        : m_array(std::move(data)), m_shape(shape, shape + ndim) {
+        size_t size = 1;
+        for (size_t i = 0; i < ndim; ++i)
+            size *= shape[i];
+        if (size != m_array.size()) {
+            drjit_fail("Tensor(): invalid size specified (%zu vs %zu)!",
+                size, m_array.size());
+        }
+    }
+
+
     Tensor(const void *ptr, size_t ndim, const size_t *shape)
         : m_shape(shape, shape + ndim) {
         size_t size = 1;
diff --git a/include/drjit/texture.h b/include/drjit/texture.h
@@ -42,34 +42,34 @@ enum class CudaTextureFormat : uint32_t {
     Float16 = 1, /// Half precision storage format
 };
 
-template <typename _Storage, size_t Dimension> class Texture {
+template <typename Storage_, size_t Dimension> class Texture {
 public:
-    static constexpr bool IsCUDA = is_cuda_v<_Storage>;
-    static constexpr bool IsDiff = is_diff_v<_Storage>;
-    static constexpr bool IsDynamic = is_dynamic_v<_Storage>;
+    static constexpr bool IsCUDA = is_cuda_v<Storage_>;
+    static constexpr bool IsDiff = is_diff_v<Storage_>;
+    static constexpr bool IsDynamic = is_dynamic_v<Storage_>;
     // Only half/single-precision floating-point CUDA textures are supported
-    static constexpr bool IsHalf = std::is_same_v<scalar_t<_Storage>, drjit::half>;
-    static constexpr bool IsSingle = std::is_same_v<scalar_t<_Storage>, float>;
+    static constexpr bool IsHalf = std::is_same_v<scalar_t<Storage_>, drjit::half>;
+    static constexpr bool IsSingle = std::is_same_v<scalar_t<Storage_>, float>;
     static constexpr bool HasCudaTexture = (IsHalf || IsSingle) && IsCUDA;
     static constexpr int CudaFormat = HasCudaTexture ?
         IsHalf ? (int)CudaTextureFormat::Float16 : (int)CudaTextureFormat::Float32 : -1;
 
-    using Int32 = int32_array_t<_Storage>;
-    using UInt32 = uint32_array_t<_Storage>;
-    using Storage = std::conditional_t<IsDynamic, _Storage, DynamicArray<_Storage>>;
-    using Packet = std::conditional_t<is_jit_v<_Storage>,
-        DynamicArray<_Storage>, _Storage*>;
+    using Int32 = int32_array_t<Storage_>;
+    using UInt32 = uint32_array_t<Storage_>;
+    using Storage = std::conditional_t<IsDynamic, Storage_, DynamicArray<Storage_>>;
+    using Packet = std::conditional_t<is_jit_v<Storage_>,
+        DynamicArray<Storage_>, Storage_*>;
     using TensorXf = Tensor<Storage>;
 
     #define DR_TEX_ALLOC_PACKET(name, size)                     \
         Packet _packet;                                         \
-        _Storage* name;                                         \
+        Storage_* name;                                         \
                                                                 \
         if constexpr (is_jit_v<Value>) {                        \
             _packet = empty<Packet>(m_channels_storage);        \
             name = _packet.data();                              \
         } else {                                                \
-            name = (_Storage*) alloca(sizeof(_Storage) * size); \
+            name = (Storage_*) alloca(sizeof(Storage_) * size); \
             (void) _packet;                                     \
         }
 
@@ -125,15 +125,16 @@ template <typename _Storage, size_t Dimension> class Texture {
      * Both the \c filter_mode and \c wrap_mode have the same defaults and
      * behaviors as for the previous constructor.
      */
-    Texture(const TensorXf &tensor, bool use_accel = true, bool migrate = true,
+    template <typename TensorT>
+    Texture(TensorT &&tensor, bool use_accel = true, bool migrate = true,
             FilterMode filter_mode = FilterMode::Linear,
             WrapMode wrap_mode = WrapMode::Clamp) {
         if (tensor.ndim() != Dimension + 1)
             jit_raise("Texture::Texture(): tensor dimension must equal "
                         "texture dimension plus one.");
         init(tensor.shape().data(), tensor.shape(Dimension), use_accel,
              filter_mode, wrap_mode);
-        set_tensor(tensor, migrate);
+        set_tensor(std::forward<TensorT>(tensor), migrate);
     }
 
     Texture(Texture &&other) noexcept {
@@ -209,16 +210,21 @@ template <typename _Storage, size_t Dimension> class Texture {
      * When \c migrate is set to \c true on CUDA mode, the texture information
      * is *fully* migrated to GPU texture memory to avoid redundant storage.
      */
-    void set_value(const Storage &value, bool migrate=false) {
-        if constexpr (!is_jit_v<_Storage>) {
+    template <typename StorageT>
+    void set_value(StorageT &&value, bool migrate = false) {
+        static_assert(
+            std::is_same_v<std::decay_t<StorageT>, Storage>,
+            "Texture::set_value(): argument has an unsupported type!");
+
+        if constexpr (!is_jit_v<Storage_>) {
             if (value.size() != m_size)
                 jit_raise("Texture::set_value(): unexpected array size!");
-            m_value.array() = value;
+            m_value.array() = std::forward<StorageT>(value);
         } else /* JIT variant */ {
             Storage padded_value;
 
             if (m_channels_storage != m_channels) {
-                using Mask = mask_t<_Storage>;
+                using Mask = mask_t<Storage_>;
                 UInt32 idx = arange<UInt32>(m_size);
                 UInt32 pixels_idx = idx / m_channels_storage;
                 UInt32 channel_idx = idx % m_channels_storage;
@@ -230,7 +236,9 @@ template <typename _Storage, size_t Dimension> class Texture {
             }
 
             if (padded_value.size() != m_size)
-                jit_raise("Texture::set_value(): unexpected array size!");
+                jit_raise(
+                    "Texture::set_value(): unexpected array size (%zu vs %zu)!",
+                    padded_value.size(), m_size);
 
             // We can always re-compute the unpadded values from the padded
             // ones. However, if we systematically do that, users will not be
@@ -242,9 +250,11 @@ template <typename _Storage, size_t Dimension> class Texture {
             // the correct gradient value.
             // To solve this issue, we store the AD index now, and re-attach
             // it to the output of `tensor()` on every call.
-            if constexpr (IsDiff)
-                m_unpadded_value.array() =
-                    replace_grad(m_unpadded_value.array(), value);
+            if constexpr (IsDiff) {
+                if (grad_enabled(value))
+                    m_unpadded_value.array() =
+                        replace_grad(m_unpadded_value.array(), value);
+            }
 
             if constexpr (HasCudaTexture) {
                 if (m_use_accel) {
@@ -286,12 +296,13 @@ template <typename _Storage, size_t Dimension> class Texture {
      * When \c migrate is set to \c true on CUDA mode, the texture information
      * is *fully* migrated to GPU texture memory to avoid redundant storage.
      */
-    void set_tensor(const TensorXf &tensor, bool migrate=false) {
+    template <typename TensorT>
+    void set_tensor(TensorT &&tensor, bool migrate = false) {
         if (tensor.ndim() != Dimension + 1)
             jit_raise("Texture::set_tensor(): tensor dimension must equal "
-                        "texture dimension plus one (channels).");
+                      "texture dimension plus one (channels).");
 
-        if (&tensor == &m_unpadded_value) {
+        if ((void *) &tensor == (void *) &m_unpadded_value) {
             jit_log(::LogLevel::Warn,
                     "Texture::set_tensor(): the `tensor` argument is a "
                     "reference to this texture's own tensor representation "
@@ -311,9 +322,12 @@ template <typename _Storage, size_t Dimension> class Texture {
 
         // Only update tensors & CUDA texture if shape changed
         init(tensor.shape().data(), tensor.shape(Dimension),
-            m_use_accel, m_filter_mode, m_wrap_mode, shape_changed);
+             m_use_accel, m_filter_mode, m_wrap_mode, shape_changed);
 
-        set_value(tensor.array(), migrate);
+        if constexpr (std::is_lvalue_reference_v<TensorT>)
+            set_value(tensor.array(), migrate);
+        else
+            set_value(std::move(tensor.array()), migrate);
     }
 
     /**
@@ -342,7 +356,7 @@ template <typename _Storage, size_t Dimension> class Texture {
             }
         }
 
-        if constexpr (!is_jit_v<_Storage>) {
+        if constexpr (!is_jit_v<Storage_>) {
             if (shape_changed)
                 init(m_unpadded_value.shape().data(),
                      m_unpadded_value.shape(Dimension), m_use_accel, m_filter_mode,
@@ -371,7 +385,7 @@ template <typename _Storage, size_t Dimension> class Texture {
      * \brief Return the texture data as a tensor object
      */
     const TensorXf &tensor() const {
-        if constexpr (!is_jit_v<_Storage>) {
+        if constexpr (!is_jit_v<Storage_>) {
             return m_value;
         } else {
             sync_device_data();
@@ -412,7 +426,7 @@ template <typename _Storage, size_t Dimension> class Texture {
      */
     TensorXf &tensor() {
         return const_cast<TensorXf &>(
-            const_cast<const Texture<_Storage, Dimension> *>(this)->tensor());
+            const_cast<const Texture<Storage_, Dimension> *>(this)->tensor());
     }
 
     /**
@@ -1386,7 +1400,7 @@ template <typename _Storage, size_t Dimension> class Texture {
         m_channels = channels;
 
         // Determine padding used for channels depending on backend
-        if constexpr (is_jit_v<_Storage>) {
+        if constexpr (is_jit_v<Storage_>) {
             m_channels_storage = 1;
             while (m_channels_storage < m_channels)
                 m_channels_storage <<= 1;
@@ -1413,10 +1427,18 @@ template <typename _Storage, size_t Dimension> class Texture {
         m_wrap_mode = wrap_mode;
 
         if (init_tensor) {
-            m_value =
-                TensorXf(empty<Storage>(m_size), Dimension + 1, tensor_shape);
-            m_unpadded_value =
-                TensorXf(empty<Storage>(unpadded_size), Dimension + 1, m_shape);
+            if constexpr (is_jit_v<Storage_>) {
+                m_value =
+                    TensorXf(empty<Storage>(m_size), Dimension + 1, tensor_shape);
+                m_unpadded_value =
+                    TensorXf(empty<Storage>(unpadded_size), Dimension + 1, m_shape);
+            } else {
+                // Don't allocate memory in scalar modes
+                m_value =
+                    TensorXf(Storage::map_(nullptr, m_size), Dimension + 1, tensor_shape);
+                m_unpadded_value =
+                    TensorXf(Storage::map_(nullptr, unpadded_size), Dimension + 1, m_shape);
+            }
         }
 
         if constexpr (HasCudaTexture) {
diff --git a/src/python/texture.h b/src/python/texture.h
@@ -26,8 +26,8 @@ void bind_texture(nb::module_ &m, const char *name) {
              "filter_mode"_a = dr::FilterMode::Linear,
              "wrap_mode"_a = dr::WrapMode::Clamp,
              doc_Texture_init_tensor)
-        .def("set_value",  &Tex::set_value,  "value"_a,  "migrate"_a = false, doc_Texture_set_value)
-        .def("set_tensor", &Tex::set_tensor, "tensor"_a, "migrate"_a = false, doc_Texture_set_tensor)
+        .def("set_value", &Tex::template set_value<const typename Tex::Storage &>, "value"_a, "migrate"_a = false, doc_Texture_set_value)
+        .def("set_tensor", &Tex::template set_tensor<const typename Tex::TensorXf &>, "tensor"_a,  "migrate"_a = false, doc_Texture_set_tensor)
         .def("inplace_update", &Tex::inplace_update, "migrate"_a = false, doc_Texture_inplace_update)
         .def("value", &Tex::value, nb::rv_policy::reference_internal, doc_Texture_value)
         .def("tensor",