Skip to content

Commit c821068

Browse files
committed
Faster dr::Texture construction
The Dr.Jit texture constructor currently spends a significant amount of time zero-initializing and copying arrays when these are backed by ``dr::DynamicArray<T>`` (i.e., in scalar mode). This commit changes the constructor so that it takes universal (T&&) reference to automatically copy or move as needed.
1 parent 583fde5 commit c821068

File tree

5 files changed

+81
-39
lines changed

5 files changed

+81
-39
lines changed

include/drjit/dynamic.h

+8
Original file line numberDiff line numberDiff line change
@@ -407,6 +407,14 @@ struct DynamicArray
407407
m_free = true;
408408
}
409409

410+
static DynamicArray map_(Value *value, size_t size) {
411+
DynamicArray result;
412+
result.m_data = value;
413+
result.m_size = size;
414+
result.m_free = false;
415+
return result;
416+
}
417+
410418
static auto counter(size_t size) {
411419
uint32_array_t<DynamicArray> result;
412420
result.init_(size);

include/drjit/tensor.h

+12
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,18 @@ struct Tensor
161161
}
162162
}
163163

164+
Tensor(Array &&data, size_t ndim, const size_t *shape)
165+
: m_array(std::move(data)), m_shape(shape, shape + ndim) {
166+
size_t size = 1;
167+
for (size_t i = 0; i < ndim; ++i)
168+
size *= shape[i];
169+
if (size != m_array.size()) {
170+
drjit_fail("Tensor(): invalid size specified (%zu vs %zu)!",
171+
size, m_array.size());
172+
}
173+
}
174+
175+
164176
Tensor(const void *ptr, size_t ndim, const size_t *shape)
165177
: m_shape(shape, shape + ndim) {
166178
size_t size = 1;

include/drjit/texture.h

+58-36
Original file line numberDiff line numberDiff line change
@@ -42,34 +42,34 @@ enum class CudaTextureFormat : uint32_t {
4242
Float16 = 1, /// Half precision storage format
4343
};
4444

45-
template <typename _Storage, size_t Dimension> class Texture {
45+
template <typename Storage_, size_t Dimension> class Texture {
4646
public:
47-
static constexpr bool IsCUDA = is_cuda_v<_Storage>;
48-
static constexpr bool IsDiff = is_diff_v<_Storage>;
49-
static constexpr bool IsDynamic = is_dynamic_v<_Storage>;
47+
static constexpr bool IsCUDA = is_cuda_v<Storage_>;
48+
static constexpr bool IsDiff = is_diff_v<Storage_>;
49+
static constexpr bool IsDynamic = is_dynamic_v<Storage_>;
5050
// Only half/single-precision floating-point CUDA textures are supported
51-
static constexpr bool IsHalf = std::is_same_v<scalar_t<_Storage>, drjit::half>;
52-
static constexpr bool IsSingle = std::is_same_v<scalar_t<_Storage>, float>;
51+
static constexpr bool IsHalf = std::is_same_v<scalar_t<Storage_>, drjit::half>;
52+
static constexpr bool IsSingle = std::is_same_v<scalar_t<Storage_>, float>;
5353
static constexpr bool HasCudaTexture = (IsHalf || IsSingle) && IsCUDA;
5454
static constexpr int CudaFormat = HasCudaTexture ?
5555
IsHalf ? (int)CudaTextureFormat::Float16 : (int)CudaTextureFormat::Float32 : -1;
5656

57-
using Int32 = int32_array_t<_Storage>;
58-
using UInt32 = uint32_array_t<_Storage>;
59-
using Storage = std::conditional_t<IsDynamic, _Storage, DynamicArray<_Storage>>;
60-
using Packet = std::conditional_t<is_jit_v<_Storage>,
61-
DynamicArray<_Storage>, _Storage*>;
57+
using Int32 = int32_array_t<Storage_>;
58+
using UInt32 = uint32_array_t<Storage_>;
59+
using Storage = std::conditional_t<IsDynamic, Storage_, DynamicArray<Storage_>>;
60+
using Packet = std::conditional_t<is_jit_v<Storage_>,
61+
DynamicArray<Storage_>, Storage_*>;
6262
using TensorXf = Tensor<Storage>;
6363

6464
#define DR_TEX_ALLOC_PACKET(name, size) \
6565
Packet _packet; \
66-
_Storage* name; \
66+
Storage_* name; \
6767
\
6868
if constexpr (is_jit_v<Value>) { \
6969
_packet = empty<Packet>(m_channels_storage); \
7070
name = _packet.data(); \
7171
} else { \
72-
name = (_Storage*) alloca(sizeof(_Storage) * size); \
72+
name = (Storage_*) alloca(sizeof(Storage_) * size); \
7373
(void) _packet; \
7474
}
7575

@@ -125,15 +125,16 @@ template <typename _Storage, size_t Dimension> class Texture {
125125
* Both the \c filter_mode and \c wrap_mode have the same defaults and
126126
* behaviors as for the previous constructor.
127127
*/
128-
Texture(const TensorXf &tensor, bool use_accel = true, bool migrate = true,
128+
template <typename TensorT>
129+
Texture(TensorT &&tensor, bool use_accel = true, bool migrate = true,
129130
FilterMode filter_mode = FilterMode::Linear,
130131
WrapMode wrap_mode = WrapMode::Clamp) {
131132
if (tensor.ndim() != Dimension + 1)
132133
jit_raise("Texture::Texture(): tensor dimension must equal "
133134
"texture dimension plus one.");
134135
init(tensor.shape().data(), tensor.shape(Dimension), use_accel,
135136
filter_mode, wrap_mode);
136-
set_tensor(tensor, migrate);
137+
set_tensor(std::forward<TensorT>(tensor), migrate);
137138
}
138139

139140
Texture(Texture &&other) noexcept {
@@ -209,16 +210,21 @@ template <typename _Storage, size_t Dimension> class Texture {
209210
* When \c migrate is set to \c true on CUDA mode, the texture information
210211
* is *fully* migrated to GPU texture memory to avoid redundant storage.
211212
*/
212-
void set_value(const Storage &value, bool migrate=false) {
213-
if constexpr (!is_jit_v<_Storage>) {
213+
template <typename StorageT>
214+
void set_value(StorageT &&value, bool migrate = false) {
215+
static_assert(
216+
std::is_same_v<std::decay_t<StorageT>, Storage>,
217+
"Texture::set_value(): argument has an unsupported type!");
218+
219+
if constexpr (!is_jit_v<Storage_>) {
214220
if (value.size() != m_size)
215221
jit_raise("Texture::set_value(): unexpected array size!");
216-
m_value.array() = value;
222+
m_value.array() = std::forward<StorageT>(value);
217223
} else /* JIT variant */ {
218224
Storage padded_value;
219225

220226
if (m_channels_storage != m_channels) {
221-
using Mask = mask_t<_Storage>;
227+
using Mask = mask_t<Storage_>;
222228
UInt32 idx = arange<UInt32>(m_size);
223229
UInt32 pixels_idx = idx / m_channels_storage;
224230
UInt32 channel_idx = idx % m_channels_storage;
@@ -230,7 +236,9 @@ template <typename _Storage, size_t Dimension> class Texture {
230236
}
231237

232238
if (padded_value.size() != m_size)
233-
jit_raise("Texture::set_value(): unexpected array size!");
239+
jit_raise(
240+
"Texture::set_value(): unexpected array size (%zu vs %zu)!",
241+
padded_value.size(), m_size);
234242

235243
// We can always re-compute the unpadded values from the padded
236244
// ones. However, if we systematically do that, users will not be
@@ -242,9 +250,11 @@ template <typename _Storage, size_t Dimension> class Texture {
242250
// the correct gradient value.
243251
// To solve this issue, we store the AD index now, and re-attach
244252
// it to the output of `tensor()` on every call.
245-
if constexpr (IsDiff)
246-
m_unpadded_value.array() =
247-
replace_grad(m_unpadded_value.array(), value);
253+
if constexpr (IsDiff) {
254+
if (grad_enabled(value))
255+
m_unpadded_value.array() =
256+
replace_grad(m_unpadded_value.array(), value);
257+
}
248258

249259
if constexpr (HasCudaTexture) {
250260
if (m_use_accel) {
@@ -286,12 +296,13 @@ template <typename _Storage, size_t Dimension> class Texture {
286296
* When \c migrate is set to \c true on CUDA mode, the texture information
287297
* is *fully* migrated to GPU texture memory to avoid redundant storage.
288298
*/
289-
void set_tensor(const TensorXf &tensor, bool migrate=false) {
299+
template <typename TensorT>
300+
void set_tensor(TensorT &&tensor, bool migrate = false) {
290301
if (tensor.ndim() != Dimension + 1)
291302
jit_raise("Texture::set_tensor(): tensor dimension must equal "
292-
"texture dimension plus one (channels).");
303+
"texture dimension plus one (channels).");
293304

294-
if (&tensor == &m_unpadded_value) {
305+
if ((void *) &tensor == (void *) &m_unpadded_value) {
295306
jit_log(::LogLevel::Warn,
296307
"Texture::set_tensor(): the `tensor` argument is a "
297308
"reference to this texture's own tensor representation "
@@ -311,9 +322,12 @@ template <typename _Storage, size_t Dimension> class Texture {
311322

312323
// Only update tensors & CUDA texture if shape changed
313324
init(tensor.shape().data(), tensor.shape(Dimension),
314-
m_use_accel, m_filter_mode, m_wrap_mode, shape_changed);
325+
m_use_accel, m_filter_mode, m_wrap_mode, shape_changed);
315326

316-
set_value(tensor.array(), migrate);
327+
if constexpr (std::is_lvalue_reference_v<TensorT>)
328+
set_value(tensor.array(), migrate);
329+
else
330+
set_value(std::move(tensor.array()), migrate);
317331
}
318332

319333
/**
@@ -342,7 +356,7 @@ template <typename _Storage, size_t Dimension> class Texture {
342356
}
343357
}
344358

345-
if constexpr (!is_jit_v<_Storage>) {
359+
if constexpr (!is_jit_v<Storage_>) {
346360
if (shape_changed)
347361
init(m_unpadded_value.shape().data(),
348362
m_unpadded_value.shape(Dimension), m_use_accel, m_filter_mode,
@@ -371,7 +385,7 @@ template <typename _Storage, size_t Dimension> class Texture {
371385
* \brief Return the texture data as a tensor object
372386
*/
373387
const TensorXf &tensor() const {
374-
if constexpr (!is_jit_v<_Storage>) {
388+
if constexpr (!is_jit_v<Storage_>) {
375389
return m_value;
376390
} else {
377391
sync_device_data();
@@ -412,7 +426,7 @@ template <typename _Storage, size_t Dimension> class Texture {
412426
*/
413427
TensorXf &tensor() {
414428
return const_cast<TensorXf &>(
415-
const_cast<const Texture<_Storage, Dimension> *>(this)->tensor());
429+
const_cast<const Texture<Storage_, Dimension> *>(this)->tensor());
416430
}
417431

418432
/**
@@ -1386,7 +1400,7 @@ template <typename _Storage, size_t Dimension> class Texture {
13861400
m_channels = channels;
13871401

13881402
// Determine padding used for channels depending on backend
1389-
if constexpr (is_jit_v<_Storage>) {
1403+
if constexpr (is_jit_v<Storage_>) {
13901404
m_channels_storage = 1;
13911405
while (m_channels_storage < m_channels)
13921406
m_channels_storage <<= 1;
@@ -1413,10 +1427,18 @@ template <typename _Storage, size_t Dimension> class Texture {
14131427
m_wrap_mode = wrap_mode;
14141428

14151429
if (init_tensor) {
1416-
m_value =
1417-
TensorXf(empty<Storage>(m_size), Dimension + 1, tensor_shape);
1418-
m_unpadded_value =
1419-
TensorXf(empty<Storage>(unpadded_size), Dimension + 1, m_shape);
1430+
if constexpr (is_jit_v<Storage_>) {
1431+
m_value =
1432+
TensorXf(empty<Storage>(m_size), Dimension + 1, tensor_shape);
1433+
m_unpadded_value =
1434+
TensorXf(empty<Storage>(unpadded_size), Dimension + 1, m_shape);
1435+
} else {
1436+
// Don't allocate memory in scalar modes
1437+
m_value =
1438+
TensorXf(Storage::map_(nullptr, m_size), Dimension + 1, tensor_shape);
1439+
m_unpadded_value =
1440+
TensorXf(Storage::map_(nullptr, unpadded_size), Dimension + 1, m_shape);
1441+
}
14201442
}
14211443

14221444
if constexpr (HasCudaTexture) {

src/python/texture.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ void bind_texture(nb::module_ &m, const char *name) {
2626
"filter_mode"_a = dr::FilterMode::Linear,
2727
"wrap_mode"_a = dr::WrapMode::Clamp,
2828
doc_Texture_init_tensor)
29-
.def("set_value", &Tex::set_value, "value"_a, "migrate"_a = false, doc_Texture_set_value)
30-
.def("set_tensor", &Tex::set_tensor, "tensor"_a, "migrate"_a = false, doc_Texture_set_tensor)
29+
.def("set_value", &Tex::template set_value<const typename Tex::Storage &>, "value"_a, "migrate"_a = false, doc_Texture_set_value)
30+
.def("set_tensor", &Tex::template set_tensor<const typename Tex::TensorXf &>, "tensor"_a, "migrate"_a = false, doc_Texture_set_tensor)
3131
.def("inplace_update", &Tex::inplace_update, "migrate"_a = false, doc_Texture_inplace_update)
3232
.def("value", &Tex::value, nb::rv_policy::reference_internal, doc_Texture_value)
3333
.def("tensor",

0 commit comments

Comments
 (0)