Skip to content

Commit d073178

Browse files
committed
speed up CUDA enabled TSD builds by splitting computeScalarRange into TUs
1 parent c93877a commit d073178

9 files changed

+184
-79
lines changed

tsd/src/tsd/CMakeLists.txt

+12-1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,12 @@ project_add_library(STATIC)
66
project_sources(
77
PRIVATE
88
algorithms/computeScalarRange.cpp
9+
algorithms/detail/computeScalarRange_fixed8.cpp
10+
algorithms/detail/computeScalarRange_fixed16.cpp
11+
algorithms/detail/computeScalarRange_ufixed8.cpp
12+
algorithms/detail/computeScalarRange_ufixed16.cpp
13+
algorithms/detail/computeScalarRange_float32.cpp
14+
algorithms/detail/computeScalarRange_float64.cpp
915
authoring/importers/detail/HDRImage.cpp
1016
authoring/importers/detail/importer_common.cpp
1117
authoring/importers/import_ASSIMP.cpp
@@ -80,7 +86,12 @@ if (TSD_USE_CUDA)
8086
project_compile_definitions(PUBLIC -DTSD_USE_CUDA=1)
8187
project_link_libraries(PUBLIC CUDA::cudart)
8288
set_source_files_properties(
83-
algorithms/computeScalarRange.cpp
89+
algorithms/detail/computeScalarRange_fixed8.cpp
90+
algorithms/detail/computeScalarRange_fixed16.cpp
91+
algorithms/detail/computeScalarRange_ufixed8.cpp
92+
algorithms/detail/computeScalarRange_ufixed16.cpp
93+
algorithms/detail/computeScalarRange_float32.cpp
94+
algorithms/detail/computeScalarRange_float64.cpp
8495
objects/SpatialField.cpp
8596
PROPERTIES
8697
COMPILE_FLAGS "--extended-lambda --expt-relaxed-constexpr"

tsd/src/tsd/algorithms/computeScalarRange.cpp

+29-78
Original file line numberDiff line numberDiff line change
@@ -1,86 +1,13 @@
11
// Copyright 2024-2025 NVIDIA Corporation
22
// SPDX-License-Identifier: Apache-2.0
33

4-
#ifndef TSD_USE_CUDA
5-
#define TSD_USE_CUDA 1
6-
#endif
7-
84
#include "tsd/algorithms/computeScalarRange.hpp"
95
#include "tsd/core/Context.hpp"
10-
// std
11-
#include <algorithm>
12-
#include <limits>
13-
#if TSD_USE_CUDA
14-
// thrust
15-
#include <cuda_runtime.h>
16-
#include <thrust/device_ptr.h>
17-
#include <thrust/extrema.h>
18-
#endif
19-
20-
namespace tsd::algorithm {
21-
22-
namespace detail {
23-
24-
// NOTE(jda): This is a reduced version of anari::anariTypeInvoke() to lower
25-
// Thrust/CUDA compile times
26-
template <typename R, template <int> class F, typename... Args>
27-
inline R scalarTypeInvoke(ANARIDataType type, Args &&...args)
28-
{
29-
// clang-format off
30-
switch (type) {
31-
case ANARI_UFIXED8: return F<ANARI_UFIXED8>()(std::forward<Args>(args)...);
32-
case ANARI_UFIXED16: return F<ANARI_UFIXED16>()(std::forward<Args>(args)...);
33-
case ANARI_FIXED8: return F<ANARI_FIXED8>()(std::forward<Args>(args)...);
34-
case ANARI_FIXED16: return F<ANARI_FIXED16>()(std::forward<Args>(args)...);
35-
case ANARI_FLOAT32: return F<ANARI_FLOAT32>()(std::forward<Args>(args)...);
36-
case ANARI_FLOAT64: return F<ANARI_FLOAT64>()(std::forward<Args>(args)...);
37-
default:
38-
return F<ANARI_UNKNOWN>()(std::forward<Args>(args)...);
39-
}
40-
// clang-format off
41-
}
42-
43-
template <int ANARI_ENUM_T>
44-
struct ComputeScalarRange
45-
{
46-
using properties_t = anari::ANARITypeProperties<ANARI_ENUM_T>;
47-
using base_t = typename properties_t::base_type;
6+
#include "tsd/core/Logging.hpp"
487

49-
tsd::float2 operator()(const Array &a)
50-
{
51-
tsd::float4 min_out{0.f, 0.f, 0.f, 0.f};
52-
tsd::float4 max_out{0.f, 0.f, 0.f, 0.f};
8+
#include "tsd/algorithms/detail/computeScalarRangeImpl.hpp"
539

54-
const auto *begin = a.dataAs<base_t>();
55-
const auto *end = begin + a.size();
56-
#if TSD_USE_CUDA
57-
if (a.kind() == Array::MemoryKind::CUDA) {
58-
const auto minmax = thrust::minmax_element(
59-
thrust::device_pointer_cast(begin), thrust::device_pointer_cast(end));
60-
const base_t min_v = *minmax.first;
61-
const base_t max_v = *minmax.second;
62-
properties_t::toFloat4(&min_out.x, &min_v);
63-
properties_t::toFloat4(&max_out.x, &max_v);
64-
} else {
65-
#endif
66-
const auto minmax = std::minmax_element(begin, end);
67-
const auto min_v = *minmax.first;
68-
const auto max_v = *minmax.second;
69-
properties_t::toFloat4(&min_out.x, &min_v);
70-
properties_t::toFloat4(&max_out.x, &max_v);
71-
#if TSD_USE_CUDA
72-
}
73-
#endif
74-
75-
return {min_out.x, max_out.x};
76-
}
77-
};
78-
79-
} // namespace detail
80-
81-
///////////////////////////////////////////////////////////////////////////////
82-
///////////////////////////////////////////////////////////////////////////////
83-
///////////////////////////////////////////////////////////////////////////////
10+
namespace tsd::algorithm {
8411

8512
tsd::float2 computeScalarRange(const Array &a)
8613
{
@@ -103,8 +30,32 @@ tsd::float2 computeScalarRange(const Array &a)
10330
retval.y = std::max(retval.y, subRange.y);
10431
});
10532
} else if (elementsAreScalars) {
106-
retval = detail::scalarTypeInvoke<tsd::float2, detail::ComputeScalarRange>(
107-
type, a);
33+
switch (type) {
34+
case ANARI_UFIXED8:
35+
retval = detail::computeScalarRange_ufixed8(a);
36+
break;
37+
case ANARI_UFIXED16:
38+
retval = detail::computeScalarRange_ufixed16(a);
39+
break;
40+
case ANARI_FIXED8:
41+
retval = detail::computeScalarRange_fixed8(a);
42+
break;
43+
case ANARI_FIXED16:
44+
retval = detail::computeScalarRange_fixed16(a);
45+
break;
46+
case ANARI_FLOAT32:
47+
retval = detail::computeScalarRange_float32(a);
48+
break;
49+
case ANARI_FLOAT64:
50+
retval = detail::computeScalarRange_float64(a);
51+
break;
52+
default:
53+
logWarning(
54+
"computeScalarRange() called on an "
55+
"array with incompatible element type '%s'",
56+
anari::toString(type));
57+
break;
58+
}
10859
}
10960

11061
return retval;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
// Copyright 2025 NVIDIA Corporation
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
#pragma once
5+
6+
#ifndef TSD_USE_CUDA
7+
#define TSD_USE_CUDA 1
8+
#endif
9+
10+
#include "tsd/core/TSDMath.hpp"
11+
#include "tsd/objects/Array.hpp"
12+
// std
13+
#include <algorithm>
14+
#include <limits>
15+
#if TSD_USE_CUDA
16+
// thrust
17+
#include <cuda_runtime.h>
18+
#include <thrust/device_ptr.h>
19+
#include <thrust/extrema.h>
20+
#endif
21+
22+
namespace tsd::algorithm::detail {
23+
24+
template <int ANARI_ENUM_T>
25+
inline tsd::float2 computeScalarRangeImpl(const Array &a)
26+
{
27+
using properties_t = anari::ANARITypeProperties<ANARI_ENUM_T>;
28+
using base_t = typename properties_t::base_type;
29+
tsd::float4 min_out{0.f, 0.f, 0.f, 0.f};
30+
tsd::float4 max_out{0.f, 0.f, 0.f, 0.f};
31+
32+
const auto *begin = a.dataAs<base_t>();
33+
const auto *end = begin + a.size();
34+
#if TSD_USE_CUDA
35+
if (a.kind() == Array::MemoryKind::CUDA) {
36+
const auto minmax = thrust::minmax_element(
37+
thrust::device_pointer_cast(begin), thrust::device_pointer_cast(end));
38+
const base_t min_v = *minmax.first;
39+
const base_t max_v = *minmax.second;
40+
properties_t::toFloat4(&min_out.x, &min_v);
41+
properties_t::toFloat4(&max_out.x, &max_v);
42+
} else {
43+
#endif
44+
const auto minmax = std::minmax_element(begin, end);
45+
const auto min_v = *minmax.first;
46+
const auto max_v = *minmax.second;
47+
properties_t::toFloat4(&min_out.x, &min_v);
48+
properties_t::toFloat4(&max_out.x, &max_v);
49+
#if TSD_USE_CUDA
50+
}
51+
#endif
52+
53+
return {min_out.x, max_out.x};
54+
}
55+
56+
// NOTE(jda) - Expand the template in separate TUs due to long Thrust compile
57+
// times.
58+
tsd::float2 computeScalarRange_ufixed8(const Array &a);
59+
tsd::float2 computeScalarRange_ufixed16(const Array &a);
60+
tsd::float2 computeScalarRange_fixed8(const Array &a);
61+
tsd::float2 computeScalarRange_fixed16(const Array &a);
62+
tsd::float2 computeScalarRange_float32(const Array &a);
63+
tsd::float2 computeScalarRange_float64(const Array &a);
64+
65+
} // namespace tsd::algorithm::detail
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
// Copyright 2025 NVIDIA Corporation
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
#include "computeScalarRangeImpl.hpp"
5+
6+
namespace tsd::algorithm::detail {
7+
8+
tsd::float2 computeScalarRange_fixed16(const Array &a)
9+
{
10+
return computeScalarRangeImpl<ANARI_FIXED16>(a);
11+
}
12+
13+
} // namespace tsd::algorithm::detail
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
// Copyright 2025 NVIDIA Corporation
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
#include "computeScalarRangeImpl.hpp"
5+
6+
namespace tsd::algorithm::detail {
7+
8+
tsd::float2 computeScalarRange_fixed8(const Array &a)
9+
{
10+
return computeScalarRangeImpl<ANARI_FIXED8>(a);
11+
}
12+
13+
} // namespace tsd::algorithm::detail
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
// Copyright 2025 NVIDIA Corporation
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
#include "computeScalarRangeImpl.hpp"
5+
6+
namespace tsd::algorithm::detail {
7+
8+
tsd::float2 computeScalarRange_float32(const Array &a)
9+
{
10+
return computeScalarRangeImpl<ANARI_FLOAT32>(a);
11+
}
12+
13+
} // namespace tsd::algorithm::detail
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
// Copyright 2025 NVIDIA Corporation
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
#include "computeScalarRangeImpl.hpp"
5+
6+
namespace tsd::algorithm::detail {
7+
8+
tsd::float2 computeScalarRange_float64(const Array &a)
9+
{
10+
return computeScalarRangeImpl<ANARI_FLOAT64>(a);
11+
}
12+
13+
} // namespace tsd::algorithm::detail
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
// Copyright 2025 NVIDIA Corporation
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
#include "computeScalarRangeImpl.hpp"
5+
6+
namespace tsd::algorithm::detail {
7+
8+
tsd::float2 computeScalarRange_ufixed16(const Array &a)
9+
{
10+
return computeScalarRangeImpl<ANARI_UFIXED16>(a);
11+
}
12+
13+
} // namespace tsd::algorithm::detail
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
// Copyright 2025 NVIDIA Corporation
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
#include "computeScalarRangeImpl.hpp"
5+
6+
namespace tsd::algorithm::detail {
7+
8+
tsd::float2 computeScalarRange_ufixed8(const Array &a)
9+
{
10+
return computeScalarRangeImpl<ANARI_UFIXED8>(a);
11+
}
12+
13+
} // namespace tsd::algorithm::detail

0 commit comments

Comments
 (0)