Skip to content

Commit 6ef215a

Browse files
committed
Add option to name CUDA kernels based on stack trace
1 parent f5963ca commit 6ef215a

File tree

1 file changed

+116
-3
lines changed

1 file changed

+116
-3
lines changed

ext/cuda/cuda_utils.jl

Lines changed: 116 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,70 @@ import ClimaCore.DataLayouts
44
import ClimaCore.DataLayouts: empty_kernel_stats
55

66
const reported_stats = Dict()
7+
const kernel_names = IdDict()
78
# Call via ClimaCore.DataLayouts.empty_kernel_stats()
89
empty_kernel_stats(::ClimaComms.CUDADevice) = empty!(reported_stats)
910
collect_kernel_stats() = false
1011

12+
# Robustly parse boolean-like environment variables
13+
function _getenv_bool(var::AbstractString; default::Bool = false)
14+
raw = get(ENV, var, nothing)
15+
raw === nothing && return default
16+
s = lowercase(strip(String(raw)))
17+
if s in ("1", "true", "t", "yes", "y", "on")
18+
return true
19+
elseif s in ("0", "false", "f", "no", "n", "off")
20+
return false
21+
else
22+
# fall back to parse as integer (non-zero -> true)
23+
try
24+
return parse(Int, s) != 0
25+
catch
26+
@warn "Unrecognized boolean env var value; using default" var = var val = raw default =
27+
default
28+
return default
29+
end
30+
end
31+
end
32+
33+
# Create a ref to hold the setting determining whether to name kernels from
34+
# stack trace
35+
const NAME_KERNELS_FROM_STACK_TRACE = Ref{Bool}(false)
36+
37+
# Always reload when module is imported so precompilation doesn't make it "stick"
38+
function __init__()
39+
NAME_KERNELS_FROM_STACK_TRACE[] = _getenv_bool(
40+
"CLIMA_NAME_CUDA_KERNELS_FROM_STACK_TRACE"; default = false,
41+
)
42+
end
43+
44+
name_kernels_from_stack_trace() = NAME_KERNELS_FROM_STACK_TRACE[]
45+
46+
# Modules to ignore when constructing kernel names from stack traces
47+
const IGNORE_MODULES = (
48+
:Base,
49+
:Core,
50+
:GPUCompiler,
51+
:CUDA,
52+
:NVTX,
53+
:ClimaCoreCUDAExt,
54+
:ClimaCore,
55+
)
56+
57+
# Helper function to check if a stack frame is relevant
58+
@inline function is_relevant_frame(frame::Base.StackTraces.StackFrame)
59+
linfo = frame.linfo
60+
linfo isa Core.MethodInstance || return false
61+
mod = linfo.def.module::Module
62+
mod_name = fullname(mod)[1]
63+
return mod_name IGNORE_MODULES
64+
end
65+
66+
# Extract file path from a MethodInstance as a string
67+
@inline function fpath_from_method_instance(mi::Core.MethodInstance)
68+
return string(mi.def.file::Symbol)::String
69+
end
70+
1171
"""
1272
auto_launch!(f!::F!, args,
1373
::Union{
@@ -39,19 +99,72 @@ function auto_launch!(
3999
always_inline = true,
40100
caller = :unknown,
41101
) where {F!}
102+
# If desired, compute a kernel name from the stack trace and store in
103+
# a global Dict, which serves as an in memory cache
104+
kernel_name = nothing
105+
if name_kernels_from_stack_trace()
106+
# Create a key from the method instance and types of the args
107+
key = objectid(CUDA.methodinstance(typeof(f!), typeof(args)))
108+
kernel_name_exists = key in keys(kernel_names)
109+
if !kernel_name_exists
110+
# Construct the kernel name, ignoring modules we don't care about
111+
stack = stacktrace()
112+
first_relevant_index = findfirst(is_relevant_frame, stack)
113+
if !isnothing(first_relevant_index)
114+
# Don't include file if this is inside an NVTX annotation
115+
frame = stack[first_relevant_index]::Base.StackTraces.StackFrame
116+
func_name = string(frame.func)
117+
if contains(func_name, "#")
118+
func_name = split(func_name, "#")[1]
119+
end
120+
fp_split =
121+
splitpath(fpath_from_method_instance(frame.linfo::Core.MethodInstance))
122+
if "NVTX" in fp_split
123+
fp_string = "_NVTX"
124+
line_string = ""
125+
else
126+
# Trim base directory off of file path to shorten
127+
package_index = findfirst(fp_split) do part
128+
startswith(part, "Clima")
129+
end
130+
if isnothing(package_index)
131+
package_index = findfirst(p -> p == ".julia", fp_split)
132+
end
133+
if isnothing(package_index)
134+
package_index = findfirst(p -> p == "src", fp_split)
135+
end
136+
if isnothing(package_index)
137+
package_index = 1
138+
end
139+
fp_string =
140+
"_FILE_" *
141+
string(joinpath(fp_split[package_index:end]...))
142+
line_string = "_L" * string(frame.line)
143+
end
144+
name_str = string(func_name) * fp_string * line_string
145+
kernel_name = replace(name_str, r"[^A-Za-z0-9]" => "_")
146+
end
147+
@debug "Using kernel name: $kernel_name"
148+
kernel_names[key] = kernel_name
149+
end
150+
kernel_name = kernel_names[key]
151+
end
152+
42153
if auto
43154
@assert !isnothing(nitems)
44155
if nitems 0
45-
kernel = CUDA.@cuda always_inline = true launch = false f!(args...)
156+
# Note: `name = nothing` here will revert to default behavior
157+
kernel = CUDA.@cuda name = kernel_name always_inline = true launch =
158+
false f!(args...)
46159
config = CUDA.launch_configuration(kernel.fun)
47160
threads = min(nitems, config.threads)
48161
blocks = cld(nitems, threads)
49162
kernel(args...; threads, blocks) # This knows to use always_inline from above.
50163
end
51164
else
52165
kernel =
53-
CUDA.@cuda always_inline = always_inline threads = threads_s blocks =
54-
blocks_s f!(args...)
166+
CUDA.@cuda name = kernel_name always_inline = always_inline threads =
167+
threads_s blocks = blocks_s f!(args...)
55168
end
56169

57170
if collect_kernel_stats() # only for development use

0 commit comments

Comments
 (0)