@@ -4,10 +4,70 @@ import ClimaCore.DataLayouts
44import ClimaCore. DataLayouts: empty_kernel_stats
55
66const reported_stats = Dict ()
7+ const kernel_names = IdDict ()
78# Call via ClimaCore.DataLayouts.empty_kernel_stats()
89empty_kernel_stats (:: ClimaComms.CUDADevice ) = empty! (reported_stats)
910collect_kernel_stats () = false
1011
12+ # Robustly parse boolean-like environment variables
13+ function _getenv_bool (var:: AbstractString ; default:: Bool = false )
14+ raw = get (ENV , var, nothing )
15+ raw === nothing && return default
16+ s = lowercase (strip (String (raw)))
17+ if s in (" 1" , " true" , " t" , " yes" , " y" , " on" )
18+ return true
19+ elseif s in (" 0" , " false" , " f" , " no" , " n" , " off" )
20+ return false
21+ else
22+ # fall back to parse as integer (non-zero -> true)
23+ try
24+ return parse (Int, s) != 0
25+ catch
26+ @warn " Unrecognized boolean env var value; using default" var = var val = raw default =
27+ default
28+ return default
29+ end
30+ end
31+ end
32+
33+ # Create a ref to hold the setting determining whether to name kernels from
34+ # stack trace
35+ const NAME_KERNELS_FROM_STACK_TRACE = Ref {Bool} (false )
36+
37+ # Always reload when module is imported so precompilation doesn't make it "stick"
38+ function __init__ ()
39+ NAME_KERNELS_FROM_STACK_TRACE[] = _getenv_bool (
40+ " CLIMA_NAME_CUDA_KERNELS_FROM_STACK_TRACE" ; default = false ,
41+ )
42+ end
43+
44+ name_kernels_from_stack_trace () = NAME_KERNELS_FROM_STACK_TRACE[]
45+
46+ # Modules to ignore when constructing kernel names from stack traces
47+ const IGNORE_MODULES = (
48+ :Base ,
49+ :Core ,
50+ :GPUCompiler ,
51+ :CUDA ,
52+ :NVTX ,
53+ :ClimaCoreCUDAExt ,
54+ :ClimaCore ,
55+ )
56+
57+ # Helper function to check if a stack frame is relevant
58+ @inline function is_relevant_frame (frame:: Base.StackTraces.StackFrame )
59+ linfo = frame. linfo
60+ linfo isa Core. MethodInstance || return false
61+ mod = linfo. def. module:: Module
62+ mod_name = fullname (mod)[1 ]
63+ return mod_name ∉ IGNORE_MODULES
64+ end
65+
66+ # Extract file path from a MethodInstance as a string
67+ @inline function fpath_from_method_instance (mi:: Core.MethodInstance )
68+ return string (mi. def. file:: Symbol ):: String
69+ end
70+
1171"""
1272 auto_launch!(f!::F!, args,
1373 ::Union{
@@ -39,19 +99,72 @@ function auto_launch!(
3999 always_inline = true ,
40100 caller = :unknown ,
41101) where {F!}
102+ # If desired, compute a kernel name from the stack trace and store in
103+ # a global Dict, which serves as an in memory cache
104+ kernel_name = nothing
105+ if name_kernels_from_stack_trace ()
106+ # Create a key from the method instance and types of the args
107+ key = objectid (CUDA. methodinstance (typeof (f!), typeof (args)))
108+ kernel_name_exists = key in keys (kernel_names)
109+ if ! kernel_name_exists
110+ # Construct the kernel name, ignoring modules we don't care about
111+ stack = stacktrace ()
112+ first_relevant_index = findfirst (is_relevant_frame, stack)
113+ if ! isnothing (first_relevant_index)
114+ # Don't include file if this is inside an NVTX annotation
115+ frame = stack[first_relevant_index]:: Base.StackTraces.StackFrame
116+ func_name = string (frame. func)
117+ if contains (func_name, " #" )
118+ func_name = split (func_name, " #" )[1 ]
119+ end
120+ fp_split =
121+ splitpath (fpath_from_method_instance (frame. linfo:: Core.MethodInstance ))
122+ if " NVTX" in fp_split
123+ fp_string = " _NVTX"
124+ line_string = " "
125+ else
126+ # Trim base directory off of file path to shorten
127+ package_index = findfirst (fp_split) do part
128+ startswith (part, " Clima" )
129+ end
130+ if isnothing (package_index)
131+ package_index = findfirst (p -> p == " .julia" , fp_split)
132+ end
133+ if isnothing (package_index)
134+ package_index = findfirst (p -> p == " src" , fp_split)
135+ end
136+ if isnothing (package_index)
137+ package_index = 1
138+ end
139+ fp_string =
140+ " _FILE_" *
141+ string (joinpath (fp_split[package_index: end ]. .. ))
142+ line_string = " _L" * string (frame. line)
143+ end
144+ name_str = string (func_name) * fp_string * line_string
145+ kernel_name = replace (name_str, r" [^A-Za-z0-9]" => " _" )
146+ end
147+ @debug " Using kernel name: $kernel_name "
148+ kernel_names[key] = kernel_name
149+ end
150+ kernel_name = kernel_names[key]
151+ end
152+
42153 if auto
43154 @assert ! isnothing (nitems)
44155 if nitems ≥ 0
45- kernel = CUDA. @cuda always_inline = true launch = false f! (args... )
156+ # Note: `name = nothing` here will revert to default behavior
157+ kernel = CUDA. @cuda name = kernel_name always_inline = true launch =
158+ false f! (args... )
46159 config = CUDA. launch_configuration (kernel. fun)
47160 threads = min (nitems, config. threads)
48161 blocks = cld (nitems, threads)
49162 kernel (args... ; threads, blocks) # This knows to use always_inline from above.
50163 end
51164 else
52165 kernel =
53- CUDA. @cuda always_inline = always_inline threads = threads_s blocks =
54- blocks_s f! (args... )
166+ CUDA. @cuda name = kernel_name always_inline = always_inline threads =
167+ threads_s blocks = blocks_s f! (args... )
55168 end
56169
57170 if collect_kernel_stats () # only for development use
0 commit comments