Skip to content

Commit d68ea75

Browse files
authored
Merge pull request #4055 from CliMA/pb/gpu-perf-2
Update `benchmark_step.jl` for CUDA benchmarking with useful kernel names
2 parents eb298da + 2d3e036 commit d68ea75

File tree

3 files changed

+41
-32
lines changed

3 files changed

+41
-32
lines changed

.buildkite/Manifest-v1.11.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -408,7 +408,7 @@ weakdeps = ["CUDA", "MPI"]
408408
deps = ["Adapt", "BandedMatrices", "BlockArrays", "ClimaComms", "CubedSphere", "DataStructures", "ForwardDiff", "GaussQuadrature", "GilbertCurves", "HDF5", "InteractiveUtils", "IntervalSets", "KrylovKit", "LazyBroadcast", "LinearAlgebra", "MultiBroadcastFusion", "NVTX", "PkgVersion", "RecursiveArrayTools", "RootSolvers", "SparseArrays", "StaticArrays", "Statistics", "UnrolledUtilities"]
409409
git-tree-sha1 = "344711aa776e0bbd007ad127e5ba9f2113a1c88b"
410410
uuid = "d414da3d-4745-48bb-8d80-42e94e092884"
411-
version = "0.14.41"
411+
version = "0.14.42"
412412
weakdeps = ["CUDA", "Krylov"]
413413

414414
[deps.ClimaCore.extensions]

.buildkite/pipeline.yml

Lines changed: 5 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ steps:
2020
- label: "init :computer:"
2121
key: "init_cpu_env"
2222
concurrency: 1
23-
concurrency_group: 'depot/climaatmos-ci'
23+
concurrency_group: "depot/climaatmos-ci"
2424
command:
2525
- "echo $$JULIA_DEPOT_PATH"
2626

@@ -41,13 +41,11 @@ steps:
4141

4242
- group: "Reproducibility infrastructure"
4343
steps:
44-
4544
- label: ":computer: Test reproducibility infrastructure"
4645
command: "julia --color=yes --project=.buildkite test/unit_reproducibility_infra.jl"
4746

4847
- group: "Radiation"
4948
steps:
50-
5149
- label: ":computer: single column radiative equilibrium gray"
5250
command: >
5351
julia --color=yes --project=.buildkite .buildkite/ci_driver.jl
@@ -98,7 +96,6 @@ steps:
9896

9997
- group: "Gravity wave"
10098
steps:
101-
10299
- label: ":computer: non-orographic gravity wave parameterization unit test 3d"
103100
command: "julia --color=yes --project=.buildkite test/parameterized_tendencies/gravity_wave/non_orographic_gravity_wave/nogw_test_3d.jl"
104101
artifact_paths: "nonorographic_gravity_wave_test_3d/*"
@@ -132,7 +129,6 @@ steps:
132129

133130
- group: "Column Examples"
134131
steps:
135-
136132
- label: ":computer: single column hydrostatic balance float64"
137133
command: >
138134
julia --color=yes --project=.buildkite .buildkite/ci_driver.jl
@@ -149,7 +145,6 @@ steps:
149145

150146
- group: "Box Examples"
151147
steps:
152-
153148
- label: ":computer: Box hydrostatic balance"
154149
command: >
155150
julia --color=yes --project=.buildkite .buildkite/ci_driver.jl
@@ -188,7 +183,6 @@ steps:
188183

189184
- group: "Plane Examples"
190185
steps:
191-
192186
- label: ":computer: Density current experiment"
193187
command: >
194188
julia --color=yes --project=.buildkite .buildkite/ci_driver.jl
@@ -287,7 +281,6 @@ steps:
287281

288282
- group: "Conservation check"
289283
steps:
290-
291284
- label: ":computer: baroclinic wave check conservation"
292285
command: >
293286
julia --color=yes --project=.buildkite .buildkite/ci_driver.jl --config_file $CONFIG_PATH/baroclinic_wave_conservation.yml
@@ -324,7 +317,6 @@ steps:
324317

325318
- group: "Sphere Examples (Dycore)"
326319
steps:
327-
328320
- label: ":computer: hydrostatic balance float64"
329321
command: >
330322
julia --color=yes --project=.buildkite .buildkite/ci_driver.jl
@@ -394,7 +386,6 @@ steps:
394386

395387
- group: "Sphere Examples (Aquaplanet)"
396388
steps:
397-
398389
- label: ":umbrella: aquaplanet nonequil allsky monin_obukhov varying insol gravity wave (gfdl_restart) high top 1-moment"
399390
command: >
400391
julia --color=yes --project=.buildkite .buildkite/ci_driver.jl
@@ -462,7 +453,6 @@ steps:
462453

463454
- group: "Sphere Examples (Topography)"
464455
steps:
465-
466456
- label: ":computer: baroclinic wave topography (dcmip)"
467457
command: >
468458
julia --color=yes --project=.buildkite .buildkite/ci_driver.jl
@@ -489,7 +479,6 @@ steps:
489479

490480
- group: "Restarting"
491481
steps:
492-
493482
- label: ":computer: test restart"
494483
command: >
495484
julia --color=yes --project=.buildkite test/restart.jl
@@ -554,7 +543,6 @@ steps:
554543

555544
- group: "MPI Examples"
556545
steps:
557-
558546
- label: ":computer: Prep restart for MPI"
559547
key: "mpi_baro_wave_make_restart"
560548
command: >
@@ -624,7 +612,6 @@ steps:
624612

625613
- group: "EDOnlyEDMFX"
626614
steps:
627-
628615
- label: ":man_in_business_suit_levitating: EDOnly EDMFX aquaplanet"
629616
command: >
630617
julia --color=yes --project=.buildkite .buildkite/ci_driver.jl
@@ -647,7 +634,6 @@ steps:
647634

648635
- group: "Diagnostic EDMFX"
649636
steps:
650-
651637
- label: ":genie: Diagnostic EDMFX test in a box"
652638
command: >
653639
julia --color=yes --project=.buildkite .buildkite/ci_driver.jl
@@ -746,14 +732,12 @@ steps:
746732

747733
- group: "Prognostic EDMFX"
748734
steps:
749-
750735
- label: ":genie: Prognostic EDMFX advection test in a column"
751736
command: >
752737
julia --color=yes --project=.buildkite .buildkite/ci_driver.jl
753738
--config_file $CONFIG_PATH/prognostic_edmfx_adv_test_column.yml
754739
--job_id prognostic_edmfx_adv_test_column
755740
artifact_paths: "prognostic_edmfx_adv_test_column/output_active/*"
756-
757741
agents:
758742
slurm_mem: 20GB
759743

@@ -948,7 +932,6 @@ steps:
948932

949933
- group: "Autodiff"
950934
steps:
951-
952935
- label: "baroclinic wave moist check conservation float64 sparse autodiff"
953936
command: >
954937
julia --color=yes --project=.buildkite .buildkite/ci_driver.jl --config_file $CONFIG_PATH/baroclinic_wave_equil_conservation_ft64_sparse_autodiff.yml
@@ -1044,7 +1027,6 @@ steps:
10441027

10451028
- group: "GPU"
10461029
steps:
1047-
10481030
- label: "GPU: Gravity waves"
10491031
command: >
10501032
julia --color=yes --project=.buildkite .buildkite/ci_driver.jl
@@ -1113,7 +1095,6 @@ steps:
11131095
- "baroclinic_wave"
11141096
- "baroclinic_wave_gpu"
11151097

1116-
11171098
- label: "GPU: baroclinic wave - 2 gpus"
11181099
key: "baroclinic_wave_2gpu"
11191100
command:
@@ -1233,7 +1214,6 @@ steps:
12331214

12341215
- group: "Benchmarks"
12351216
steps:
1236-
12371217
- label: ":computer: Benchmark: CPU baroclinic wave moist"
12381218
command: >
12391219
julia --color=yes --project=.buildkite perf/benchmark_step.jl
@@ -1251,6 +1231,7 @@ steps:
12511231
artifact_paths: "bm_baroclinic_wave_moist_gpu/output_active/*"
12521232
env:
12531233
CLIMACOMMS_DEVICE: "CUDA"
1234+
CLIMA_NAME_CUDA_KERNELS_FROM_STACK_TRACE: "true"
12541235
agents:
12551236
slurm_mem: 16G
12561237
slurm_gpus: 1
@@ -1272,6 +1253,7 @@ steps:
12721253
--job_id bm_default_gpu
12731254
env:
12741255
CLIMACOMMS_DEVICE: "CUDA"
1256+
CLIMA_NAME_CUDA_KERNELS_FROM_STACK_TRACE: "true"
12751257
agents:
12761258
slurm_mem: 24GB
12771259
slurm_gpus: 1
@@ -1284,6 +1266,7 @@ steps:
12841266
--job_id bm_diag_edmf_gpu
12851267
env:
12861268
CLIMACOMMS_DEVICE: "CUDA"
1269+
CLIMA_NAME_CUDA_KERNELS_FROM_STACK_TRACE: "true"
12871270
agents:
12881271
slurm_mem: 24GB
12891272
slurm_gpus: 1
@@ -1296,13 +1279,13 @@ steps:
12961279
--job_id bm_prog_edmf_gpu
12971280
env:
12981281
CLIMACOMMS_DEVICE: "CUDA"
1282+
CLIMA_NAME_CUDA_KERNELS_FROM_STACK_TRACE: "true"
12991283
agents:
13001284
slurm_mem: 24GB
13011285
slurm_gpus: 1
13021286

13031287
- group: "Flame graphs"
13041288
steps:
1305-
13061289
- label: ":fire: Flame graph: gpu job"
13071290
command: >
13081291
julia --color=yes --project=.buildkite perf/flame.jl
@@ -1420,7 +1403,6 @@ steps:
14201403

14211404
- group: "Checkbounds/Inference/Invalidations"
14221405
steps:
1423-
14241406
# TODO: we should somehow decouple this unit test from the perf env / scripts
14251407
# Checkbounds
14261408
- label: ":computer: checkbounds"

perf/benchmark_step.jl

Lines changed: 35 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ ClimaComms.@import_required_backends
1717
import Random
1818
Random.seed!(1234)
1919
import ClimaAtmos as CA
20-
import ClimaComms
20+
import CUDA
2121

2222
include("common.jl")
2323
(; config_file, job_id) = CA.commandline_kwargs()
@@ -26,17 +26,44 @@ config = CA.AtmosConfig(config_file; job_id)
2626
simulation = CA.get_simulation(config)
2727
(; integrator) = simulation;
2828
Y₀ = deepcopy(integrator.u);
29+
# Run one step to compile
2930
@info "Compiling benchmark_step!..."
30-
CA.benchmark_step!(integrator, Y₀); # compile first
31+
CA.benchmark_step!(integrator, Y₀);
3132

3233
@info "Running benchmark_step!..."
33-
n_steps = 10
3434
comms_ctx = ClimaComms.context(integrator.u.c)
3535
device = ClimaComms.device(comms_ctx)
36-
local e
37-
s = CA.@timed_str begin
38-
e = ClimaComms.elapsed(device) do
39-
CA.benchmark_step!(integrator, Y₀, n_steps) # run
36+
37+
# If we're running on CUDA, use CUDA's profiler
38+
if device isa ClimaComms.CUDADevice
39+
e = 0.0
40+
n_steps = 5
41+
use_external_profiler = CUDA.Profile.detect_cupti()
42+
if use_external_profiler
43+
@info "Using external CUDA profiler"
44+
CUDA.@profile external = true begin
45+
e = CUDA.@elapsed begin
46+
CA.benchmark_step!(integrator, Y₀, n_steps)
47+
end
48+
end
49+
else
50+
@info "Using internal CUDA profiler"
51+
res = CUDA.@profile external = false begin
52+
e = CUDA.@elapsed begin
53+
CA.benchmark_step!(integrator, Y₀, n_steps)
54+
end
55+
end
56+
show(IOContext(stdout, :limit => false), res)
57+
end
58+
@info "Ran step! with CUDA $n_steps times in $e s, ($(CA.prettytime(e/n_steps*1e9)) per step)"
59+
else
60+
# Profile with Julia's built-in profiler
61+
n_steps = 10
62+
local e
63+
s = CA.@timed_str begin
64+
e = ClimaComms.elapsed(device) do
65+
CA.benchmark_step!(integrator, Y₀, n_steps) # run
66+
end
4067
end
68+
@info "Ran step! $n_steps times in $s, ($(CA.prettytime(e/n_steps*1e9)) per step)"
4169
end
42-
@info "Ran step! $n_steps times in $s, ($(CA.prettytime(e/n_steps*1e9)) per step)"

0 commit comments

Comments
 (0)