Skip to content

Commit 1ff2b0e

Browse files
authored
Merge pull request #4093 from CliMA/dr/gpu_perf/manual_sparse_jacobian_1
Improve performance of CUDA kernels generated from manual_sparse_jacobian
2 parents cec0868 + 1202522 commit 1ff2b0e

File tree

2 files changed

+17
-14
lines changed

2 files changed

+17
-14
lines changed

reproducibility_tests/ref_counter.jl

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
274
1+
275
22

33
# **README**
44
#
@@ -20,6 +20,11 @@
2020

2121

2222
#=
23+
275
24+
- Change order of GPU calculations for better performance, but it
25+
results in slightly different floating point rounding. Artifacts
26+
all look the same.
27+
2328
274
2429
- Remove unused calculation of TKE exchange in mixing length
2530

src/prognostic_equations/implicit/manual_sparse_jacobian.jl

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -626,11 +626,13 @@ function update_jacobian!(alg::ManualSparseJacobian, cache, Y, p, dtγ, t)
626626
FT = eltype(params)
627627
(; vertical_diffusion, smagorinsky_lilly) = p.atmos
628628
(; ᶜp) = p.precomputed
629+
ᶜK_u = p.scratch.ᶜtemp_scalar_4
630+
ᶜK_h = p.scratch.ᶜtemp_scalar_6
629631
if vertical_diffusion isa DecayWithHeightDiffusion
630-
ᶜK_h = ᶜcompute_eddy_diffusivity_coefficient(Y.c.ρ, vertical_diffusion)
632+
ᶜK_h .= ᶜcompute_eddy_diffusivity_coefficient(Y.c.ρ, vertical_diffusion)
631633
ᶜK_u = ᶜK_h
632634
elseif vertical_diffusion isa VerticalDiffusion
633-
ᶜK_h = ᶜcompute_eddy_diffusivity_coefficient(Y.c.uₕ, ᶜp, vertical_diffusion)
635+
ᶜK_h .= ᶜcompute_eddy_diffusivity_coefficient(Y.c.uₕ, ᶜp, vertical_diffusion)
634636
ᶜK_u = ᶜK_h
635637
elseif is_smagorinsky_vertical(smagorinsky_lilly)
636638
set_smagorinsky_lilly_precomputed_quantities!(Y, p, smagorinsky_lilly)
@@ -650,22 +652,18 @@ function update_jacobian!(alg::ManualSparseJacobian, cache, Y, p, dtγ, t)
650652
@. ᶜK_h = eddy_diffusivity(ᶜK_u, ᶜprandtl_nvec)
651653
end
652654

653-
654-
@. p.scratch.ᶜbidiagonal_adjoint_matrix_c3 =
655-
ᶜadvdivᵥ_matrix() DiagonalMatrixRow(ᶠinterp(ᶜρ) * ᶠinterp(ᶜK_h))
656-
@. ᶜdiffusion_h_matrix =
657-
p.scratch.ᶜbidiagonal_adjoint_matrix_c3 ᶠgradᵥ_matrix()
658-
655+
∂ᶠρχ_dif_flux_∂ᶜχ = ᶠp_grad_matrix
656+
@. ∂ᶠρχ_dif_flux_∂ᶜχ =
657+
DiagonalMatrixRow(ᶠinterp(ᶜρ) * ᶠinterp(ᶜK_h)) ᶠgradᵥ_matrix()
658+
@. ᶜdiffusion_h_matrix = ᶜadvdivᵥ_matrix() ∂ᶠρχ_dif_flux_∂ᶜχ
659659
if (
660660
MatrixFields.has_field(Y, @name(c.sgs⁰.ρatke)) ||
661661
!isnothing(p.atmos.turbconv_model) ||
662662
!disable_momentum_vertical_diffusion(p.atmos.vertical_diffusion)
663663
)
664-
@. p.scratch.ᶜbidiagonal_adjoint_matrix_c3 =
665-
ᶜadvdivᵥ_matrix()
666-
DiagonalMatrixRow(ᶠinterp(ᶜρ) * ᶠinterp(ᶜK_u))
667-
@. ᶜdiffusion_u_matrix =
668-
p.scratch.ᶜbidiagonal_adjoint_matrix_c3 ᶠgradᵥ_matrix()
664+
@. ∂ᶠρχ_dif_flux_∂ᶜχ =
665+
DiagonalMatrixRow(ᶠinterp(ᶜρ) * ᶠinterp(ᶜK_u)) ᶠgradᵥ_matrix()
666+
@. ᶜdiffusion_u_matrix = ᶜadvdivᵥ_matrix() ∂ᶠρχ_dif_flux_∂ᶜχ
669667
end
670668

671669
∂ᶜρe_tot_err_∂ᶜρ = matrix[@name(c.ρe_tot), @name(c.ρ)]

0 commit comments

Comments
 (0)