map in +/- for Arrays (#59961)

jishnub · web-flow · commit b05afe0f259f · 2025-11-11T10:12:25.000+04:00
`map` is a simpler operation and uses linear indexing for `Array`s. This often improves performance (occasionally enabling vectorization) and improves TTFX in common cases. It also automatically returns the correct result for 0-D arrays, unlike broadcasting that returns a scalar. Performance: ```julia julia> A = ones(3,3); julia> @Btime $A + $A; 44.622 ns (2 allocations: 144 bytes) # v"1.13.0-DEV.1387" 29.047 ns (2 allocations: 144 bytes) # this PR julia> A = ones(3,3000); julia> @Btime $A + $A; 10.095 μs (3 allocations: 70.40 KiB) # v"1.13.0-DEV.1387" 4.787 μs (3 allocations: 70.40 KiB) # this PR julia> @Btime A + B + C + D + E + F setup=(A = rand(200,200); B = rand(200,200); C = rand(200,200); D = rand(200,200); E = rand(200,200); F = rand(200,200)); 93.910 μs (3 allocations: 312.59 KiB) # v"1.13.0-DEV.1387" 64.813 μs (9 allocations: 312.77 KiB) # this PR ``` Similarly for `-`. TTFX: ```julia julia> A = ones(3,3); julia> @time A + A; 0.174090 seconds (303.47 k allocations: 14.575 MiB, 99.98% compilation time) # v"1.13.0-DEV.1387" 0.072748 seconds (220.27 k allocations: 11.139 MiB, 99.95% compilation time) # this PR ``` These are measured on ```julia julia> versioninfo() Julia Version 1.13.0-DEV.1388 Commit c5f4927 (2025-10-27 11:44 UTC) Platform Info: OS: Linux (x86_64-linux-gnu) CPU: 8 × Intel(R) Core(TM) i5-10310U CPU @ 1.70GHz WORD_SIZE: 64 LLVM: libLLVM-20.1.8 (ORCJIT, skylake) GC: Built with stock GC Threads: 1 default, 1 interactive, 1 GC (on 8 virtual cores) Environment: LD_LIBRARY_PATH = /usr/local/lib: JULIA_EDITOR = subl ```
diff --git a/base/arraymath.jl b/base/arraymath.jl
@@ -2,26 +2,45 @@
 
 ## Binary arithmetic operators ##
 
+function _broadcast_preserving_zero_d(f, A, B)
+    broadcast_preserving_zero_d(f, A, B)
+end
+
+# Using map over broadcast enables vectorization for wide matrices with few rows.
+# This is because we use linear indexing in `map` as opposed to Cartesian indexing in broadcasting.
+# https://github.com/JuliaLang/julia/issues/47873#issuecomment-1352472461
+function _broadcast_preserving_zero_d(f, A::Array, B::Array)
+    map(f, A, B)
+end
+
+function _broadcast_preserving_zero_d(f, A::Array, B::Number)
+    map(Fix2(f, B), A)
+end
+
+function _broadcast_preserving_zero_d(f, A::Number, B::Array)
+    map(Fix1(f, A), B)
+end
+
 for f in (:+, :-)
     @eval function ($f)(A::AbstractArray, B::AbstractArray)
         promote_shape(A, B) # check size compatibility
-        broadcast_preserving_zero_d($f, A, B)
+        _broadcast_preserving_zero_d($f, A, B)
     end
 end
 
 function +(A::Array, Bs::Array...)
     for B in Bs
         promote_shape(A, B) # check size compatibility
     end
-    broadcast_preserving_zero_d(+, A, Bs...)
+    map(+, A, Bs...)
 end
 
 for f in (:/, :\, :*)
     if f !== :/
-        @eval ($f)(A::Number, B::AbstractArray) = broadcast_preserving_zero_d($f, A, B)
+        @eval ($f)(A::Number, B::AbstractArray) = _broadcast_preserving_zero_d($f, A, B)
     end
     if f !== :\
-        @eval ($f)(A::AbstractArray, B::Number) = broadcast_preserving_zero_d($f, A, B)
+        @eval ($f)(A::AbstractArray, B::Number) = _broadcast_preserving_zero_d($f, A, B)
     end
 end