From 9a52c18331544ddc9d57752f225c7edeb549574d Mon Sep 17 00:00:00 2001 From: Kunal Mansukhani Date: Sat, 4 Apr 2026 23:58:38 -0700 Subject: [PATCH] Upgrade Mojo starters to 26.2 Breaking changes in Mojo 26 require updates across all starter.mojo files: - stdlib imports must be fully qualified (std.gpu, std.memory, std.math) - gpu.id module removed; block_dim/block_idx/thread_idx now live in std.gpu - UnsafePointer requires an explicit origin; use MutAnyOrigin for pointers crossing the @export C ABI boundary - ctx.enqueue_function[kernel](...) parametric form removed; kernels must first be compiled via ctx.compile_function[kernel, kernel]() and then passed as a runtime arg to ctx.enqueue_function(kernel, ...) - def no longer auto-raises in Mojo 26, so @export functions that call raising APIs (DeviceContext, enqueue_function, synchronize) are now declared as `fn solve(...) raises` instead of `def solve(...)` Verified by compiling several starters (vector_add, 1d_convolution, reduction) against Mojo 26.2 on a Tesla T4 via the accelerated runner. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../19_reverse_array/starter/starter.mojo | 15 ++++++++------- .../easy/1_vector_add/starter/starter.mojo | 15 ++++++++------- challenges/easy/21_relu/starter/starter.mojo | 15 ++++++++------- .../easy/23_leaky_relu/starter/starter.mojo | 15 ++++++++------- .../24_rainbow_table/starter/starter.mojo | 15 ++++++++------- .../starter/starter.mojo | 15 ++++++++------- .../easy/31_matrix_copy/starter/starter.mojo | 15 ++++++++------- .../3_matrix_transpose/starter/starter.mojo | 15 ++++++++------- challenges/easy/52_silu/starter/starter.mojo | 15 ++++++++------- .../easy/54_swiglu/starter/starter.mojo | 15 ++++++++------- .../62_value_clipping/starter/starter.mojo | 15 ++++++++------- .../easy/63_interleave/starter/starter.mojo | 15 ++++++++------- challenges/easy/65_geglu/starter/starter.mojo | 15 ++++++++------- .../66_rgb_to_grayscale/starter/starter.mojo | 15 ++++++++------- .../easy/68_sigmoid/starter/starter.mojo | 15 ++++++++------- .../7_color_inversion/starter/starter.mojo | 15 ++++++++------- .../8_matrix_addition/starter/starter.mojo | 15 ++++++++------- .../9_1d_convolution/starter/starter.mojo | 19 ++++++++++--------- .../starter/starter.mojo | 10 +++++----- .../14_multi_agent_sim/starter/starter.mojo | 10 +++++----- .../hard/15_sorting/starter/starter.mojo | 10 +++++----- .../20_kmeans_clustering/starter/starter.mojo | 10 +++++----- .../hard/36_radix_sort/starter/starter.mojo | 8 ++++---- .../starter/starter.mojo | 10 +++++----- .../46_bfs_shortest_path/starter/starter.mojo | 6 +++--- .../53_casual_attention/starter/starter.mojo | 12 ++++++------ .../56_linear_attention/starter/starter.mojo | 12 ++++++------ .../starter/starter.mojo | 12 ++++++------ .../starter/starter.mojo | 10 +++++----- .../hard/74_gpt2_block/starter/starter.mojo | 10 +++++----- .../10_2d_convolution/starter/starter.mojo | 10 +++++----- .../11_3d_convolution/starter/starter.mojo | 10 +++++----- .../13_histogramming/starter/starter.mojo | 10 +++++----- .../medium/16_prefix_sum/starter/starter.mojo | 10 +++++----- .../17_dot_product/starter/starter.mojo | 10 +++++----- .../starter/starter.mojo | 10 +++++----- .../medium/22_gemm/starter/starter.mojo | 10 +++++----- .../starter/starter.mojo | 10 +++++----- .../starter/starter.mojo | 10 +++++----- .../28_gaussian_blur/starter/starter.mojo | 10 +++++----- .../29_top_k_selection/starter/starter.mojo | 10 +++++----- .../starter/starter.mojo | 10 +++++----- .../starter/starter.mojo | 10 +++++----- .../starter/starter.mojo | 10 +++++----- .../starter/starter.mojo | 10 +++++----- .../starter/starter.mojo | 10 +++++----- .../37_matrix_power/starter/starter.mojo | 14 +++++++------- .../38_nearest_neighbor/starter/starter.mojo | 4 ++-- .../starter/starter.mojo | 14 +++++++------- .../42_2d_max_pooling/starter/starter.mojo | 12 ++++++------ .../starter/starter.mojo | 10 +++++----- .../starter/starter.mojo | 10 +++++----- .../starter/starter.mojo | 10 +++++----- .../47_subarray_sum/starter/starter.mojo | 10 +++++----- .../48_2d_subarray_sum/starter/starter.mojo | 10 +++++----- .../49_3d_subarray_sum/starter/starter.mojo | 10 +++++----- .../medium/4_reduction/starter/starter.mojo | 10 +++++----- .../50_rms_normalization/starter/starter.mojo | 14 +++++++------- .../51_max_subarray_sum/starter/starter.mojo | 10 +++++----- .../starter/starter.mojo | 12 ++++++------ .../starter/starter.mojo | 10 +++++----- .../58_fp16_dot_product/starter/starter.mojo | 10 +++++----- .../medium/5_softmax/starter/starter.mojo | 10 +++++----- .../60_top_p_sampling/starter/starter.mojo | 12 ++++++------ .../61_rope_embedding/starter/starter.mojo | 10 +++++----- .../starter/starter.mojo | 10 +++++----- .../67_moe_topk_gating/starter/starter.mojo | 10 +++++----- .../69_jacobi_stencil_2d/starter/starter.mojo | 10 +++++----- .../6_softmax_attention/starter/starter.mojo | 12 ++++++------ .../starter/starter.mojo | 10 +++++----- .../71_parallel_merge/starter/starter.mojo | 6 +++--- .../72_stream_compaction/starter/starter.mojo | 6 +++--- .../starter/starter.mojo | 10 +++++----- .../medium/78_2d_fft/starter/starter.mojo | 10 +++++----- .../starter/starter.mojo | 16 ++++++++-------- .../81_int4_matmul/starter/starter.mojo | 16 ++++++++-------- .../82_linear_recurrence/starter/starter.mojo | 10 +++++----- .../85_lora_linear/starter/starter.mojo | 18 +++++++++--------- 78 files changed, 459 insertions(+), 441 deletions(-) diff --git a/challenges/easy/19_reverse_array/starter/starter.mojo b/challenges/easy/19_reverse_array/starter/starter.mojo index 2d06cdc1..2463cd49 100644 --- a/challenges/easy/19_reverse_array/starter/starter.mojo +++ b/challenges/easy/19_reverse_array/starter/starter.mojo @@ -1,20 +1,21 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv -fn reverse_array_kernel(input: UnsafePointer[Float32], N: Int32): +fn reverse_array_kernel(input: UnsafePointer[Float32, MutExternalOrigin], N: Int32): pass # input is a device pointer (i.e. pointer to memory on the GPU) @export -def solve(input: UnsafePointer[Float32], N: Int32): +fn solve(input: UnsafePointer[Float32, MutExternalOrigin], N: Int32) raises: var threadsPerBlock: Int32 = 256 var ctx = DeviceContext() var blocksPerGrid = ceildiv(N, threadsPerBlock) - ctx.enqueue_function[reverse_array_kernel]( + var _kernel = ctx.compile_function[reverse_array_kernel, reverse_array_kernel]() + ctx.enqueue_function(_kernel, input, N, grid_dim = blocksPerGrid, block_dim = threadsPerBlock diff --git a/challenges/easy/1_vector_add/starter/starter.mojo b/challenges/easy/1_vector_add/starter/starter.mojo index d88a3777..006047f3 100644 --- a/challenges/easy/1_vector_add/starter/starter.mojo +++ b/challenges/easy/1_vector_add/starter/starter.mojo @@ -1,19 +1,20 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv -fn vector_add_kernel(A: UnsafePointer[Float32], B: UnsafePointer[Float32], C: UnsafePointer[Float32], N: Int32): +fn vector_add_kernel(A: UnsafePointer[Float32, MutExternalOrigin], B: UnsafePointer[Float32, MutExternalOrigin], C: UnsafePointer[Float32, MutExternalOrigin], N: Int32): pass # A, B, C are device pointers (i.e. pointers to memory on the GPU) @export -def solve(A: UnsafePointer[Float32], B: UnsafePointer[Float32], C: UnsafePointer[Float32], N: Int32): +fn solve(A: UnsafePointer[Float32, MutExternalOrigin], B: UnsafePointer[Float32, MutExternalOrigin], C: UnsafePointer[Float32, MutExternalOrigin], N: Int32) raises: var BLOCK_SIZE: Int32 = 256 var ctx = DeviceContext() var num_blocks = ceildiv(N, BLOCK_SIZE) - ctx.enqueue_function[vector_add_kernel]( + var _kernel = ctx.compile_function[vector_add_kernel, vector_add_kernel]() + ctx.enqueue_function(_kernel, A, B, C, N, grid_dim = num_blocks, block_dim = BLOCK_SIZE diff --git a/challenges/easy/21_relu/starter/starter.mojo b/challenges/easy/21_relu/starter/starter.mojo index 88033c23..674d9db9 100644 --- a/challenges/easy/21_relu/starter/starter.mojo +++ b/challenges/easy/21_relu/starter/starter.mojo @@ -1,20 +1,21 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv -fn relu_kernel(input: UnsafePointer[Float32], output: UnsafePointer[Float32], N: Int32): +fn relu_kernel(input: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], N: Int32): pass # input, output are device pointers (i.e. pointers to memory on the GPU) @export -def solve(input: UnsafePointer[Float32], output: UnsafePointer[Float32], N: Int32): +fn solve(input: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], N: Int32) raises: var threadsPerBlock: Int32 = 256 var ctx = DeviceContext() var blocksPerGrid = ceildiv(N, threadsPerBlock) - ctx.enqueue_function[relu_kernel]( + var _kernel = ctx.compile_function[relu_kernel, relu_kernel]() + ctx.enqueue_function(_kernel, input, output, N, grid_dim = blocksPerGrid, block_dim = threadsPerBlock diff --git a/challenges/easy/23_leaky_relu/starter/starter.mojo b/challenges/easy/23_leaky_relu/starter/starter.mojo index 9d7c998d..3d5472aa 100644 --- a/challenges/easy/23_leaky_relu/starter/starter.mojo +++ b/challenges/easy/23_leaky_relu/starter/starter.mojo @@ -1,20 +1,21 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv -fn leaky_relu_kernel(input: UnsafePointer[Float32], output: UnsafePointer[Float32], N: Int32): +fn leaky_relu_kernel(input: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], N: Int32): pass # input, output are device pointers (i.e. pointers to memory on the GPU) @export -def solve(input: UnsafePointer[Float32], output: UnsafePointer[Float32], N: Int32): +fn solve(input: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], N: Int32) raises: var threadsPerBlock: Int32 = 256 var ctx = DeviceContext() var blocksPerGrid = ceildiv(N, threadsPerBlock) - ctx.enqueue_function[leaky_relu_kernel]( + var _kernel = ctx.compile_function[leaky_relu_kernel, leaky_relu_kernel]() + ctx.enqueue_function(_kernel, input, output, N, grid_dim = blocksPerGrid, block_dim = threadsPerBlock diff --git a/challenges/easy/24_rainbow_table/starter/starter.mojo b/challenges/easy/24_rainbow_table/starter/starter.mojo index 41684115..e96fa607 100644 --- a/challenges/easy/24_rainbow_table/starter/starter.mojo +++ b/challenges/easy/24_rainbow_table/starter/starter.mojo @@ -1,7 +1,7 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv fn fnv1a_hash(input: Int32) -> UInt32: alias FNV_PRIME: UInt32 = 16777619 @@ -15,19 +15,20 @@ fn fnv1a_hash(input: Int32) -> UInt32: return hash -fn fnv1a_hash_kernel(input: UnsafePointer[Int32], output: UnsafePointer[UInt32], +fn fnv1a_hash_kernel(input: UnsafePointer[Int32, MutExternalOrigin], output: UnsafePointer[UInt32, MutExternalOrigin], N: Int32, R: Int32): pass # input, output are device pointers (i.e. pointers to memory on the GPU) @export -def solve(input: UnsafePointer[Int32], output: UnsafePointer[UInt32], N: Int32, R: Int32): +fn solve(input: UnsafePointer[Int32, MutExternalOrigin], output: UnsafePointer[UInt32, MutExternalOrigin], N: Int32, R: Int32) raises: var threadsPerBlock: Int32 = 256 var ctx = DeviceContext() var blocksPerGrid = ceildiv(N, threadsPerBlock) - ctx.enqueue_function[fnv1a_hash_kernel]( + var _kernel = ctx.compile_function[fnv1a_hash_kernel, fnv1a_hash_kernel]() + ctx.enqueue_function(_kernel, input, output, N, R, grid_dim = blocksPerGrid, block_dim = threadsPerBlock diff --git a/challenges/easy/2_matrix_multiplication/starter/starter.mojo b/challenges/easy/2_matrix_multiplication/starter/starter.mojo index 1c447391..58a105c8 100644 --- a/challenges/easy/2_matrix_multiplication/starter/starter.mojo +++ b/challenges/easy/2_matrix_multiplication/starter/starter.mojo @@ -1,21 +1,22 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math.math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv -fn matrix_multiplication_kernel(A: UnsafePointer[Float32], B: UnsafePointer[Float32], C: UnsafePointer[Float32], M: Int32, N: Int32, K: Int32): +fn matrix_multiplication_kernel(A: UnsafePointer[Float32, MutExternalOrigin], B: UnsafePointer[Float32, MutExternalOrigin], C: UnsafePointer[Float32, MutExternalOrigin], M: Int32, N: Int32, K: Int32): pass # A, B, C are device pointers (i.e. pointers to memory on the GPU) @export -def solve(A: UnsafePointer[Float32], B: UnsafePointer[Float32], C: UnsafePointer[Float32], M: Int32, N: Int32, K: Int32): +fn solve(A: UnsafePointer[Float32, MutExternalOrigin], B: UnsafePointer[Float32, MutExternalOrigin], C: UnsafePointer[Float32, MutExternalOrigin], M: Int32, N: Int32, K: Int32) raises: var BLOCK_SIZE: Int32 = 16 var ctx = DeviceContext() var grid_dim_x = ceildiv(K, BLOCK_SIZE) var grid_dim_y = ceildiv(M, BLOCK_SIZE) - ctx.enqueue_function[matrix_multiplication_kernel]( + var _kernel = ctx.compile_function[matrix_multiplication_kernel, matrix_multiplication_kernel]() + ctx.enqueue_function(_kernel, A, B, C, M, N, K, grid_dim = (grid_dim_x, grid_dim_y), block_dim = (BLOCK_SIZE, BLOCK_SIZE) diff --git a/challenges/easy/31_matrix_copy/starter/starter.mojo b/challenges/easy/31_matrix_copy/starter/starter.mojo index dff118e8..ca585920 100644 --- a/challenges/easy/31_matrix_copy/starter/starter.mojo +++ b/challenges/easy/31_matrix_copy/starter/starter.mojo @@ -1,21 +1,22 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv -fn copy_matrix_kernel(A: UnsafePointer[Float32], B: UnsafePointer[Float32], N: Int32): +fn copy_matrix_kernel(A: UnsafePointer[Float32, MutExternalOrigin], B: UnsafePointer[Float32, MutExternalOrigin], N: Int32): pass # A, B are device pointers (i.e. pointers to memory on the GPU) @export -def solve(A: UnsafePointer[Float32], B: UnsafePointer[Float32], N: Int32): +fn solve(A: UnsafePointer[Float32, MutExternalOrigin], B: UnsafePointer[Float32, MutExternalOrigin], N: Int32) raises: var total = N * N var threadsPerBlock: Int32 = 256 var ctx = DeviceContext() var blocksPerGrid = ceildiv(total, threadsPerBlock) - ctx.enqueue_function[copy_matrix_kernel]( + var _kernel = ctx.compile_function[copy_matrix_kernel, copy_matrix_kernel]() + ctx.enqueue_function(_kernel, A, B, N, grid_dim = blocksPerGrid, block_dim = threadsPerBlock diff --git a/challenges/easy/3_matrix_transpose/starter/starter.mojo b/challenges/easy/3_matrix_transpose/starter/starter.mojo index 8d3bcafb..231d7505 100644 --- a/challenges/easy/3_matrix_transpose/starter/starter.mojo +++ b/challenges/easy/3_matrix_transpose/starter/starter.mojo @@ -1,21 +1,22 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv -fn matrix_transpose_kernel(input: UnsafePointer[Float32], output: UnsafePointer[Float32], rows: Int32, cols: Int32): +fn matrix_transpose_kernel(input: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], rows: Int32, cols: Int32): pass # input, output are device pointers (i.e. pointers to memory on the GPU) @export -def solve(input: UnsafePointer[Float32], output: UnsafePointer[Float32], rows: Int32, cols: Int32): +fn solve(input: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], rows: Int32, cols: Int32) raises: var BLOCK_SIZE: Int32 = 32 var ctx = DeviceContext() var grid_dim_x = ceildiv(cols, BLOCK_SIZE) var grid_dim_y = ceildiv(rows, BLOCK_SIZE) - ctx.enqueue_function[matrix_transpose_kernel]( + var _kernel = ctx.compile_function[matrix_transpose_kernel, matrix_transpose_kernel]() + ctx.enqueue_function(_kernel, input, output, rows, cols, grid_dim = (grid_dim_x, grid_dim_y), block_dim = (BLOCK_SIZE, BLOCK_SIZE) diff --git a/challenges/easy/52_silu/starter/starter.mojo b/challenges/easy/52_silu/starter/starter.mojo index 89e67442..8b5814c0 100644 --- a/challenges/easy/52_silu/starter/starter.mojo +++ b/challenges/easy/52_silu/starter/starter.mojo @@ -1,20 +1,21 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv -fn silu_kernel(input: UnsafePointer[Float32], output: UnsafePointer[Float32], N: Int32): +fn silu_kernel(input: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], N: Int32): pass # input, output are device pointers @export -def solve(input: UnsafePointer[Float32], output: UnsafePointer[Float32], N: Int32): +fn solve(input: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], N: Int32) raises: var threadsPerBlock: Int32 = 256 var ctx = DeviceContext() var blocksPerGrid = ceildiv(N, threadsPerBlock) - ctx.enqueue_function[silu_kernel]( + var _kernel = ctx.compile_function[silu_kernel, silu_kernel]() + ctx.enqueue_function(_kernel, input, output, N, grid_dim = blocksPerGrid, block_dim = threadsPerBlock diff --git a/challenges/easy/54_swiglu/starter/starter.mojo b/challenges/easy/54_swiglu/starter/starter.mojo index 7eda8532..c32b232a 100644 --- a/challenges/easy/54_swiglu/starter/starter.mojo +++ b/challenges/easy/54_swiglu/starter/starter.mojo @@ -1,21 +1,22 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv -fn swiglu_kernel(input: UnsafePointer[Float32], output: UnsafePointer[Float32], N: Int32): +fn swiglu_kernel(input: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], N: Int32): pass # input, output are device pointers @export -def solve(input: UnsafePointer[Float32], output: UnsafePointer[Float32], N: Int32): +fn solve(input: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], N: Int32) raises: var BLOCK_SIZE: Int32 = 256 var ctx = DeviceContext() var num_blocks = ceildiv(N // 2, BLOCK_SIZE) - ctx.enqueue_function[swiglu_kernel]( + var _kernel = ctx.compile_function[swiglu_kernel, swiglu_kernel]() + ctx.enqueue_function(_kernel, input, output, N, grid_dim = num_blocks, block_dim = BLOCK_SIZE diff --git a/challenges/easy/62_value_clipping/starter/starter.mojo b/challenges/easy/62_value_clipping/starter/starter.mojo index 106457ce..49872678 100644 --- a/challenges/easy/62_value_clipping/starter/starter.mojo +++ b/challenges/easy/62_value_clipping/starter/starter.mojo @@ -1,20 +1,21 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv -fn clip_kernel(input: UnsafePointer[Float32], output: UnsafePointer[Float32], lo: Float32, hi: Float32, N: Int32): +fn clip_kernel(input: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], lo: Float32, hi: Float32, N: Int32): pass # input, output are device pointers @export -def solve(input: UnsafePointer[Float32], output: UnsafePointer[Float32], lo: Float32, hi: Float32, N: Int32): +fn solve(input: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], lo: Float32, hi: Float32, N: Int32) raises: var BLOCK_SIZE: Int32 = 256 var ctx = DeviceContext() var num_blocks = ceildiv(N, BLOCK_SIZE) - ctx.enqueue_function[clip_kernel]( + var _kernel = ctx.compile_function[clip_kernel, clip_kernel]() + ctx.enqueue_function(_kernel, input, output, lo, hi, N, grid_dim = num_blocks, block_dim = BLOCK_SIZE diff --git a/challenges/easy/63_interleave/starter/starter.mojo b/challenges/easy/63_interleave/starter/starter.mojo index 3329478f..1b46d0c9 100644 --- a/challenges/easy/63_interleave/starter/starter.mojo +++ b/challenges/easy/63_interleave/starter/starter.mojo @@ -1,19 +1,20 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv -fn interleave_kernel(A: UnsafePointer[Float32], B: UnsafePointer[Float32], output: UnsafePointer[Float32], N: Int32): +fn interleave_kernel(A: UnsafePointer[Float32, MutExternalOrigin], B: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], N: Int32): pass # A, B, output are device pointers (i.e. pointers to memory on the GPU) @export -def solve(A: UnsafePointer[Float32], B: UnsafePointer[Float32], output: UnsafePointer[Float32], N: Int32): +fn solve(A: UnsafePointer[Float32, MutExternalOrigin], B: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], N: Int32) raises: var BLOCK_SIZE: Int32 = 256 var ctx = DeviceContext() var num_blocks = ceildiv(N, BLOCK_SIZE) - ctx.enqueue_function[interleave_kernel]( + var _kernel = ctx.compile_function[interleave_kernel, interleave_kernel]() + ctx.enqueue_function(_kernel, A, B, output, N, grid_dim = num_blocks, block_dim = BLOCK_SIZE diff --git a/challenges/easy/65_geglu/starter/starter.mojo b/challenges/easy/65_geglu/starter/starter.mojo index 60e54e13..1d77e891 100644 --- a/challenges/easy/65_geglu/starter/starter.mojo +++ b/challenges/easy/65_geglu/starter/starter.mojo @@ -1,21 +1,22 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv -fn geglu_kernel(input: UnsafePointer[Float32], output: UnsafePointer[Float32], N: Int32): +fn geglu_kernel(input: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], N: Int32): pass # input, output are device pointers @export -def solve(input: UnsafePointer[Float32], output: UnsafePointer[Float32], N: Int32): +fn solve(input: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], N: Int32) raises: var BLOCK_SIZE: Int32 = 256 var ctx = DeviceContext() var num_blocks = ceildiv(N // 2, BLOCK_SIZE) - ctx.enqueue_function[geglu_kernel]( + var _kernel = ctx.compile_function[geglu_kernel, geglu_kernel]() + ctx.enqueue_function(_kernel, input, output, N, grid_dim = num_blocks, block_dim = BLOCK_SIZE diff --git a/challenges/easy/66_rgb_to_grayscale/starter/starter.mojo b/challenges/easy/66_rgb_to_grayscale/starter/starter.mojo index 4dde0448..45116afb 100644 --- a/challenges/easy/66_rgb_to_grayscale/starter/starter.mojo +++ b/challenges/easy/66_rgb_to_grayscale/starter/starter.mojo @@ -1,20 +1,21 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv -fn rgb_to_grayscale_kernel(input: UnsafePointer[Float32], output: UnsafePointer[Float32], width: Int32, height: Int32): +fn rgb_to_grayscale_kernel(input: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], width: Int32, height: Int32): pass # input, output are device pointers @export -def solve(input: UnsafePointer[Float32], output: UnsafePointer[Float32], width: Int32, height: Int32): +fn solve(input: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], width: Int32, height: Int32) raises: var total_pixels = width * height var BLOCK_SIZE: Int32 = 256 var ctx = DeviceContext() var num_blocks = ceildiv(total_pixels, BLOCK_SIZE) - ctx.enqueue_function[rgb_to_grayscale_kernel]( + var _kernel = ctx.compile_function[rgb_to_grayscale_kernel, rgb_to_grayscale_kernel]() + ctx.enqueue_function(_kernel, input, output, width, height, grid_dim = num_blocks, block_dim = BLOCK_SIZE diff --git a/challenges/easy/68_sigmoid/starter/starter.mojo b/challenges/easy/68_sigmoid/starter/starter.mojo index 2ab43d63..cc558cf2 100644 --- a/challenges/easy/68_sigmoid/starter/starter.mojo +++ b/challenges/easy/68_sigmoid/starter/starter.mojo @@ -1,19 +1,20 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv -fn sigmoid_kernel(X: UnsafePointer[Float32], Y: UnsafePointer[Float32], N: Int32): +fn sigmoid_kernel(X: UnsafePointer[Float32, MutExternalOrigin], Y: UnsafePointer[Float32, MutExternalOrigin], N: Int32): pass # X, Y are device pointers (i.e. pointers to memory on the GPU) @export -def solve(X: UnsafePointer[Float32], Y: UnsafePointer[Float32], N: Int32): +fn solve(X: UnsafePointer[Float32, MutExternalOrigin], Y: UnsafePointer[Float32, MutExternalOrigin], N: Int32) raises: var BLOCK_SIZE: Int32 = 256 var ctx = DeviceContext() var num_blocks = ceildiv(N, BLOCK_SIZE) - ctx.enqueue_function[sigmoid_kernel]( + var _kernel = ctx.compile_function[sigmoid_kernel, sigmoid_kernel]() + ctx.enqueue_function(_kernel, X, Y, N, grid_dim = num_blocks, block_dim = BLOCK_SIZE diff --git a/challenges/easy/7_color_inversion/starter/starter.mojo b/challenges/easy/7_color_inversion/starter/starter.mojo index bf46faa5..827589cf 100644 --- a/challenges/easy/7_color_inversion/starter/starter.mojo +++ b/challenges/easy/7_color_inversion/starter/starter.mojo @@ -1,21 +1,22 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv -fn invert_kernel(image: UnsafePointer[UInt8], width: Int32, height: Int32): +fn invert_kernel(image: UnsafePointer[UInt8, MutExternalOrigin], width: Int32, height: Int32): pass # image is a device pointer (i.e. pointer to memory on the GPU) @export -def solve(image: UnsafePointer[UInt8], width: Int32, height: Int32): +fn solve(image: UnsafePointer[UInt8, MutExternalOrigin], width: Int32, height: Int32) raises: var threadsPerBlock: Int32 = 256 var ctx = DeviceContext() var total_pixels = width * height var blocksPerGrid = ceildiv(total_pixels, threadsPerBlock) - ctx.enqueue_function[invert_kernel]( + var _kernel = ctx.compile_function[invert_kernel, invert_kernel]() + ctx.enqueue_function(_kernel, image, width, height, grid_dim = blocksPerGrid, block_dim = threadsPerBlock diff --git a/challenges/easy/8_matrix_addition/starter/starter.mojo b/challenges/easy/8_matrix_addition/starter/starter.mojo index 5465984d..c947032e 100644 --- a/challenges/easy/8_matrix_addition/starter/starter.mojo +++ b/challenges/easy/8_matrix_addition/starter/starter.mojo @@ -1,20 +1,21 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv -fn matrix_add_kernel(A: UnsafePointer[Float32], B: UnsafePointer[Float32], C: UnsafePointer[Float32], N: Int32): +fn matrix_add_kernel(A: UnsafePointer[Float32, MutExternalOrigin], B: UnsafePointer[Float32, MutExternalOrigin], C: UnsafePointer[Float32, MutExternalOrigin], N: Int32): pass # A, B, C are device pointers (i.e. pointers to memory on the GPU) @export -def solve(A: UnsafePointer[Float32], B: UnsafePointer[Float32], C: UnsafePointer[Float32], N: Int32): +fn solve(A: UnsafePointer[Float32, MutExternalOrigin], B: UnsafePointer[Float32, MutExternalOrigin], C: UnsafePointer[Float32, MutExternalOrigin], N: Int32) raises: var BLOCK_SIZE: Int32 = 256 var ctx = DeviceContext() var n_elements = N * N var num_blocks = ceildiv(n_elements, BLOCK_SIZE) - ctx.enqueue_function[matrix_add_kernel]( + var _kernel = ctx.compile_function[matrix_add_kernel, matrix_add_kernel]() + ctx.enqueue_function(_kernel, A, B, C, N, grid_dim = num_blocks, block_dim = BLOCK_SIZE diff --git a/challenges/easy/9_1d_convolution/starter/starter.mojo b/challenges/easy/9_1d_convolution/starter/starter.mojo index 745fc0d4..6bc29b7f 100644 --- a/challenges/easy/9_1d_convolution/starter/starter.mojo +++ b/challenges/easy/9_1d_convolution/starter/starter.mojo @@ -1,23 +1,24 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv -fn convolution_1d_kernel(input: UnsafePointer[Float32], kernel: UnsafePointer[Float32], - output: UnsafePointer[Float32], input_size: Int32, kernel_size: Int32): +fn convolution_1d_kernel(input: UnsafePointer[Float32, MutExternalOrigin], kernel: UnsafePointer[Float32, MutExternalOrigin], + output: UnsafePointer[Float32, MutExternalOrigin], input_size: Int32, kernel_size: Int32): pass # input, kernel, output are device pointers (i.e. pointers to memory on the GPU) @export -def solve(input: UnsafePointer[Float32], kernel: UnsafePointer[Float32], - output: UnsafePointer[Float32], input_size: Int32, kernel_size: Int32): +fn solve(input: UnsafePointer[Float32, MutExternalOrigin], kernel: UnsafePointer[Float32, MutExternalOrigin], + output: UnsafePointer[Float32, MutExternalOrigin], input_size: Int32, kernel_size: Int32) raises: var output_size = input_size - kernel_size + 1 var threadsPerBlock: Int32 = 256 var ctx = DeviceContext() var blocksPerGrid = ceildiv(output_size, threadsPerBlock) - ctx.enqueue_function[convolution_1d_kernel]( + var _kernel = ctx.compile_function[convolution_1d_kernel, convolution_1d_kernel]() + ctx.enqueue_function(_kernel, input, kernel, output, input_size, kernel_size, grid_dim = blocksPerGrid, block_dim = threadsPerBlock diff --git a/challenges/hard/12_multi_head_attention/starter/starter.mojo b/challenges/hard/12_multi_head_attention/starter/starter.mojo index b6e13df2..1f0884e1 100644 --- a/challenges/hard/12_multi_head_attention/starter/starter.mojo +++ b/challenges/hard/12_multi_head_attention/starter/starter.mojo @@ -1,8 +1,8 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv @export -def solve(Q: UnsafePointer[Float32], K: UnsafePointer[Float32], V: UnsafePointer[Float32], output: UnsafePointer[Float32], N: Int32, d_model: Int32, h: Int32): +fn solve(Q: UnsafePointer[Float32, MutExternalOrigin], K: UnsafePointer[Float32, MutExternalOrigin], V: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], N: Int32, d_model: Int32, h: Int32) raises: pass diff --git a/challenges/hard/14_multi_agent_sim/starter/starter.mojo b/challenges/hard/14_multi_agent_sim/starter/starter.mojo index bd28ab96..589c22fe 100644 --- a/challenges/hard/14_multi_agent_sim/starter/starter.mojo +++ b/challenges/hard/14_multi_agent_sim/starter/starter.mojo @@ -1,8 +1,8 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv @export -def solve(agents: UnsafePointer[Float32], agents_next: UnsafePointer[Float32], N: Int32): +fn solve(agents: UnsafePointer[Float32, MutExternalOrigin], agents_next: UnsafePointer[Float32, MutExternalOrigin], N: Int32) raises: pass diff --git a/challenges/hard/15_sorting/starter/starter.mojo b/challenges/hard/15_sorting/starter/starter.mojo index bb7cb470..ea451789 100644 --- a/challenges/hard/15_sorting/starter/starter.mojo +++ b/challenges/hard/15_sorting/starter/starter.mojo @@ -1,8 +1,8 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv @export -def solve(data: UnsafePointer[Float32], N: Int32): +fn solve(data: UnsafePointer[Float32, MutExternalOrigin], N: Int32) raises: pass diff --git a/challenges/hard/20_kmeans_clustering/starter/starter.mojo b/challenges/hard/20_kmeans_clustering/starter/starter.mojo index 3ac92e83..04610735 100644 --- a/challenges/hard/20_kmeans_clustering/starter/starter.mojo +++ b/challenges/hard/20_kmeans_clustering/starter/starter.mojo @@ -1,8 +1,8 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv @export -def solve(data_x: UnsafePointer[Float32], data_y: UnsafePointer[Float32], labels: UnsafePointer[Int32], initial_centroid_x: UnsafePointer[Float32], initial_centroid_y: UnsafePointer[Float32], final_centroid_x: UnsafePointer[Float32], final_centroid_y: UnsafePointer[Float32], sample_size: Int32, k: Int32, max_iterations: Int32): +fn solve(data_x: UnsafePointer[Float32, MutExternalOrigin], data_y: UnsafePointer[Float32, MutExternalOrigin], labels: UnsafePointer[Int32, MutExternalOrigin], initial_centroid_x: UnsafePointer[Float32, MutExternalOrigin], initial_centroid_y: UnsafePointer[Float32, MutExternalOrigin], final_centroid_x: UnsafePointer[Float32, MutExternalOrigin], final_centroid_y: UnsafePointer[Float32, MutExternalOrigin], sample_size: Int32, k: Int32, max_iterations: Int32) raises: pass diff --git a/challenges/hard/36_radix_sort/starter/starter.mojo b/challenges/hard/36_radix_sort/starter/starter.mojo index 4dc6cf6b..7484ff7c 100644 --- a/challenges/hard/36_radix_sort/starter/starter.mojo +++ b/challenges/hard/36_radix_sort/starter/starter.mojo @@ -1,8 +1,8 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer # input, output are device pointers @export -def solve(input: UnsafePointer[UInt32], output: UnsafePointer[UInt32], N: Int32): +fn solve(input: UnsafePointer[UInt32, MutExternalOrigin], output: UnsafePointer[UInt32, MutExternalOrigin], N: Int32) raises: pass diff --git a/challenges/hard/39_Fast_Fourier_transform/starter/starter.mojo b/challenges/hard/39_Fast_Fourier_transform/starter/starter.mojo index 0e01386a..2fa6da9a 100644 --- a/challenges/hard/39_Fast_Fourier_transform/starter/starter.mojo +++ b/challenges/hard/39_Fast_Fourier_transform/starter/starter.mojo @@ -1,9 +1,9 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv # signal and spectrum are device pointers @export -def solve(signal: UnsafePointer[Float32], spectrum: UnsafePointer[Float32], N: Int32): +fn solve(signal: UnsafePointer[Float32, MutExternalOrigin], spectrum: UnsafePointer[Float32, MutExternalOrigin], N: Int32) raises: pass diff --git a/challenges/hard/46_bfs_shortest_path/starter/starter.mojo b/challenges/hard/46_bfs_shortest_path/starter/starter.mojo index 712578b5..9bb12c2f 100644 --- a/challenges/hard/46_bfs_shortest_path/starter/starter.mojo +++ b/challenges/hard/46_bfs_shortest_path/starter/starter.mojo @@ -1,7 +1,7 @@ -from memory import UnsafePointer +from std.memory import UnsafePointer # grid, result are device pointers @export -def solve(grid: UnsafePointer[Int32], result: UnsafePointer[Int32], rows: Int32, cols: Int32, - start_row: Int32, start_col: Int32, end_row: Int32, end_col: Int32): +fn solve(grid: UnsafePointer[Int32, MutExternalOrigin], result: UnsafePointer[Int32, MutExternalOrigin], rows: Int32, cols: Int32, + start_row: Int32, start_col: Int32, end_row: Int32, end_col: Int32) raises: pass diff --git a/challenges/hard/53_casual_attention/starter/starter.mojo b/challenges/hard/53_casual_attention/starter/starter.mojo index f0fbc957..196a0a16 100644 --- a/challenges/hard/53_casual_attention/starter/starter.mojo +++ b/challenges/hard/53_casual_attention/starter/starter.mojo @@ -1,10 +1,10 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv # Q, K, V, output are device pointers (i.e. pointers to memory on the GPU) @export -def solve(Q: UnsafePointer[Float32], K: UnsafePointer[Float32], V: UnsafePointer[Float32], - output: UnsafePointer[Float32], M: Int32, d: Int32): +fn solve(Q: UnsafePointer[Float32, MutExternalOrigin], K: UnsafePointer[Float32, MutExternalOrigin], V: UnsafePointer[Float32, MutExternalOrigin], + output: UnsafePointer[Float32, MutExternalOrigin], M: Int32, d: Int32) raises: pass diff --git a/challenges/hard/56_linear_attention/starter/starter.mojo b/challenges/hard/56_linear_attention/starter/starter.mojo index f0fbc957..196a0a16 100644 --- a/challenges/hard/56_linear_attention/starter/starter.mojo +++ b/challenges/hard/56_linear_attention/starter/starter.mojo @@ -1,10 +1,10 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv # Q, K, V, output are device pointers (i.e. pointers to memory on the GPU) @export -def solve(Q: UnsafePointer[Float32], K: UnsafePointer[Float32], V: UnsafePointer[Float32], - output: UnsafePointer[Float32], M: Int32, d: Int32): +fn solve(Q: UnsafePointer[Float32, MutExternalOrigin], K: UnsafePointer[Float32, MutExternalOrigin], V: UnsafePointer[Float32, MutExternalOrigin], + output: UnsafePointer[Float32, MutExternalOrigin], M: Int32, d: Int32) raises: pass diff --git a/challenges/hard/59_sliding_window_attn/starter/starter.mojo b/challenges/hard/59_sliding_window_attn/starter/starter.mojo index ecd8a37a..035a8680 100644 --- a/challenges/hard/59_sliding_window_attn/starter/starter.mojo +++ b/challenges/hard/59_sliding_window_attn/starter/starter.mojo @@ -1,10 +1,10 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv # Q, K, V, output are device pointers (i.e. pointers to memory on the GPU) @export -def solve(Q: UnsafePointer[Float32], K: UnsafePointer[Float32], V: UnsafePointer[Float32], - output: UnsafePointer[Float32], M: Int32, d: Int32, window_size: Int32): +fn solve(Q: UnsafePointer[Float32, MutExternalOrigin], K: UnsafePointer[Float32, MutExternalOrigin], V: UnsafePointer[Float32, MutExternalOrigin], + output: UnsafePointer[Float32, MutExternalOrigin], M: Int32, d: Int32, window_size: Int32) raises: pass diff --git a/challenges/hard/73_all_pairs_shortest_paths/starter/starter.mojo b/challenges/hard/73_all_pairs_shortest_paths/starter/starter.mojo index 6bfafcbb..71f7058d 100644 --- a/challenges/hard/73_all_pairs_shortest_paths/starter/starter.mojo +++ b/challenges/hard/73_all_pairs_shortest_paths/starter/starter.mojo @@ -1,9 +1,9 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv # dist, output are device pointers (i.e. pointers to memory on the GPU) @export -def solve(dist: UnsafePointer[Float32], output: UnsafePointer[Float32], N: Int32): +fn solve(dist: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], N: Int32) raises: pass diff --git a/challenges/hard/74_gpt2_block/starter/starter.mojo b/challenges/hard/74_gpt2_block/starter/starter.mojo index 55275dc8..e7f5ed07 100644 --- a/challenges/hard/74_gpt2_block/starter/starter.mojo +++ b/challenges/hard/74_gpt2_block/starter/starter.mojo @@ -1,9 +1,9 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv # x, output, weights are device pointers @export -def solve(x: UnsafePointer[Float32], output: UnsafePointer[Float32], weights: UnsafePointer[Float32], seq_len: Int32): +fn solve(x: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], weights: UnsafePointer[Float32, MutExternalOrigin], seq_len: Int32) raises: pass diff --git a/challenges/medium/10_2d_convolution/starter/starter.mojo b/challenges/medium/10_2d_convolution/starter/starter.mojo index 1e19acfc..a1682717 100644 --- a/challenges/medium/10_2d_convolution/starter/starter.mojo +++ b/challenges/medium/10_2d_convolution/starter/starter.mojo @@ -1,8 +1,8 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv @export -def solve(input: UnsafePointer[Float32], kernel: UnsafePointer[Float32], output: UnsafePointer[Float32], input_rows: Int32, input_cols: Int32, kernel_rows: Int32, kernel_cols: Int32): +fn solve(input: UnsafePointer[Float32, MutExternalOrigin], kernel: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], input_rows: Int32, input_cols: Int32, kernel_rows: Int32, kernel_cols: Int32) raises: pass diff --git a/challenges/medium/11_3d_convolution/starter/starter.mojo b/challenges/medium/11_3d_convolution/starter/starter.mojo index 3f1292a9..09872742 100644 --- a/challenges/medium/11_3d_convolution/starter/starter.mojo +++ b/challenges/medium/11_3d_convolution/starter/starter.mojo @@ -1,8 +1,8 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv @export -def solve(input: UnsafePointer[Float32], kernel: UnsafePointer[Float32], output: UnsafePointer[Float32], input_depth: Int32, input_rows: Int32, input_cols: Int32, kernel_depth: Int32, kernel_rows: Int32, kernel_cols: Int32): +fn solve(input: UnsafePointer[Float32, MutExternalOrigin], kernel: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], input_depth: Int32, input_rows: Int32, input_cols: Int32, kernel_depth: Int32, kernel_rows: Int32, kernel_cols: Int32) raises: pass diff --git a/challenges/medium/13_histogramming/starter/starter.mojo b/challenges/medium/13_histogramming/starter/starter.mojo index 795c9451..a3b5cb24 100644 --- a/challenges/medium/13_histogramming/starter/starter.mojo +++ b/challenges/medium/13_histogramming/starter/starter.mojo @@ -1,8 +1,8 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv @export -def solve(input: UnsafePointer[Int32], histogram: UnsafePointer[Int32], N: Int32, num_bins: Int32): +fn solve(input: UnsafePointer[Int32, MutExternalOrigin], histogram: UnsafePointer[Int32, MutExternalOrigin], N: Int32, num_bins: Int32) raises: pass diff --git a/challenges/medium/16_prefix_sum/starter/starter.mojo b/challenges/medium/16_prefix_sum/starter/starter.mojo index 1c4cc5a9..02c2c06f 100644 --- a/challenges/medium/16_prefix_sum/starter/starter.mojo +++ b/challenges/medium/16_prefix_sum/starter/starter.mojo @@ -1,8 +1,8 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv @export -def solve(input: UnsafePointer[Float32], output: UnsafePointer[Float32], N: Int32): +fn solve(input: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], N: Int32) raises: pass diff --git a/challenges/medium/17_dot_product/starter/starter.mojo b/challenges/medium/17_dot_product/starter/starter.mojo index 51ba8550..dbc4b7ff 100644 --- a/challenges/medium/17_dot_product/starter/starter.mojo +++ b/challenges/medium/17_dot_product/starter/starter.mojo @@ -1,8 +1,8 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv @export -def solve(A: UnsafePointer[Float32], B: UnsafePointer[Float32], result: UnsafePointer[Float32], N: Int32): +fn solve(A: UnsafePointer[Float32, MutExternalOrigin], B: UnsafePointer[Float32, MutExternalOrigin], result: UnsafePointer[Float32, MutExternalOrigin], N: Int32) raises: pass diff --git a/challenges/medium/18_sparse_matrix_vector_multiplication/starter/starter.mojo b/challenges/medium/18_sparse_matrix_vector_multiplication/starter/starter.mojo index bdcf2cd1..c593c92a 100644 --- a/challenges/medium/18_sparse_matrix_vector_multiplication/starter/starter.mojo +++ b/challenges/medium/18_sparse_matrix_vector_multiplication/starter/starter.mojo @@ -1,8 +1,8 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv @export -def solve(A: UnsafePointer[Float32], x: UnsafePointer[Float32], y: UnsafePointer[Float32], M: Int32, N: Int32, nnz: Int32): +fn solve(A: UnsafePointer[Float32, MutExternalOrigin], x: UnsafePointer[Float32, MutExternalOrigin], y: UnsafePointer[Float32, MutExternalOrigin], M: Int32, N: Int32, nnz: Int32) raises: pass diff --git a/challenges/medium/22_gemm/starter/starter.mojo b/challenges/medium/22_gemm/starter/starter.mojo index 03091aa3..28329886 100644 --- a/challenges/medium/22_gemm/starter/starter.mojo +++ b/challenges/medium/22_gemm/starter/starter.mojo @@ -1,8 +1,8 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv @export -def solve(A: UnsafePointer[Float16], B: UnsafePointer[Float16], C: UnsafePointer[Float16], M: Int32, N: Int32, K: Int32, alpha: Float32, beta: Float32): +fn solve(A: UnsafePointer[Float16, MutExternalOrigin], B: UnsafePointer[Float16, MutExternalOrigin], C: UnsafePointer[Float16, MutExternalOrigin], M: Int32, N: Int32, K: Int32, alpha: Float32, beta: Float32) raises: pass diff --git a/challenges/medium/25_categorical_cross_entropy_loss/starter/starter.mojo b/challenges/medium/25_categorical_cross_entropy_loss/starter/starter.mojo index 5c12cc3e..7d35ecd2 100644 --- a/challenges/medium/25_categorical_cross_entropy_loss/starter/starter.mojo +++ b/challenges/medium/25_categorical_cross_entropy_loss/starter/starter.mojo @@ -1,8 +1,8 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv @export -def solve(logits: UnsafePointer[Float32], true_labels: UnsafePointer[Int32], loss: UnsafePointer[Float32], N: Int32, C: Int32): +fn solve(logits: UnsafePointer[Float32, MutExternalOrigin], true_labels: UnsafePointer[Int32, MutExternalOrigin], loss: UnsafePointer[Float32, MutExternalOrigin], N: Int32, C: Int32) raises: pass diff --git a/challenges/medium/27_mean_squared_error/starter/starter.mojo b/challenges/medium/27_mean_squared_error/starter/starter.mojo index a7de6d56..6ff28913 100644 --- a/challenges/medium/27_mean_squared_error/starter/starter.mojo +++ b/challenges/medium/27_mean_squared_error/starter/starter.mojo @@ -1,8 +1,8 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv @export -def solve(predictions: UnsafePointer[Float32], targets: UnsafePointer[Float32], mse: UnsafePointer[Float32], N: Int32): +fn solve(predictions: UnsafePointer[Float32, MutExternalOrigin], targets: UnsafePointer[Float32, MutExternalOrigin], mse: UnsafePointer[Float32, MutExternalOrigin], N: Int32) raises: pass diff --git a/challenges/medium/28_gaussian_blur/starter/starter.mojo b/challenges/medium/28_gaussian_blur/starter/starter.mojo index 1e19acfc..a1682717 100644 --- a/challenges/medium/28_gaussian_blur/starter/starter.mojo +++ b/challenges/medium/28_gaussian_blur/starter/starter.mojo @@ -1,8 +1,8 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv @export -def solve(input: UnsafePointer[Float32], kernel: UnsafePointer[Float32], output: UnsafePointer[Float32], input_rows: Int32, input_cols: Int32, kernel_rows: Int32, kernel_cols: Int32): +fn solve(input: UnsafePointer[Float32, MutExternalOrigin], kernel: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], input_rows: Int32, input_cols: Int32, kernel_rows: Int32, kernel_cols: Int32) raises: pass diff --git a/challenges/medium/29_top_k_selection/starter/starter.mojo b/challenges/medium/29_top_k_selection/starter/starter.mojo index 773f4aa3..dfa4381e 100644 --- a/challenges/medium/29_top_k_selection/starter/starter.mojo +++ b/challenges/medium/29_top_k_selection/starter/starter.mojo @@ -1,8 +1,8 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv @export -def solve(input: UnsafePointer[Float32], output: UnsafePointer[Float32], N: Int32, k: Int32): +fn solve(input: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], N: Int32, k: Int32) raises: pass diff --git a/challenges/medium/30_batched_matrix_multiplication/starter/starter.mojo b/challenges/medium/30_batched_matrix_multiplication/starter/starter.mojo index b87bf7ae..558b9b8c 100644 --- a/challenges/medium/30_batched_matrix_multiplication/starter/starter.mojo +++ b/challenges/medium/30_batched_matrix_multiplication/starter/starter.mojo @@ -1,8 +1,8 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv @export -def solve(A: UnsafePointer[Float32], B: UnsafePointer[Float32], C: UnsafePointer[Float32], BATCH: Int32, M: Int32, N: Int32, K: Int32): +fn solve(A: UnsafePointer[Float32, MutExternalOrigin], B: UnsafePointer[Float32, MutExternalOrigin], C: UnsafePointer[Float32, MutExternalOrigin], BATCH: Int32, M: Int32, N: Int32, K: Int32) raises: pass diff --git a/challenges/medium/32_int8_quantized_matmul/starter/starter.mojo b/challenges/medium/32_int8_quantized_matmul/starter/starter.mojo index 0fa98d4d..1f2ca8c3 100644 --- a/challenges/medium/32_int8_quantized_matmul/starter/starter.mojo +++ b/challenges/medium/32_int8_quantized_matmul/starter/starter.mojo @@ -1,8 +1,8 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv @export -def solve(A: UnsafePointer[Int8], B: UnsafePointer[Int8], C: UnsafePointer[Int8], M: Int32, N: Int32, K: Int32, scale_A: Float32, scale_B: Float32, scale_C: Float32, zero_point_A: Int32, zero_point_B: Int32, zero_point_C: Int32): +fn solve(A: UnsafePointer[Int8, MutExternalOrigin], B: UnsafePointer[Int8, MutExternalOrigin], C: UnsafePointer[Int8, MutExternalOrigin], M: Int32, N: Int32, K: Int32, scale_A: Float32, scale_B: Float32, scale_C: Float32, zero_point_A: Int32, zero_point_B: Int32, zero_point_C: Int32) raises: pass diff --git a/challenges/medium/33_ordinary_least_squares/starter/starter.mojo b/challenges/medium/33_ordinary_least_squares/starter/starter.mojo index 2d0b62fb..fefa6e34 100644 --- a/challenges/medium/33_ordinary_least_squares/starter/starter.mojo +++ b/challenges/medium/33_ordinary_least_squares/starter/starter.mojo @@ -1,9 +1,9 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv # X, y, beta are device pointers (i.e. pointers to memory on the GPU) @export -def solve(X: UnsafePointer[Float32], y: UnsafePointer[Float32], beta: UnsafePointer[Float32], n_samples: Int32, n_features: Int32): +fn solve(X: UnsafePointer[Float32, MutExternalOrigin], y: UnsafePointer[Float32, MutExternalOrigin], beta: UnsafePointer[Float32, MutExternalOrigin], n_samples: Int32, n_features: Int32) raises: pass diff --git a/challenges/medium/34_logistic_regression/starter/starter.mojo b/challenges/medium/34_logistic_regression/starter/starter.mojo index 2d0b62fb..fefa6e34 100644 --- a/challenges/medium/34_logistic_regression/starter/starter.mojo +++ b/challenges/medium/34_logistic_regression/starter/starter.mojo @@ -1,9 +1,9 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv # X, y, beta are device pointers (i.e. pointers to memory on the GPU) @export -def solve(X: UnsafePointer[Float32], y: UnsafePointer[Float32], beta: UnsafePointer[Float32], n_samples: Int32, n_features: Int32): +fn solve(X: UnsafePointer[Float32, MutExternalOrigin], y: UnsafePointer[Float32, MutExternalOrigin], beta: UnsafePointer[Float32, MutExternalOrigin], n_samples: Int32, n_features: Int32) raises: pass diff --git a/challenges/medium/35_monte_carlo_integration/starter/starter.mojo b/challenges/medium/35_monte_carlo_integration/starter/starter.mojo index 695c7c8d..f81c4ade 100644 --- a/challenges/medium/35_monte_carlo_integration/starter/starter.mojo +++ b/challenges/medium/35_monte_carlo_integration/starter/starter.mojo @@ -1,9 +1,9 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv # y_samples, result are device pointers @export -def solve(y_samples: UnsafePointer[Float32], result: UnsafePointer[Float32], a: Float32, b: Float32, n_samples: Int32): +fn solve(y_samples: UnsafePointer[Float32, MutExternalOrigin], result: UnsafePointer[Float32, MutExternalOrigin], a: Float32, b: Float32, n_samples: Int32) raises: pass diff --git a/challenges/medium/37_matrix_power/starter/starter.mojo b/challenges/medium/37_matrix_power/starter/starter.mojo index 3d5bfe3d..187f7efb 100644 --- a/challenges/medium/37_matrix_power/starter/starter.mojo +++ b/challenges/medium/37_matrix_power/starter/starter.mojo @@ -1,11 +1,11 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv # input, output are device pointers @export -def solve( - input: UnsafePointer[Float32], output: UnsafePointer[Float32], N: Int32, P: Int32 -): +fn solve( + input: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], N: Int32, P: Int32 +) raises: pass diff --git a/challenges/medium/38_nearest_neighbor/starter/starter.mojo b/challenges/medium/38_nearest_neighbor/starter/starter.mojo index 523f16d7..d317ef73 100644 --- a/challenges/medium/38_nearest_neighbor/starter/starter.mojo +++ b/challenges/medium/38_nearest_neighbor/starter/starter.mojo @@ -1,6 +1,6 @@ -from memory import UnsafePointer +from std.memory import UnsafePointer # points and indices are device pointers @export -def solve(points: UnsafePointer[Float32], indices: UnsafePointer[Int32], N: Int32): +fn solve(points: UnsafePointer[Float32, MutExternalOrigin], indices: UnsafePointer[Int32, MutExternalOrigin], N: Int32) raises: pass diff --git a/challenges/medium/40_batch_normalization/starter/starter.mojo b/challenges/medium/40_batch_normalization/starter/starter.mojo index b1ffe89d..4e230e1d 100644 --- a/challenges/medium/40_batch_normalization/starter/starter.mojo +++ b/challenges/medium/40_batch_normalization/starter/starter.mojo @@ -1,11 +1,11 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv # input, gamma, beta, output are device pointers @export -def solve(input: UnsafePointer[Float32], gamma: UnsafePointer[Float32], - beta: UnsafePointer[Float32], output: UnsafePointer[Float32], - N: Int32, C: Int32, eps: Float32): +fn solve(input: UnsafePointer[Float32, MutExternalOrigin], gamma: UnsafePointer[Float32, MutExternalOrigin], + beta: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], + N: Int32, C: Int32, eps: Float32) raises: pass diff --git a/challenges/medium/42_2d_max_pooling/starter/starter.mojo b/challenges/medium/42_2d_max_pooling/starter/starter.mojo index c2c1dcd9..60759d90 100644 --- a/challenges/medium/42_2d_max_pooling/starter/starter.mojo +++ b/challenges/medium/42_2d_max_pooling/starter/starter.mojo @@ -1,11 +1,11 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv # input, output are device pointers (i.e. pointers to memory on the GPU) @export -def solve(input: UnsafePointer[Float32], output: UnsafePointer[Float32], +fn solve(input: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], N: Int32, C: Int32, H: Int32, W: Int32, - kernel_size: Int32, stride: Int32, padding: Int32): + kernel_size: Int32, stride: Int32, padding: Int32) raises: pass diff --git a/challenges/medium/43_count_array_element/starter/starter.mojo b/challenges/medium/43_count_array_element/starter/starter.mojo index 975790d0..e5271c02 100644 --- a/challenges/medium/43_count_array_element/starter/starter.mojo +++ b/challenges/medium/43_count_array_element/starter/starter.mojo @@ -1,9 +1,9 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv # input, output are device pointers @export -def solve(input: UnsafePointer[Int32], output: UnsafePointer[Int32], N: Int32, K: Int32): +fn solve(input: UnsafePointer[Int32, MutExternalOrigin], output: UnsafePointer[Int32, MutExternalOrigin], N: Int32, K: Int32) raises: pass diff --git a/challenges/medium/44_count_2d_array_element/starter/starter.mojo b/challenges/medium/44_count_2d_array_element/starter/starter.mojo index 851b4d9f..d7cab051 100644 --- a/challenges/medium/44_count_2d_array_element/starter/starter.mojo +++ b/challenges/medium/44_count_2d_array_element/starter/starter.mojo @@ -1,9 +1,9 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv # input, output are device pointers @export -def solve(input: UnsafePointer[Int32], output: UnsafePointer[Int32], N: Int32, M: Int32, K: Int32): +fn solve(input: UnsafePointer[Int32, MutExternalOrigin], output: UnsafePointer[Int32, MutExternalOrigin], N: Int32, M: Int32, K: Int32) raises: pass diff --git a/challenges/medium/45_count_3d_array_element/starter/starter.mojo b/challenges/medium/45_count_3d_array_element/starter/starter.mojo index 364c4250..47b6da52 100644 --- a/challenges/medium/45_count_3d_array_element/starter/starter.mojo +++ b/challenges/medium/45_count_3d_array_element/starter/starter.mojo @@ -1,9 +1,9 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv # input, output are device pointers (i.e. pointers to memory on the GPU) @export -def solve(input: UnsafePointer[Int32], output: UnsafePointer[Int32], N: Int32, M: Int32, K: Int32, P: Int32): +fn solve(input: UnsafePointer[Int32, MutExternalOrigin], output: UnsafePointer[Int32, MutExternalOrigin], N: Int32, M: Int32, K: Int32, P: Int32) raises: pass diff --git a/challenges/medium/47_subarray_sum/starter/starter.mojo b/challenges/medium/47_subarray_sum/starter/starter.mojo index ef227415..1e0d49ea 100644 --- a/challenges/medium/47_subarray_sum/starter/starter.mojo +++ b/challenges/medium/47_subarray_sum/starter/starter.mojo @@ -1,9 +1,9 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv # input, output are device pointers (i.e. pointers to memory on the GPU) @export -def solve(input: UnsafePointer[Int32], output: UnsafePointer[Int32], N: Int32, S: Int32, E: Int32): +fn solve(input: UnsafePointer[Int32, MutExternalOrigin], output: UnsafePointer[Int32, MutExternalOrigin], N: Int32, S: Int32, E: Int32) raises: pass diff --git a/challenges/medium/48_2d_subarray_sum/starter/starter.mojo b/challenges/medium/48_2d_subarray_sum/starter/starter.mojo index b6e74dcc..bfee0535 100644 --- a/challenges/medium/48_2d_subarray_sum/starter/starter.mojo +++ b/challenges/medium/48_2d_subarray_sum/starter/starter.mojo @@ -1,9 +1,9 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv # input, output are device pointers (i.e. pointers to memory on the GPU) @export -def solve(input: UnsafePointer[Int32], output: UnsafePointer[Int32], N: Int32, M: Int32, S_ROW: Int32, E_ROW: Int32, S_COL: Int32, E_COL: Int32): +fn solve(input: UnsafePointer[Int32, MutExternalOrigin], output: UnsafePointer[Int32, MutExternalOrigin], N: Int32, M: Int32, S_ROW: Int32, E_ROW: Int32, S_COL: Int32, E_COL: Int32) raises: pass diff --git a/challenges/medium/49_3d_subarray_sum/starter/starter.mojo b/challenges/medium/49_3d_subarray_sum/starter/starter.mojo index a1849bad..84864938 100644 --- a/challenges/medium/49_3d_subarray_sum/starter/starter.mojo +++ b/challenges/medium/49_3d_subarray_sum/starter/starter.mojo @@ -1,9 +1,9 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv # input, output are device pointers (i.e. pointers to memory on the GPU) @export -def solve(input: UnsafePointer[Int32], output: UnsafePointer[Int32], N: Int32, M: Int32, K: Int32, S_DEP: Int32, E_DEP: Int32, S_ROW: Int32, E_ROW: Int32, S_COL: Int32, E_COL: Int32): +fn solve(input: UnsafePointer[Int32, MutExternalOrigin], output: UnsafePointer[Int32, MutExternalOrigin], N: Int32, M: Int32, K: Int32, S_DEP: Int32, E_DEP: Int32, S_ROW: Int32, E_ROW: Int32, S_COL: Int32, E_COL: Int32) raises: pass diff --git a/challenges/medium/4_reduction/starter/starter.mojo b/challenges/medium/4_reduction/starter/starter.mojo index be79c440..d076ebd5 100644 --- a/challenges/medium/4_reduction/starter/starter.mojo +++ b/challenges/medium/4_reduction/starter/starter.mojo @@ -1,9 +1,9 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv # input, output are device pointers (i.e. pointers to memory on the GPU) @export -def solve(input: UnsafePointer[Float32], output: UnsafePointer[Float32], N: Int32): +fn solve(input: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], N: Int32) raises: pass diff --git a/challenges/medium/50_rms_normalization/starter/starter.mojo b/challenges/medium/50_rms_normalization/starter/starter.mojo index 321f7997..a896b6a2 100644 --- a/challenges/medium/50_rms_normalization/starter/starter.mojo +++ b/challenges/medium/50_rms_normalization/starter/starter.mojo @@ -1,11 +1,11 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv # input, output are device pointers @export -def solve(input: UnsafePointer[Float32], gamma: Float32, - beta: Float32, output: UnsafePointer[Float32], - N: Int32, eps: Float32): +fn solve(input: UnsafePointer[Float32, MutExternalOrigin], gamma: Float32, + beta: Float32, output: UnsafePointer[Float32, MutExternalOrigin], + N: Int32, eps: Float32) raises: pass diff --git a/challenges/medium/51_max_subarray_sum/starter/starter.mojo b/challenges/medium/51_max_subarray_sum/starter/starter.mojo index 185ffcde..884c0518 100644 --- a/challenges/medium/51_max_subarray_sum/starter/starter.mojo +++ b/challenges/medium/51_max_subarray_sum/starter/starter.mojo @@ -1,9 +1,9 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv # input, output are device pointers (i.e. pointers to memory on the GPU) @export -def solve(input: UnsafePointer[Int32], output: UnsafePointer[Int32], N: Int32, window_size: Int32): +fn solve(input: UnsafePointer[Int32, MutExternalOrigin], output: UnsafePointer[Int32, MutExternalOrigin], N: Int32, window_size: Int32) raises: pass diff --git a/challenges/medium/55_attn_w_linear_bias/starter/starter.mojo b/challenges/medium/55_attn_w_linear_bias/starter/starter.mojo index 23fef5e0..de895cb5 100644 --- a/challenges/medium/55_attn_w_linear_bias/starter/starter.mojo +++ b/challenges/medium/55_attn_w_linear_bias/starter/starter.mojo @@ -1,10 +1,10 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv # Q, K, V, output are device pointers (i.e. pointers to memory on the GPU) @export -def solve(Q: UnsafePointer[Float32], K: UnsafePointer[Float32], V: UnsafePointer[Float32], - output: UnsafePointer[Float32], M: Int32, N: Int32, d: Int32, alpha: Float32): +fn solve(Q: UnsafePointer[Float32, MutExternalOrigin], K: UnsafePointer[Float32, MutExternalOrigin], V: UnsafePointer[Float32, MutExternalOrigin], + output: UnsafePointer[Float32, MutExternalOrigin], M: Int32, N: Int32, d: Int32, alpha: Float32) raises: pass diff --git a/challenges/medium/57_fp16_batched_matmul/starter/starter.mojo b/challenges/medium/57_fp16_batched_matmul/starter/starter.mojo index e4ef6e7e..bda7ea8e 100644 --- a/challenges/medium/57_fp16_batched_matmul/starter/starter.mojo +++ b/challenges/medium/57_fp16_batched_matmul/starter/starter.mojo @@ -1,8 +1,8 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv @export -def solve(A: UnsafePointer[Float16], B: UnsafePointer[Float16], C: UnsafePointer[Float16], BATCH: Int32, M: Int32, N: Int32, K: Int32): +fn solve(A: UnsafePointer[Float16, MutExternalOrigin], B: UnsafePointer[Float16, MutExternalOrigin], C: UnsafePointer[Float16, MutExternalOrigin], BATCH: Int32, M: Int32, N: Int32, K: Int32) raises: pass diff --git a/challenges/medium/58_fp16_dot_product/starter/starter.mojo b/challenges/medium/58_fp16_dot_product/starter/starter.mojo index 07508e50..3668bc38 100644 --- a/challenges/medium/58_fp16_dot_product/starter/starter.mojo +++ b/challenges/medium/58_fp16_dot_product/starter/starter.mojo @@ -1,9 +1,9 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv # A, B, result are device pointers @export -def solve(A: UnsafePointer[Float16], B: UnsafePointer[Float16], result: UnsafePointer[Float16], N: Int32): +fn solve(A: UnsafePointer[Float16, MutExternalOrigin], B: UnsafePointer[Float16, MutExternalOrigin], result: UnsafePointer[Float16, MutExternalOrigin], N: Int32) raises: pass diff --git a/challenges/medium/5_softmax/starter/starter.mojo b/challenges/medium/5_softmax/starter/starter.mojo index 1c4cc5a9..02c2c06f 100644 --- a/challenges/medium/5_softmax/starter/starter.mojo +++ b/challenges/medium/5_softmax/starter/starter.mojo @@ -1,8 +1,8 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv @export -def solve(input: UnsafePointer[Float32], output: UnsafePointer[Float32], N: Int32): +fn solve(input: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], N: Int32) raises: pass diff --git a/challenges/medium/60_top_p_sampling/starter/starter.mojo b/challenges/medium/60_top_p_sampling/starter/starter.mojo index a9ab08c8..86f4cfe5 100644 --- a/challenges/medium/60_top_p_sampling/starter/starter.mojo +++ b/challenges/medium/60_top_p_sampling/starter/starter.mojo @@ -1,9 +1,9 @@ -from gpu.host import DeviceContext -from memory import UnsafePointer -from gpu.id import block_dim, block_idx, thread_idx +from std.gpu.host import DeviceContext +from std.memory import UnsafePointer +from std.gpu import block_dim, block_idx, thread_idx @export -def solve(logits: UnsafePointer[Float32], p: UnsafePointer[Float32], - seed: UnsafePointer[Int32], sampled_token: UnsafePointer[Int32], - vocab_size: Int32): +fn solve(logits: UnsafePointer[Float32, MutExternalOrigin], p: UnsafePointer[Float32, MutExternalOrigin], + seed: UnsafePointer[Int32, MutExternalOrigin], sampled_token: UnsafePointer[Int32, MutExternalOrigin], + vocab_size: Int32) raises: pass diff --git a/challenges/medium/61_rope_embedding/starter/starter.mojo b/challenges/medium/61_rope_embedding/starter/starter.mojo index 9144d179..b7a01358 100644 --- a/challenges/medium/61_rope_embedding/starter/starter.mojo +++ b/challenges/medium/61_rope_embedding/starter/starter.mojo @@ -1,9 +1,9 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv # Q, cos, sin, output are device pointers @export -def solve(Q: UnsafePointer[Float32], cos: UnsafePointer[Float32], sin: UnsafePointer[Float32], output: UnsafePointer[Float32], M: Int32, D: Int32): +fn solve(Q: UnsafePointer[Float32, MutExternalOrigin], cos: UnsafePointer[Float32, MutExternalOrigin], sin: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], M: Int32, D: Int32) raises: pass diff --git a/challenges/medium/64_weight_dequantization/starter/starter.mojo b/challenges/medium/64_weight_dequantization/starter/starter.mojo index 154b9b84..3b8cb80e 100644 --- a/challenges/medium/64_weight_dequantization/starter/starter.mojo +++ b/challenges/medium/64_weight_dequantization/starter/starter.mojo @@ -1,9 +1,9 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv # X, S, Y are device pointers @export -def solve(X: UnsafePointer[Float32], S: UnsafePointer[Float32], Y: UnsafePointer[Float32], M: Int32, N: Int32, TILE_SIZE: Int32): +fn solve(X: UnsafePointer[Float32, MutExternalOrigin], S: UnsafePointer[Float32, MutExternalOrigin], Y: UnsafePointer[Float32, MutExternalOrigin], M: Int32, N: Int32, TILE_SIZE: Int32) raises: pass diff --git a/challenges/medium/67_moe_topk_gating/starter/starter.mojo b/challenges/medium/67_moe_topk_gating/starter/starter.mojo index 23ba2279..0412e2ed 100644 --- a/challenges/medium/67_moe_topk_gating/starter/starter.mojo +++ b/challenges/medium/67_moe_topk_gating/starter/starter.mojo @@ -1,8 +1,8 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv @export -def solve(logits: UnsafePointer[Float32], topk_weights: UnsafePointer[Float32], topk_indices: UnsafePointer[Int32], M: Int32, E: Int32, k: Int32): +fn solve(logits: UnsafePointer[Float32, MutExternalOrigin], topk_weights: UnsafePointer[Float32, MutExternalOrigin], topk_indices: UnsafePointer[Int32, MutExternalOrigin], M: Int32, E: Int32, k: Int32) raises: pass diff --git a/challenges/medium/69_jacobi_stencil_2d/starter/starter.mojo b/challenges/medium/69_jacobi_stencil_2d/starter/starter.mojo index 1fba0708..273f1732 100644 --- a/challenges/medium/69_jacobi_stencil_2d/starter/starter.mojo +++ b/challenges/medium/69_jacobi_stencil_2d/starter/starter.mojo @@ -1,9 +1,9 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv # input, output are device pointers @export -def solve(input: UnsafePointer[Float32], output: UnsafePointer[Float32], rows: Int32, cols: Int32): +fn solve(input: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], rows: Int32, cols: Int32) raises: pass diff --git a/challenges/medium/6_softmax_attention/starter/starter.mojo b/challenges/medium/6_softmax_attention/starter/starter.mojo index 9a158ae3..95736d92 100644 --- a/challenges/medium/6_softmax_attention/starter/starter.mojo +++ b/challenges/medium/6_softmax_attention/starter/starter.mojo @@ -1,10 +1,10 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv # Q, K, V, output are device pointers (i.e. pointers to memory on the GPU) @export -def solve(Q: UnsafePointer[Float32], K: UnsafePointer[Float32], V: UnsafePointer[Float32], - output: UnsafePointer[Float32], M: Int32, N: Int32, d: Int32): +fn solve(Q: UnsafePointer[Float32, MutExternalOrigin], K: UnsafePointer[Float32, MutExternalOrigin], V: UnsafePointer[Float32, MutExternalOrigin], + output: UnsafePointer[Float32, MutExternalOrigin], M: Int32, N: Int32, d: Int32) raises: pass diff --git a/challenges/medium/70_segmented_prefix_sum/starter/starter.mojo b/challenges/medium/70_segmented_prefix_sum/starter/starter.mojo index 668b2eb0..2a233e08 100644 --- a/challenges/medium/70_segmented_prefix_sum/starter/starter.mojo +++ b/challenges/medium/70_segmented_prefix_sum/starter/starter.mojo @@ -1,9 +1,9 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv # values, flags, output are device pointers @export -def solve(values: UnsafePointer[Float32], flags: UnsafePointer[Int32], output: UnsafePointer[Float32], N: Int32): +fn solve(values: UnsafePointer[Float32, MutExternalOrigin], flags: UnsafePointer[Int32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], N: Int32) raises: pass diff --git a/challenges/medium/71_parallel_merge/starter/starter.mojo b/challenges/medium/71_parallel_merge/starter/starter.mojo index 22585044..82231020 100644 --- a/challenges/medium/71_parallel_merge/starter/starter.mojo +++ b/challenges/medium/71_parallel_merge/starter/starter.mojo @@ -1,7 +1,7 @@ -from gpu.host import DeviceContext -from memory import UnsafePointer +from std.gpu.host import DeviceContext +from std.memory import UnsafePointer # A, B, C are device pointers (i.e. pointers to memory on the GPU) @export -def solve(A: UnsafePointer[Float32], B: UnsafePointer[Float32], C: UnsafePointer[Float32], M: Int32, N: Int32): +fn solve(A: UnsafePointer[Float32, MutExternalOrigin], B: UnsafePointer[Float32, MutExternalOrigin], C: UnsafePointer[Float32, MutExternalOrigin], M: Int32, N: Int32) raises: pass diff --git a/challenges/medium/72_stream_compaction/starter/starter.mojo b/challenges/medium/72_stream_compaction/starter/starter.mojo index 57896d71..43aca583 100644 --- a/challenges/medium/72_stream_compaction/starter/starter.mojo +++ b/challenges/medium/72_stream_compaction/starter/starter.mojo @@ -1,7 +1,7 @@ -from gpu.host import DeviceContext -from memory import UnsafePointer +from std.gpu.host import DeviceContext +from std.memory import UnsafePointer # A, out are device pointers @export -def solve(A: UnsafePointer[Float32], N: Int32, out: UnsafePointer[Float32]): +fn solve(A: UnsafePointer[Float32, MutExternalOrigin], N: Int32, out: UnsafePointer[Float32, MutExternalOrigin]) raises: pass diff --git a/challenges/medium/75_sparse_matrix_dense_matrix_multiplication/starter/starter.mojo b/challenges/medium/75_sparse_matrix_dense_matrix_multiplication/starter/starter.mojo index 78690e4b..cfaa9c74 100644 --- a/challenges/medium/75_sparse_matrix_dense_matrix_multiplication/starter/starter.mojo +++ b/challenges/medium/75_sparse_matrix_dense_matrix_multiplication/starter/starter.mojo @@ -1,9 +1,9 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv # A, B, C are device pointers @export -def solve(A: UnsafePointer[Float32], B: UnsafePointer[Float32], C: UnsafePointer[Float32], M: Int32, N: Int32, K: Int32, nnz: Int32): +fn solve(A: UnsafePointer[Float32, MutExternalOrigin], B: UnsafePointer[Float32, MutExternalOrigin], C: UnsafePointer[Float32, MutExternalOrigin], M: Int32, N: Int32, K: Int32, nnz: Int32) raises: pass diff --git a/challenges/medium/78_2d_fft/starter/starter.mojo b/challenges/medium/78_2d_fft/starter/starter.mojo index e7c944b6..5046919f 100644 --- a/challenges/medium/78_2d_fft/starter/starter.mojo +++ b/challenges/medium/78_2d_fft/starter/starter.mojo @@ -1,9 +1,9 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv # signal, spectrum are device pointers @export -def solve(signal: UnsafePointer[Float32], spectrum: UnsafePointer[Float32], M: Int32, N: Int32): +fn solve(signal: UnsafePointer[Float32, MutExternalOrigin], spectrum: UnsafePointer[Float32, MutExternalOrigin], M: Int32, N: Int32) raises: pass diff --git a/challenges/medium/80_grouped_query_attention/starter/starter.mojo b/challenges/medium/80_grouped_query_attention/starter/starter.mojo index 8698b239..dad131dc 100644 --- a/challenges/medium/80_grouped_query_attention/starter/starter.mojo +++ b/challenges/medium/80_grouped_query_attention/starter/starter.mojo @@ -1,16 +1,16 @@ -from gpu.host import DeviceContext -from memory import UnsafePointer +from std.gpu.host import DeviceContext +from std.memory import UnsafePointer # Q, K, V, output are device pointers @export -def solve( - Q: UnsafePointer[Float32], - K: UnsafePointer[Float32], - V: UnsafePointer[Float32], - output: UnsafePointer[Float32], +fn solve( + Q: UnsafePointer[Float32, MutExternalOrigin], + K: UnsafePointer[Float32, MutExternalOrigin], + V: UnsafePointer[Float32, MutExternalOrigin], + output: UnsafePointer[Float32, MutExternalOrigin], num_q_heads: Int32, num_kv_heads: Int32, seq_len: Int32, head_dim: Int32, -): +) raises: pass diff --git a/challenges/medium/81_int4_matmul/starter/starter.mojo b/challenges/medium/81_int4_matmul/starter/starter.mojo index dabd46f4..4aa9fc85 100644 --- a/challenges/medium/81_int4_matmul/starter/starter.mojo +++ b/challenges/medium/81_int4_matmul/starter/starter.mojo @@ -1,16 +1,16 @@ -from gpu.host import DeviceContext -from memory import UnsafePointer +from std.gpu.host import DeviceContext +from std.memory import UnsafePointer # x, w_q, scales, y are device pointers @export -def solve( - x: UnsafePointer[Float16], - w_q: UnsafePointer[UInt8], - scales: UnsafePointer[Float16], - y: UnsafePointer[Float16], +fn solve( + x: UnsafePointer[Float16, MutExternalOrigin], + w_q: UnsafePointer[UInt8, MutExternalOrigin], + scales: UnsafePointer[Float16, MutExternalOrigin], + y: UnsafePointer[Float16, MutExternalOrigin], M: Int32, N: Int32, K: Int32, group_size: Int32, -): +) raises: pass diff --git a/challenges/medium/82_linear_recurrence/starter/starter.mojo b/challenges/medium/82_linear_recurrence/starter/starter.mojo index aa4d66f7..2a75e0c6 100644 --- a/challenges/medium/82_linear_recurrence/starter/starter.mojo +++ b/challenges/medium/82_linear_recurrence/starter/starter.mojo @@ -1,9 +1,9 @@ -from gpu.host import DeviceContext -from gpu.id import block_dim, block_idx, thread_idx -from memory import UnsafePointer -from math import ceildiv +from std.gpu.host import DeviceContext +from std.gpu import block_dim, block_idx, thread_idx +from std.memory import UnsafePointer +from std.math import ceildiv # a, x, h are device pointers @export -def solve(a: UnsafePointer[Float32], x: UnsafePointer[Float32], h: UnsafePointer[Float32], B: Int32, L: Int32): +fn solve(a: UnsafePointer[Float32, MutExternalOrigin], x: UnsafePointer[Float32, MutExternalOrigin], h: UnsafePointer[Float32, MutExternalOrigin], B: Int32, L: Int32) raises: pass diff --git a/challenges/medium/85_lora_linear/starter/starter.mojo b/challenges/medium/85_lora_linear/starter/starter.mojo index 31594f76..8632a375 100644 --- a/challenges/medium/85_lora_linear/starter/starter.mojo +++ b/challenges/medium/85_lora_linear/starter/starter.mojo @@ -1,18 +1,18 @@ -from gpu.host import DeviceContext -from memory import UnsafePointer +from std.gpu.host import DeviceContext +from std.memory import UnsafePointer # x, W, A, B, output are device pointers @export -def solve( - x: UnsafePointer[Float32], - W: UnsafePointer[Float32], - A: UnsafePointer[Float32], - B: UnsafePointer[Float32], - output: UnsafePointer[Float32], +fn solve( + x: UnsafePointer[Float32, MutExternalOrigin], + W: UnsafePointer[Float32, MutExternalOrigin], + A: UnsafePointer[Float32, MutExternalOrigin], + B: UnsafePointer[Float32, MutExternalOrigin], + output: UnsafePointer[Float32, MutExternalOrigin], batch: Int32, d_in: Int32, d_out: Int32, rank: Int32, lora_scale: Float32, -): +) raises: pass