diff --git a/.github/workflows/create-challenge.yml b/.github/workflows/create-challenge.yml index a6a33efa..2b9cb6f8 100644 --- a/.github/workflows/create-challenge.yml +++ b/.github/workflows/create-challenge.yml @@ -29,7 +29,7 @@ jobs: - name: Install dependencies run: | - pip install pre-commit requests websocket-client + pip install pre-commit requests websocket-client modular pre-commit install - name: Fetch open PRs diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index c87956b3..ce4c31d1 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -76,6 +76,21 @@ jobs: - name: Checkout code uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + + - name: Install Modular (mojo) + run: | + python -m pip install --upgrade pip + pip install modular + + - name: Check Mojo formatting with mojo format + run: | + find challenges -name "*.mojo" -type f -print0 | xargs -0 mojo format -q + git diff --exit-code -- '*.mojo' + - name: Check Mojo files exist and are valid run: | echo "Checking Mojo files for basic syntax issues..." diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0c1c477e..18a80a39 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -33,6 +33,15 @@ repos: types_or: [c++, c, cuda] files: \.(cu|cpp|h)$ + # Mojo formatting + - repo: local + hooks: + - id: mojo-format + name: mojo format + entry: mojo format -q + language: system + files: \.mojo$ + # General file checks - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.5.0 diff --git a/challenges/easy/19_reverse_array/starter/starter.mojo b/challenges/easy/19_reverse_array/starter/starter.mojo index 2463cd49..5efc7cfd 100644 --- a/challenges/easy/19_reverse_array/starter/starter.mojo +++ b/challenges/easy/19_reverse_array/starter/starter.mojo @@ -3,9 +3,11 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv + fn reverse_array_kernel(input: UnsafePointer[Float32, MutExternalOrigin], N: Int32): pass + # input is a device pointer (i.e. pointer to memory on the GPU) @export fn solve(input: UnsafePointer[Float32, MutExternalOrigin], N: Int32) raises: @@ -15,10 +17,6 @@ fn solve(input: UnsafePointer[Float32, MutExternalOrigin], N: Int32) raises: var blocksPerGrid = ceildiv(N, threadsPerBlock) var _kernel = ctx.compile_function[reverse_array_kernel, reverse_array_kernel]() - ctx.enqueue_function(_kernel, - input, N, - grid_dim = blocksPerGrid, - block_dim = threadsPerBlock - ) + ctx.enqueue_function(_kernel, input, N, grid_dim=blocksPerGrid, block_dim=threadsPerBlock) ctx.synchronize() diff --git a/challenges/easy/1_vector_add/starter/starter.mojo b/challenges/easy/1_vector_add/starter/starter.mojo index 006047f3..a5482b49 100644 --- a/challenges/easy/1_vector_add/starter/starter.mojo +++ b/challenges/easy/1_vector_add/starter/starter.mojo @@ -3,21 +3,29 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv -fn vector_add_kernel(A: UnsafePointer[Float32, MutExternalOrigin], B: UnsafePointer[Float32, MutExternalOrigin], C: UnsafePointer[Float32, MutExternalOrigin], N: Int32): + +fn vector_add_kernel( + A: UnsafePointer[Float32, MutExternalOrigin], + B: UnsafePointer[Float32, MutExternalOrigin], + C: UnsafePointer[Float32, MutExternalOrigin], + N: Int32, +): pass + # A, B, C are device pointers (i.e. pointers to memory on the GPU) @export -fn solve(A: UnsafePointer[Float32, MutExternalOrigin], B: UnsafePointer[Float32, MutExternalOrigin], C: UnsafePointer[Float32, MutExternalOrigin], N: Int32) raises: +fn solve( + A: UnsafePointer[Float32, MutExternalOrigin], + B: UnsafePointer[Float32, MutExternalOrigin], + C: UnsafePointer[Float32, MutExternalOrigin], + N: Int32, +) raises: var BLOCK_SIZE: Int32 = 256 var ctx = DeviceContext() var num_blocks = ceildiv(N, BLOCK_SIZE) var _kernel = ctx.compile_function[vector_add_kernel, vector_add_kernel]() - ctx.enqueue_function(_kernel, - A, B, C, N, - grid_dim = num_blocks, - block_dim = BLOCK_SIZE - ) + ctx.enqueue_function(_kernel, A, B, C, N, grid_dim=num_blocks, block_dim=BLOCK_SIZE) ctx.synchronize() diff --git a/challenges/easy/21_relu/starter/starter.mojo b/challenges/easy/21_relu/starter/starter.mojo index 674d9db9..6c05b7ab 100644 --- a/challenges/easy/21_relu/starter/starter.mojo +++ b/challenges/easy/21_relu/starter/starter.mojo @@ -3,22 +3,30 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv -fn relu_kernel(input: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], N: Int32): + +fn relu_kernel( + input: UnsafePointer[Float32, MutExternalOrigin], + output: UnsafePointer[Float32, MutExternalOrigin], + N: Int32, +): pass + # input, output are device pointers (i.e. pointers to memory on the GPU) @export -fn solve(input: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], N: Int32) raises: +fn solve( + input: UnsafePointer[Float32, MutExternalOrigin], + output: UnsafePointer[Float32, MutExternalOrigin], + N: Int32, +) raises: var threadsPerBlock: Int32 = 256 var ctx = DeviceContext() var blocksPerGrid = ceildiv(N, threadsPerBlock) var _kernel = ctx.compile_function[relu_kernel, relu_kernel]() - ctx.enqueue_function(_kernel, - input, output, N, - grid_dim = blocksPerGrid, - block_dim = threadsPerBlock + ctx.enqueue_function( + _kernel, input, output, N, grid_dim=blocksPerGrid, block_dim=threadsPerBlock ) ctx.synchronize() diff --git a/challenges/easy/23_leaky_relu/starter/starter.mojo b/challenges/easy/23_leaky_relu/starter/starter.mojo index 3d5472aa..ca3b83db 100644 --- a/challenges/easy/23_leaky_relu/starter/starter.mojo +++ b/challenges/easy/23_leaky_relu/starter/starter.mojo @@ -3,22 +3,30 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv -fn leaky_relu_kernel(input: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], N: Int32): + +fn leaky_relu_kernel( + input: UnsafePointer[Float32, MutExternalOrigin], + output: UnsafePointer[Float32, MutExternalOrigin], + N: Int32, +): pass + # input, output are device pointers (i.e. pointers to memory on the GPU) @export -fn solve(input: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], N: Int32) raises: +fn solve( + input: UnsafePointer[Float32, MutExternalOrigin], + output: UnsafePointer[Float32, MutExternalOrigin], + N: Int32, +) raises: var threadsPerBlock: Int32 = 256 var ctx = DeviceContext() var blocksPerGrid = ceildiv(N, threadsPerBlock) var _kernel = ctx.compile_function[leaky_relu_kernel, leaky_relu_kernel]() - ctx.enqueue_function(_kernel, - input, output, N, - grid_dim = blocksPerGrid, - block_dim = threadsPerBlock + ctx.enqueue_function( + _kernel, input, output, N, grid_dim=blocksPerGrid, block_dim=threadsPerBlock ) ctx.synchronize() diff --git a/challenges/easy/24_rainbow_table/starter/starter.mojo b/challenges/easy/24_rainbow_table/starter/starter.mojo index e96fa607..faee64d4 100644 --- a/challenges/easy/24_rainbow_table/starter/starter.mojo +++ b/challenges/easy/24_rainbow_table/starter/starter.mojo @@ -3,6 +3,7 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv + fn fnv1a_hash(input: Int32) -> UInt32: alias FNV_PRIME: UInt32 = 16777619 alias OFFSET_BASIS: UInt32 = 2166136261 @@ -15,23 +16,32 @@ fn fnv1a_hash(input: Int32) -> UInt32: return hash -fn fnv1a_hash_kernel(input: UnsafePointer[Int32, MutExternalOrigin], output: UnsafePointer[UInt32, MutExternalOrigin], - N: Int32, R: Int32): + +fn fnv1a_hash_kernel( + input: UnsafePointer[Int32, MutExternalOrigin], + output: UnsafePointer[UInt32, MutExternalOrigin], + N: Int32, + R: Int32, +): pass + # input, output are device pointers (i.e. pointers to memory on the GPU) @export -fn solve(input: UnsafePointer[Int32, MutExternalOrigin], output: UnsafePointer[UInt32, MutExternalOrigin], N: Int32, R: Int32) raises: +fn solve( + input: UnsafePointer[Int32, MutExternalOrigin], + output: UnsafePointer[UInt32, MutExternalOrigin], + N: Int32, + R: Int32, +) raises: var threadsPerBlock: Int32 = 256 var ctx = DeviceContext() var blocksPerGrid = ceildiv(N, threadsPerBlock) var _kernel = ctx.compile_function[fnv1a_hash_kernel, fnv1a_hash_kernel]() - ctx.enqueue_function(_kernel, - input, output, N, R, - grid_dim = blocksPerGrid, - block_dim = threadsPerBlock + ctx.enqueue_function( + _kernel, input, output, N, R, grid_dim=blocksPerGrid, block_dim=threadsPerBlock ) ctx.synchronize() diff --git a/challenges/easy/2_matrix_multiplication/starter/starter.mojo b/challenges/easy/2_matrix_multiplication/starter/starter.mojo index 58a105c8..2c0ba73e 100644 --- a/challenges/easy/2_matrix_multiplication/starter/starter.mojo +++ b/challenges/easy/2_matrix_multiplication/starter/starter.mojo @@ -3,12 +3,28 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv -fn matrix_multiplication_kernel(A: UnsafePointer[Float32, MutExternalOrigin], B: UnsafePointer[Float32, MutExternalOrigin], C: UnsafePointer[Float32, MutExternalOrigin], M: Int32, N: Int32, K: Int32): + +fn matrix_multiplication_kernel( + A: UnsafePointer[Float32, MutExternalOrigin], + B: UnsafePointer[Float32, MutExternalOrigin], + C: UnsafePointer[Float32, MutExternalOrigin], + M: Int32, + N: Int32, + K: Int32, +): pass + # A, B, C are device pointers (i.e. pointers to memory on the GPU) @export -fn solve(A: UnsafePointer[Float32, MutExternalOrigin], B: UnsafePointer[Float32, MutExternalOrigin], C: UnsafePointer[Float32, MutExternalOrigin], M: Int32, N: Int32, K: Int32) raises: +fn solve( + A: UnsafePointer[Float32, MutExternalOrigin], + B: UnsafePointer[Float32, MutExternalOrigin], + C: UnsafePointer[Float32, MutExternalOrigin], + M: Int32, + N: Int32, + K: Int32, +) raises: var BLOCK_SIZE: Int32 = 16 var ctx = DeviceContext() @@ -16,10 +32,16 @@ fn solve(A: UnsafePointer[Float32, MutExternalOrigin], B: UnsafePointer[Float32, var grid_dim_y = ceildiv(M, BLOCK_SIZE) var _kernel = ctx.compile_function[matrix_multiplication_kernel, matrix_multiplication_kernel]() - ctx.enqueue_function(_kernel, - A, B, C, M, N, K, - grid_dim = (grid_dim_x, grid_dim_y), - block_dim = (BLOCK_SIZE, BLOCK_SIZE) + ctx.enqueue_function( + _kernel, + A, + B, + C, + M, + N, + K, + grid_dim=(grid_dim_x, grid_dim_y), + block_dim=(BLOCK_SIZE, BLOCK_SIZE), ) ctx.synchronize() diff --git a/challenges/easy/31_matrix_copy/starter/starter.mojo b/challenges/easy/31_matrix_copy/starter/starter.mojo index ca585920..487126f1 100644 --- a/challenges/easy/31_matrix_copy/starter/starter.mojo +++ b/challenges/easy/31_matrix_copy/starter/starter.mojo @@ -3,12 +3,22 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv -fn copy_matrix_kernel(A: UnsafePointer[Float32, MutExternalOrigin], B: UnsafePointer[Float32, MutExternalOrigin], N: Int32): + +fn copy_matrix_kernel( + A: UnsafePointer[Float32, MutExternalOrigin], + B: UnsafePointer[Float32, MutExternalOrigin], + N: Int32, +): pass + # A, B are device pointers (i.e. pointers to memory on the GPU) @export -fn solve(A: UnsafePointer[Float32, MutExternalOrigin], B: UnsafePointer[Float32, MutExternalOrigin], N: Int32) raises: +fn solve( + A: UnsafePointer[Float32, MutExternalOrigin], + B: UnsafePointer[Float32, MutExternalOrigin], + N: Int32, +) raises: var total = N * N var threadsPerBlock: Int32 = 256 var ctx = DeviceContext() @@ -16,10 +26,6 @@ fn solve(A: UnsafePointer[Float32, MutExternalOrigin], B: UnsafePointer[Float32, var blocksPerGrid = ceildiv(total, threadsPerBlock) var _kernel = ctx.compile_function[copy_matrix_kernel, copy_matrix_kernel]() - ctx.enqueue_function(_kernel, - A, B, N, - grid_dim = blocksPerGrid, - block_dim = threadsPerBlock - ) + ctx.enqueue_function(_kernel, A, B, N, grid_dim=blocksPerGrid, block_dim=threadsPerBlock) ctx.synchronize() diff --git a/challenges/easy/3_matrix_transpose/starter/starter.mojo b/challenges/easy/3_matrix_transpose/starter/starter.mojo index 231d7505..2b0f1636 100644 --- a/challenges/easy/3_matrix_transpose/starter/starter.mojo +++ b/challenges/easy/3_matrix_transpose/starter/starter.mojo @@ -3,12 +3,24 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv -fn matrix_transpose_kernel(input: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], rows: Int32, cols: Int32): + +fn matrix_transpose_kernel( + input: UnsafePointer[Float32, MutExternalOrigin], + output: UnsafePointer[Float32, MutExternalOrigin], + rows: Int32, + cols: Int32, +): pass + # input, output are device pointers (i.e. pointers to memory on the GPU) @export -fn solve(input: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], rows: Int32, cols: Int32) raises: +fn solve( + input: UnsafePointer[Float32, MutExternalOrigin], + output: UnsafePointer[Float32, MutExternalOrigin], + rows: Int32, + cols: Int32, +) raises: var BLOCK_SIZE: Int32 = 32 var ctx = DeviceContext() @@ -16,10 +28,14 @@ fn solve(input: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer var grid_dim_y = ceildiv(rows, BLOCK_SIZE) var _kernel = ctx.compile_function[matrix_transpose_kernel, matrix_transpose_kernel]() - ctx.enqueue_function(_kernel, - input, output, rows, cols, - grid_dim = (grid_dim_x, grid_dim_y), - block_dim = (BLOCK_SIZE, BLOCK_SIZE) + ctx.enqueue_function( + _kernel, + input, + output, + rows, + cols, + grid_dim=(grid_dim_x, grid_dim_y), + block_dim=(BLOCK_SIZE, BLOCK_SIZE), ) ctx.synchronize() diff --git a/challenges/easy/52_silu/starter/starter.mojo b/challenges/easy/52_silu/starter/starter.mojo index 8b5814c0..142997c7 100644 --- a/challenges/easy/52_silu/starter/starter.mojo +++ b/challenges/easy/52_silu/starter/starter.mojo @@ -3,22 +3,30 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv -fn silu_kernel(input: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], N: Int32): + +fn silu_kernel( + input: UnsafePointer[Float32, MutExternalOrigin], + output: UnsafePointer[Float32, MutExternalOrigin], + N: Int32, +): pass + # input, output are device pointers @export -fn solve(input: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], N: Int32) raises: +fn solve( + input: UnsafePointer[Float32, MutExternalOrigin], + output: UnsafePointer[Float32, MutExternalOrigin], + N: Int32, +) raises: var threadsPerBlock: Int32 = 256 var ctx = DeviceContext() var blocksPerGrid = ceildiv(N, threadsPerBlock) var _kernel = ctx.compile_function[silu_kernel, silu_kernel]() - ctx.enqueue_function(_kernel, - input, output, N, - grid_dim = blocksPerGrid, - block_dim = threadsPerBlock + ctx.enqueue_function( + _kernel, input, output, N, grid_dim=blocksPerGrid, block_dim=threadsPerBlock ) ctx.synchronize() diff --git a/challenges/easy/54_swiglu/starter/starter.mojo b/challenges/easy/54_swiglu/starter/starter.mojo index c32b232a..8d462903 100644 --- a/challenges/easy/54_swiglu/starter/starter.mojo +++ b/challenges/easy/54_swiglu/starter/starter.mojo @@ -4,22 +4,26 @@ from std.memory import UnsafePointer from std.math import ceildiv -fn swiglu_kernel(input: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], N: Int32): +fn swiglu_kernel( + input: UnsafePointer[Float32, MutExternalOrigin], + output: UnsafePointer[Float32, MutExternalOrigin], + N: Int32, +): pass # input, output are device pointers @export -fn solve(input: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], N: Int32) raises: +fn solve( + input: UnsafePointer[Float32, MutExternalOrigin], + output: UnsafePointer[Float32, MutExternalOrigin], + N: Int32, +) raises: var BLOCK_SIZE: Int32 = 256 var ctx = DeviceContext() var num_blocks = ceildiv(N // 2, BLOCK_SIZE) var _kernel = ctx.compile_function[swiglu_kernel, swiglu_kernel]() - ctx.enqueue_function(_kernel, - input, output, N, - grid_dim = num_blocks, - block_dim = BLOCK_SIZE - ) + ctx.enqueue_function(_kernel, input, output, N, grid_dim=num_blocks, block_dim=BLOCK_SIZE) ctx.synchronize() diff --git a/challenges/easy/62_value_clipping/starter/starter.mojo b/challenges/easy/62_value_clipping/starter/starter.mojo index 49872678..c96003d5 100644 --- a/challenges/easy/62_value_clipping/starter/starter.mojo +++ b/challenges/easy/62_value_clipping/starter/starter.mojo @@ -3,22 +3,33 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv -fn clip_kernel(input: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], lo: Float32, hi: Float32, N: Int32): + +fn clip_kernel( + input: UnsafePointer[Float32, MutExternalOrigin], + output: UnsafePointer[Float32, MutExternalOrigin], + lo: Float32, + hi: Float32, + N: Int32, +): pass # input, output are device pointers @export -fn solve(input: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], lo: Float32, hi: Float32, N: Int32) raises: +fn solve( + input: UnsafePointer[Float32, MutExternalOrigin], + output: UnsafePointer[Float32, MutExternalOrigin], + lo: Float32, + hi: Float32, + N: Int32, +) raises: var BLOCK_SIZE: Int32 = 256 var ctx = DeviceContext() var num_blocks = ceildiv(N, BLOCK_SIZE) var _kernel = ctx.compile_function[clip_kernel, clip_kernel]() - ctx.enqueue_function(_kernel, - input, output, lo, hi, N, - grid_dim = num_blocks, - block_dim = BLOCK_SIZE + ctx.enqueue_function( + _kernel, input, output, lo, hi, N, grid_dim=num_blocks, block_dim=BLOCK_SIZE ) ctx.synchronize() diff --git a/challenges/easy/63_interleave/starter/starter.mojo b/challenges/easy/63_interleave/starter/starter.mojo index 1b46d0c9..efb14e5f 100644 --- a/challenges/easy/63_interleave/starter/starter.mojo +++ b/challenges/easy/63_interleave/starter/starter.mojo @@ -3,21 +3,29 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv -fn interleave_kernel(A: UnsafePointer[Float32, MutExternalOrigin], B: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], N: Int32): + +fn interleave_kernel( + A: UnsafePointer[Float32, MutExternalOrigin], + B: UnsafePointer[Float32, MutExternalOrigin], + output: UnsafePointer[Float32, MutExternalOrigin], + N: Int32, +): pass + # A, B, output are device pointers (i.e. pointers to memory on the GPU) @export -fn solve(A: UnsafePointer[Float32, MutExternalOrigin], B: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], N: Int32) raises: +fn solve( + A: UnsafePointer[Float32, MutExternalOrigin], + B: UnsafePointer[Float32, MutExternalOrigin], + output: UnsafePointer[Float32, MutExternalOrigin], + N: Int32, +) raises: var BLOCK_SIZE: Int32 = 256 var ctx = DeviceContext() var num_blocks = ceildiv(N, BLOCK_SIZE) var _kernel = ctx.compile_function[interleave_kernel, interleave_kernel]() - ctx.enqueue_function(_kernel, - A, B, output, N, - grid_dim = num_blocks, - block_dim = BLOCK_SIZE - ) + ctx.enqueue_function(_kernel, A, B, output, N, grid_dim=num_blocks, block_dim=BLOCK_SIZE) ctx.synchronize() diff --git a/challenges/easy/65_geglu/starter/starter.mojo b/challenges/easy/65_geglu/starter/starter.mojo index 1d77e891..e62def36 100644 --- a/challenges/easy/65_geglu/starter/starter.mojo +++ b/challenges/easy/65_geglu/starter/starter.mojo @@ -4,22 +4,26 @@ from std.memory import UnsafePointer from std.math import ceildiv -fn geglu_kernel(input: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], N: Int32): +fn geglu_kernel( + input: UnsafePointer[Float32, MutExternalOrigin], + output: UnsafePointer[Float32, MutExternalOrigin], + N: Int32, +): pass # input, output are device pointers @export -fn solve(input: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], N: Int32) raises: +fn solve( + input: UnsafePointer[Float32, MutExternalOrigin], + output: UnsafePointer[Float32, MutExternalOrigin], + N: Int32, +) raises: var BLOCK_SIZE: Int32 = 256 var ctx = DeviceContext() var num_blocks = ceildiv(N // 2, BLOCK_SIZE) var _kernel = ctx.compile_function[geglu_kernel, geglu_kernel]() - ctx.enqueue_function(_kernel, - input, output, N, - grid_dim = num_blocks, - block_dim = BLOCK_SIZE - ) + ctx.enqueue_function(_kernel, input, output, N, grid_dim=num_blocks, block_dim=BLOCK_SIZE) ctx.synchronize() diff --git a/challenges/easy/66_rgb_to_grayscale/starter/starter.mojo b/challenges/easy/66_rgb_to_grayscale/starter/starter.mojo index 45116afb..07786e77 100644 --- a/challenges/easy/66_rgb_to_grayscale/starter/starter.mojo +++ b/challenges/easy/66_rgb_to_grayscale/starter/starter.mojo @@ -3,22 +3,32 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv -fn rgb_to_grayscale_kernel(input: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], width: Int32, height: Int32): + +fn rgb_to_grayscale_kernel( + input: UnsafePointer[Float32, MutExternalOrigin], + output: UnsafePointer[Float32, MutExternalOrigin], + width: Int32, + height: Int32, +): pass + # input, output are device pointers @export -fn solve(input: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], width: Int32, height: Int32) raises: +fn solve( + input: UnsafePointer[Float32, MutExternalOrigin], + output: UnsafePointer[Float32, MutExternalOrigin], + width: Int32, + height: Int32, +) raises: var total_pixels = width * height var BLOCK_SIZE: Int32 = 256 var ctx = DeviceContext() var num_blocks = ceildiv(total_pixels, BLOCK_SIZE) var _kernel = ctx.compile_function[rgb_to_grayscale_kernel, rgb_to_grayscale_kernel]() - ctx.enqueue_function(_kernel, - input, output, width, height, - grid_dim = num_blocks, - block_dim = BLOCK_SIZE + ctx.enqueue_function( + _kernel, input, output, width, height, grid_dim=num_blocks, block_dim=BLOCK_SIZE ) ctx.synchronize() diff --git a/challenges/easy/68_sigmoid/starter/starter.mojo b/challenges/easy/68_sigmoid/starter/starter.mojo index cc558cf2..f6ab3ae7 100644 --- a/challenges/easy/68_sigmoid/starter/starter.mojo +++ b/challenges/easy/68_sigmoid/starter/starter.mojo @@ -3,21 +3,27 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv -fn sigmoid_kernel(X: UnsafePointer[Float32, MutExternalOrigin], Y: UnsafePointer[Float32, MutExternalOrigin], N: Int32): + +fn sigmoid_kernel( + X: UnsafePointer[Float32, MutExternalOrigin], + Y: UnsafePointer[Float32, MutExternalOrigin], + N: Int32, +): pass + # X, Y are device pointers (i.e. pointers to memory on the GPU) @export -fn solve(X: UnsafePointer[Float32, MutExternalOrigin], Y: UnsafePointer[Float32, MutExternalOrigin], N: Int32) raises: +fn solve( + X: UnsafePointer[Float32, MutExternalOrigin], + Y: UnsafePointer[Float32, MutExternalOrigin], + N: Int32, +) raises: var BLOCK_SIZE: Int32 = 256 var ctx = DeviceContext() var num_blocks = ceildiv(N, BLOCK_SIZE) var _kernel = ctx.compile_function[sigmoid_kernel, sigmoid_kernel]() - ctx.enqueue_function(_kernel, - X, Y, N, - grid_dim = num_blocks, - block_dim = BLOCK_SIZE - ) + ctx.enqueue_function(_kernel, X, Y, N, grid_dim=num_blocks, block_dim=BLOCK_SIZE) ctx.synchronize() diff --git a/challenges/easy/7_color_inversion/starter/starter.mojo b/challenges/easy/7_color_inversion/starter/starter.mojo index 827589cf..691e0263 100644 --- a/challenges/easy/7_color_inversion/starter/starter.mojo +++ b/challenges/easy/7_color_inversion/starter/starter.mojo @@ -3,9 +3,11 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv + fn invert_kernel(image: UnsafePointer[UInt8, MutExternalOrigin], width: Int32, height: Int32): pass + # image is a device pointer (i.e. pointer to memory on the GPU) @export fn solve(image: UnsafePointer[UInt8, MutExternalOrigin], width: Int32, height: Int32) raises: @@ -16,10 +18,8 @@ fn solve(image: UnsafePointer[UInt8, MutExternalOrigin], width: Int32, height: I var blocksPerGrid = ceildiv(total_pixels, threadsPerBlock) var _kernel = ctx.compile_function[invert_kernel, invert_kernel]() - ctx.enqueue_function(_kernel, - image, width, height, - grid_dim = blocksPerGrid, - block_dim = threadsPerBlock + ctx.enqueue_function( + _kernel, image, width, height, grid_dim=blocksPerGrid, block_dim=threadsPerBlock ) ctx.synchronize() diff --git a/challenges/easy/8_matrix_addition/starter/starter.mojo b/challenges/easy/8_matrix_addition/starter/starter.mojo index c947032e..bdd4af4b 100644 --- a/challenges/easy/8_matrix_addition/starter/starter.mojo +++ b/challenges/easy/8_matrix_addition/starter/starter.mojo @@ -3,22 +3,30 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv -fn matrix_add_kernel(A: UnsafePointer[Float32, MutExternalOrigin], B: UnsafePointer[Float32, MutExternalOrigin], C: UnsafePointer[Float32, MutExternalOrigin], N: Int32): + +fn matrix_add_kernel( + A: UnsafePointer[Float32, MutExternalOrigin], + B: UnsafePointer[Float32, MutExternalOrigin], + C: UnsafePointer[Float32, MutExternalOrigin], + N: Int32, +): pass + # A, B, C are device pointers (i.e. pointers to memory on the GPU) @export -fn solve(A: UnsafePointer[Float32, MutExternalOrigin], B: UnsafePointer[Float32, MutExternalOrigin], C: UnsafePointer[Float32, MutExternalOrigin], N: Int32) raises: +fn solve( + A: UnsafePointer[Float32, MutExternalOrigin], + B: UnsafePointer[Float32, MutExternalOrigin], + C: UnsafePointer[Float32, MutExternalOrigin], + N: Int32, +) raises: var BLOCK_SIZE: Int32 = 256 var ctx = DeviceContext() var n_elements = N * N var num_blocks = ceildiv(n_elements, BLOCK_SIZE) var _kernel = ctx.compile_function[matrix_add_kernel, matrix_add_kernel]() - ctx.enqueue_function(_kernel, - A, B, C, N, - grid_dim = num_blocks, - block_dim = BLOCK_SIZE - ) + ctx.enqueue_function(_kernel, A, B, C, N, grid_dim=num_blocks, block_dim=BLOCK_SIZE) ctx.synchronize() diff --git a/challenges/easy/9_1d_convolution/starter/starter.mojo b/challenges/easy/9_1d_convolution/starter/starter.mojo index 6bc29b7f..fd414d53 100644 --- a/challenges/easy/9_1d_convolution/starter/starter.mojo +++ b/challenges/easy/9_1d_convolution/starter/starter.mojo @@ -3,14 +3,26 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv -fn convolution_1d_kernel(input: UnsafePointer[Float32, MutExternalOrigin], kernel: UnsafePointer[Float32, MutExternalOrigin], - output: UnsafePointer[Float32, MutExternalOrigin], input_size: Int32, kernel_size: Int32): + +fn convolution_1d_kernel( + input: UnsafePointer[Float32, MutExternalOrigin], + kernel: UnsafePointer[Float32, MutExternalOrigin], + output: UnsafePointer[Float32, MutExternalOrigin], + input_size: Int32, + kernel_size: Int32, +): pass + # input, kernel, output are device pointers (i.e. pointers to memory on the GPU) @export -fn solve(input: UnsafePointer[Float32, MutExternalOrigin], kernel: UnsafePointer[Float32, MutExternalOrigin], - output: UnsafePointer[Float32, MutExternalOrigin], input_size: Int32, kernel_size: Int32) raises: +fn solve( + input: UnsafePointer[Float32, MutExternalOrigin], + kernel: UnsafePointer[Float32, MutExternalOrigin], + output: UnsafePointer[Float32, MutExternalOrigin], + input_size: Int32, + kernel_size: Int32, +) raises: var output_size = input_size - kernel_size + 1 var threadsPerBlock: Int32 = 256 var ctx = DeviceContext() @@ -18,10 +30,15 @@ fn solve(input: UnsafePointer[Float32, MutExternalOrigin], kernel: UnsafePointer var blocksPerGrid = ceildiv(output_size, threadsPerBlock) var _kernel = ctx.compile_function[convolution_1d_kernel, convolution_1d_kernel]() - ctx.enqueue_function(_kernel, - input, kernel, output, input_size, kernel_size, - grid_dim = blocksPerGrid, - block_dim = threadsPerBlock + ctx.enqueue_function( + _kernel, + input, + kernel, + output, + input_size, + kernel_size, + grid_dim=blocksPerGrid, + block_dim=threadsPerBlock, ) ctx.synchronize() diff --git a/challenges/hard/12_multi_head_attention/starter/starter.mojo b/challenges/hard/12_multi_head_attention/starter/starter.mojo index 1f0884e1..9eba26f9 100644 --- a/challenges/hard/12_multi_head_attention/starter/starter.mojo +++ b/challenges/hard/12_multi_head_attention/starter/starter.mojo @@ -3,6 +3,15 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv + @export -fn solve(Q: UnsafePointer[Float32, MutExternalOrigin], K: UnsafePointer[Float32, MutExternalOrigin], V: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], N: Int32, d_model: Int32, h: Int32) raises: +fn solve( + Q: UnsafePointer[Float32, MutExternalOrigin], + K: UnsafePointer[Float32, MutExternalOrigin], + V: UnsafePointer[Float32, MutExternalOrigin], + output: UnsafePointer[Float32, MutExternalOrigin], + N: Int32, + d_model: Int32, + h: Int32, +) raises: pass diff --git a/challenges/hard/14_multi_agent_sim/starter/starter.mojo b/challenges/hard/14_multi_agent_sim/starter/starter.mojo index 589c22fe..221ad7a5 100644 --- a/challenges/hard/14_multi_agent_sim/starter/starter.mojo +++ b/challenges/hard/14_multi_agent_sim/starter/starter.mojo @@ -3,6 +3,11 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv + @export -fn solve(agents: UnsafePointer[Float32, MutExternalOrigin], agents_next: UnsafePointer[Float32, MutExternalOrigin], N: Int32) raises: +fn solve( + agents: UnsafePointer[Float32, MutExternalOrigin], + agents_next: UnsafePointer[Float32, MutExternalOrigin], + N: Int32, +) raises: pass diff --git a/challenges/hard/15_sorting/starter/starter.mojo b/challenges/hard/15_sorting/starter/starter.mojo index ea451789..8feafacc 100644 --- a/challenges/hard/15_sorting/starter/starter.mojo +++ b/challenges/hard/15_sorting/starter/starter.mojo @@ -3,6 +3,7 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv + @export fn solve(data: UnsafePointer[Float32, MutExternalOrigin], N: Int32) raises: pass diff --git a/challenges/hard/20_kmeans_clustering/starter/starter.mojo b/challenges/hard/20_kmeans_clustering/starter/starter.mojo index 04610735..bce1debf 100644 --- a/challenges/hard/20_kmeans_clustering/starter/starter.mojo +++ b/challenges/hard/20_kmeans_clustering/starter/starter.mojo @@ -3,6 +3,18 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv + @export -fn solve(data_x: UnsafePointer[Float32, MutExternalOrigin], data_y: UnsafePointer[Float32, MutExternalOrigin], labels: UnsafePointer[Int32, MutExternalOrigin], initial_centroid_x: UnsafePointer[Float32, MutExternalOrigin], initial_centroid_y: UnsafePointer[Float32, MutExternalOrigin], final_centroid_x: UnsafePointer[Float32, MutExternalOrigin], final_centroid_y: UnsafePointer[Float32, MutExternalOrigin], sample_size: Int32, k: Int32, max_iterations: Int32) raises: +fn solve( + data_x: UnsafePointer[Float32, MutExternalOrigin], + data_y: UnsafePointer[Float32, MutExternalOrigin], + labels: UnsafePointer[Int32, MutExternalOrigin], + initial_centroid_x: UnsafePointer[Float32, MutExternalOrigin], + initial_centroid_y: UnsafePointer[Float32, MutExternalOrigin], + final_centroid_x: UnsafePointer[Float32, MutExternalOrigin], + final_centroid_y: UnsafePointer[Float32, MutExternalOrigin], + sample_size: Int32, + k: Int32, + max_iterations: Int32, +) raises: pass diff --git a/challenges/hard/36_radix_sort/starter/starter.mojo b/challenges/hard/36_radix_sort/starter/starter.mojo index 7484ff7c..354f04b8 100644 --- a/challenges/hard/36_radix_sort/starter/starter.mojo +++ b/challenges/hard/36_radix_sort/starter/starter.mojo @@ -2,7 +2,12 @@ from std.gpu.host import DeviceContext from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer + # input, output are device pointers @export -fn solve(input: UnsafePointer[UInt32, MutExternalOrigin], output: UnsafePointer[UInt32, MutExternalOrigin], N: Int32) raises: +fn solve( + input: UnsafePointer[UInt32, MutExternalOrigin], + output: UnsafePointer[UInt32, MutExternalOrigin], + N: Int32, +) raises: pass diff --git a/challenges/hard/39_Fast_Fourier_transform/starter/starter.mojo b/challenges/hard/39_Fast_Fourier_transform/starter/starter.mojo index 2fa6da9a..93440dde 100644 --- a/challenges/hard/39_Fast_Fourier_transform/starter/starter.mojo +++ b/challenges/hard/39_Fast_Fourier_transform/starter/starter.mojo @@ -3,7 +3,12 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv + # signal and spectrum are device pointers @export -fn solve(signal: UnsafePointer[Float32, MutExternalOrigin], spectrum: UnsafePointer[Float32, MutExternalOrigin], N: Int32) raises: +fn solve( + signal: UnsafePointer[Float32, MutExternalOrigin], + spectrum: UnsafePointer[Float32, MutExternalOrigin], + N: Int32, +) raises: pass diff --git a/challenges/hard/46_bfs_shortest_path/starter/starter.mojo b/challenges/hard/46_bfs_shortest_path/starter/starter.mojo index 9bb12c2f..aa50e8eb 100644 --- a/challenges/hard/46_bfs_shortest_path/starter/starter.mojo +++ b/challenges/hard/46_bfs_shortest_path/starter/starter.mojo @@ -1,7 +1,16 @@ from std.memory import UnsafePointer + # grid, result are device pointers @export -fn solve(grid: UnsafePointer[Int32, MutExternalOrigin], result: UnsafePointer[Int32, MutExternalOrigin], rows: Int32, cols: Int32, - start_row: Int32, start_col: Int32, end_row: Int32, end_col: Int32) raises: +fn solve( + grid: UnsafePointer[Int32, MutExternalOrigin], + result: UnsafePointer[Int32, MutExternalOrigin], + rows: Int32, + cols: Int32, + start_row: Int32, + start_col: Int32, + end_row: Int32, + end_col: Int32, +) raises: pass diff --git a/challenges/hard/53_casual_attention/starter/starter.mojo b/challenges/hard/53_casual_attention/starter/starter.mojo index 196a0a16..80d27b6d 100644 --- a/challenges/hard/53_casual_attention/starter/starter.mojo +++ b/challenges/hard/53_casual_attention/starter/starter.mojo @@ -3,8 +3,15 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv + # Q, K, V, output are device pointers (i.e. pointers to memory on the GPU) @export -fn solve(Q: UnsafePointer[Float32, MutExternalOrigin], K: UnsafePointer[Float32, MutExternalOrigin], V: UnsafePointer[Float32, MutExternalOrigin], - output: UnsafePointer[Float32, MutExternalOrigin], M: Int32, d: Int32) raises: +fn solve( + Q: UnsafePointer[Float32, MutExternalOrigin], + K: UnsafePointer[Float32, MutExternalOrigin], + V: UnsafePointer[Float32, MutExternalOrigin], + output: UnsafePointer[Float32, MutExternalOrigin], + M: Int32, + d: Int32, +) raises: pass diff --git a/challenges/hard/56_linear_attention/starter/starter.mojo b/challenges/hard/56_linear_attention/starter/starter.mojo index 196a0a16..80d27b6d 100644 --- a/challenges/hard/56_linear_attention/starter/starter.mojo +++ b/challenges/hard/56_linear_attention/starter/starter.mojo @@ -3,8 +3,15 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv + # Q, K, V, output are device pointers (i.e. pointers to memory on the GPU) @export -fn solve(Q: UnsafePointer[Float32, MutExternalOrigin], K: UnsafePointer[Float32, MutExternalOrigin], V: UnsafePointer[Float32, MutExternalOrigin], - output: UnsafePointer[Float32, MutExternalOrigin], M: Int32, d: Int32) raises: +fn solve( + Q: UnsafePointer[Float32, MutExternalOrigin], + K: UnsafePointer[Float32, MutExternalOrigin], + V: UnsafePointer[Float32, MutExternalOrigin], + output: UnsafePointer[Float32, MutExternalOrigin], + M: Int32, + d: Int32, +) raises: pass diff --git a/challenges/hard/59_sliding_window_attn/starter/starter.mojo b/challenges/hard/59_sliding_window_attn/starter/starter.mojo index 035a8680..44c56b15 100644 --- a/challenges/hard/59_sliding_window_attn/starter/starter.mojo +++ b/challenges/hard/59_sliding_window_attn/starter/starter.mojo @@ -3,8 +3,16 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv + # Q, K, V, output are device pointers (i.e. pointers to memory on the GPU) @export -fn solve(Q: UnsafePointer[Float32, MutExternalOrigin], K: UnsafePointer[Float32, MutExternalOrigin], V: UnsafePointer[Float32, MutExternalOrigin], - output: UnsafePointer[Float32, MutExternalOrigin], M: Int32, d: Int32, window_size: Int32) raises: +fn solve( + Q: UnsafePointer[Float32, MutExternalOrigin], + K: UnsafePointer[Float32, MutExternalOrigin], + V: UnsafePointer[Float32, MutExternalOrigin], + output: UnsafePointer[Float32, MutExternalOrigin], + M: Int32, + d: Int32, + window_size: Int32, +) raises: pass diff --git a/challenges/hard/73_all_pairs_shortest_paths/starter/starter.mojo b/challenges/hard/73_all_pairs_shortest_paths/starter/starter.mojo index 71f7058d..a86d900a 100644 --- a/challenges/hard/73_all_pairs_shortest_paths/starter/starter.mojo +++ b/challenges/hard/73_all_pairs_shortest_paths/starter/starter.mojo @@ -3,7 +3,12 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv + # dist, output are device pointers (i.e. pointers to memory on the GPU) @export -fn solve(dist: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], N: Int32) raises: +fn solve( + dist: UnsafePointer[Float32, MutExternalOrigin], + output: UnsafePointer[Float32, MutExternalOrigin], + N: Int32, +) raises: pass diff --git a/challenges/hard/74_gpt2_block/starter/starter.mojo b/challenges/hard/74_gpt2_block/starter/starter.mojo index e7f5ed07..b338e292 100644 --- a/challenges/hard/74_gpt2_block/starter/starter.mojo +++ b/challenges/hard/74_gpt2_block/starter/starter.mojo @@ -3,7 +3,13 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv + # x, output, weights are device pointers @export -fn solve(x: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], weights: UnsafePointer[Float32, MutExternalOrigin], seq_len: Int32) raises: +fn solve( + x: UnsafePointer[Float32, MutExternalOrigin], + output: UnsafePointer[Float32, MutExternalOrigin], + weights: UnsafePointer[Float32, MutExternalOrigin], + seq_len: Int32, +) raises: pass diff --git a/challenges/medium/10_2d_convolution/starter/starter.mojo b/challenges/medium/10_2d_convolution/starter/starter.mojo index a1682717..55693c03 100644 --- a/challenges/medium/10_2d_convolution/starter/starter.mojo +++ b/challenges/medium/10_2d_convolution/starter/starter.mojo @@ -3,6 +3,15 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv + @export -fn solve(input: UnsafePointer[Float32, MutExternalOrigin], kernel: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], input_rows: Int32, input_cols: Int32, kernel_rows: Int32, kernel_cols: Int32) raises: +fn solve( + input: UnsafePointer[Float32, MutExternalOrigin], + kernel: UnsafePointer[Float32, MutExternalOrigin], + output: UnsafePointer[Float32, MutExternalOrigin], + input_rows: Int32, + input_cols: Int32, + kernel_rows: Int32, + kernel_cols: Int32, +) raises: pass diff --git a/challenges/medium/11_3d_convolution/starter/starter.mojo b/challenges/medium/11_3d_convolution/starter/starter.mojo index 09872742..1ba87c43 100644 --- a/challenges/medium/11_3d_convolution/starter/starter.mojo +++ b/challenges/medium/11_3d_convolution/starter/starter.mojo @@ -3,6 +3,17 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv + @export -fn solve(input: UnsafePointer[Float32, MutExternalOrigin], kernel: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], input_depth: Int32, input_rows: Int32, input_cols: Int32, kernel_depth: Int32, kernel_rows: Int32, kernel_cols: Int32) raises: +fn solve( + input: UnsafePointer[Float32, MutExternalOrigin], + kernel: UnsafePointer[Float32, MutExternalOrigin], + output: UnsafePointer[Float32, MutExternalOrigin], + input_depth: Int32, + input_rows: Int32, + input_cols: Int32, + kernel_depth: Int32, + kernel_rows: Int32, + kernel_cols: Int32, +) raises: pass diff --git a/challenges/medium/13_histogramming/starter/starter.mojo b/challenges/medium/13_histogramming/starter/starter.mojo index a3b5cb24..b3446647 100644 --- a/challenges/medium/13_histogramming/starter/starter.mojo +++ b/challenges/medium/13_histogramming/starter/starter.mojo @@ -3,6 +3,12 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv + @export -fn solve(input: UnsafePointer[Int32, MutExternalOrigin], histogram: UnsafePointer[Int32, MutExternalOrigin], N: Int32, num_bins: Int32) raises: +fn solve( + input: UnsafePointer[Int32, MutExternalOrigin], + histogram: UnsafePointer[Int32, MutExternalOrigin], + N: Int32, + num_bins: Int32, +) raises: pass diff --git a/challenges/medium/16_prefix_sum/starter/starter.mojo b/challenges/medium/16_prefix_sum/starter/starter.mojo index 02c2c06f..35305df8 100644 --- a/challenges/medium/16_prefix_sum/starter/starter.mojo +++ b/challenges/medium/16_prefix_sum/starter/starter.mojo @@ -3,6 +3,11 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv + @export -fn solve(input: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], N: Int32) raises: +fn solve( + input: UnsafePointer[Float32, MutExternalOrigin], + output: UnsafePointer[Float32, MutExternalOrigin], + N: Int32, +) raises: pass diff --git a/challenges/medium/17_dot_product/starter/starter.mojo b/challenges/medium/17_dot_product/starter/starter.mojo index dbc4b7ff..628c5906 100644 --- a/challenges/medium/17_dot_product/starter/starter.mojo +++ b/challenges/medium/17_dot_product/starter/starter.mojo @@ -3,6 +3,12 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv + @export -fn solve(A: UnsafePointer[Float32, MutExternalOrigin], B: UnsafePointer[Float32, MutExternalOrigin], result: UnsafePointer[Float32, MutExternalOrigin], N: Int32) raises: +fn solve( + A: UnsafePointer[Float32, MutExternalOrigin], + B: UnsafePointer[Float32, MutExternalOrigin], + result: UnsafePointer[Float32, MutExternalOrigin], + N: Int32, +) raises: pass diff --git a/challenges/medium/18_sparse_matrix_vector_multiplication/starter/starter.mojo b/challenges/medium/18_sparse_matrix_vector_multiplication/starter/starter.mojo index c593c92a..7f86a849 100644 --- a/challenges/medium/18_sparse_matrix_vector_multiplication/starter/starter.mojo +++ b/challenges/medium/18_sparse_matrix_vector_multiplication/starter/starter.mojo @@ -3,6 +3,14 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv + @export -fn solve(A: UnsafePointer[Float32, MutExternalOrigin], x: UnsafePointer[Float32, MutExternalOrigin], y: UnsafePointer[Float32, MutExternalOrigin], M: Int32, N: Int32, nnz: Int32) raises: +fn solve( + A: UnsafePointer[Float32, MutExternalOrigin], + x: UnsafePointer[Float32, MutExternalOrigin], + y: UnsafePointer[Float32, MutExternalOrigin], + M: Int32, + N: Int32, + nnz: Int32, +) raises: pass diff --git a/challenges/medium/22_gemm/starter/starter.mojo b/challenges/medium/22_gemm/starter/starter.mojo index 28329886..85c23000 100644 --- a/challenges/medium/22_gemm/starter/starter.mojo +++ b/challenges/medium/22_gemm/starter/starter.mojo @@ -3,6 +3,16 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv + @export -fn solve(A: UnsafePointer[Float16, MutExternalOrigin], B: UnsafePointer[Float16, MutExternalOrigin], C: UnsafePointer[Float16, MutExternalOrigin], M: Int32, N: Int32, K: Int32, alpha: Float32, beta: Float32) raises: +fn solve( + A: UnsafePointer[Float16, MutExternalOrigin], + B: UnsafePointer[Float16, MutExternalOrigin], + C: UnsafePointer[Float16, MutExternalOrigin], + M: Int32, + N: Int32, + K: Int32, + alpha: Float32, + beta: Float32, +) raises: pass diff --git a/challenges/medium/25_categorical_cross_entropy_loss/starter/starter.mojo b/challenges/medium/25_categorical_cross_entropy_loss/starter/starter.mojo index 7d35ecd2..c3c0c3d2 100644 --- a/challenges/medium/25_categorical_cross_entropy_loss/starter/starter.mojo +++ b/challenges/medium/25_categorical_cross_entropy_loss/starter/starter.mojo @@ -3,6 +3,13 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv + @export -fn solve(logits: UnsafePointer[Float32, MutExternalOrigin], true_labels: UnsafePointer[Int32, MutExternalOrigin], loss: UnsafePointer[Float32, MutExternalOrigin], N: Int32, C: Int32) raises: +fn solve( + logits: UnsafePointer[Float32, MutExternalOrigin], + true_labels: UnsafePointer[Int32, MutExternalOrigin], + loss: UnsafePointer[Float32, MutExternalOrigin], + N: Int32, + C: Int32, +) raises: pass diff --git a/challenges/medium/27_mean_squared_error/starter/starter.mojo b/challenges/medium/27_mean_squared_error/starter/starter.mojo index 6ff28913..2ba5fa40 100644 --- a/challenges/medium/27_mean_squared_error/starter/starter.mojo +++ b/challenges/medium/27_mean_squared_error/starter/starter.mojo @@ -3,6 +3,12 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv + @export -fn solve(predictions: UnsafePointer[Float32, MutExternalOrigin], targets: UnsafePointer[Float32, MutExternalOrigin], mse: UnsafePointer[Float32, MutExternalOrigin], N: Int32) raises: +fn solve( + predictions: UnsafePointer[Float32, MutExternalOrigin], + targets: UnsafePointer[Float32, MutExternalOrigin], + mse: UnsafePointer[Float32, MutExternalOrigin], + N: Int32, +) raises: pass diff --git a/challenges/medium/28_gaussian_blur/starter/starter.mojo b/challenges/medium/28_gaussian_blur/starter/starter.mojo index a1682717..55693c03 100644 --- a/challenges/medium/28_gaussian_blur/starter/starter.mojo +++ b/challenges/medium/28_gaussian_blur/starter/starter.mojo @@ -3,6 +3,15 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv + @export -fn solve(input: UnsafePointer[Float32, MutExternalOrigin], kernel: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], input_rows: Int32, input_cols: Int32, kernel_rows: Int32, kernel_cols: Int32) raises: +fn solve( + input: UnsafePointer[Float32, MutExternalOrigin], + kernel: UnsafePointer[Float32, MutExternalOrigin], + output: UnsafePointer[Float32, MutExternalOrigin], + input_rows: Int32, + input_cols: Int32, + kernel_rows: Int32, + kernel_cols: Int32, +) raises: pass diff --git a/challenges/medium/29_top_k_selection/starter/starter.mojo b/challenges/medium/29_top_k_selection/starter/starter.mojo index dfa4381e..661de98c 100644 --- a/challenges/medium/29_top_k_selection/starter/starter.mojo +++ b/challenges/medium/29_top_k_selection/starter/starter.mojo @@ -3,6 +3,12 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv + @export -fn solve(input: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], N: Int32, k: Int32) raises: +fn solve( + input: UnsafePointer[Float32, MutExternalOrigin], + output: UnsafePointer[Float32, MutExternalOrigin], + N: Int32, + k: Int32, +) raises: pass diff --git a/challenges/medium/30_batched_matrix_multiplication/starter/starter.mojo b/challenges/medium/30_batched_matrix_multiplication/starter/starter.mojo index 558b9b8c..525f6a24 100644 --- a/challenges/medium/30_batched_matrix_multiplication/starter/starter.mojo +++ b/challenges/medium/30_batched_matrix_multiplication/starter/starter.mojo @@ -3,6 +3,15 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv + @export -fn solve(A: UnsafePointer[Float32, MutExternalOrigin], B: UnsafePointer[Float32, MutExternalOrigin], C: UnsafePointer[Float32, MutExternalOrigin], BATCH: Int32, M: Int32, N: Int32, K: Int32) raises: +fn solve( + A: UnsafePointer[Float32, MutExternalOrigin], + B: UnsafePointer[Float32, MutExternalOrigin], + C: UnsafePointer[Float32, MutExternalOrigin], + BATCH: Int32, + M: Int32, + N: Int32, + K: Int32, +) raises: pass diff --git a/challenges/medium/32_int8_quantized_matmul/starter/starter.mojo b/challenges/medium/32_int8_quantized_matmul/starter/starter.mojo index 1f2ca8c3..03ca7720 100644 --- a/challenges/medium/32_int8_quantized_matmul/starter/starter.mojo +++ b/challenges/medium/32_int8_quantized_matmul/starter/starter.mojo @@ -3,6 +3,20 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv + @export -fn solve(A: UnsafePointer[Int8, MutExternalOrigin], B: UnsafePointer[Int8, MutExternalOrigin], C: UnsafePointer[Int8, MutExternalOrigin], M: Int32, N: Int32, K: Int32, scale_A: Float32, scale_B: Float32, scale_C: Float32, zero_point_A: Int32, zero_point_B: Int32, zero_point_C: Int32) raises: +fn solve( + A: UnsafePointer[Int8, MutExternalOrigin], + B: UnsafePointer[Int8, MutExternalOrigin], + C: UnsafePointer[Int8, MutExternalOrigin], + M: Int32, + N: Int32, + K: Int32, + scale_A: Float32, + scale_B: Float32, + scale_C: Float32, + zero_point_A: Int32, + zero_point_B: Int32, + zero_point_C: Int32, +) raises: pass diff --git a/challenges/medium/33_ordinary_least_squares/starter/starter.mojo b/challenges/medium/33_ordinary_least_squares/starter/starter.mojo index fefa6e34..18a72242 100644 --- a/challenges/medium/33_ordinary_least_squares/starter/starter.mojo +++ b/challenges/medium/33_ordinary_least_squares/starter/starter.mojo @@ -3,7 +3,14 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv + # X, y, beta are device pointers (i.e. pointers to memory on the GPU) @export -fn solve(X: UnsafePointer[Float32, MutExternalOrigin], y: UnsafePointer[Float32, MutExternalOrigin], beta: UnsafePointer[Float32, MutExternalOrigin], n_samples: Int32, n_features: Int32) raises: +fn solve( + X: UnsafePointer[Float32, MutExternalOrigin], + y: UnsafePointer[Float32, MutExternalOrigin], + beta: UnsafePointer[Float32, MutExternalOrigin], + n_samples: Int32, + n_features: Int32, +) raises: pass diff --git a/challenges/medium/34_logistic_regression/starter/starter.mojo b/challenges/medium/34_logistic_regression/starter/starter.mojo index fefa6e34..18a72242 100644 --- a/challenges/medium/34_logistic_regression/starter/starter.mojo +++ b/challenges/medium/34_logistic_regression/starter/starter.mojo @@ -3,7 +3,14 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv + # X, y, beta are device pointers (i.e. pointers to memory on the GPU) @export -fn solve(X: UnsafePointer[Float32, MutExternalOrigin], y: UnsafePointer[Float32, MutExternalOrigin], beta: UnsafePointer[Float32, MutExternalOrigin], n_samples: Int32, n_features: Int32) raises: +fn solve( + X: UnsafePointer[Float32, MutExternalOrigin], + y: UnsafePointer[Float32, MutExternalOrigin], + beta: UnsafePointer[Float32, MutExternalOrigin], + n_samples: Int32, + n_features: Int32, +) raises: pass diff --git a/challenges/medium/35_monte_carlo_integration/starter/starter.mojo b/challenges/medium/35_monte_carlo_integration/starter/starter.mojo index f81c4ade..397fbbcd 100644 --- a/challenges/medium/35_monte_carlo_integration/starter/starter.mojo +++ b/challenges/medium/35_monte_carlo_integration/starter/starter.mojo @@ -3,7 +3,14 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv + # y_samples, result are device pointers @export -fn solve(y_samples: UnsafePointer[Float32, MutExternalOrigin], result: UnsafePointer[Float32, MutExternalOrigin], a: Float32, b: Float32, n_samples: Int32) raises: +fn solve( + y_samples: UnsafePointer[Float32, MutExternalOrigin], + result: UnsafePointer[Float32, MutExternalOrigin], + a: Float32, + b: Float32, + n_samples: Int32, +) raises: pass diff --git a/challenges/medium/37_matrix_power/starter/starter.mojo b/challenges/medium/37_matrix_power/starter/starter.mojo index 187f7efb..38d5f009 100644 --- a/challenges/medium/37_matrix_power/starter/starter.mojo +++ b/challenges/medium/37_matrix_power/starter/starter.mojo @@ -3,9 +3,13 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv + # input, output are device pointers @export fn solve( - input: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], N: Int32, P: Int32 + input: UnsafePointer[Float32, MutExternalOrigin], + output: UnsafePointer[Float32, MutExternalOrigin], + N: Int32, + P: Int32, ) raises: pass diff --git a/challenges/medium/38_nearest_neighbor/starter/starter.mojo b/challenges/medium/38_nearest_neighbor/starter/starter.mojo index d317ef73..897643bc 100644 --- a/challenges/medium/38_nearest_neighbor/starter/starter.mojo +++ b/challenges/medium/38_nearest_neighbor/starter/starter.mojo @@ -1,6 +1,11 @@ from std.memory import UnsafePointer + # points and indices are device pointers @export -fn solve(points: UnsafePointer[Float32, MutExternalOrigin], indices: UnsafePointer[Int32, MutExternalOrigin], N: Int32) raises: +fn solve( + points: UnsafePointer[Float32, MutExternalOrigin], + indices: UnsafePointer[Int32, MutExternalOrigin], + N: Int32, +) raises: pass diff --git a/challenges/medium/40_batch_normalization/starter/starter.mojo b/challenges/medium/40_batch_normalization/starter/starter.mojo index 4e230e1d..40d007c8 100644 --- a/challenges/medium/40_batch_normalization/starter/starter.mojo +++ b/challenges/medium/40_batch_normalization/starter/starter.mojo @@ -3,9 +3,16 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv + # input, gamma, beta, output are device pointers @export -fn solve(input: UnsafePointer[Float32, MutExternalOrigin], gamma: UnsafePointer[Float32, MutExternalOrigin], - beta: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], - N: Int32, C: Int32, eps: Float32) raises: +fn solve( + input: UnsafePointer[Float32, MutExternalOrigin], + gamma: UnsafePointer[Float32, MutExternalOrigin], + beta: UnsafePointer[Float32, MutExternalOrigin], + output: UnsafePointer[Float32, MutExternalOrigin], + N: Int32, + C: Int32, + eps: Float32, +) raises: pass diff --git a/challenges/medium/42_2d_max_pooling/starter/starter.mojo b/challenges/medium/42_2d_max_pooling/starter/starter.mojo index 60759d90..781b8ede 100644 --- a/challenges/medium/42_2d_max_pooling/starter/starter.mojo +++ b/challenges/medium/42_2d_max_pooling/starter/starter.mojo @@ -3,9 +3,18 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv + # input, output are device pointers (i.e. pointers to memory on the GPU) @export -fn solve(input: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], - N: Int32, C: Int32, H: Int32, W: Int32, - kernel_size: Int32, stride: Int32, padding: Int32) raises: +fn solve( + input: UnsafePointer[Float32, MutExternalOrigin], + output: UnsafePointer[Float32, MutExternalOrigin], + N: Int32, + C: Int32, + H: Int32, + W: Int32, + kernel_size: Int32, + stride: Int32, + padding: Int32, +) raises: pass diff --git a/challenges/medium/43_count_array_element/starter/starter.mojo b/challenges/medium/43_count_array_element/starter/starter.mojo index e5271c02..0b56de43 100644 --- a/challenges/medium/43_count_array_element/starter/starter.mojo +++ b/challenges/medium/43_count_array_element/starter/starter.mojo @@ -3,7 +3,13 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv + # input, output are device pointers @export -fn solve(input: UnsafePointer[Int32, MutExternalOrigin], output: UnsafePointer[Int32, MutExternalOrigin], N: Int32, K: Int32) raises: +fn solve( + input: UnsafePointer[Int32, MutExternalOrigin], + output: UnsafePointer[Int32, MutExternalOrigin], + N: Int32, + K: Int32, +) raises: pass diff --git a/challenges/medium/44_count_2d_array_element/starter/starter.mojo b/challenges/medium/44_count_2d_array_element/starter/starter.mojo index d7cab051..0c85bbf9 100644 --- a/challenges/medium/44_count_2d_array_element/starter/starter.mojo +++ b/challenges/medium/44_count_2d_array_element/starter/starter.mojo @@ -3,7 +3,14 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv + # input, output are device pointers @export -fn solve(input: UnsafePointer[Int32, MutExternalOrigin], output: UnsafePointer[Int32, MutExternalOrigin], N: Int32, M: Int32, K: Int32) raises: +fn solve( + input: UnsafePointer[Int32, MutExternalOrigin], + output: UnsafePointer[Int32, MutExternalOrigin], + N: Int32, + M: Int32, + K: Int32, +) raises: pass diff --git a/challenges/medium/45_count_3d_array_element/starter/starter.mojo b/challenges/medium/45_count_3d_array_element/starter/starter.mojo index 47b6da52..be330dea 100644 --- a/challenges/medium/45_count_3d_array_element/starter/starter.mojo +++ b/challenges/medium/45_count_3d_array_element/starter/starter.mojo @@ -3,7 +3,15 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv + # input, output are device pointers (i.e. pointers to memory on the GPU) @export -fn solve(input: UnsafePointer[Int32, MutExternalOrigin], output: UnsafePointer[Int32, MutExternalOrigin], N: Int32, M: Int32, K: Int32, P: Int32) raises: +fn solve( + input: UnsafePointer[Int32, MutExternalOrigin], + output: UnsafePointer[Int32, MutExternalOrigin], + N: Int32, + M: Int32, + K: Int32, + P: Int32, +) raises: pass diff --git a/challenges/medium/47_subarray_sum/starter/starter.mojo b/challenges/medium/47_subarray_sum/starter/starter.mojo index 1e0d49ea..6231a557 100644 --- a/challenges/medium/47_subarray_sum/starter/starter.mojo +++ b/challenges/medium/47_subarray_sum/starter/starter.mojo @@ -3,7 +3,14 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv + # input, output are device pointers (i.e. pointers to memory on the GPU) @export -fn solve(input: UnsafePointer[Int32, MutExternalOrigin], output: UnsafePointer[Int32, MutExternalOrigin], N: Int32, S: Int32, E: Int32) raises: +fn solve( + input: UnsafePointer[Int32, MutExternalOrigin], + output: UnsafePointer[Int32, MutExternalOrigin], + N: Int32, + S: Int32, + E: Int32, +) raises: pass diff --git a/challenges/medium/48_2d_subarray_sum/starter/starter.mojo b/challenges/medium/48_2d_subarray_sum/starter/starter.mojo index bfee0535..9bbe423d 100644 --- a/challenges/medium/48_2d_subarray_sum/starter/starter.mojo +++ b/challenges/medium/48_2d_subarray_sum/starter/starter.mojo @@ -3,7 +3,17 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv + # input, output are device pointers (i.e. pointers to memory on the GPU) @export -fn solve(input: UnsafePointer[Int32, MutExternalOrigin], output: UnsafePointer[Int32, MutExternalOrigin], N: Int32, M: Int32, S_ROW: Int32, E_ROW: Int32, S_COL: Int32, E_COL: Int32) raises: +fn solve( + input: UnsafePointer[Int32, MutExternalOrigin], + output: UnsafePointer[Int32, MutExternalOrigin], + N: Int32, + M: Int32, + S_ROW: Int32, + E_ROW: Int32, + S_COL: Int32, + E_COL: Int32, +) raises: pass diff --git a/challenges/medium/49_3d_subarray_sum/starter/starter.mojo b/challenges/medium/49_3d_subarray_sum/starter/starter.mojo index 84864938..407aabe8 100644 --- a/challenges/medium/49_3d_subarray_sum/starter/starter.mojo +++ b/challenges/medium/49_3d_subarray_sum/starter/starter.mojo @@ -3,7 +3,20 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv + # input, output are device pointers (i.e. pointers to memory on the GPU) @export -fn solve(input: UnsafePointer[Int32, MutExternalOrigin], output: UnsafePointer[Int32, MutExternalOrigin], N: Int32, M: Int32, K: Int32, S_DEP: Int32, E_DEP: Int32, S_ROW: Int32, E_ROW: Int32, S_COL: Int32, E_COL: Int32) raises: +fn solve( + input: UnsafePointer[Int32, MutExternalOrigin], + output: UnsafePointer[Int32, MutExternalOrigin], + N: Int32, + M: Int32, + K: Int32, + S_DEP: Int32, + E_DEP: Int32, + S_ROW: Int32, + E_ROW: Int32, + S_COL: Int32, + E_COL: Int32, +) raises: pass diff --git a/challenges/medium/4_reduction/starter/starter.mojo b/challenges/medium/4_reduction/starter/starter.mojo index d076ebd5..790f2844 100644 --- a/challenges/medium/4_reduction/starter/starter.mojo +++ b/challenges/medium/4_reduction/starter/starter.mojo @@ -3,7 +3,12 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv + # input, output are device pointers (i.e. pointers to memory on the GPU) @export -fn solve(input: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], N: Int32) raises: +fn solve( + input: UnsafePointer[Float32, MutExternalOrigin], + output: UnsafePointer[Float32, MutExternalOrigin], + N: Int32, +) raises: pass diff --git a/challenges/medium/50_rms_normalization/starter/starter.mojo b/challenges/medium/50_rms_normalization/starter/starter.mojo index a896b6a2..8287e6b6 100644 --- a/challenges/medium/50_rms_normalization/starter/starter.mojo +++ b/challenges/medium/50_rms_normalization/starter/starter.mojo @@ -3,9 +3,15 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv + # input, output are device pointers @export -fn solve(input: UnsafePointer[Float32, MutExternalOrigin], gamma: Float32, - beta: Float32, output: UnsafePointer[Float32, MutExternalOrigin], - N: Int32, eps: Float32) raises: +fn solve( + input: UnsafePointer[Float32, MutExternalOrigin], + gamma: Float32, + beta: Float32, + output: UnsafePointer[Float32, MutExternalOrigin], + N: Int32, + eps: Float32, +) raises: pass diff --git a/challenges/medium/51_max_subarray_sum/starter/starter.mojo b/challenges/medium/51_max_subarray_sum/starter/starter.mojo index 884c0518..035ed9f0 100644 --- a/challenges/medium/51_max_subarray_sum/starter/starter.mojo +++ b/challenges/medium/51_max_subarray_sum/starter/starter.mojo @@ -3,7 +3,13 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv + # input, output are device pointers (i.e. pointers to memory on the GPU) @export -fn solve(input: UnsafePointer[Int32, MutExternalOrigin], output: UnsafePointer[Int32, MutExternalOrigin], N: Int32, window_size: Int32) raises: +fn solve( + input: UnsafePointer[Int32, MutExternalOrigin], + output: UnsafePointer[Int32, MutExternalOrigin], + N: Int32, + window_size: Int32, +) raises: pass diff --git a/challenges/medium/55_attn_w_linear_bias/starter/starter.mojo b/challenges/medium/55_attn_w_linear_bias/starter/starter.mojo index de895cb5..bbe086ec 100644 --- a/challenges/medium/55_attn_w_linear_bias/starter/starter.mojo +++ b/challenges/medium/55_attn_w_linear_bias/starter/starter.mojo @@ -3,8 +3,17 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv + # Q, K, V, output are device pointers (i.e. pointers to memory on the GPU) @export -fn solve(Q: UnsafePointer[Float32, MutExternalOrigin], K: UnsafePointer[Float32, MutExternalOrigin], V: UnsafePointer[Float32, MutExternalOrigin], - output: UnsafePointer[Float32, MutExternalOrigin], M: Int32, N: Int32, d: Int32, alpha: Float32) raises: +fn solve( + Q: UnsafePointer[Float32, MutExternalOrigin], + K: UnsafePointer[Float32, MutExternalOrigin], + V: UnsafePointer[Float32, MutExternalOrigin], + output: UnsafePointer[Float32, MutExternalOrigin], + M: Int32, + N: Int32, + d: Int32, + alpha: Float32, +) raises: pass diff --git a/challenges/medium/57_fp16_batched_matmul/starter/starter.mojo b/challenges/medium/57_fp16_batched_matmul/starter/starter.mojo index bda7ea8e..6cd244ae 100644 --- a/challenges/medium/57_fp16_batched_matmul/starter/starter.mojo +++ b/challenges/medium/57_fp16_batched_matmul/starter/starter.mojo @@ -3,6 +3,15 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv + @export -fn solve(A: UnsafePointer[Float16, MutExternalOrigin], B: UnsafePointer[Float16, MutExternalOrigin], C: UnsafePointer[Float16, MutExternalOrigin], BATCH: Int32, M: Int32, N: Int32, K: Int32) raises: +fn solve( + A: UnsafePointer[Float16, MutExternalOrigin], + B: UnsafePointer[Float16, MutExternalOrigin], + C: UnsafePointer[Float16, MutExternalOrigin], + BATCH: Int32, + M: Int32, + N: Int32, + K: Int32, +) raises: pass diff --git a/challenges/medium/58_fp16_dot_product/starter/starter.mojo b/challenges/medium/58_fp16_dot_product/starter/starter.mojo index 3668bc38..0778b427 100644 --- a/challenges/medium/58_fp16_dot_product/starter/starter.mojo +++ b/challenges/medium/58_fp16_dot_product/starter/starter.mojo @@ -3,7 +3,13 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv + # A, B, result are device pointers @export -fn solve(A: UnsafePointer[Float16, MutExternalOrigin], B: UnsafePointer[Float16, MutExternalOrigin], result: UnsafePointer[Float16, MutExternalOrigin], N: Int32) raises: +fn solve( + A: UnsafePointer[Float16, MutExternalOrigin], + B: UnsafePointer[Float16, MutExternalOrigin], + result: UnsafePointer[Float16, MutExternalOrigin], + N: Int32, +) raises: pass diff --git a/challenges/medium/5_softmax/starter/starter.mojo b/challenges/medium/5_softmax/starter/starter.mojo index 02c2c06f..35305df8 100644 --- a/challenges/medium/5_softmax/starter/starter.mojo +++ b/challenges/medium/5_softmax/starter/starter.mojo @@ -3,6 +3,11 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv + @export -fn solve(input: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], N: Int32) raises: +fn solve( + input: UnsafePointer[Float32, MutExternalOrigin], + output: UnsafePointer[Float32, MutExternalOrigin], + N: Int32, +) raises: pass diff --git a/challenges/medium/60_top_p_sampling/starter/starter.mojo b/challenges/medium/60_top_p_sampling/starter/starter.mojo index 86f4cfe5..e53febb9 100644 --- a/challenges/medium/60_top_p_sampling/starter/starter.mojo +++ b/challenges/medium/60_top_p_sampling/starter/starter.mojo @@ -2,8 +2,13 @@ from std.gpu.host import DeviceContext from std.memory import UnsafePointer from std.gpu import block_dim, block_idx, thread_idx + @export -fn solve(logits: UnsafePointer[Float32, MutExternalOrigin], p: UnsafePointer[Float32, MutExternalOrigin], - seed: UnsafePointer[Int32, MutExternalOrigin], sampled_token: UnsafePointer[Int32, MutExternalOrigin], - vocab_size: Int32) raises: +fn solve( + logits: UnsafePointer[Float32, MutExternalOrigin], + p: UnsafePointer[Float32, MutExternalOrigin], + seed: UnsafePointer[Int32, MutExternalOrigin], + sampled_token: UnsafePointer[Int32, MutExternalOrigin], + vocab_size: Int32, +) raises: pass diff --git a/challenges/medium/61_rope_embedding/starter/starter.mojo b/challenges/medium/61_rope_embedding/starter/starter.mojo index b7a01358..a5d579ed 100644 --- a/challenges/medium/61_rope_embedding/starter/starter.mojo +++ b/challenges/medium/61_rope_embedding/starter/starter.mojo @@ -3,7 +3,15 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv + # Q, cos, sin, output are device pointers @export -fn solve(Q: UnsafePointer[Float32, MutExternalOrigin], cos: UnsafePointer[Float32, MutExternalOrigin], sin: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], M: Int32, D: Int32) raises: +fn solve( + Q: UnsafePointer[Float32, MutExternalOrigin], + cos: UnsafePointer[Float32, MutExternalOrigin], + sin: UnsafePointer[Float32, MutExternalOrigin], + output: UnsafePointer[Float32, MutExternalOrigin], + M: Int32, + D: Int32, +) raises: pass diff --git a/challenges/medium/64_weight_dequantization/starter/starter.mojo b/challenges/medium/64_weight_dequantization/starter/starter.mojo index 3b8cb80e..10521090 100644 --- a/challenges/medium/64_weight_dequantization/starter/starter.mojo +++ b/challenges/medium/64_weight_dequantization/starter/starter.mojo @@ -3,7 +3,15 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv + # X, S, Y are device pointers @export -fn solve(X: UnsafePointer[Float32, MutExternalOrigin], S: UnsafePointer[Float32, MutExternalOrigin], Y: UnsafePointer[Float32, MutExternalOrigin], M: Int32, N: Int32, TILE_SIZE: Int32) raises: +fn solve( + X: UnsafePointer[Float32, MutExternalOrigin], + S: UnsafePointer[Float32, MutExternalOrigin], + Y: UnsafePointer[Float32, MutExternalOrigin], + M: Int32, + N: Int32, + TILE_SIZE: Int32, +) raises: pass diff --git a/challenges/medium/67_moe_topk_gating/starter/starter.mojo b/challenges/medium/67_moe_topk_gating/starter/starter.mojo index 0412e2ed..a1394ea2 100644 --- a/challenges/medium/67_moe_topk_gating/starter/starter.mojo +++ b/challenges/medium/67_moe_topk_gating/starter/starter.mojo @@ -3,6 +3,14 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv + @export -fn solve(logits: UnsafePointer[Float32, MutExternalOrigin], topk_weights: UnsafePointer[Float32, MutExternalOrigin], topk_indices: UnsafePointer[Int32, MutExternalOrigin], M: Int32, E: Int32, k: Int32) raises: +fn solve( + logits: UnsafePointer[Float32, MutExternalOrigin], + topk_weights: UnsafePointer[Float32, MutExternalOrigin], + topk_indices: UnsafePointer[Int32, MutExternalOrigin], + M: Int32, + E: Int32, + k: Int32, +) raises: pass diff --git a/challenges/medium/69_jacobi_stencil_2d/starter/starter.mojo b/challenges/medium/69_jacobi_stencil_2d/starter/starter.mojo index 273f1732..43b64e2d 100644 --- a/challenges/medium/69_jacobi_stencil_2d/starter/starter.mojo +++ b/challenges/medium/69_jacobi_stencil_2d/starter/starter.mojo @@ -3,7 +3,13 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv + # input, output are device pointers @export -fn solve(input: UnsafePointer[Float32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], rows: Int32, cols: Int32) raises: +fn solve( + input: UnsafePointer[Float32, MutExternalOrigin], + output: UnsafePointer[Float32, MutExternalOrigin], + rows: Int32, + cols: Int32, +) raises: pass diff --git a/challenges/medium/6_softmax_attention/starter/starter.mojo b/challenges/medium/6_softmax_attention/starter/starter.mojo index 95736d92..ffc240f4 100644 --- a/challenges/medium/6_softmax_attention/starter/starter.mojo +++ b/challenges/medium/6_softmax_attention/starter/starter.mojo @@ -3,8 +3,16 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv + # Q, K, V, output are device pointers (i.e. pointers to memory on the GPU) @export -fn solve(Q: UnsafePointer[Float32, MutExternalOrigin], K: UnsafePointer[Float32, MutExternalOrigin], V: UnsafePointer[Float32, MutExternalOrigin], - output: UnsafePointer[Float32, MutExternalOrigin], M: Int32, N: Int32, d: Int32) raises: +fn solve( + Q: UnsafePointer[Float32, MutExternalOrigin], + K: UnsafePointer[Float32, MutExternalOrigin], + V: UnsafePointer[Float32, MutExternalOrigin], + output: UnsafePointer[Float32, MutExternalOrigin], + M: Int32, + N: Int32, + d: Int32, +) raises: pass diff --git a/challenges/medium/70_segmented_prefix_sum/starter/starter.mojo b/challenges/medium/70_segmented_prefix_sum/starter/starter.mojo index 2a233e08..ad7b1ac6 100644 --- a/challenges/medium/70_segmented_prefix_sum/starter/starter.mojo +++ b/challenges/medium/70_segmented_prefix_sum/starter/starter.mojo @@ -3,7 +3,13 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv + # values, flags, output are device pointers @export -fn solve(values: UnsafePointer[Float32, MutExternalOrigin], flags: UnsafePointer[Int32, MutExternalOrigin], output: UnsafePointer[Float32, MutExternalOrigin], N: Int32) raises: +fn solve( + values: UnsafePointer[Float32, MutExternalOrigin], + flags: UnsafePointer[Int32, MutExternalOrigin], + output: UnsafePointer[Float32, MutExternalOrigin], + N: Int32, +) raises: pass diff --git a/challenges/medium/71_parallel_merge/starter/starter.mojo b/challenges/medium/71_parallel_merge/starter/starter.mojo index 82231020..d43c1f92 100644 --- a/challenges/medium/71_parallel_merge/starter/starter.mojo +++ b/challenges/medium/71_parallel_merge/starter/starter.mojo @@ -1,7 +1,14 @@ from std.gpu.host import DeviceContext from std.memory import UnsafePointer + # A, B, C are device pointers (i.e. pointers to memory on the GPU) @export -fn solve(A: UnsafePointer[Float32, MutExternalOrigin], B: UnsafePointer[Float32, MutExternalOrigin], C: UnsafePointer[Float32, MutExternalOrigin], M: Int32, N: Int32) raises: +fn solve( + A: UnsafePointer[Float32, MutExternalOrigin], + B: UnsafePointer[Float32, MutExternalOrigin], + C: UnsafePointer[Float32, MutExternalOrigin], + M: Int32, + N: Int32, +) raises: pass diff --git a/challenges/medium/72_stream_compaction/starter/starter.mojo b/challenges/medium/72_stream_compaction/starter/starter.mojo index 43aca583..d36fbcf2 100644 --- a/challenges/medium/72_stream_compaction/starter/starter.mojo +++ b/challenges/medium/72_stream_compaction/starter/starter.mojo @@ -1,7 +1,12 @@ from std.gpu.host import DeviceContext from std.memory import UnsafePointer + # A, out are device pointers @export -fn solve(A: UnsafePointer[Float32, MutExternalOrigin], N: Int32, out: UnsafePointer[Float32, MutExternalOrigin]) raises: +fn solve( + A: UnsafePointer[Float32, MutExternalOrigin], + N: Int32, + out: UnsafePointer[Float32, MutExternalOrigin], +) raises: pass diff --git a/challenges/medium/75_sparse_matrix_dense_matrix_multiplication/starter/starter.mojo b/challenges/medium/75_sparse_matrix_dense_matrix_multiplication/starter/starter.mojo index cfaa9c74..11b99724 100644 --- a/challenges/medium/75_sparse_matrix_dense_matrix_multiplication/starter/starter.mojo +++ b/challenges/medium/75_sparse_matrix_dense_matrix_multiplication/starter/starter.mojo @@ -3,7 +3,16 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv + # A, B, C are device pointers @export -fn solve(A: UnsafePointer[Float32, MutExternalOrigin], B: UnsafePointer[Float32, MutExternalOrigin], C: UnsafePointer[Float32, MutExternalOrigin], M: Int32, N: Int32, K: Int32, nnz: Int32) raises: +fn solve( + A: UnsafePointer[Float32, MutExternalOrigin], + B: UnsafePointer[Float32, MutExternalOrigin], + C: UnsafePointer[Float32, MutExternalOrigin], + M: Int32, + N: Int32, + K: Int32, + nnz: Int32, +) raises: pass diff --git a/challenges/medium/78_2d_fft/starter/starter.mojo b/challenges/medium/78_2d_fft/starter/starter.mojo index 5046919f..6a79cd1b 100644 --- a/challenges/medium/78_2d_fft/starter/starter.mojo +++ b/challenges/medium/78_2d_fft/starter/starter.mojo @@ -3,7 +3,13 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv + # signal, spectrum are device pointers @export -fn solve(signal: UnsafePointer[Float32, MutExternalOrigin], spectrum: UnsafePointer[Float32, MutExternalOrigin], M: Int32, N: Int32) raises: +fn solve( + signal: UnsafePointer[Float32, MutExternalOrigin], + spectrum: UnsafePointer[Float32, MutExternalOrigin], + M: Int32, + N: Int32, +) raises: pass diff --git a/challenges/medium/80_grouped_query_attention/starter/starter.mojo b/challenges/medium/80_grouped_query_attention/starter/starter.mojo index dad131dc..565957a1 100644 --- a/challenges/medium/80_grouped_query_attention/starter/starter.mojo +++ b/challenges/medium/80_grouped_query_attention/starter/starter.mojo @@ -1,6 +1,7 @@ from std.gpu.host import DeviceContext from std.memory import UnsafePointer + # Q, K, V, output are device pointers @export fn solve( diff --git a/challenges/medium/81_int4_matmul/starter/starter.mojo b/challenges/medium/81_int4_matmul/starter/starter.mojo index 4aa9fc85..116c4278 100644 --- a/challenges/medium/81_int4_matmul/starter/starter.mojo +++ b/challenges/medium/81_int4_matmul/starter/starter.mojo @@ -1,6 +1,7 @@ from std.gpu.host import DeviceContext from std.memory import UnsafePointer + # x, w_q, scales, y are device pointers @export fn solve( diff --git a/challenges/medium/82_linear_recurrence/starter/starter.mojo b/challenges/medium/82_linear_recurrence/starter/starter.mojo index 2a75e0c6..4ab5ceaf 100644 --- a/challenges/medium/82_linear_recurrence/starter/starter.mojo +++ b/challenges/medium/82_linear_recurrence/starter/starter.mojo @@ -3,7 +3,14 @@ from std.gpu import block_dim, block_idx, thread_idx from std.memory import UnsafePointer from std.math import ceildiv + # a, x, h are device pointers @export -fn solve(a: UnsafePointer[Float32, MutExternalOrigin], x: UnsafePointer[Float32, MutExternalOrigin], h: UnsafePointer[Float32, MutExternalOrigin], B: Int32, L: Int32) raises: +fn solve( + a: UnsafePointer[Float32, MutExternalOrigin], + x: UnsafePointer[Float32, MutExternalOrigin], + h: UnsafePointer[Float32, MutExternalOrigin], + B: Int32, + L: Int32, +) raises: pass diff --git a/challenges/medium/85_lora_linear/starter/starter.mojo b/challenges/medium/85_lora_linear/starter/starter.mojo index 8632a375..4b1ffb1a 100644 --- a/challenges/medium/85_lora_linear/starter/starter.mojo +++ b/challenges/medium/85_lora_linear/starter/starter.mojo @@ -1,6 +1,7 @@ from std.gpu.host import DeviceContext from std.memory import UnsafePointer + # x, W, A, B, output are device pointers @export fn solve( diff --git a/pyproject.toml b/pyproject.toml index 96d54bc4..11b87ab5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,5 @@ [tool.black] line-length = 100 -target-version = ['py312'] [tool.isort] profile = "black" diff --git a/scripts/update_challenges.py b/scripts/update_challenges.py index 27a7a61b..17aa60a5 100644 --- a/scripts/update_challenges.py +++ b/scripts/update_challenges.py @@ -13,6 +13,7 @@ import os import re import sys +from concurrent.futures import ThreadPoolExecutor, as_completed from pathlib import Path from typing import Dict, Optional @@ -154,18 +155,28 @@ def main(): base = Path(__file__).parent.parent / "challenges" dirs = [d for diff in base.iterdir() if diff.is_dir() for d in diff.iterdir() if d.is_dir()] - success = fail = 0 + # Load sequentially: load_challenge mutates sys.path/sys.modules and is not thread-safe. + payloads = [] + fail = 0 for d in sorted(dirs): try: - payload = load_challenge(d) - if update_challenge(SERVICE_URL, payload, LEETGPU_API_KEY): - success += 1 - else: - fail += 1 + payloads.append(load_challenge(d)) except Exception as e: logger.error("Failed %s: %s", d, e) fail += 1 + # Upload in parallel: pure HTTP I/O, safe to run concurrently. + success = 0 + with ThreadPoolExecutor(max_workers=16) as executor: + futures = { + executor.submit(update_challenge, SERVICE_URL, p, LEETGPU_API_KEY): p for p in payloads + } + for future in as_completed(futures): + if future.result(): + success += 1 + else: + fail += 1 + logger.info("Summary: %d succeeded, %d failed", success, fail) return 0 if fail == 0 else 1