Skip to content

Commit

Permalink
small formatting fix before merge
Browse files Browse the repository at this point in the history
  • Loading branch information
karpathy committed May 24, 2024
1 parent dbacaf8 commit 25f17e6
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions train_gpt2.cu
Original file line number Diff line number Diff line change
Expand Up @@ -986,10 +986,10 @@ __global__ void reduce_add_sum_kernel(floatX* dst, const float* src, size_t n, s
}

__global__ void __launch_bounds__(512, 2) // todo - any warnings on Turing with only 1024 threads?
layernorm_backward_kernel9(floatX* dinp, floatX* dweight, floatX* dbias, float* scratch,
const floatX* dout, const floatX* inp, const floatX* weight,
const floatX* mean, const floatX* rstd,
int B, int T, int C) {
layernorm_backward_kernel9(floatX* dinp, floatX* dweight, floatX* dbias, float* scratch,
const floatX* dout, const floatX* inp, const floatX* weight,
const floatX* mean, const floatX* rstd,
int B, int T, int C) {
extern __shared__ float shared[]; // size = 2*C + 2*block_size + 1
int warpsInBlock = blockDim.x / WARP_SIZE; //number of warps in block
int warpId = threadIdx.x / WARP_SIZE; // warp index within a block
Expand Down

0 comments on commit 25f17e6

Please sign in to comment.