Skip to content

Commit 25f17e6

Browse files
committed
small formatting fix before merge
1 parent dbacaf8 commit 25f17e6

File tree

1 file changed

+4
-4
lines changed

1 file changed

+4
-4
lines changed

train_gpt2.cu

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -986,10 +986,10 @@ __global__ void reduce_add_sum_kernel(floatX* dst, const float* src, size_t n, s
986986
}
987987

988988
__global__ void __launch_bounds__(512, 2) // todo - any warnings on Turing with only 1024 threads?
989-
layernorm_backward_kernel9(floatX* dinp, floatX* dweight, floatX* dbias, float* scratch,
990-
const floatX* dout, const floatX* inp, const floatX* weight,
991-
const floatX* mean, const floatX* rstd,
992-
int B, int T, int C) {
989+
layernorm_backward_kernel9(floatX* dinp, floatX* dweight, floatX* dbias, float* scratch,
990+
const floatX* dout, const floatX* inp, const floatX* weight,
991+
const floatX* mean, const floatX* rstd,
992+
int B, int T, int C) {
993993
extern __shared__ float shared[]; // size = 2*C + 2*block_size + 1
994994
int warpsInBlock = blockDim.x / WARP_SIZE; //number of warps in block
995995
int warpId = threadIdx.x / WARP_SIZE; // warp index within a block

0 commit comments

Comments
 (0)