diff --git a/train_gpt2.cu b/train_gpt2.cu index 899293f75..16f8a4216 100644 --- a/train_gpt2.cu +++ b/train_gpt2.cu @@ -39,6 +39,7 @@ This reads & runs in fp32, B=4, T=64, LR=1e-4, val/sample never (200), #include #include #include +#include #include #include // GPU / CUDA related