Skip to content

Commit

Permalink
Merge branch 'feature/rand' of https://github.com/azret/llm.c into fe…
Browse files Browse the repository at this point in the history
…ature/random_init
  • Loading branch information
karpathy committed May 23, 2024
2 parents e6a7d1d + 3c3c965 commit bc1ebc1
Showing 1 changed file with 141 additions and 0 deletions.
141 changes: 141 additions & 0 deletions rand.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
#ifndef RAND_H
#define RAND_H

#include <math.h>

#define MERSENNE_STATE_M 397u
#define MERSENNE_STATE_N 624u

#define LMASK 0x7ffffffful
#define UMASK 0x80000000ul

// Copyright(c) Makoto Matsumoto and Takuji Nishimura

// This implementation follows PyTorch so that we are numerically identical when running verification tests.

typedef struct {
unsigned long long seed_;
int left_;
unsigned int next_;
unsigned int state_[MERSENNE_STATE_N];
unsigned int MATRIX_A[2];
} mt19937_state;

void manual_seed(mt19937_state* state, unsigned int seed) {
state->MATRIX_A[0] = 0x0u;
state->MATRIX_A[1] = 0x9908b0df;
state->state_[0] = seed & 0xffffffff;
for (unsigned int j = 1; j < MERSENNE_STATE_N; j++) {
state->state_[j] = 1812433253 * (state->state_[j - 1] ^ (state->state_[j - 1] >> 30)) + j;
state->state_[j] &= 0xffffffff;
}
state->left_ = 1;
state->next_ = 0;
}

void next_state(mt19937_state* state) {
state->left_ = MERSENNE_STATE_N;
state->next_ = 0;
unsigned int y, j;
for (j = 0; j < MERSENNE_STATE_N - MERSENNE_STATE_M; j++) {
y = (state->state_[j] & UMASK) | (state->state_[j + 1] & LMASK);
state->state_[j] = state->state_[j + MERSENNE_STATE_M] ^ (y >> 1) ^ state->MATRIX_A[y & 0x1];
}
for (; j < MERSENNE_STATE_N - 1; j++) {
y = (state->state_[j] & UMASK) | (state->state_[j + 1] & LMASK);
state->state_[j] = state->state_[j + (MERSENNE_STATE_M - MERSENNE_STATE_N)] ^ (y >> 1) ^ state->MATRIX_A[y & 0x1];
}
y = (state->state_[MERSENNE_STATE_N - 1] & UMASK) | (state->state_[0] & LMASK);
state->state_[MERSENNE_STATE_N - 1] = state->state_[MERSENNE_STATE_M - 1] ^ (y >> 1) ^ state->MATRIX_A[y & 0x1];
}

unsigned int randint32(mt19937_state* state) {
if (!state) return 0;
if (state->MATRIX_A[0] != 0 || state->MATRIX_A[1] != 0x9908b0df) manual_seed(state, 5489); // auto-initialize
if (--state->left_ <= 0) {
next_state(state);
}
unsigned int y = state->state_[state->next_++];
y ^= y >> 11;
y ^= (y << 7) & 0x9d2c5680;
y ^= (y << 15) & 0xefc60000;
y ^= y >> 18;
return y;
}

inline unsigned long long randint64(mt19937_state* state) {
return (((unsigned long long)(randint32(state)) << 32) | randint32(state));
}

inline float randfloat32(mt19937_state* state) {
return (randint32(state) & ((1ull << 24) - 1)) * (1.0f / (1ull << 24));
}

inline double randfloat64(mt19937_state* state) {
return (randint64(state) & ((1ull << 53) - 1)) * (1.0 / (1ull << 53));
}

void uniform_(float* data, unsigned int numel, float from, float to, mt19937_state* state) {
for (unsigned int t = 0; t < numel; t++) {
data[t] = randfloat32(state) * (to - from) + from;
}
}

// Box–Muller transform

void normal_fill_16(float* data, float mean, float std, mt19937_state* state) {
#define EPSILONE 1e-12
for (unsigned int t = 0; t < 8; t++) {
float u1 = 1 - data[t];
float u2 = data[t + 8];
float radius = sqrtf(-2 * logf(u1 + EPSILONE));
float theta = 2.0 * M_PI * u2;
data[t] = (radius * cosf(theta) * std + mean);
data[t + 8] = (radius * sinf(theta) * std + mean);
}
}

void normal_fill(float* data, unsigned int numel, float mean, float std, mt19937_state* state) {
for (unsigned int t = 0; t < numel; t++) {
data[t] = randfloat32(state);
}
for (unsigned int i = 0; i < numel - 15; i += 16) {
normal_fill_16(data + i, mean, std, state);
}
if (numel % 16 != 0) {
// recompute the last 16 values
data = data + numel - 16;
for (unsigned int i = 0; i < 16; i++) {
data[i] = randfloat32(state);
}
normal_fill_16(data, mean, std, state);
}
}

void normal_(float* data, unsigned int numel, float mean, float std, mt19937_state* state) {
#define EPSILONE 1e-12
if (numel >= 16) {
normal_fill(data, numel, mean, std, state);
}
else {
double next_double_normal_sample;
int has_next_double_normal_sample = 0;
for (unsigned int t = 0; t < numel; t++) {
if (has_next_double_normal_sample) {
data[t] = (float)(next_double_normal_sample * std + mean);
has_next_double_normal_sample = 0;
continue;
}
// for numel < 16 we draw a double (float64)
float u1 = randfloat64(state);
float u2 = randfloat64(state);
float radius = sqrtf(-2 * logf(1 - u2 + EPSILONE));
float theta = 2.0 * M_PI * u1;
next_double_normal_sample = radius * sinf(theta);
has_next_double_normal_sample = 1;
data[t] = (radius * cosf(theta) * std + mean);
}
}
}

#endif

0 comments on commit bc1ebc1

Please sign in to comment.