-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmm_naive.cpp
More file actions
59 lines (51 loc) · 1.79 KB
/
mm_naive.cpp
File metadata and controls
59 lines (51 loc) · 1.79 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#include <cstdio>
#include <omp.h>
#include "utils.hpp"
constexpr int ALIGN_SIZE = 64;
#if !defined(M_SIZE) || !defined(N_SIZE) || !defined(K_SIZE)
#define M_SIZE 1536
#define N_SIZE 1152
#define K_SIZE 960
#endif
float A[M_SIZE * K_SIZE] __attribute__((aligned(ALIGN_SIZE))) = { 0.0f, };
float B[K_SIZE * N_SIZE] __attribute__((aligned(ALIGN_SIZE))) = { 0.0f, };
float C[M_SIZE * N_SIZE] __attribute__((aligned(ALIGN_SIZE))) = { 0.0f, };
void matmul(const float *A, const float *B, float *C,
const int M, const int N, const int K)
{
#pragma omp parallel for collapse(2) schedule(static)
for (int m = 0; m < M; ++m)
{
for (int n = 0; n < N; ++n)
{
for (int k = 0; k < K; ++k)
{
C[m * N + n] += A[m * K + k] * B[k * N + n];
}
}
}
}
int main(int argc, char *argv[])
{
constexpr size_t FLOPs = 2 * static_cast<size_t>(M_SIZE) * static_cast<size_t>(N_SIZE) * static_cast<size_t>(K_SIZE);
std::printf("M = %d, N = %d, K = %d, FLOPs = %zu\n", M_SIZE, N_SIZE, K_SIZE, FLOPs);
// load array
if (LoadArray("../data/arr1.txt", A, M_SIZE * K_SIZE) == false ||
LoadArray("../data/arr2.txt", B, K_SIZE * N_SIZE) == false)
{
std::printf("Failed to load data\n");
return 1;
}
// timer
double start_time = omp_get_wtime();
// gemm
matmul(A, B, C, M_SIZE, N_SIZE, K_SIZE);
// show speed
double elapsed_time = (omp_get_wtime() - start_time) * 1000.0;
std::printf("Elapsed time: %.2f ms | ", elapsed_time);
std::printf("GFLOPS: %.4f\n", static_cast<double>(FLOPs) / (elapsed_time * 1000000.0));
// check
std::printf("Checksum: %.4f\n", Checksum(C, static_cast<size_t>(M_SIZE * N_SIZE)));
ShowResult(C, 100, 5);
return 0;
}