Skip to content

Commit 45f14e4

Browse files
committed
update
1 parent 80223d3 commit 45f14e4

File tree

2 files changed

+39
-45
lines changed

2 files changed

+39
-45
lines changed

cpu_spmv.cpp

+30-23
Original file line numberDiff line numberDiff line change
@@ -604,19 +604,23 @@ void RunTests(
604604
printf("\t%d timing iterations\n", timing_iterations);
605605
}
606606

607-
// Allocate input and output vectors (Use NUMA force if availabe to get consistent perf results)
607+
// Allocate input and output vectors (if available, use NUMA allocation to force storage on the
608+
// sockets for performance consistency)
608609
ValueT *vector_x, *vector_y_in, *reference_vector_y_out, *vector_y_out;
609-
/*
610-
vector_x = new ValueT[csr_matrix.num_cols];
611-
vector_y_in = new ValueT[csr_matrix.num_rows];
612-
reference_vector_y_out = new ValueT[csr_matrix.num_rows];
613-
vector_y_out = new ValueT[csr_matrix.num_rows];
614-
*/
615-
616-
vector_x = (ValueT*) mkl_malloc(sizeof(ValueT) * csr_matrix.num_cols, 4096);
617-
vector_y_in = (ValueT*) mkl_malloc(sizeof(ValueT) * csr_matrix.num_rows, 4096);
618-
reference_vector_y_out = (ValueT*) mkl_malloc(sizeof(ValueT) * csr_matrix.num_rows, 4096);
619-
vector_y_out = (ValueT*) mkl_malloc(sizeof(ValueT) * csr_matrix.num_rows, 4096);
610+
if (csr_matrix.IsNumaMalloc())
611+
{
612+
vector_x = (ValueT*) numa_alloc_onnode(sizeof(ValueT) * csr_matrix.num_cols, 0);
613+
vector_y_in = (ValueT*) numa_alloc_onnode(sizeof(ValueT) * csr_matrix.num_rows, 0);
614+
reference_vector_y_out = (ValueT*) numa_alloc_onnode(sizeof(ValueT) * csr_matrix.num_rows, 0);
615+
vector_y_out = (ValueT*) numa_alloc_onnode(sizeof(ValueT) * csr_matrix.num_rows, 0);
616+
}
617+
else
618+
{
619+
vector_x = (ValueT*) mkl_malloc(sizeof(ValueT) * csr_matrix.num_cols, 4096);
620+
vector_y_in = (ValueT*) mkl_malloc(sizeof(ValueT) * csr_matrix.num_rows, 4096);
621+
reference_vector_y_out = (ValueT*) mkl_malloc(sizeof(ValueT) * csr_matrix.num_rows, 4096);
622+
vector_y_out = (ValueT*) mkl_malloc(sizeof(ValueT) * csr_matrix.num_rows, 4096);
623+
}
620624

621625
for (int col = 0; col < csr_matrix.num_cols; ++col)
622626
vector_x[col] = 1.0;
@@ -644,17 +648,20 @@ void RunTests(
644648
DisplayPerf(avg_millis, csr_matrix);
645649

646650
// Cleanup
647-
/*
648-
if (vector_x) delete[] vector_x;
649-
if (vector_y_in) delete[] vector_y_in;
650-
if (reference_vector_y_out) delete[] reference_vector_y_out;
651-
if (vector_y_out) delete[] vector_y_out;
652-
*/
653-
654-
if (vector_x) mkl_free(vector_x);
655-
if (vector_y_in) mkl_free(vector_y_in);
656-
if (reference_vector_y_out) mkl_free(reference_vector_y_out);
657-
if (vector_y_out) mkl_free(vector_y_out);
651+
if (csr_matrix.IsNumaMalloc())
652+
{
653+
if (vector_x) numa_free(vector_x, sizeof(ValueT) * csr_matrix.num_cols);
654+
if (vector_y_in) numa_free(vector_y_in, sizeof(ValueT) * csr_matrix.num_rows);
655+
if (reference_vector_y_out) numa_free(reference_vector_y_out, sizeof(ValueT) * csr_matrix.num_rows);
656+
if (vector_y_out) numa_free(vector_y_out, sizeof(ValueT) * csr_matrix.num_rows);
657+
}
658+
else
659+
{
660+
if (vector_x) mkl_free(vector_x);
661+
if (vector_y_in) mkl_free(vector_y_in);
662+
if (reference_vector_y_out) mkl_free(reference_vector_y_out);
663+
if (vector_y_out) mkl_free(vector_y_out);
664+
}
658665

659666
}
660667

sparse_matrix.h

+9-22
Original file line numberDiff line numberDiff line change
@@ -62,9 +62,7 @@ struct GraphStats
6262
int num_cols;
6363
int num_nonzeros;
6464

65-
double diag_dist_mean; // mean
66-
double diag_dist_std_dev; // sample std dev
67-
double pearson_r; // coefficient of variation
65+
double pearson_r; // coefficient of variation x vs y (how linear the sparsity plot is)
6866

6967
double row_length_mean; // mean
7068
double row_length_std_dev; // sample std_dev
@@ -78,19 +76,13 @@ struct GraphStats
7876
"\t num_rows: %d\n"
7977
"\t num_cols: %d\n"
8078
"\t num_nonzeros: %d\n"
81-
"\t diag_dist_mean: %.2f\n"
82-
"\t diag_dist_std_dev: %.2f\n"
83-
"\t pearson_r: %f\n"
8479
"\t row_length_mean: %.5f\n"
8580
"\t row_length_std_dev: %.5f\n"
8681
"\t row_length_variation: %.5f\n"
8782
"\t row_length_skewness: %.5f\n",
8883
num_rows,
8984
num_cols,
9085
num_nonzeros,
91-
diag_dist_mean,
92-
diag_dist_std_dev,
93-
pearson_r,
9486
row_length_mean,
9587
row_length_std_dev,
9688
row_length_variation,
@@ -100,19 +92,13 @@ struct GraphStats
10092
"%d, "
10193
"%d, "
10294
"%d, "
103-
"%.2f, "
104-
"%.2f, "
105-
"%f, "
10695
"%.5f, "
10796
"%.5f, "
10897
"%.5f, "
10998
"%.5f, ",
11099
num_rows,
111100
num_cols,
112101
num_nonzeros,
113-
diag_dist_mean,
114-
diag_dist_std_dev,
115-
pearson_r,
116102
row_length_mean,
117103
row_length_std_dev,
118104
row_length_variation,
@@ -664,11 +650,11 @@ struct CsrMatrix
664650
ValueT* values;
665651

666652

667-
// Which allocation method to use
653+
// Whether to use NUMA malloc to always put storage on the same sockets (for perf repeatability)
668654
bool IsNumaMalloc()
669655
{
670656
#ifdef CUB_MKL
671-
return ((numa_available() >= 0) && (numa_num_task_nodes() > 1));
657+
return (numa_available() >= 0);
672658
#else
673659
return false;
674660
#endif
@@ -698,7 +684,11 @@ struct CsrMatrix
698684

699685
row_offsets = (OffsetT*) numa_alloc_onnode(sizeof(OffsetT) * (num_rows + 1), 0);
700686
column_indices = (OffsetT*) numa_alloc_onnode(sizeof(OffsetT) * num_nonzeros, 0);
701-
values = (ValueT*) numa_alloc_onnode(sizeof(ValueT) * num_nonzeros, 1);
687+
688+
if (numa_num_task_nodes() > 1)
689+
values = (ValueT*) numa_alloc_onnode(sizeof(ValueT) * num_nonzeros, 1); // put on different socket than column_indices
690+
else
691+
values = (ValueT*) numa_alloc_onnode(sizeof(ValueT) * num_nonzeros, 0);
702692
}
703693
else
704694
{
@@ -824,9 +814,6 @@ struct CsrMatrix
824814
ss_tot += delta * (x - mean);
825815
}
826816
}
827-
stats.diag_dist_mean = mean;
828-
double variance = ss_tot / samples;
829-
stats.diag_dist_std_dev = sqrt(variance);
830817

831818
//
832819
// Compute deming statistics
@@ -908,7 +895,7 @@ struct CsrMatrix
908895

909896
// Sample mean
910897
stats.row_length_mean = double(num_nonzeros) / num_rows;
911-
variance = 0.0;
898+
double variance = 0.0;
912899
stats.row_length_skewness = 0.0;
913900
for (OffsetT row = 0; row < num_rows; ++row)
914901
{

0 commit comments

Comments
 (0)