@@ -604,19 +604,23 @@ void RunTests(
604
604
printf (" \t %d timing iterations\n " , timing_iterations);
605
605
}
606
606
607
- // Allocate input and output vectors (Use NUMA force if availabe to get consistent perf results)
607
+ // Allocate input and output vectors (if available, use NUMA allocation to force storage on the
608
+ // sockets for performance consistency)
608
609
ValueT *vector_x, *vector_y_in, *reference_vector_y_out, *vector_y_out;
609
- /*
610
- vector_x = new ValueT[csr_matrix.num_cols];
611
- vector_y_in = new ValueT[csr_matrix.num_rows];
612
- reference_vector_y_out = new ValueT[csr_matrix.num_rows];
613
- vector_y_out = new ValueT[csr_matrix.num_rows];
614
- */
615
-
616
- vector_x = (ValueT*) mkl_malloc (sizeof (ValueT) * csr_matrix.num_cols , 4096 );
617
- vector_y_in = (ValueT*) mkl_malloc (sizeof (ValueT) * csr_matrix.num_rows , 4096 );
618
- reference_vector_y_out = (ValueT*) mkl_malloc (sizeof (ValueT) * csr_matrix.num_rows , 4096 );
619
- vector_y_out = (ValueT*) mkl_malloc (sizeof (ValueT) * csr_matrix.num_rows , 4096 );
610
+ if (csr_matrix.IsNumaMalloc ())
611
+ {
612
+ vector_x = (ValueT*) numa_alloc_onnode (sizeof (ValueT) * csr_matrix.num_cols , 0 );
613
+ vector_y_in = (ValueT*) numa_alloc_onnode (sizeof (ValueT) * csr_matrix.num_rows , 0 );
614
+ reference_vector_y_out = (ValueT*) numa_alloc_onnode (sizeof (ValueT) * csr_matrix.num_rows , 0 );
615
+ vector_y_out = (ValueT*) numa_alloc_onnode (sizeof (ValueT) * csr_matrix.num_rows , 0 );
616
+ }
617
+ else
618
+ {
619
+ vector_x = (ValueT*) mkl_malloc (sizeof (ValueT) * csr_matrix.num_cols , 4096 );
620
+ vector_y_in = (ValueT*) mkl_malloc (sizeof (ValueT) * csr_matrix.num_rows , 4096 );
621
+ reference_vector_y_out = (ValueT*) mkl_malloc (sizeof (ValueT) * csr_matrix.num_rows , 4096 );
622
+ vector_y_out = (ValueT*) mkl_malloc (sizeof (ValueT) * csr_matrix.num_rows , 4096 );
623
+ }
620
624
621
625
for (int col = 0 ; col < csr_matrix.num_cols ; ++col)
622
626
vector_x[col] = 1.0 ;
@@ -644,17 +648,20 @@ void RunTests(
644
648
DisplayPerf (avg_millis, csr_matrix);
645
649
646
650
// Cleanup
647
- /*
648
- if (vector_x) delete[] vector_x;
649
- if (vector_y_in) delete[] vector_y_in;
650
- if (reference_vector_y_out) delete[] reference_vector_y_out;
651
- if (vector_y_out) delete[] vector_y_out;
652
- */
653
-
654
- if (vector_x) mkl_free (vector_x);
655
- if (vector_y_in) mkl_free (vector_y_in);
656
- if (reference_vector_y_out) mkl_free (reference_vector_y_out);
657
- if (vector_y_out) mkl_free (vector_y_out);
651
+ if (csr_matrix.IsNumaMalloc ())
652
+ {
653
+ if (vector_x) numa_free (vector_x, sizeof (ValueT) * csr_matrix.num_cols );
654
+ if (vector_y_in) numa_free (vector_y_in, sizeof (ValueT) * csr_matrix.num_rows );
655
+ if (reference_vector_y_out) numa_free (reference_vector_y_out, sizeof (ValueT) * csr_matrix.num_rows );
656
+ if (vector_y_out) numa_free (vector_y_out, sizeof (ValueT) * csr_matrix.num_rows );
657
+ }
658
+ else
659
+ {
660
+ if (vector_x) mkl_free (vector_x);
661
+ if (vector_y_in) mkl_free (vector_y_in);
662
+ if (reference_vector_y_out) mkl_free (reference_vector_y_out);
663
+ if (vector_y_out) mkl_free (vector_y_out);
664
+ }
658
665
659
666
}
660
667
0 commit comments