Skip to content

Commit ddeb557

Browse files
peleshkswirydo
and
kswirydo
authored
Unit tests for Gram-Schmidt methods on CPU backend (#132)
* GS unit tests for low-synch cpu version * Add HIP tests for Gram-Schmidt methods. --------- Co-authored-by: kswirydo <[email protected]>
1 parent afefdd9 commit ddeb557

File tree

7 files changed

+70
-31
lines changed

7 files changed

+70
-31
lines changed

resolve/GramSchmidt.cpp

+11-5
Original file line numberDiff line numberDiff line change
@@ -197,13 +197,14 @@ namespace ReSolve
197197
vec_w_->setData(V->getVectorData(i + 1, memspace), memspace);
198198
vec_rv_->setCurrentSize(i + 1);
199199

200-
vector_handler_->massDot2Vec(n, V, i, vec_v_, vec_rv_, memspace);
200+
// vector_handler_->massDot2Vec(n, V, i + 1, vec_v_, vec_rv_, memspace);
201+
vector_handler_->massDot2Vec(n, V, i + 1, vec_v_, vec_rv_, memspace);
201202
vec_rv_->setDataUpdated(memspace);
202203
vec_rv_->copyData(memspace, memory::HOST);
203204

204205
vec_rv_->deepCopyVectorData(&h_L_[idxmap(i, 0, num_vecs_ + 1)], 0, memory::HOST);
205206
h_rv_ = vec_rv_->getVectorData(1, memory::HOST);
206-
207+
207208
for(int j=0; j<=i; ++j) {
208209
H[ idxmap(i, j, num_vecs_ + 1) ] = 0.0;
209210
}
@@ -218,7 +219,7 @@ namespace ReSolve
218219
} // for j
219220
vec_Hcolumn_->setCurrentSize(i + 1);
220221
vec_Hcolumn_->update(&H[ idxmap(i, 0, num_vecs_ + 1)], memory::HOST, memspace);
221-
vector_handler_->massAxpy(n, vec_Hcolumn_, i, V, vec_w_, memspace);
222+
vector_handler_->massAxpy(n, vec_Hcolumn_, i + 1, V, vec_w_, memspace);
222223

223224
// normalize (second synch)
224225
t = vector_handler_->dot(vec_w_, vec_w_, memspace);
@@ -228,6 +229,11 @@ namespace ReSolve
228229
if(fabs(t) > EPSILON) {
229230
t = 1.0 / t;
230231
vector_handler_->scal(&t, vec_w_, memspace);
232+
for (int ii=0; ii<=i; ++ii)
233+
{
234+
vec_v_->setData(V->getVectorData(ii, memspace), memspace);
235+
vec_w_->setData(V->getVectorData(i + 1, memspace), memspace);
236+
}
231237
} else {
232238
assert(0 && "Iterative refinement failed, Krylov vector with ZERO norm\n");
233239
return -1;
@@ -240,7 +246,7 @@ namespace ReSolve
240246
vec_w_->setData(V->getVectorData(i + 1, memspace), memspace);
241247
vec_rv_->setCurrentSize(i + 1);
242248

243-
vector_handler_->massDot2Vec(n, V, i, vec_v_, vec_rv_, memspace);
249+
vector_handler_->massDot2Vec(n, V, i + 1, vec_v_, vec_rv_, memspace);
244250
vec_rv_->setDataUpdated(memspace);
245251
vec_rv_->copyData(memspace, memory::HOST);
246252

@@ -290,7 +296,7 @@ namespace ReSolve
290296
vec_Hcolumn_->setCurrentSize(i + 1);
291297
vec_Hcolumn_->update(&H[ idxmap(i, 0, num_vecs_ + 1)], memory::HOST, memspace);
292298

293-
vector_handler_->massAxpy(n, vec_Hcolumn_, i, V, vec_w_, memspace);
299+
vector_handler_->massAxpy(n, vec_Hcolumn_, i + 1, V, vec_w_, memspace);
294300
// normalize (second synch)
295301
t = vector_handler_->dot(vec_w_, vec_w_, memspace);
296302
//set the last entry in Hessenberg matrix

resolve/cuda/cudaKernels.cu

+2-2
Original file line numberDiff line numberDiff line change
@@ -407,7 +407,7 @@ namespace ReSolve {
407407
const real_type* mvec,
408408
real_type* result)
409409
{
410-
kernels::MassIPTwoVec<<<i + 1, 1024>>>(vec1, vec2, mvec, result, i + 1, n);
410+
kernels::MassIPTwoVec<<<i, 1024>>>(vec1, vec2, mvec, result, i, n);
411411
}
412412

413413
/**
@@ -421,7 +421,7 @@ namespace ReSolve {
421421
*/
422422
void mass_axpy(index_type n, index_type i, const real_type* x, real_type* y, const real_type* alpha)
423423
{
424-
kernels::massAxpy3<<<(n + 384 - 1) / 384, 384>>>(n, i + 1, x, y, alpha);
424+
kernels::massAxpy3<<<(n + 384 - 1) / 384, 384>>>(n, i, x, y, alpha);
425425
}
426426

427427
/**

resolve/hip/hipKernels.hip

+2-2
Original file line numberDiff line numberDiff line change
@@ -524,15 +524,15 @@ namespace ReSolve {
524524
real_type* result)
525525
{
526526
hipLaunchKernelGGL(kernels::MassIPTwoVec_kernel,
527-
dim3(i + 1),
527+
dim3(i),
528528
dim3(1024),
529529
0,
530530
0,
531531
vec1,
532532
vec2,
533533
mvec,
534534
result,
535-
i + 1,
535+
i,
536536
n);
537537
}
538538

resolve/vector/VectorHandlerCpu.cpp

+20-9
Original file line numberDiff line numberDiff line change
@@ -220,21 +220,32 @@ namespace ReSolve {
220220
*/
221221
void VectorHandlerCpu::massDot2Vec(index_type size,
222222
vector::Vector* V,
223-
index_type k,
223+
index_type q,
224224
vector::Vector* x,
225225
vector::Vector* res)
226226
{
227227
real_type* res_data = res->getData(memory::HOST);
228-
real_type* x_data = x->getData(memory::HOST);
229-
real_type* V_data = V->getData(memory::HOST);
230-
index_type i, j;
228+
real_type* x_data = x->getData(memory::HOST);
229+
real_type* V_data = V->getData(memory::HOST);
231230

232-
for (i = 0; i < k; ++i) {
231+
real_type c0 = 0.0;
232+
real_type cq = 0.0;
233+
234+
for (index_type i = 0; i < q; ++i) {
233235
res_data[i] = 0.0;
234-
res_data[i + k] = 0.0;
235-
for (j = 0; j < size; ++j) {
236-
res_data[i] += V_data[i * size + j] * x_data[j];
237-
res_data[i + k] += V_data[i * size + j] * x_data[j + size];
236+
res_data[i + q] = 0.0;
237+
238+
// Make sure we don't accumulate round-off errors
239+
for (index_type j = 0; j < size; ++j) {
240+
real_type y0 = (V_data[i * size + j] * x_data[j]) - c0;
241+
real_type yq = (V_data[i * size + j] * x_data[j + size]) - cq;
242+
real_type t0 = res_data[i] + y0;
243+
real_type tq = res_data[i + q] + yq;
244+
c0 = (t0 - res_data[i] ) - y0;
245+
cq = (tq - res_data[i + q]) - yq;
246+
247+
res_data[i] = t0;
248+
res_data[i + q] = tq;
238249
}
239250
}
240251
}

resolve/vector/VectorHandlerCuda.cpp

+4-4
Original file line numberDiff line numberDiff line change
@@ -206,12 +206,12 @@ namespace ReSolve {
206206
CUBLAS_OP_N,
207207
size, // m
208208
1, // n
209-
k + 1, // k
209+
k, // k
210210
&MINUSONE, // alpha
211211
x->getData(memory::DEVICE), // A
212212
size, // lda
213213
alpha->getData(memory::DEVICE), // B
214-
k + 1, // ldb
214+
k, // ldb
215215
&ONE,
216216
y->getData(memory::DEVICE), // c
217217
size); // ldc
@@ -243,7 +243,7 @@ namespace ReSolve {
243243
cublasDgemm(handle_cublas,
244244
CUBLAS_OP_T,
245245
CUBLAS_OP_N,
246-
k + 1, //m
246+
k, //m
247247
2, //n
248248
size, //k
249249
&ONE, //alpha
@@ -253,7 +253,7 @@ namespace ReSolve {
253253
size, //ldb
254254
&ZERO,
255255
res->getData(memory::DEVICE), //c
256-
k + 1); //ldc
256+
k); //ldc
257257
}
258258
}
259259

tests/unit/vector/GramSchmidtTests.hpp

+6-9
Original file line numberDiff line numberDiff line change
@@ -26,18 +26,10 @@ namespace ReSolve {
2626
TestOutcome GramSchmidtConstructor()
2727
{
2828
TestStatus status;
29-
// status.skipTest();
30-
31-
// GramSchmidt gs1;
32-
// status *= (gs1.getVariant() == GramSchmidt::mgs);
33-
// status *= (gs1.getL() == nullptr);
34-
// status *= !gs1.isSetupComplete();
3529

3630
VectorHandler vh;
3731
GramSchmidt gs2(&vh, GramSchmidt::mgs_pm);
3832
status *= (gs2.getVariant() == GramSchmidt::mgs_pm);
39-
// status *= (gs1.getL() == nullptr);
40-
// status *= !gs1.isSetupComplete();
4133

4234
return status.report(__func__);
4335
}
@@ -140,6 +132,12 @@ namespace ReSolve {
140132
LinAlgWorkspaceCUDA* workspace = new LinAlgWorkspaceCUDA();
141133
workspace->initializeHandles();
142134
return new VectorHandler(workspace);
135+
#endif
136+
#ifdef RESOLVE_USE_HIP
137+
} else if (memspace_ == "hip") {
138+
LinAlgWorkspaceHIP* workspace = new LinAlgWorkspaceHIP();
139+
workspace->initializeHandles();
140+
return new VectorHandler(workspace);
143141
#endif
144142
} else {
145143
std::cout << "ReSolve not built with support for memory space " << memspace_ << "\n";
@@ -167,7 +165,6 @@ namespace ReSolve {
167165
a->update(x->getVectorData(i, ms), ms, memory::HOST);
168166
b->update(x->getVectorData(j, ms), ms, memory::HOST);
169167
ip = handler->dot(a, b, memory::HOST);
170-
171168
if ( (i != j) && (abs(ip) > 1e-14)) {
172169
status = false;
173170
std::cout << "Vectors " << i << " and " << j << " are not orthogonal!"

tests/unit/vector/runGramSchmidtTests.cpp

+25
Original file line numberDiff line numberDiff line change
@@ -21,5 +21,30 @@ int main(int, char**)
2121
}
2222
#endif
2323

24+
#ifdef RESOLVE_USE_HIP
25+
{
26+
std::cout << "Running tests with HIP backend:\n";
27+
ReSolve::tests::GramSchmidtTests test("hip");
28+
29+
result += test.GramSchmidtConstructor();
30+
result += test.orthogonalize(5000, ReSolve::GramSchmidt::mgs);
31+
result += test.orthogonalize(5000, ReSolve::GramSchmidt::cgs2);
32+
result += test.orthogonalize(5000, ReSolve::GramSchmidt::mgs_two_synch);
33+
result += test.orthogonalize(5000, ReSolve::GramSchmidt::mgs_pm);
34+
std::cout << "\n";
35+
}
36+
#endif
37+
38+
{
39+
std::cout << "Running tests on the CPU:\n";
40+
ReSolve::tests::GramSchmidtTests test("cpu");
41+
42+
result += test.GramSchmidtConstructor();
43+
result += test.orthogonalize(5000, ReSolve::GramSchmidt::mgs);
44+
result += test.orthogonalize(5000, ReSolve::GramSchmidt::cgs2);
45+
result += test.orthogonalize(5000, ReSolve::GramSchmidt::mgs_two_synch);
46+
result += test.orthogonalize(5000, ReSolve::GramSchmidt::mgs_pm);
47+
std::cout << "\n";
48+
}
2449
return result.summary();
2550
}

0 commit comments

Comments
 (0)