|
| 1 | + |
| 2 | +//@HEADER |
| 3 | +// ************************************************************************ |
| 4 | +// |
| 5 | +// HPCCG: Simple Conjugate Gradient Benchmark Code |
| 6 | +// Copyright (2006) Sandia Corporation |
| 7 | +// |
| 8 | +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive |
| 9 | +// license for use of this work by or on behalf of the U.S. Government. |
| 10 | +// |
| 11 | +// This library is free software; you can redistribute it and/or modify |
| 12 | +// it under the terms of the GNU Lesser General Public License as |
| 13 | +// published by the Free Software Foundation; either version 2.1 of the |
| 14 | +// License, or (at your option) any later version. |
| 15 | +// |
| 16 | +// This library is distributed in the hope that it will be useful, but |
| 17 | +// WITHOUT ANY WARRANTY; without even the implied warranty of |
| 18 | +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 19 | +// Lesser General Public License for more details. |
| 20 | +// |
| 21 | +// You should have received a copy of the GNU Lesser General Public |
| 22 | +// License along with this library; if not, write to the Free Software |
| 23 | +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 |
| 24 | +// USA |
| 25 | +// Questions? Contact Michael A. Heroux ([email protected]) |
| 26 | +// |
| 27 | +// ************************************************************************ |
| 28 | +//@HEADER |
| 29 | + |
| 30 | +///////////////////////////////////////////////////////////////////////// |
| 31 | + |
| 32 | +// Routine to compute an approximate solution to Ax = b where: |
| 33 | + |
| 34 | +// A - known matrix stored as an HPC_Sparse_Matrix struct |
| 35 | + |
| 36 | +// b - known right hand side vector |
| 37 | + |
| 38 | +// x - On entry is initial guess, on exit new approximate solution |
| 39 | + |
| 40 | +// max_iter - Maximum number of iterations to perform, even if |
| 41 | +// tolerance is not met. |
| 42 | + |
| 43 | +// tolerance - Stop and assert convergence if norm of residual is <= |
| 44 | +// to tolerance. |
| 45 | + |
| 46 | +// niters - On output, the number of iterations actually performed. |
| 47 | + |
| 48 | +///////////////////////////////////////////////////////////////////////// |
| 49 | + |
| 50 | +#include <iostream> |
| 51 | +#include <cstdio> |
| 52 | +#include <cmath> |
| 53 | +#include "mytimer.hpp" |
| 54 | +#include "HPCCG.hpp" |
| 55 | + |
| 56 | +#define TICK() t0 = mytimer() // Use TICK and TOCK to time a code section |
| 57 | +#define TOCK(t) t += mytimer() - t0 |
| 58 | +int HPCCG(HPC_Sparse_Matrix * A, |
| 59 | + const double * const b, double * const x, |
| 60 | + const int max_iter, const double tolerance, int &niters, double & normr, |
| 61 | + double * times) |
| 62 | + |
| 63 | +{ |
| 64 | + double t_begin = mytimer(); // Start timing right away |
| 65 | + |
| 66 | + double t0 = 0.0, t1 = 0.0, t2 = 0.0, t3 = 0.0, t4 = 0.0; |
| 67 | +#ifdef USING_MPI |
| 68 | + double t5 = 0.0; |
| 69 | +#endif |
| 70 | + int nrow = A->local_nrow; |
| 71 | + int ncol = A->local_ncol; |
| 72 | + |
| 73 | + double * r = new double [nrow]; |
| 74 | + double * p = new double [ncol]; // In parallel case, A is rectangular |
| 75 | + double * Ap = new double [nrow]; |
| 76 | + |
| 77 | + normr = 0.0; |
| 78 | + double rtrans = 0.0; |
| 79 | + double oldrtrans = 0.0; |
| 80 | + |
| 81 | +#ifdef USING_MPI |
| 82 | + int size, rank; // Number of MPI processes, My process ID |
| 83 | + MPI_Comm_size(MPI_COMM_WORLD, &size); |
| 84 | + MPI_Comm_rank(MPI_COMM_WORLD, &rank); |
| 85 | +#else |
| 86 | + //int size = 1; // Serial case (not using MPI) |
| 87 | + int rank = 0; |
| 88 | +#endif |
| 89 | + |
| 90 | + int print_freq = max_iter/10; |
| 91 | + if (print_freq>50) print_freq=50; |
| 92 | + if (print_freq<1) print_freq=1; |
| 93 | + |
| 94 | + // p is of length ncols, copy x to p for sparse MV operation |
| 95 | + TICK(); waxpby(nrow, 1.0, x, 0.0, x, p); TOCK(t2); |
| 96 | +#ifdef USING_MPI |
| 97 | + TICK(); exchange_externals(A,p); TOCK(t5); |
| 98 | +#endif |
| 99 | + TICK(); HPC_sparsemv(A, p, Ap); TOCK(t3); |
| 100 | + TICK(); waxpby(nrow, 1.0, b, -1.0, Ap, r); TOCK(t2); |
| 101 | + TICK(); ddot(nrow, r, r, &rtrans, t4); TOCK(t1); |
| 102 | + normr = sqrt(rtrans); |
| 103 | + |
| 104 | + if (rank==0) printf("Initial Residual = %g\n", normr); |
| 105 | + |
| 106 | + for(int k=1; k<max_iter && normr > tolerance; k++ ) |
| 107 | + { |
| 108 | + if (k == 1) |
| 109 | + { |
| 110 | + TICK(); waxpby(nrow, 1.0, r, 0.0, r, p); TOCK(t2); |
| 111 | + } |
| 112 | + else |
| 113 | + { |
| 114 | + oldrtrans = rtrans; |
| 115 | + TICK(); ddot (nrow, r, r, &rtrans, t4); TOCK(t1);// 2*nrow ops |
| 116 | + double beta = rtrans/oldrtrans; |
| 117 | + TICK(); waxpby (nrow, 1.0, r, beta, p, p); TOCK(t2);// 2*nrow ops |
| 118 | + } |
| 119 | + normr = sqrt(rtrans); |
| 120 | + if (rank==0 && (k%print_freq == 0 || k+1 == max_iter)) |
| 121 | + printf("Iteration = %i Residual = %g\n", k, normr); |
| 122 | + |
| 123 | + |
| 124 | +#ifdef USING_MPI |
| 125 | + TICK(); exchange_externals(A,p); TOCK(t5); |
| 126 | +#endif |
| 127 | + TICK(); HPC_sparsemv(A, p, Ap); TOCK(t3); // 2*nnz ops |
| 128 | + double alpha = 0.0; |
| 129 | + TICK(); ddot(nrow, p, Ap, &alpha, t4); TOCK(t1); // 2*nrow ops |
| 130 | + alpha = rtrans/alpha; |
| 131 | + TICK(); waxpby(nrow, 1.0, x, alpha, p, x);// 2*nrow ops |
| 132 | + waxpby(nrow, 1.0, r, -alpha, Ap, r); TOCK(t2);// 2*nrow ops |
| 133 | + niters = k; |
| 134 | + } |
| 135 | + |
| 136 | + // Store times |
| 137 | + times[1] = t1; // ddot time |
| 138 | + times[2] = t2; // waxpby time |
| 139 | + times[3] = t3; // sparsemv time |
| 140 | + times[4] = t4; // AllReduce time |
| 141 | +#ifdef USING_MPI |
| 142 | + times[5] = t5; // exchange boundary time |
| 143 | +#endif |
| 144 | + delete [] p; |
| 145 | + delete [] Ap; |
| 146 | + delete [] r; |
| 147 | + times[0] = mytimer() - t_begin; // Total time. All done... |
| 148 | + return(0); |
| 149 | +} |
0 commit comments