Skip to content

Commit f308faa

Browse files
author
kbwheel
committed
qthread-ized hpccg
git-svn-id: https://qthreads.googlecode.com/svn/trunk@3626 9dbd018f-d1da-d342-4c11-58c73dfe904b
1 parent ef2d3f0 commit f308faa

30 files changed

+2853
-8
lines changed

COPYING

+6-4
Original file line numberDiff line numberDiff line change
@@ -27,17 +27,17 @@ certain rights in this software.
2727
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
2828
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2929

30-
The qthreads library contains parts of an earlier library (libtask, by Russ
31-
Cox) that requires the propogation of its copyright information. This sofware
32-
is in the fastcontext directory, namely the files:
30+
The qthreads library contains heavily modified parts of an earlier library
31+
(libtask, by Russ Cox) that requires the propogation of its copyright
32+
information. This sofware is in the fastcontext directory, namely the files:
3333

3434
asm.S
3535
power-ucontext.h
3636
386-ucontext.h
3737
context.c
3838
taskimpl.h
3939

40-
However, those files have been extensively modified.
40+
However, as noted above, those files have been extensively modified, and do not resemble the original very much.
4141

4242
The rest of the qthread library, which will function on any Unix system with
4343
the makecontext()-family of functions, can stand alone. Those files are simply
@@ -50,6 +50,8 @@ used only by the UTS implementation in test/benchmarks/uts. The license for the
5050
random number generator code is in the LICENSE file in the
5151
test/benchmarks/uts/rng directory.
5252

53+
The benchmark suite ALSO contains some code from the Sandia Mantevo project, contained in the test/benchmarks/mantevo directory. The licenses for those codes reside in their respective directories. For example, the HPCCG code license is in test/benchmarks/mantevo/hpccg/README.
54+
5355
~~~~~~~~~~~~~
5456

5557
The libtask copyright is as follows:

configure.in

+11-4
Original file line numberDiff line numberDiff line change
@@ -1042,10 +1042,15 @@ AS_IF([test "x$enable_multinode" = "xyes"],
10421042
esac],
10431043
[enable_multinode="no"])
10441044

1045-
AS_IF([test "x$enable_lf_febs" == "xyes"],
1046-
[enable_lf_febs=yes
1047-
AC_DEFINE([LOCK_FREE_FEBS], [1], [Define to use a lock-free hash table for FEB metadata.])],
1045+
AS_IF([test "x$enable_lf_febs" = "xyes"],
1046+
[AC_DEFINE([LOCK_FREE_FEBS], [1], [Define to use a lock-free hash table for FEB metadata.])],
10481047
[enable_lf_febs=no])
1048+
dnl AS_IF([test "x$enable_lf_febs" != "xno"],
1049+
dnl [AS_IF([test "x$enable_hardware_atomics" = "xyes" -a "x$qthread_cv_atomic_CASptr" = "xyes"],
1050+
dnl [enable_lf_febs=yes
1051+
dnl AC_DEFINE([LOCK_FREE_FEBS], [1], [Define to use a lock-free hash table for FEB metadata.])],
1052+
dnl [enable_lf_febs=no])],
1053+
dnl [enable_lf_febs=no])
10491054

10501055
## --------------- ##
10511056
## Output and done ##
@@ -1097,9 +1102,11 @@ AC_CONFIG_FILES([Makefile
10971102
test/features/Makefile
10981103
test/stress/Makefile
10991104
test/benchmarks/Makefile
1105+
test/benchmarks/mantevo/Makefile
1106+
test/benchmarks/mantevo/hpccg/Makefile
11001107
test/benchmarks/rose_bots/Makefile])
11011108
AS_IF([test "enable_multinode" = "yes"],
1102-
[AC_CONFIG_FILES([test/multinode/Makefile])])
1109+
[AC_CONFIG_FILES([test/multinode/Makefile])])
11031110
AC_OUTPUT
11041111

11051112
case "$qthread_cv_stack_size" in

test/benchmarks/Makefile.am

+3
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
SUBDIRS = mantevo
12

23
.PHONY: buildall buildtests buildextra benchmarks
34

@@ -135,9 +136,11 @@ buildall: $(benchmarks)
135136
$(MAKE) -C rose_bots buildall
136137
buildextra: $(benchmarks)
137138
$(MAKE) -C rose_bots buildextra
139+
$(MAKE) -C mantevo buildextra
138140
else
139141
buildall: $(benchmarks)
140142
buildextra: $(benchmarks)
143+
$(MAKE) -C mantevo buildextra
141144
endif
142145

143146
benchmarks: buildextra

test/benchmarks/mantevo/Makefile.am

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
SUBDIRS = hpccg
2+
3+
.PHONY: buildall buildextra benchmarks
4+
5+
benchmarks: buildextra
6+
buildall: buildextra
7+
8+
mantevo_benchmarks = hpccg
9+
10+
buildextra:
11+
$(MAKE) -C hpccg hpccg
12+
13+
INCLUDES = -I$(top_srcdir)/include
14+
qthreadlib = $(top_builddir)/src/libqthread.la
15+
16+
LDADD = $(qthreadlib)
17+
18+
$(qthreadlib):
19+
$(MAKE) -C $(top_builddir)/src libqthread.la
+149
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
2+
//@HEADER
3+
// ************************************************************************
4+
//
5+
// HPCCG: Simple Conjugate Gradient Benchmark Code
6+
// Copyright (2006) Sandia Corporation
7+
//
8+
// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
9+
// license for use of this work by or on behalf of the U.S. Government.
10+
//
11+
// This library is free software; you can redistribute it and/or modify
12+
// it under the terms of the GNU Lesser General Public License as
13+
// published by the Free Software Foundation; either version 2.1 of the
14+
// License, or (at your option) any later version.
15+
//
16+
// This library is distributed in the hope that it will be useful, but
17+
// WITHOUT ANY WARRANTY; without even the implied warranty of
18+
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19+
// Lesser General Public License for more details.
20+
//
21+
// You should have received a copy of the GNU Lesser General Public
22+
// License along with this library; if not, write to the Free Software
23+
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
24+
// USA
25+
// Questions? Contact Michael A. Heroux ([email protected])
26+
//
27+
// ************************************************************************
28+
//@HEADER
29+
30+
/////////////////////////////////////////////////////////////////////////
31+
32+
// Routine to compute an approximate solution to Ax = b where:
33+
34+
// A - known matrix stored as an HPC_Sparse_Matrix struct
35+
36+
// b - known right hand side vector
37+
38+
// x - On entry is initial guess, on exit new approximate solution
39+
40+
// max_iter - Maximum number of iterations to perform, even if
41+
// tolerance is not met.
42+
43+
// tolerance - Stop and assert convergence if norm of residual is <=
44+
// to tolerance.
45+
46+
// niters - On output, the number of iterations actually performed.
47+
48+
/////////////////////////////////////////////////////////////////////////
49+
50+
#include <iostream>
51+
#include <cstdio>
52+
#include <cmath>
53+
#include "mytimer.hpp"
54+
#include "HPCCG.hpp"
55+
56+
#define TICK() t0 = mytimer() // Use TICK and TOCK to time a code section
57+
#define TOCK(t) t += mytimer() - t0
58+
int HPCCG(HPC_Sparse_Matrix * A,
59+
const double * const b, double * const x,
60+
const int max_iter, const double tolerance, int &niters, double & normr,
61+
double * times)
62+
63+
{
64+
double t_begin = mytimer(); // Start timing right away
65+
66+
double t0 = 0.0, t1 = 0.0, t2 = 0.0, t3 = 0.0, t4 = 0.0;
67+
#ifdef USING_MPI
68+
double t5 = 0.0;
69+
#endif
70+
int nrow = A->local_nrow;
71+
int ncol = A->local_ncol;
72+
73+
double * r = new double [nrow];
74+
double * p = new double [ncol]; // In parallel case, A is rectangular
75+
double * Ap = new double [nrow];
76+
77+
normr = 0.0;
78+
double rtrans = 0.0;
79+
double oldrtrans = 0.0;
80+
81+
#ifdef USING_MPI
82+
int size, rank; // Number of MPI processes, My process ID
83+
MPI_Comm_size(MPI_COMM_WORLD, &size);
84+
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
85+
#else
86+
//int size = 1; // Serial case (not using MPI)
87+
int rank = 0;
88+
#endif
89+
90+
int print_freq = max_iter/10;
91+
if (print_freq>50) print_freq=50;
92+
if (print_freq<1) print_freq=1;
93+
94+
// p is of length ncols, copy x to p for sparse MV operation
95+
TICK(); waxpby(nrow, 1.0, x, 0.0, x, p); TOCK(t2);
96+
#ifdef USING_MPI
97+
TICK(); exchange_externals(A,p); TOCK(t5);
98+
#endif
99+
TICK(); HPC_sparsemv(A, p, Ap); TOCK(t3);
100+
TICK(); waxpby(nrow, 1.0, b, -1.0, Ap, r); TOCK(t2);
101+
TICK(); ddot(nrow, r, r, &rtrans, t4); TOCK(t1);
102+
normr = sqrt(rtrans);
103+
104+
if (rank==0) printf("Initial Residual = %g\n", normr);
105+
106+
for(int k=1; k<max_iter && normr > tolerance; k++ )
107+
{
108+
if (k == 1)
109+
{
110+
TICK(); waxpby(nrow, 1.0, r, 0.0, r, p); TOCK(t2);
111+
}
112+
else
113+
{
114+
oldrtrans = rtrans;
115+
TICK(); ddot (nrow, r, r, &rtrans, t4); TOCK(t1);// 2*nrow ops
116+
double beta = rtrans/oldrtrans;
117+
TICK(); waxpby (nrow, 1.0, r, beta, p, p); TOCK(t2);// 2*nrow ops
118+
}
119+
normr = sqrt(rtrans);
120+
if (rank==0 && (k%print_freq == 0 || k+1 == max_iter))
121+
printf("Iteration = %i Residual = %g\n", k, normr);
122+
123+
124+
#ifdef USING_MPI
125+
TICK(); exchange_externals(A,p); TOCK(t5);
126+
#endif
127+
TICK(); HPC_sparsemv(A, p, Ap); TOCK(t3); // 2*nnz ops
128+
double alpha = 0.0;
129+
TICK(); ddot(nrow, p, Ap, &alpha, t4); TOCK(t1); // 2*nrow ops
130+
alpha = rtrans/alpha;
131+
TICK(); waxpby(nrow, 1.0, x, alpha, p, x);// 2*nrow ops
132+
waxpby(nrow, 1.0, r, -alpha, Ap, r); TOCK(t2);// 2*nrow ops
133+
niters = k;
134+
}
135+
136+
// Store times
137+
times[1] = t1; // ddot time
138+
times[2] = t2; // waxpby time
139+
times[3] = t3; // sparsemv time
140+
times[4] = t4; // AllReduce time
141+
#ifdef USING_MPI
142+
times[5] = t5; // exchange boundary time
143+
#endif
144+
delete [] p;
145+
delete [] Ap;
146+
delete [] r;
147+
times[0] = mytimer() - t_begin; // Total time. All done...
148+
return(0);
149+
}
+59
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
2+
//@HEADER
3+
// ************************************************************************
4+
//
5+
// HPCCG: Simple Conjugate Gradient Benchmark Code
6+
// Copyright (2006) Sandia Corporation
7+
//
8+
// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
9+
// license for use of this work by or on behalf of the U.S. Government.
10+
//
11+
// This library is free software; you can redistribute it and/or modify
12+
// it under the terms of the GNU Lesser General Public License as
13+
// published by the Free Software Foundation; either version 2.1 of the
14+
// License, or (at your option) any later version.
15+
//
16+
// This library is distributed in the hope that it will be useful, but
17+
// WITHOUT ANY WARRANTY; without even the implied warranty of
18+
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19+
// Lesser General Public License for more details.
20+
//
21+
// You should have received a copy of the GNU Lesser General Public
22+
// License along with this library; if not, write to the Free Software
23+
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
24+
// USA
25+
// Questions? Contact Michael A. Heroux ([email protected])
26+
//
27+
// ************************************************************************
28+
//@HEADER
29+
30+
#ifndef HPCCG_H
31+
#define HPCCG_H
32+
#include "HPC_sparsemv.hpp"
33+
#include "ddot.hpp"
34+
#include "waxpby.hpp"
35+
#include "HPC_Sparse_Matrix.hpp"
36+
37+
#ifdef USING_MPI
38+
#include "exchange_externals.hpp"
39+
#include <mpi.h> // If this routine is compiled with -DUSING_MPI
40+
// then include mpi.h
41+
#endif
42+
int HPCCG(HPC_Sparse_Matrix * A,
43+
const double * const b, double * const x,
44+
const int max_iter, const double tolerance, int & niters, double & normr, double * times);
45+
46+
// this function will compute the Conjugate Gradient...
47+
// A <=> Matrix
48+
// b <=> constant
49+
// xnot <=> initial guess
50+
// max_iter <=> how many times we iterate
51+
// tolerance <=> specifies how "good"of a value we would like
52+
// x <=> used for return value
53+
54+
// A is known
55+
// x is unknown vector
56+
// b is known vector
57+
// xnot = 0
58+
// niters is the number of iterations
59+
#endif
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
2+
//@HEADER
3+
// ************************************************************************
4+
//
5+
// HPCCG: Simple Conjugate Gradient Benchmark Code
6+
// Copyright (2006) Sandia Corporation
7+
//
8+
// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
9+
// license for use of this work by or on behalf of the U.S. Government.
10+
//
11+
// This library is free software; you can redistribute it and/or modify
12+
// it under the terms of the GNU Lesser General Public License as
13+
// published by the Free Software Foundation; either version 2.1 of the
14+
// License, or (at your option) any later version.
15+
//
16+
// This library is distributed in the hope that it will be useful, but
17+
// WITHOUT ANY WARRANTY; without even the implied warranty of
18+
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19+
// Lesser General Public License for more details.
20+
//
21+
// You should have received a copy of the GNU Lesser General Public
22+
// License along with this library; if not, write to the Free Software
23+
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
24+
// USA
25+
// Questions? Contact Michael A. Heroux ([email protected])
26+
//
27+
// ************************************************************************
28+
//@HEADER
29+
30+
#ifndef HPC_SPARSE_MATRIX_H
31+
#define HPC_SPARSE_MATRIX_H
32+
33+
// These constants are upper bounds that might need to be changes for
34+
// pathological matrices, e.g., those with nearly dense rows/columns.
35+
36+
const int max_external = 100000;
37+
const int max_num_messages = 500;
38+
const int max_num_neighbors = max_num_messages;
39+
40+
41+
struct HPC_Sparse_Matrix_STRUCT {
42+
char *title;
43+
int start_row;
44+
int stop_row;
45+
int total_nrow;
46+
long long total_nnz;
47+
int local_nrow;
48+
int local_ncol; // Must be defined in make_local_matrix
49+
int local_nnz;
50+
int * nnz_in_row;
51+
double ** ptr_to_vals_in_row;
52+
int ** ptr_to_inds_in_row;
53+
double ** ptr_to_diags;
54+
55+
#ifdef USING_MPI
56+
int num_external;
57+
int num_send_neighbors;
58+
int *external_index;
59+
int *external_local_index;
60+
int total_to_be_sent;
61+
int *elements_to_send;
62+
int *neighbors;
63+
int *recv_length;
64+
int *send_length;
65+
double *send_buffer;
66+
67+
#endif
68+
};
69+
typedef struct HPC_Sparse_Matrix_STRUCT HPC_Sparse_Matrix;
70+
#endif

0 commit comments

Comments
 (0)