Skip to content

Commit 104ce02

Browse files
committed
merge in 2.16 changes from devel repo
1 parent 2e3a399 commit 104ce02

File tree

79 files changed

+1429
-934
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

79 files changed

+1429
-934
lines changed

CHARM++/Stencil/Makefile

+25-5
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,27 @@
11
include ../../common/CHARM++.defs
22
##### User configurable options #####
3+
#uncomment any of the following flags (and change values) to change defaults
34

45
OPTFLAGS = $(DEFAULT_OPT_FLAGS)
56
#description: change above into something that is a decent optimization on you system
67

7-
#uncomment any of the following flags (and change values) to change defaults
8-
98
#RESTRICTFLAG = -DRESTRICT_KEYWORD
109
#description: the "restrict" keyword can be used on IA platforms to disambiguate
1110
# data accessed through pointers
1211

13-
RADIUSFLAG = -DRADIUS=2
12+
ifndef RADIUS
13+
RADIUS=2
14+
endif
1415
#description: default radius of filter to be applied is 2
1516

1617
DOUBLEFLAG = -DDOUBLE
1718
#description: default data type is single precision
1819

20+
ifndef LOOPGEN
21+
LOOPGEN=0
22+
endif
23+
#description: default is compact (non-expanded) form of loop body
24+
1925
STARFLAG = -DSTAR
2026
#description: default stencil is compact (dense, square)
2127

@@ -32,11 +38,25 @@ LIBPATHS =
3238
INCLUDEPATHS =
3339

3440
### End User configurable options ###
41+
LOOPGENFLAG= -DLOOPGEN=$(LOOPGEN)
42+
RADIUSFLAG = -DRADIUS=$(RADIUS)
3543

36-
TUNEFLAGS = $(RESTRICTFLAG) $(DEBUGFLAG) $(USERFLAGS) \
37-
$(DOUBLEFLAG) $(RADIUSFLAG) $(STARFLAG)
44+
TUNEFLAGS = $(RESTRICTFLAG) $(DEBUGFLAG) $(USERFLAGS) \
45+
$(DOUBLEFLAG) $(RADIUSFLAG) $(LOOPGENFLAG) \
46+
$(STARFLAG)
3847
PROGRAM = stencil
3948
OBJS = $(PROGRAM).o $(COMOBJS)
4049

4150
include ../../common/make.common
4251

52+
$(PROGRAM).o: loop_body_star.incl
53+
54+
loop_body_star.incl:
55+
@echo "#########################################################################"
56+
@echo "##### No file loop_body_star.incl -- invoking loop_gen to create it #####"
57+
@echo "#########################################################################"
58+
../../common/Stencil/loop_gen $(RADIUS) 1
59+
60+
veryclean:
61+
@rm -f loop_body_star.incl
62+
make clean

CHARM++/Stencil/stencil.C

+59-55
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,10 @@
44
#define EPSILON 1.e-8
55
#define COEFX 1.0
66
#define COEFY 1.0
7-
#define TINDEX(i,j) (i+RADIUS+(width+2*RADIUS)*(j+RADIUS))
8-
#define IN(i,j) in[TINDEX(i,j)]
9-
#define TNINDEX(i,j) (i+width*(j))
10-
#define OUT(i,j) out[TNINDEX(i,j)]
7+
#define INDEXIN(i,j) (i+RADIUS+(width+2*RADIUS)*(j+RADIUS))
8+
#define IN(i,j) in[INDEXIN(i-istart,j-jstart)]
9+
#define INDEXOUT(i,j) (i+width*(j))
10+
#define OUT(i,j) out[INDEXOUT(i-istart,j-jstart)]
1111
#define WEIGHT(i,j) weight[i+RADIUS+(j+RADIUS)*(2*RADIUS+1)]
1212
#define LEFT 1111
1313
#define RIGHT 2222
@@ -45,6 +45,9 @@ public:
4545
int num_chares, min_size;
4646
long nsquare;
4747

48+
CkPrintf("Parallel Research Kernels Version %s\n", PRKVERSION);
49+
CkPrintf("Charm++ stencil execution on 2D grid\n");
50+
4851
if (m->argc != 4) {
4952
CkPrintf("%s <maxiterations> <grid_size> <overdecomposition factor>\n", m->argv[0]);
5053
CkExit();
@@ -99,8 +102,6 @@ public:
99102
}
100103

101104
// print info
102-
CkPrintf("Parallel Research Kernels Version %s\n", PRKVERSION);
103-
CkPrintf("Charm++ stencil execution on 2D grid\n");
104105
CkPrintf("Number of Charm++ PEs = %d\n", CkNumPes());
105106
CkPrintf("Overdecomposition = %d\n", overdecomposition);
106107
CkPrintf("Grid size = %d\n", n);
@@ -112,6 +113,11 @@ public:
112113
CkPrintf("Type of stencil = compact\n");
113114
CkPrintf("ERROR: Compact stencil not (yet) supported\n");
114115
CkExit();
116+
#endif
117+
#if LOOPGEN
118+
CkPrintf("Script used to expand stencil loop body\n");
119+
#else
120+
CkPrintf("Compact representation of stencil loop body\n");
115121
#endif
116122
CkPrintf("Number of iterations = %d\n", maxiterations);
117123

@@ -220,10 +226,10 @@ public:
220226
if (thisIndex.y == num_chare_rows-1) {max_messages_due--; }
221227
messages_due = max_messages_due;
222228

223-
for(j=jstart,jloc=0;j<=jend;j++,jloc++){
224-
for(i=istart,iloc=0;i<=iend;i++,iloc++){
225-
IN(iloc,jloc) = COEFX*i+COEFY*j;
226-
OUT(iloc,jloc) = 0.0;
229+
for(j=jstart;j<=jend;j++){
230+
for(i=istart;i<=iend;i++){
231+
IN(i,j) = COEFX*i+COEFY*j;
232+
OUT(i,j) = 0.0;
227233
}
228234
}
229235
}
@@ -240,85 +246,85 @@ public:
240246
// Perform one iteration of work
241247
// The first step is to send the local state to the neighbors
242248
void begin_iteration(void) {
243-
int k;
249+
int kk;
244250

245-
// Send my left edge
246-
if (thisIndex.x > 0) {
247-
ghostMsg *msg = new (height*RADIUS) ghostMsg(LEFT, height);
251+
// Send my top edge
252+
if (thisIndex.y < num_chare_rows-1) {
253+
ghostMsg *msg = new (width*RADIUS) ghostMsg(TOP, width);
248254
if (!msg) {
249255
CkPrintf("Could not allocate space for message\n");
250256
CkExit();
251257
}
252258
CkSetRefNum(msg, iterations);
253-
for(int j=0, k=0;j<height;++j) for (int i=0; i<RADIUS; i++)
254-
msg->edge[k++] = IN(i,j);
255-
thisProxy(thisIndex.x-1, thisIndex.y).receiveGhosts(msg);
259+
for (int j=jend-RADIUS+1, kk=0; j<=jend; j++) for(int i=istart;i<=iend;i++)
260+
msg->edge[kk++] = IN(i,j);
261+
thisProxy(thisIndex.x, thisIndex.y+1).receiveGhosts(msg);
256262
}
257263

258-
// Send my right edge
259-
if (thisIndex.x < num_chare_cols-1) {
260-
ghostMsg *msg = new (height*RADIUS) ghostMsg(RIGHT, height);
264+
// Send my bottom edge
265+
if (thisIndex.y > 0) {
266+
ghostMsg *msg = new (width*RADIUS) ghostMsg(BOTTOM, width);
261267
if (!msg) {
262268
CkPrintf("Could not allocate space for message\n");
263269
CkExit();
264270
}
265271
CkSetRefNum(msg, iterations);
266-
for(int j=0, k=0;j<height;++j) for (int i=0; i<RADIUS; i++)
267-
msg->edge[k++] = IN(width-RADIUS+i,j);
268-
thisProxy(thisIndex.x+1, thisIndex.y).receiveGhosts(msg);
272+
for (int j=jstart, kk=0; j<=jstart+RADIUS-1; j++) for(int i=istart;i<=iend;i++)
273+
msg->edge[kk++] = IN(i,j);
274+
thisProxy(thisIndex.x, thisIndex.y-1).receiveGhosts(msg);
269275
}
270276

271-
// Send my bottom edge
272-
if (thisIndex.y > 0) {
273-
ghostMsg *msg = new (width*RADIUS) ghostMsg(BOTTOM, width);
277+
// Send my right edge
278+
if (thisIndex.x < num_chare_cols-1) {
279+
ghostMsg *msg = new (height*RADIUS) ghostMsg(RIGHT, height);
274280
if (!msg) {
275281
CkPrintf("Could not allocate space for message\n");
276282
CkExit();
277283
}
278284
CkSetRefNum(msg, iterations);
279-
for (int j=0, k=0; j<RADIUS; j++) for(int i=0;i<width;i++)
280-
msg->edge[k++] = IN(i,j);
281-
thisProxy(thisIndex.x, thisIndex.y-1).receiveGhosts(msg);
285+
for(int j=jstart, kk=0;j<=jend;j++) for (int i=iend-RADIUS+1; i<=iend; i++)
286+
msg->edge[kk++] = IN(i,j);
287+
thisProxy(thisIndex.x+1, thisIndex.y).receiveGhosts(msg);
282288
}
283289

284-
// Send my top edge
285-
if (thisIndex.y < num_chare_rows-1) {
286-
ghostMsg *msg = new (width*RADIUS) ghostMsg(TOP, width);
290+
// Send my left edge
291+
if (thisIndex.x > 0) {
292+
ghostMsg *msg = new (height*RADIUS) ghostMsg(LEFT, height);
287293
if (!msg) {
288294
CkPrintf("Could not allocate space for message\n");
289295
CkExit();
290296
}
291297
CkSetRefNum(msg, iterations);
292-
for (int j=0, k=0; j<RADIUS; j++) for(int i=0;i<width;i++)
293-
msg->edge[k++] = IN(i,height-RADIUS+j);
294-
thisProxy(thisIndex.x, thisIndex.y+1).receiveGhosts(msg);
298+
for(int j=jstart, kk=0;j<=jend;j++) for (int i=istart; i<=istart+RADIUS-1; i++)
299+
msg->edge[kk++] = IN(i,j);
300+
thisProxy(thisIndex.x-1, thisIndex.y).receiveGhosts(msg);
295301
}
296302
}
297303

298304
void processGhosts(ghostMsg *msg) {
299-
int k; k=0;
305+
int kk=0;
300306
int size = msg->size;
301307

302308
switch(msg->dir) {
303309
case LEFT:
304-
for(int j=0;j<size;++j) for (int i=0; i<RADIUS; i++)
305-
IN(width+i,j) = msg->edge[k++];
310+
for(int j=jstart;j<=jend;j++) for (int i=iend+1; i<=iend+RADIUS; i++)
311+
IN(i,j) = msg->edge[kk++];
306312
break;
307313

308314
case RIGHT:
309-
for(int j=0;j<size;++j) for (int i=0; i<RADIUS; i++)
310-
IN(-RADIUS+i,j) = msg->edge[k++];
315+
for(int j=jstart;j<=jend;j++) for (int i=istart-RADIUS; i<=istart-1; i++)
316+
IN(i,j) = msg->edge[kk++];
311317
break;
312318

313319
case BOTTOM:
314-
for (int j=0; j<RADIUS; j++) for(int i=0;i<size;++i){
315-
IN(i,height+j) = msg->edge[k++];
320+
for (int j=jend+1; j<=jend+RADIUS; j++) for(int i=istart;i<=iend;i++){
321+
IN(i,j) = msg->edge[kk++];
316322
}
317323
break;
318324

319325
case TOP:
320-
for (int j=0; j<RADIUS; j++) for(int i=0;i<size;++i)
321-
IN(i,-RADIUS+j) = msg->edge[k++];
326+
for (int j=jstart-RADIUS; j<=jstart-1; j++) for(int i=istart;i<=iend;i++)
327+
IN(i,j) = msg->edge[kk++];
322328
break;
323329

324330
default: CkPrintf("ERROR: invalid direction\n");
@@ -330,19 +336,17 @@ public:
330336
void compute() {
331337
double * RESTRICT in = this->in;
332338
double * RESTRICT out = this->out;
339+
int ii, jj;
333340

334341
for (int j=MAX(jstart,RADIUS); j<=MIN(n-1-RADIUS,jend); j++) {
335342
for (int i=MAX(istart,RADIUS); i<=MIN(n-1-RADIUS,iend); i++) {
336-
337-
for (int jj=-RADIUS; jj<=RADIUS; jj++) {
338-
OUT(i-istart,j-jstart) += WEIGHT(0,jj)*IN(i-istart,j-jstart+jj);
339-
}
340-
for (int ii=-RADIUS; ii<0; ii++) {
341-
OUT(i-istart,j-jstart) += WEIGHT(ii,0)*IN(i-istart+ii,j-jstart);
342-
}
343-
for (int ii=1; ii<=RADIUS; ii++) {
344-
OUT(i-istart,j-jstart) += WEIGHT(ii,0)*IN(i-istart+ii,j-jstart);
345-
}
343+
#if LOOPGEN
344+
#include "loop_body_star.incl"
345+
#else
346+
for (jj=-RADIUS; jj<=RADIUS; jj++) OUT(i,j) += WEIGHT(0,jj)*IN(i,j+jj);
347+
for (ii=-RADIUS; ii<0; ii++) OUT(i,j) += WEIGHT(ii,0)*IN(i+ii,j);
348+
for (ii=1; ii<=RADIUS; ii++) OUT(i,j) += WEIGHT(ii,0)*IN(i+ii,j);
349+
#endif
346350
}
347351
}
348352
}
@@ -352,7 +356,7 @@ public:
352356
local_norm = 0.0;
353357
for (int j=MAX(jstart,RADIUS); j<=MIN(n-1-RADIUS,jend); j++) {
354358
for (int i=MAX(istart,RADIUS); i<=MIN(n-1-RADIUS,iend); i++) {
355-
local_norm += OUT(i-istart,j-jstart);
359+
local_norm += OUT(i,j);
356360
}
357361
}
358362
}

CHARM++/Synch_p2p/p2p.C

+6-5
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,10 @@ public:
3636
CProxy_P2p array;
3737

3838
Main(CkArgMsg* cmdlinearg) {
39+
40+
CkPrintf("Parallel Research Kernels Version %s\n", PRKVERSION);
41+
CkPrintf("Charm++ pipeline execution on 2D grid\n");
42+
3943
if (cmdlinearg->argc != 5 && cmdlinearg->argc != 6 ) {
4044
CkPrintf("%s <#iterations> <grid_size x> <grid_size y> <overdecomposition factor> ",
4145
cmdlinearg->argv[0]);
@@ -85,8 +89,6 @@ public:
8589
}
8690

8791
// print info
88-
CkPrintf("Parallel Research Kernels Version %s\n", PRKVERSION);
89-
CkPrintf("Charm++ pipeline execution on 2D grid\n");
9092
CkPrintf("Number of Charm++ PEs = %d\n", CkNumPes());
9193
CkPrintf("Overdecomposition = %d\n", overdecomposition);
9294
CkPrintf("Grid sizes = %ld,%ld\n", m, n);
@@ -131,12 +133,12 @@ public:
131133
int iterations;
132134
double result;
133135
int offset, istart, iend, j; // global grid indices of strip
134-
int width;
136+
long width;
135137
double *vector;
136138

137139
// Constructor, initialize values
138140
P2p() {
139-
int i, iloc, leftover;
141+
long i, iloc, leftover;
140142

141143
/* compute amount of space required for input and solution arrays */
142144
width = m/num_chares;
@@ -203,7 +205,6 @@ public:
203205
CkSetRefNum(msg, j+iterations*(n-1));
204206
for (jj=0; jj<jjsize; jj++) {
205207
msg->gp[jj] = ARRAY(iend-istart,j+jj);
206-
// CkPrintf("Chare %d, send_msg->[%d]=%lf\n", thisIndex, jj, msg->gp[jj]);
207208
}
208209
thisProxy(thisIndex+1).receiveGhost(msg);
209210
}

0 commit comments

Comments
 (0)