4
4
#define EPSILON 1.e-8
5
5
#define COEFX 1.0
6
6
#define COEFY 1.0
7
- #define TINDEX (i ,j ) (i+RADIUS+(width+2*RADIUS)*(j+RADIUS))
8
- #define IN (i ,j ) in[TINDEX(i,j )]
9
- #define TNINDEX (i ,j ) (i+width*(j))
10
- #define OUT (i ,j ) out[TNINDEX(i,j )]
7
+ #define INDEXIN (i ,j ) (i+RADIUS+(width+2*RADIUS)*(j+RADIUS))
8
+ #define IN (i ,j ) in[INDEXIN(i-istart,j-jstart )]
9
+ #define INDEXOUT (i ,j ) (i+width*(j))
10
+ #define OUT (i ,j ) out[INDEXOUT(i-istart,j-jstart )]
11
11
#define WEIGHT (i ,j ) weight[i+RADIUS+(j+RADIUS)*(2*RADIUS+1)]
12
12
#define LEFT 1111
13
13
#define RIGHT 2222
@@ -45,6 +45,9 @@ public:
45
45
int num_chares , min_size ;
46
46
long nsquare ;
47
47
48
+ CkPrintf ("Parallel Research Kernels Version %s\n" , PRKVERSION );
49
+ CkPrintf ("Charm++ stencil execution on 2D grid\n" );
50
+
48
51
if (m -> argc != 4 ) {
49
52
CkPrintf ("%s <maxiterations> <grid_size> <overdecomposition factor>\n" , m -> argv [0 ]);
50
53
CkExit ();
@@ -99,8 +102,6 @@ public:
99
102
}
100
103
101
104
// print info
102
- CkPrintf ("Parallel Research Kernels Version %s\n" , PRKVERSION );
103
- CkPrintf ("Charm++ stencil execution on 2D grid\n" );
104
105
CkPrintf ("Number of Charm++ PEs = %d\n" , CkNumPes ());
105
106
CkPrintf ("Overdecomposition = %d\n" , overdecomposition );
106
107
CkPrintf ("Grid size = %d\n" , n );
@@ -112,6 +113,11 @@ public:
112
113
CkPrintf ("Type of stencil = compact\n" );
113
114
CkPrintf ("ERROR: Compact stencil not (yet) supported\n" );
114
115
CkExit ();
116
+ #endif
117
+ #if LOOPGEN
118
+ CkPrintf ("Script used to expand stencil loop body\n" );
119
+ #else
120
+ CkPrintf ("Compact representation of stencil loop body\n" );
115
121
#endif
116
122
CkPrintf ("Number of iterations = %d\n" , maxiterations );
117
123
@@ -220,10 +226,10 @@ public:
220
226
if (thisIndex .y == num_chare_rows - 1 ) {max_messages_due -- ; }
221
227
messages_due = max_messages_due ;
222
228
223
- for (j = jstart , jloc = 0 ;j <=jend ;j ++ , jloc ++ ){
224
- for (i = istart , iloc = 0 ;i <=iend ;i ++ , iloc ++ ){
225
- IN (iloc , jloc ) = COEFX * i + COEFY * j ;
226
- OUT (iloc , jloc ) = 0.0 ;
229
+ for (j = jstart ;j <=jend ;j ++ ){
230
+ for (i = istart ;i <=iend ;i ++ ){
231
+ IN (i , j ) = COEFX * i + COEFY * j ;
232
+ OUT (i , j ) = 0.0 ;
227
233
}
228
234
}
229
235
}
@@ -240,85 +246,85 @@ public:
240
246
// Perform one iteration of work
241
247
// The first step is to send the local state to the neighbors
242
248
void begin_iteration (void ) {
243
- int k ;
249
+ int kk ;
244
250
245
- // Send my left edge
246
- if (thisIndex .x > 0 ) {
247
- ghostMsg * msg = new (height * RADIUS ) ghostMsg (LEFT , height );
251
+ // Send my top edge
252
+ if (thisIndex .y < num_chare_rows - 1 ) {
253
+ ghostMsg * msg = new (width * RADIUS ) ghostMsg (TOP , width );
248
254
if (!msg ) {
249
255
CkPrintf ("Could not allocate space for message\n" );
250
256
CkExit ();
251
257
}
252
258
CkSetRefNum (msg , iterations );
253
- for (int j = 0 , k = 0 ;j < height ; ++ j ) for (int i = 0 ; i < RADIUS ; i ++ )
254
- msg -> edge [k ++ ] = IN (i ,j );
255
- thisProxy (thisIndex .x - 1 , thisIndex .y ).receiveGhosts (msg );
259
+ for (int j = jend - RADIUS + 1 , kk = 0 ; j <= jend ; j ++ ) for (int i = istart ; i <= iend ; i ++ )
260
+ msg -> edge [kk ++ ] = IN (i ,j );
261
+ thisProxy (thisIndex .x , thisIndex .y + 1 ).receiveGhosts (msg );
256
262
}
257
263
258
- // Send my right edge
259
- if (thisIndex .x < num_chare_cols - 1 ) {
260
- ghostMsg * msg = new (height * RADIUS ) ghostMsg (RIGHT , height );
264
+ // Send my bottom edge
265
+ if (thisIndex .y > 0 ) {
266
+ ghostMsg * msg = new (width * RADIUS ) ghostMsg (BOTTOM , width );
261
267
if (!msg ) {
262
268
CkPrintf ("Could not allocate space for message\n" );
263
269
CkExit ();
264
270
}
265
271
CkSetRefNum (msg , iterations );
266
- for (int j = 0 , k = 0 ;j < height ; ++ j ) for (int i = 0 ; i < RADIUS ; i ++ )
267
- msg -> edge [k ++ ] = IN (width - RADIUS + i ,j );
268
- thisProxy (thisIndex .x + 1 , thisIndex .y ).receiveGhosts (msg );
272
+ for (int j = jstart , kk = 0 ; j <= jstart + RADIUS - 1 ; j ++ ) for (int i = istart ; i <= iend ; i ++ )
273
+ msg -> edge [kk ++ ] = IN (i ,j );
274
+ thisProxy (thisIndex .x , thisIndex .y - 1 ).receiveGhosts (msg );
269
275
}
270
276
271
- // Send my bottom edge
272
- if (thisIndex .y > 0 ) {
273
- ghostMsg * msg = new (width * RADIUS ) ghostMsg (BOTTOM , width );
277
+ // Send my right edge
278
+ if (thisIndex .x < num_chare_cols - 1 ) {
279
+ ghostMsg * msg = new (height * RADIUS ) ghostMsg (RIGHT , height );
274
280
if (!msg ) {
275
281
CkPrintf ("Could not allocate space for message\n" );
276
282
CkExit ();
277
283
}
278
284
CkSetRefNum (msg , iterations );
279
- for (int j = 0 , k = 0 ; j < RADIUS ; j ++ ) for (int i = 0 ; i < width ; i ++ )
280
- msg -> edge [k ++ ] = IN (i ,j );
281
- thisProxy (thisIndex .x , thisIndex .y - 1 ).receiveGhosts (msg );
285
+ for (int j = jstart , kk = 0 ;j <= jend ; j ++ ) for (int i = iend - RADIUS + 1 ; i <= iend ; i ++ )
286
+ msg -> edge [kk ++ ] = IN (i ,j );
287
+ thisProxy (thisIndex .x + 1 , thisIndex .y ).receiveGhosts (msg );
282
288
}
283
289
284
- // Send my top edge
285
- if (thisIndex .y < num_chare_rows - 1 ) {
286
- ghostMsg * msg = new (width * RADIUS ) ghostMsg (TOP , width );
290
+ // Send my left edge
291
+ if (thisIndex .x > 0 ) {
292
+ ghostMsg * msg = new (height * RADIUS ) ghostMsg (LEFT , height );
287
293
if (!msg ) {
288
294
CkPrintf ("Could not allocate space for message\n" );
289
295
CkExit ();
290
296
}
291
297
CkSetRefNum (msg , iterations );
292
- for (int j = 0 , k = 0 ; j < RADIUS ; j ++ ) for (int i = 0 ; i < width ; i ++ )
293
- msg -> edge [k ++ ] = IN (i ,height - RADIUS + j );
294
- thisProxy (thisIndex .x , thisIndex .y + 1 ).receiveGhosts (msg );
298
+ for (int j = jstart , kk = 0 ;j <= jend ; j ++ ) for (int i = istart ; i <= istart + RADIUS - 1 ; i ++ )
299
+ msg -> edge [kk ++ ] = IN (i ,j );
300
+ thisProxy (thisIndex .x - 1 , thisIndex .y ).receiveGhosts (msg );
295
301
}
296
302
}
297
303
298
304
void processGhosts (ghostMsg * msg ) {
299
- int k ; k = 0 ;
305
+ int kk = 0 ;
300
306
int size = msg -> size ;
301
307
302
308
switch (msg -> dir ) {
303
309
case LEFT :
304
- for (int j = 0 ;j < size ; ++ j ) for (int i = 0 ; i < RADIUS ; i ++ )
305
- IN (width + i ,j ) = msg -> edge [k ++ ];
310
+ for (int j = jstart ;j <= jend ; j ++ ) for (int i = iend + 1 ; i <= iend + RADIUS ; i ++ )
311
+ IN (i ,j ) = msg -> edge [kk ++ ];
306
312
break ;
307
313
308
314
case RIGHT :
309
- for (int j = 0 ;j < size ; ++ j ) for (int i = 0 ; i < RADIUS ; i ++ )
310
- IN (- RADIUS + i ,j ) = msg -> edge [k ++ ];
315
+ for (int j = jstart ;j <= jend ; j ++ ) for (int i = istart - RADIUS ; i <= istart - 1 ; i ++ )
316
+ IN (i ,j ) = msg -> edge [kk ++ ];
311
317
break ;
312
318
313
319
case BOTTOM :
314
- for (int j = 0 ; j < RADIUS ; j ++ ) for (int i = 0 ;i < size ; ++ i ){
315
- IN (i ,height + j ) = msg -> edge [k ++ ];
320
+ for (int j = jend + 1 ; j <= jend + RADIUS ; j ++ ) for (int i = istart ;i <= iend ; i ++ ){
321
+ IN (i ,j ) = msg -> edge [kk ++ ];
316
322
}
317
323
break ;
318
324
319
325
case TOP :
320
- for (int j = 0 ; j < RADIUS ; j ++ ) for (int i = 0 ;i < size ; ++ i )
321
- IN (i ,- RADIUS + j ) = msg -> edge [k ++ ];
326
+ for (int j = jstart - RADIUS ; j <= jstart - 1 ; j ++ ) for (int i = istart ;i <= iend ; i ++ )
327
+ IN (i ,j ) = msg -> edge [kk ++ ];
322
328
break ;
323
329
324
330
default : CkPrintf ("ERROR: invalid direction\n" );
@@ -330,19 +336,17 @@ public:
330
336
void compute () {
331
337
double * RESTRICT in = this -> in ;
332
338
double * RESTRICT out = this -> out ;
339
+ int ii , jj ;
333
340
334
341
for (int j = MAX (jstart ,RADIUS ); j <=MIN (n - 1 - RADIUS ,jend ); j ++ ) {
335
342
for (int i = MAX (istart ,RADIUS ); i <=MIN (n - 1 - RADIUS ,iend ); i ++ ) {
336
-
337
- for (int jj = - RADIUS ; jj <=RADIUS ; jj ++ ) {
338
- OUT (i - istart ,j - jstart ) += WEIGHT (0 ,jj )* IN (i - istart ,j - jstart + jj );
339
- }
340
- for (int ii = - RADIUS ; ii < 0 ; ii ++ ) {
341
- OUT (i - istart ,j - jstart ) += WEIGHT (ii ,0 )* IN (i - istart + ii ,j - jstart );
342
- }
343
- for (int ii = 1 ; ii <=RADIUS ; ii ++ ) {
344
- OUT (i - istart ,j - jstart ) += WEIGHT (ii ,0 )* IN (i - istart + ii ,j - jstart );
345
- }
343
+ #if LOOPGEN
344
+ #include "loop_body_star.incl"
345
+ #else
346
+ for (jj = - RADIUS ; jj <=RADIUS ; jj ++ ) OUT (i ,j ) += WEIGHT (0 ,jj )* IN (i ,j + jj );
347
+ for (ii = - RADIUS ; ii < 0 ; ii ++ ) OUT (i ,j ) += WEIGHT (ii ,0 )* IN (i + ii ,j );
348
+ for (ii = 1 ; ii <=RADIUS ; ii ++ ) OUT (i ,j ) += WEIGHT (ii ,0 )* IN (i + ii ,j );
349
+ #endif
346
350
}
347
351
}
348
352
}
@@ -352,7 +356,7 @@ public:
352
356
local_norm = 0.0 ;
353
357
for (int j = MAX (jstart ,RADIUS ); j <=MIN (n - 1 - RADIUS ,jend ); j ++ ) {
354
358
for (int i = MAX (istart ,RADIUS ); i <=MIN (n - 1 - RADIUS ,iend ); i ++ ) {
355
- local_norm += OUT (i - istart , j - jstart );
359
+ local_norm += OUT (i , j );
356
360
}
357
361
}
358
362
}
0 commit comments