@@ -191,6 +191,7 @@ void ActionWithVector::runAllTasks() {
191
191
unsigned nt=OpenMP::getNumThreads ();
192
192
if ( nt*stride*10 >nactive_tasks ) nt=nactive_tasks/stride/10 ;
193
193
if ( nt==0 ) nt=1 ;
194
+ if ( myvals.size ()!=nt ) myvals.resize (nt);
194
195
195
196
// Get the total number of streamed quantities that we need
196
197
// Get size for buffer
@@ -210,28 +211,31 @@ void ActionWithVector::runAllTasks() {
210
211
211
212
// Recover the number of derivatives we require
212
213
if ( !doNotCalculateDerivatives () && !gridsInStream ) {
214
+ unsigned nargs = getNumberOfArguments (); int nmasks=getNumberOfMasks ();
215
+ if ( nargs>=nmasks && nmasks>0 ) nargs = nargs - nmasks;
213
216
if ( getNumberOfAtoms ()>0 ) nderivatives += 3 *getNumberOfAtoms () + 9 ;
214
217
for (unsigned i=0 ; i<getNumberOfArguments (); ++i) nderivatives += getPntrToArgument (i)->getNumberOfValues ();
215
218
}
216
219
217
220
#pragma omp parallel num_threads(nt)
218
221
{
219
222
std::vector<double > omp_buffer;
223
+ const unsigned t=OpenMP::getThreadNum ();
220
224
if ( nt>1 ) omp_buffer.resize ( bufsize, 0.0 );
221
- MultiValue myvals ( getNumberOfComponents (), nderivatives, 0 );
222
- myvals.clearAll ();
225
+ if ( myvals[t]. getNumberOfValues ()!= getNumberOfComponents () || myvals[t]. getNumberOfDerivatives ()!= nderivatives ) myvals[t]. resize ( getNumberOfComponents (), nderivatives );
226
+ myvals[t] .clearAll ();
223
227
224
228
#pragma omp for nowait
225
229
for (unsigned i=rank; i<nactive_tasks; i+=stride) {
226
230
// Calculate the stuff in the loop for this action
227
- runTask ( partialTaskList[i], myvals );
231
+ runTask ( partialTaskList[i], myvals[t] );
228
232
229
233
// Now transfer the data to the actions that accumulate values from the calculated quantities
230
- if ( nt>1 ) gatherAccumulators ( partialTaskList[i], myvals, omp_buffer );
231
- else gatherAccumulators ( partialTaskList[i], myvals, buffer );
234
+ if ( nt>1 ) gatherAccumulators ( partialTaskList[i], myvals[t] , omp_buffer );
235
+ else gatherAccumulators ( partialTaskList[i], myvals[t] , buffer );
232
236
233
237
// Clear the value
234
- myvals.clearAll ();
238
+ myvals[t] .clearAll ();
235
239
}
236
240
#pragma omp critical
237
241
if ( nt>1 ) for (unsigned i=0 ; i<bufsize; ++i) buffer[i]+=omp_buffer[i];
@@ -349,18 +353,14 @@ bool ActionWithVector::checkForForces() {
349
353
unsigned nt=OpenMP::getNumThreads ();
350
354
if ( nt*stride*10 >nf_tasks ) nt=nf_tasks/stride/10 ;
351
355
if ( nt==0 ) nt=1 ;
356
+ if ( myvals.size ()!=nt ) myvals.resize (nt);
357
+ if ( omp_forces.size ()!=nt ) omp_forces.resize (nt);
352
358
353
- // Now determine how big the multivalue needs to be
354
- unsigned nmatrices=0 ; ActionWithMatrix* am=dynamic_cast <ActionWithMatrix*>(this );
355
- if (am) {
356
- for (unsigned i=0 ; i<getNumberOfComponents (); ++i) {
357
- if ( getConstPntrToComponent (i)->getRank ()==2 && !getConstPntrToComponent (i)->hasDerivatives () ) { nmatrices=getNumberOfComponents (); }
358
- }
359
- }
360
359
// Recover the number of derivatives we require (this should be equal to the number of forces)
361
- unsigned nderiv=0 ;
360
+ unsigned nderiv=0 , nargs = getNumberOfArguments (); int nmasks = getNumberOfMasks ();
361
+ if ( nargs>=nmasks && nmasks>0 ) nargs = nargs - nmasks;
362
362
if ( getNumberOfAtoms ()>0 ) nderiv += 3 *getNumberOfAtoms () + 9 ;
363
- for (unsigned i=0 ; i<getNumberOfArguments () ; ++i) {
363
+ for (unsigned i=0 ; i<nargs ; ++i) {
364
364
nderiv += getPntrToArgument (i)->getNumberOfStoredValues ();
365
365
}
366
366
if ( forcesForApply.size ()!=nderiv ) forcesForApply.resize ( nderiv );
@@ -369,23 +369,26 @@ bool ActionWithVector::checkForForces() {
369
369
370
370
#pragma omp parallel num_threads(nt)
371
371
{
372
- std::vector<double > omp_forces;
373
- if ( nt>1 ) omp_forces.resize ( forcesForApply.size (), 0.0 );
374
- MultiValue myvals ( getNumberOfComponents (), nderiv, nmatrices );
375
- myvals.clearAll ();
372
+ const unsigned t=OpenMP::getThreadNum ();
373
+ if ( nt>1 ) {
374
+ if ( omp_forces[t].size ()!=forcesForApply.size () ) omp_forces[t].resize ( forcesForApply.size (), 0.0 );
375
+ else omp_forces[t].assign ( forcesForApply.size (), 0.0 );
376
+ }
377
+ if ( myvals[t].getNumberOfValues ()!=getNumberOfComponents () || myvals[t].getNumberOfDerivatives ()!=nderiv ) myvals[t].resize ( getNumberOfComponents (), nderiv );
378
+ myvals[t].clearAll ();
376
379
377
380
#pragma omp for nowait
378
381
for (unsigned i=rank; i<nf_tasks; i+=stride) {
379
- runTask ( force_tasks[i], myvals );
382
+ runTask ( force_tasks[i], myvals[t] );
380
383
381
384
// Now get the forces
382
- if ( nt>1 ) gatherForces ( force_tasks[i], myvals, omp_forces );
383
- else gatherForces ( force_tasks[i], myvals, forcesForApply );
385
+ if ( nt>1 ) gatherForces ( force_tasks[i], myvals[t] , omp_forces[t] );
386
+ else gatherForces ( force_tasks[i], myvals[t] , forcesForApply );
384
387
385
- myvals.clearAll ();
388
+ myvals[t] .clearAll ();
386
389
}
387
390
#pragma omp critical
388
- if (nt>1 ) for (unsigned i=0 ; i<forcesForApply.size (); ++i) forcesForApply[i]+=omp_forces[i];
391
+ if (nt>1 ) for (unsigned i=0 ; i<forcesForApply.size (); ++i) forcesForApply[i]+=omp_forces[t][ i];
389
392
}
390
393
// MPI Gather on forces
391
394
if ( !serial ) comm.Sum ( forcesForApply );
0 commit comments