diff --git a/lib/FFT.h b/lib/FFT.h index cccd1da5..a16e775b 100644 --- a/lib/FFT.h +++ b/lib/FFT.h @@ -226,12 +226,18 @@ namespace Grid { std::vector lcoor(Nd), gcoor(Nd); result = source; for(int p=0;plSites();idx++) { - sgrid->LocalIndexToLocalCoor(idx,lcoor); + PARALLEL_REGION + { + std::vector cbuf(Nd); sobj s; - peekLocalSite(s,result,lcoor); - lcoor[dim]+=p*L; - pokeLocalSite(s,pgbuf,lcoor); + + PARALLEL_FOR_LOOP_INTERN + for(int idx=0;idxlSites();idx++) { + sgrid->LocalIndexToLocalCoor(idx,cbuf); + peekLocalSite(s,result,cbuf); + cbuf[dim]+=p*L; + pokeLocalSite(s,pgbuf,cbuf); + } } result = Cshift(result,dim,L); } @@ -240,14 +246,18 @@ namespace Grid { int NN=pencil_g.lSites(); GridStopWatch timer; timer.Start(); - //PARALLEL_FOR_LOOP - for(int idx=0;idx cbuf(Nd); - if ( lcoor[dim] == 0 ) { // restricts loop to plane at lcoor[dim]==0 - FFTW_scalar *in = (FFTW_scalar *)&pgbuf._odata[idx]; - FFTW_scalar *out= (FFTW_scalar *)&pgbuf._odata[idx]; - FFTW::fftw_execute_dft(p,in,out); + PARALLEL_FOR_LOOP_INTERN + for(int idx=0;idx::fftw_execute_dft(p,in,out); + } } } timer.Stop(); @@ -261,14 +271,20 @@ namespace Grid { // writing out result int pc = processor_coor[dim]; - for(int idx=0;idxlSites();idx++) { - sgrid->LocalIndexToLocalCoor(idx,lcoor); - gcoor = lcoor; + PARALLEL_REGION + { + std::vector clbuf(Nd), cgbuf(Nd); sobj s; - gcoor[dim] = lcoor[dim]+L*pc; - peekLocalSite(s,pgbuf,gcoor); - s = s * div; - pokeLocalSite(s,result,lcoor); + + PARALLEL_FOR_LOOP_INTERN + for(int idx=0;idxlSites();idx++) { + sgrid->LocalIndexToLocalCoor(idx,clbuf); + cgbuf = clbuf; + cgbuf[dim] = clbuf[dim]+L*pc; + peekLocalSite(s,pgbuf,cgbuf); + s = s * div; + pokeLocalSite(s,result,clbuf); + } } // destroying plan diff --git a/lib/Threads.h b/lib/Threads.h index 08e5d545..2f270b73 100644 --- a/lib/Threads.h +++ b/lib/Threads.h @@ -38,14 +38,19 @@ Author: paboyle #ifdef GRID_OMP #include #ifdef GRID_NUMA -#define PARALLEL_FOR_LOOP _Pragma("omp parallel for schedule(static)") +#define PARALLEL_FOR_LOOP _Pragma("omp parallel for schedule(static)") +#define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(static)") #else -#define PARALLEL_FOR_LOOP _Pragma("omp parallel for schedule(runtime)") +#define PARALLEL_FOR_LOOP _Pragma("omp parallel for schedule(runtime)") +#define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(runtime)") #endif #define PARALLEL_NESTED_LOOP2 _Pragma("omp parallel for collapse(2)") +#define PARALLEL_REGION _Pragma("omp parallel") #else -#define PARALLEL_FOR_LOOP +#define PARALLEL_FOR_LOOP +#define PARALLEL_FOR_LOOP_INTERN #define PARALLEL_NESTED_LOOP2 +#define PARALLEL_REGION #endif namespace Grid {