1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-12 20:27:06 +01:00

Merge branch 'feature/fft-opt' into develop

This commit is contained in:
2016-11-03 14:34:46 +00:00
2 changed files with 43 additions and 22 deletions

View File

@ -226,12 +226,18 @@ namespace Grid {
std::vector<int> lcoor(Nd), gcoor(Nd); std::vector<int> lcoor(Nd), gcoor(Nd);
result = source; result = source;
for(int p=0;p<processors[dim];p++) { for(int p=0;p<processors[dim];p++) {
for(int idx=0;idx<sgrid->lSites();idx++) { PARALLEL_REGION
sgrid->LocalIndexToLocalCoor(idx,lcoor); {
std::vector<int> cbuf(Nd);
sobj s; sobj s;
peekLocalSite(s,result,lcoor);
lcoor[dim]+=p*L; PARALLEL_FOR_LOOP_INTERN
pokeLocalSite(s,pgbuf,lcoor); for(int idx=0;idx<sgrid->lSites();idx++) {
sgrid->LocalIndexToLocalCoor(idx,cbuf);
peekLocalSite(s,result,cbuf);
cbuf[dim]+=p*L;
pokeLocalSite(s,pgbuf,cbuf);
}
} }
result = Cshift(result,dim,L); result = Cshift(result,dim,L);
} }
@ -240,14 +246,18 @@ namespace Grid {
int NN=pencil_g.lSites(); int NN=pencil_g.lSites();
GridStopWatch timer; GridStopWatch timer;
timer.Start(); timer.Start();
//PARALLEL_FOR_LOOP PARALLEL_REGION
for(int idx=0;idx<NN;idx++) { {
pencil_g.LocalIndexToLocalCoor(idx,lcoor); std::vector<int> cbuf(Nd);
if ( lcoor[dim] == 0 ) { // restricts loop to plane at lcoor[dim]==0 PARALLEL_FOR_LOOP_INTERN
FFTW_scalar *in = (FFTW_scalar *)&pgbuf._odata[idx]; for(int idx=0;idx<NN;idx++) {
FFTW_scalar *out= (FFTW_scalar *)&pgbuf._odata[idx]; pencil_g.LocalIndexToLocalCoor(idx, cbuf);
FFTW<scalar>::fftw_execute_dft(p,in,out); if ( cbuf[dim] == 0 ) { // restricts loop to plane at lcoor[dim]==0
FFTW_scalar *in = (FFTW_scalar *)&pgbuf._odata[idx];
FFTW_scalar *out= (FFTW_scalar *)&pgbuf._odata[idx];
FFTW<scalar>::fftw_execute_dft(p,in,out);
}
} }
} }
timer.Stop(); timer.Stop();
@ -261,14 +271,20 @@ namespace Grid {
// writing out result // writing out result
int pc = processor_coor[dim]; int pc = processor_coor[dim];
for(int idx=0;idx<sgrid->lSites();idx++) { PARALLEL_REGION
sgrid->LocalIndexToLocalCoor(idx,lcoor); {
gcoor = lcoor; std::vector<int> clbuf(Nd), cgbuf(Nd);
sobj s; sobj s;
gcoor[dim] = lcoor[dim]+L*pc;
peekLocalSite(s,pgbuf,gcoor); PARALLEL_FOR_LOOP_INTERN
s = s * div; for(int idx=0;idx<sgrid->lSites();idx++) {
pokeLocalSite(s,result,lcoor); sgrid->LocalIndexToLocalCoor(idx,clbuf);
cgbuf = clbuf;
cgbuf[dim] = clbuf[dim]+L*pc;
peekLocalSite(s,pgbuf,cgbuf);
s = s * div;
pokeLocalSite(s,result,clbuf);
}
} }
// destroying plan // destroying plan

View File

@ -38,14 +38,19 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#ifdef GRID_OMP #ifdef GRID_OMP
#include <omp.h> #include <omp.h>
#ifdef GRID_NUMA #ifdef GRID_NUMA
#define PARALLEL_FOR_LOOP _Pragma("omp parallel for schedule(static)") #define PARALLEL_FOR_LOOP _Pragma("omp parallel for schedule(static)")
#define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(static)")
#else #else
#define PARALLEL_FOR_LOOP _Pragma("omp parallel for schedule(runtime)") #define PARALLEL_FOR_LOOP _Pragma("omp parallel for schedule(runtime)")
#define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(runtime)")
#endif #endif
#define PARALLEL_NESTED_LOOP2 _Pragma("omp parallel for collapse(2)") #define PARALLEL_NESTED_LOOP2 _Pragma("omp parallel for collapse(2)")
#define PARALLEL_REGION _Pragma("omp parallel")
#else #else
#define PARALLEL_FOR_LOOP #define PARALLEL_FOR_LOOP
#define PARALLEL_FOR_LOOP_INTERN
#define PARALLEL_NESTED_LOOP2 #define PARALLEL_NESTED_LOOP2
#define PARALLEL_REGION
#endif #endif
namespace Grid { namespace Grid {