1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-25 13:15:55 +01:00

threaded FFT

This commit is contained in:
Antonin Portelli 2016-10-26 19:46:36 +01:00
parent 434af6aeaa
commit b4d2af8c89
2 changed files with 42 additions and 21 deletions

View File

@ -200,12 +200,18 @@ namespace Grid {
std::vector<int> lcoor(Nd), gcoor(Nd); std::vector<int> lcoor(Nd), gcoor(Nd);
result = source; result = source;
for(int p=0;p<processors[dim];p++) { for(int p=0;p<processors[dim];p++) {
for(int idx=0;idx<sgrid->lSites();idx++) { PARALLEL_REGION
sgrid->LocalIndexToLocalCoor(idx,lcoor); {
std::vector<int> cbuf(Nd);
sobj s; sobj s;
peekLocalSite(s,result,lcoor);
lcoor[dim]+=p*L; PARALLEL_FOR_LOOP_INTERN
pokeLocalSite(s,pgbuf,lcoor); for(int idx=0;idx<sgrid->lSites();idx++) {
sgrid->LocalIndexToLocalCoor(idx,cbuf);
peekLocalSite(s,result,cbuf);
cbuf[dim]+=p*L;
pokeLocalSite(s,pgbuf,cbuf);
}
} }
result = Cshift(result,dim,L); result = Cshift(result,dim,L);
} }
@ -214,14 +220,18 @@ namespace Grid {
int NN=pencil_g.lSites(); int NN=pencil_g.lSites();
GridStopWatch timer; GridStopWatch timer;
timer.Start(); timer.Start();
//PARALLEL_FOR_LOOP PARALLEL_REGION
for(int idx=0;idx<NN;idx++) { {
pencil_g.LocalIndexToLocalCoor(idx,lcoor); std::vector<int> cbuf(Nd);
if ( lcoor[dim] == 0 ) { // restricts loop to plane at lcoor[dim]==0 PARALLEL_FOR_LOOP_INTERN
FFTW_scalar *in = (FFTW_scalar *)&pgbuf._odata[idx]; for(int idx=0;idx<NN;idx++) {
FFTW_scalar *out= (FFTW_scalar *)&pgbuf._odata[idx]; pencil_g.LocalIndexToLocalCoor(idx, cbuf);
FFTW<scalar>::fftw_execute_dft(p,in,out); if ( cbuf[dim] == 0 ) { // restricts loop to plane at lcoor[dim]==0
FFTW_scalar *in = (FFTW_scalar *)&pgbuf._odata[idx];
FFTW_scalar *out= (FFTW_scalar *)&pgbuf._odata[idx];
FFTW<scalar>::fftw_execute_dft(p,in,out);
}
} }
} }
timer.Stop(); timer.Stop();
@ -235,13 +245,19 @@ namespace Grid {
// writing out result // writing out result
int pc = processor_coor[dim]; int pc = processor_coor[dim];
for(int idx=0;idx<sgrid->lSites();idx++) { PARALLEL_REGION
sgrid->LocalIndexToLocalCoor(idx,lcoor); {
gcoor = lcoor; std::vector<int> clbuf(Nd), cgbuf(Nd);
sobj s; sobj s;
gcoor[dim] = lcoor[dim]+L*pc;
peekLocalSite(s,pgbuf,gcoor); PARALLEL_FOR_LOOP_INTERN
pokeLocalSite(s,result,lcoor); for(int idx=0;idx<sgrid->lSites();idx++) {
sgrid->LocalIndexToLocalCoor(idx,clbuf);
cgbuf = clbuf;
cgbuf[dim] = clbuf[dim]+L*pc;
peekLocalSite(s,pgbuf,cgbuf);
pokeLocalSite(s,result,clbuf);
}
} }
// destroying plan // destroying plan

View File

@ -38,14 +38,19 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#ifdef GRID_OMP #ifdef GRID_OMP
#include <omp.h> #include <omp.h>
#ifdef GRID_NUMA #ifdef GRID_NUMA
#define PARALLEL_FOR_LOOP _Pragma("omp parallel for schedule(static)") #define PARALLEL_FOR_LOOP _Pragma("omp parallel for schedule(static)")
#define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(static)")
#else #else
#define PARALLEL_FOR_LOOP _Pragma("omp parallel for schedule(runtime)") #define PARALLEL_FOR_LOOP _Pragma("omp parallel for schedule(runtime)")
#define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(runtime)")
#endif #endif
#define PARALLEL_NESTED_LOOP2 _Pragma("omp parallel for collapse(2)") #define PARALLEL_NESTED_LOOP2 _Pragma("omp parallel for collapse(2)")
#define PARALLEL_REGION _Pragma("omp parallel")
#else #else
#define PARALLEL_FOR_LOOP #define PARALLEL_FOR_LOOP
#define PARALLEL_FOR_LOOP_INTERN
#define PARALLEL_NESTED_LOOP2 #define PARALLEL_NESTED_LOOP2
#define PARALLEL_REGION
#endif #endif
namespace Grid { namespace Grid {