mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-25 13:15:55 +01:00
threaded FFT
This commit is contained in:
parent
434af6aeaa
commit
b4d2af8c89
52
lib/FFT.h
52
lib/FFT.h
@ -200,12 +200,18 @@ namespace Grid {
|
|||||||
std::vector<int> lcoor(Nd), gcoor(Nd);
|
std::vector<int> lcoor(Nd), gcoor(Nd);
|
||||||
result = source;
|
result = source;
|
||||||
for(int p=0;p<processors[dim];p++) {
|
for(int p=0;p<processors[dim];p++) {
|
||||||
for(int idx=0;idx<sgrid->lSites();idx++) {
|
PARALLEL_REGION
|
||||||
sgrid->LocalIndexToLocalCoor(idx,lcoor);
|
{
|
||||||
|
std::vector<int> cbuf(Nd);
|
||||||
sobj s;
|
sobj s;
|
||||||
peekLocalSite(s,result,lcoor);
|
|
||||||
lcoor[dim]+=p*L;
|
PARALLEL_FOR_LOOP_INTERN
|
||||||
pokeLocalSite(s,pgbuf,lcoor);
|
for(int idx=0;idx<sgrid->lSites();idx++) {
|
||||||
|
sgrid->LocalIndexToLocalCoor(idx,cbuf);
|
||||||
|
peekLocalSite(s,result,cbuf);
|
||||||
|
cbuf[dim]+=p*L;
|
||||||
|
pokeLocalSite(s,pgbuf,cbuf);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
result = Cshift(result,dim,L);
|
result = Cshift(result,dim,L);
|
||||||
}
|
}
|
||||||
@ -214,14 +220,18 @@ namespace Grid {
|
|||||||
int NN=pencil_g.lSites();
|
int NN=pencil_g.lSites();
|
||||||
GridStopWatch timer;
|
GridStopWatch timer;
|
||||||
timer.Start();
|
timer.Start();
|
||||||
//PARALLEL_FOR_LOOP
|
PARALLEL_REGION
|
||||||
for(int idx=0;idx<NN;idx++) {
|
{
|
||||||
pencil_g.LocalIndexToLocalCoor(idx,lcoor);
|
std::vector<int> cbuf(Nd);
|
||||||
|
|
||||||
if ( lcoor[dim] == 0 ) { // restricts loop to plane at lcoor[dim]==0
|
PARALLEL_FOR_LOOP_INTERN
|
||||||
FFTW_scalar *in = (FFTW_scalar *)&pgbuf._odata[idx];
|
for(int idx=0;idx<NN;idx++) {
|
||||||
FFTW_scalar *out= (FFTW_scalar *)&pgbuf._odata[idx];
|
pencil_g.LocalIndexToLocalCoor(idx, cbuf);
|
||||||
FFTW<scalar>::fftw_execute_dft(p,in,out);
|
if ( cbuf[dim] == 0 ) { // restricts loop to plane at lcoor[dim]==0
|
||||||
|
FFTW_scalar *in = (FFTW_scalar *)&pgbuf._odata[idx];
|
||||||
|
FFTW_scalar *out= (FFTW_scalar *)&pgbuf._odata[idx];
|
||||||
|
FFTW<scalar>::fftw_execute_dft(p,in,out);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
timer.Stop();
|
timer.Stop();
|
||||||
@ -235,13 +245,19 @@ namespace Grid {
|
|||||||
|
|
||||||
// writing out result
|
// writing out result
|
||||||
int pc = processor_coor[dim];
|
int pc = processor_coor[dim];
|
||||||
for(int idx=0;idx<sgrid->lSites();idx++) {
|
PARALLEL_REGION
|
||||||
sgrid->LocalIndexToLocalCoor(idx,lcoor);
|
{
|
||||||
gcoor = lcoor;
|
std::vector<int> clbuf(Nd), cgbuf(Nd);
|
||||||
sobj s;
|
sobj s;
|
||||||
gcoor[dim] = lcoor[dim]+L*pc;
|
|
||||||
peekLocalSite(s,pgbuf,gcoor);
|
PARALLEL_FOR_LOOP_INTERN
|
||||||
pokeLocalSite(s,result,lcoor);
|
for(int idx=0;idx<sgrid->lSites();idx++) {
|
||||||
|
sgrid->LocalIndexToLocalCoor(idx,clbuf);
|
||||||
|
cgbuf = clbuf;
|
||||||
|
cgbuf[dim] = clbuf[dim]+L*pc;
|
||||||
|
peekLocalSite(s,pgbuf,cgbuf);
|
||||||
|
pokeLocalSite(s,result,clbuf);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// destroying plan
|
// destroying plan
|
||||||
|
@ -38,14 +38,19 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
#ifdef GRID_OMP
|
#ifdef GRID_OMP
|
||||||
#include <omp.h>
|
#include <omp.h>
|
||||||
#ifdef GRID_NUMA
|
#ifdef GRID_NUMA
|
||||||
#define PARALLEL_FOR_LOOP _Pragma("omp parallel for schedule(static)")
|
#define PARALLEL_FOR_LOOP _Pragma("omp parallel for schedule(static)")
|
||||||
|
#define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(static)")
|
||||||
#else
|
#else
|
||||||
#define PARALLEL_FOR_LOOP _Pragma("omp parallel for schedule(runtime)")
|
#define PARALLEL_FOR_LOOP _Pragma("omp parallel for schedule(runtime)")
|
||||||
|
#define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(runtime)")
|
||||||
#endif
|
#endif
|
||||||
#define PARALLEL_NESTED_LOOP2 _Pragma("omp parallel for collapse(2)")
|
#define PARALLEL_NESTED_LOOP2 _Pragma("omp parallel for collapse(2)")
|
||||||
|
#define PARALLEL_REGION _Pragma("omp parallel")
|
||||||
#else
|
#else
|
||||||
#define PARALLEL_FOR_LOOP
|
#define PARALLEL_FOR_LOOP
|
||||||
|
#define PARALLEL_FOR_LOOP_INTERN
|
||||||
#define PARALLEL_NESTED_LOOP2
|
#define PARALLEL_NESTED_LOOP2
|
||||||
|
#define PARALLEL_REGION
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user