mirror of
https://github.com/paboyle/Grid.git
synced 2024-12-23 19:35:26 +00:00
threaded FFT
This commit is contained in:
parent
434af6aeaa
commit
b4d2af8c89
52
lib/FFT.h
52
lib/FFT.h
@ -200,12 +200,18 @@ namespace Grid {
|
||||
std::vector<int> lcoor(Nd), gcoor(Nd);
|
||||
result = source;
|
||||
for(int p=0;p<processors[dim];p++) {
|
||||
for(int idx=0;idx<sgrid->lSites();idx++) {
|
||||
sgrid->LocalIndexToLocalCoor(idx,lcoor);
|
||||
PARALLEL_REGION
|
||||
{
|
||||
std::vector<int> cbuf(Nd);
|
||||
sobj s;
|
||||
peekLocalSite(s,result,lcoor);
|
||||
lcoor[dim]+=p*L;
|
||||
pokeLocalSite(s,pgbuf,lcoor);
|
||||
|
||||
PARALLEL_FOR_LOOP_INTERN
|
||||
for(int idx=0;idx<sgrid->lSites();idx++) {
|
||||
sgrid->LocalIndexToLocalCoor(idx,cbuf);
|
||||
peekLocalSite(s,result,cbuf);
|
||||
cbuf[dim]+=p*L;
|
||||
pokeLocalSite(s,pgbuf,cbuf);
|
||||
}
|
||||
}
|
||||
result = Cshift(result,dim,L);
|
||||
}
|
||||
@ -214,14 +220,18 @@ namespace Grid {
|
||||
int NN=pencil_g.lSites();
|
||||
GridStopWatch timer;
|
||||
timer.Start();
|
||||
//PARALLEL_FOR_LOOP
|
||||
for(int idx=0;idx<NN;idx++) {
|
||||
pencil_g.LocalIndexToLocalCoor(idx,lcoor);
|
||||
PARALLEL_REGION
|
||||
{
|
||||
std::vector<int> cbuf(Nd);
|
||||
|
||||
if ( lcoor[dim] == 0 ) { // restricts loop to plane at lcoor[dim]==0
|
||||
FFTW_scalar *in = (FFTW_scalar *)&pgbuf._odata[idx];
|
||||
FFTW_scalar *out= (FFTW_scalar *)&pgbuf._odata[idx];
|
||||
FFTW<scalar>::fftw_execute_dft(p,in,out);
|
||||
PARALLEL_FOR_LOOP_INTERN
|
||||
for(int idx=0;idx<NN;idx++) {
|
||||
pencil_g.LocalIndexToLocalCoor(idx, cbuf);
|
||||
if ( cbuf[dim] == 0 ) { // restricts loop to plane at lcoor[dim]==0
|
||||
FFTW_scalar *in = (FFTW_scalar *)&pgbuf._odata[idx];
|
||||
FFTW_scalar *out= (FFTW_scalar *)&pgbuf._odata[idx];
|
||||
FFTW<scalar>::fftw_execute_dft(p,in,out);
|
||||
}
|
||||
}
|
||||
}
|
||||
timer.Stop();
|
||||
@ -235,13 +245,19 @@ namespace Grid {
|
||||
|
||||
// writing out result
|
||||
int pc = processor_coor[dim];
|
||||
for(int idx=0;idx<sgrid->lSites();idx++) {
|
||||
sgrid->LocalIndexToLocalCoor(idx,lcoor);
|
||||
gcoor = lcoor;
|
||||
PARALLEL_REGION
|
||||
{
|
||||
std::vector<int> clbuf(Nd), cgbuf(Nd);
|
||||
sobj s;
|
||||
gcoor[dim] = lcoor[dim]+L*pc;
|
||||
peekLocalSite(s,pgbuf,gcoor);
|
||||
pokeLocalSite(s,result,lcoor);
|
||||
|
||||
PARALLEL_FOR_LOOP_INTERN
|
||||
for(int idx=0;idx<sgrid->lSites();idx++) {
|
||||
sgrid->LocalIndexToLocalCoor(idx,clbuf);
|
||||
cgbuf = clbuf;
|
||||
cgbuf[dim] = clbuf[dim]+L*pc;
|
||||
peekLocalSite(s,pgbuf,cgbuf);
|
||||
pokeLocalSite(s,result,clbuf);
|
||||
}
|
||||
}
|
||||
|
||||
// destroying plan
|
||||
|
@ -38,14 +38,19 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
#ifdef GRID_OMP
|
||||
#include <omp.h>
|
||||
#ifdef GRID_NUMA
|
||||
#define PARALLEL_FOR_LOOP _Pragma("omp parallel for schedule(static)")
|
||||
#define PARALLEL_FOR_LOOP _Pragma("omp parallel for schedule(static)")
|
||||
#define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(static)")
|
||||
#else
|
||||
#define PARALLEL_FOR_LOOP _Pragma("omp parallel for schedule(runtime)")
|
||||
#define PARALLEL_FOR_LOOP _Pragma("omp parallel for schedule(runtime)")
|
||||
#define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(runtime)")
|
||||
#endif
|
||||
#define PARALLEL_NESTED_LOOP2 _Pragma("omp parallel for collapse(2)")
|
||||
#define PARALLEL_REGION _Pragma("omp parallel")
|
||||
#else
|
||||
#define PARALLEL_FOR_LOOP
|
||||
#define PARALLEL_FOR_LOOP
|
||||
#define PARALLEL_FOR_LOOP_INTERN
|
||||
#define PARALLEL_NESTED_LOOP2
|
||||
#define PARALLEL_REGION
|
||||
#endif
|
||||
|
||||
namespace Grid {
|
||||
|
Loading…
Reference in New Issue
Block a user