1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-10 07:55:35 +00:00

FFT optimisation

This commit is contained in:
Antonin Portelli 2016-10-24 19:25:40 +01:00
parent a795b5705e
commit 13bf0482e3

View File

@ -200,18 +200,14 @@ namespace Grid {
sign,FFTW_ESTIMATE); sign,FFTW_ESTIMATE);
} }
double add,mul,fma; std::vector<int> lcoor(Nd), gcoor(Nd);
FFTW<scalar>::fftw_flops(p,&add,&mul,&fma);
flops_call = add+mul+2.0*fma;
GridStopWatch timer;
// Barrel shift and collect global pencil // Barrel shift and collect global pencil
for(int p=0;p<processors[dim];p++) { for(int p=0;p<processors[dim];p++) {
for(int idx=0;idx<sgrid->lSites();idx++) { for(int idx=0;idx<sgrid->lSites();idx++) {
std::vector<int> lcoor(Nd);
sgrid->LocalIndexToLocalCoor(idx,lcoor); sgrid->LocalIndexToLocalCoor(idx,lcoor);
sobj s; sobj s;
@ -228,14 +224,11 @@ namespace Grid {
// Loop over orthog coords // Loop over orthog coords
int NN=pencil_g.lSites(); int NN=pencil_g.lSites();
GridStopWatch timer;
GridStopWatch Timer; timer.Start();
Timer.Start();
PARALLEL_FOR_LOOP PARALLEL_FOR_LOOP
for(int idx=0;idx<NN;idx++) { for(int idx=0;idx<NN;idx++) {
std::vector<int> lcoor(Nd);
pencil_g.LocalIndexToLocalCoor(idx,lcoor); pencil_g.LocalIndexToLocalCoor(idx,lcoor);
if ( lcoor[dim] == 0 ) { // restricts loop to plane at lcoor[dim]==0 if ( lcoor[dim] == 0 ) { // restricts loop to plane at lcoor[dim]==0
@ -245,15 +238,17 @@ PARALLEL_FOR_LOOP
} }
} }
Timer.Stop(); timer.Stop();
usec += Timer.useconds();
flops+= flops_call*NN;
double add,mul,fma;
FFTW<scalar>::fftw_flops(p,&add,&mul,&fma);
flops_call = add+mul+2.0*fma;
usec += timer.useconds();
flops+= flops_call*NN;
int pc = processor_coor[dim]; int pc = processor_coor[dim];
for(int idx=0;idx<sgrid->lSites();idx++) { for(int idx=0;idx<sgrid->lSites();idx++) {
std::vector<int> lcoor(Nd);
sgrid->LocalIndexToLocalCoor(idx,lcoor); sgrid->LocalIndexToLocalCoor(idx,lcoor);
std::vector<int> gcoor = lcoor; gcoor = lcoor;
// extract the result // extract the result
sobj s; sobj s;
gcoor[dim] = lcoor[dim]+L*pc; gcoor[dim] = lcoor[dim]+L*pc;