diff --git a/lib/FFT.h b/lib/FFT.h index 4cda6483..17060dc3 100644 --- a/lib/FFT.h +++ b/lib/FFT.h @@ -200,18 +200,14 @@ namespace Grid { sign,FFTW_ESTIMATE); } - double add,mul,fma; - FFTW::fftw_flops(p,&add,&mul,&fma); - flops_call = add+mul+2.0*fma; - - GridStopWatch timer; + std::vector lcoor(Nd), gcoor(Nd); // Barrel shift and collect global pencil for(int p=0;plSites();idx++) { - std::vector lcoor(Nd); + sgrid->LocalIndexToLocalCoor(idx,lcoor); sobj s; @@ -228,14 +224,11 @@ namespace Grid { // Loop over orthog coords int NN=pencil_g.lSites(); - - GridStopWatch Timer; - Timer.Start(); + GridStopWatch timer; + timer.Start(); PARALLEL_FOR_LOOP - for(int idx=0;idx lcoor(Nd); + for(int idx=0;idx::fftw_flops(p,&add,&mul,&fma); + flops_call = add+mul+2.0*fma; + usec += timer.useconds(); + flops+= flops_call*NN; int pc = processor_coor[dim]; - for(int idx=0;idxlSites();idx++) { - std::vector lcoor(Nd); + for(int idx=0;idxlSites();idx++) { sgrid->LocalIndexToLocalCoor(idx,lcoor); - std::vector gcoor = lcoor; + gcoor = lcoor; // extract the result sobj s; gcoor[dim] = lcoor[dim]+L*pc;