1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-07-29 02:37:07 +01:00

FFT double and single precision gives good performance now in multithreaded code.

This commit is contained in:
paboyle
2016-08-24 15:05:00 +01:00
parent 88be3b39bb
commit ff6da364e8
6 changed files with 298 additions and 492 deletions

View File

@@ -35,6 +35,9 @@ int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
int threads = GridThread::GetThreads();
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
std::vector<int> latt_size = GridDefaultLatt();
std::vector<int> simd_layout( { vComplexD::Nsimd(),1,1,1});
std::vector<int> mpi_layout = GridDefaultMpi();
@@ -75,10 +78,10 @@ int main (int argc, char ** argv)
FFT theFFT(&Fine);
theFFT.FFT_dim(Ctilde,C,0,FFT::forward); C=Ctilde;
theFFT.FFT_dim(Ctilde,C,1,FFT::forward); C=Ctilde;
theFFT.FFT_dim(Ctilde,C,2,FFT::forward); C=Ctilde;
theFFT.FFT_dim(Ctilde,C,3,FFT::forward);
theFFT.FFT_dim(Ctilde,C,0,FFT::forward); C=Ctilde; std::cout << theFFT.MFlops()<<std::endl;
theFFT.FFT_dim(Ctilde,C,1,FFT::forward); C=Ctilde; std::cout << theFFT.MFlops()<<std::endl;
theFFT.FFT_dim(Ctilde,C,2,FFT::forward); C=Ctilde; std::cout << theFFT.MFlops()<<std::endl;
theFFT.FFT_dim(Ctilde,C,3,FFT::forward); std::cout << theFFT.MFlops()<<std::endl;
// C=zero;
// Ctilde = where(abs(Ctilde)<1.0e-10,C,Ctilde);
@@ -90,10 +93,10 @@ int main (int argc, char ** argv)
C=C-Ctilde;
std::cout << "diff scalar "<<norm2(C) << std::endl;
theFFT.FFT_dim(Stilde,S,0,FFT::forward); S=Stilde;
theFFT.FFT_dim(Stilde,S,1,FFT::forward); S=Stilde;
theFFT.FFT_dim(Stilde,S,2,FFT::forward); S=Stilde;
theFFT.FFT_dim(Stilde,S,3,FFT::forward);
theFFT.FFT_dim(Stilde,S,0,FFT::forward); S=Stilde; std::cout << theFFT.MFlops()<<std::endl;
theFFT.FFT_dim(Stilde,S,1,FFT::forward); S=Stilde;std::cout << theFFT.MFlops()<<std::endl;
theFFT.FFT_dim(Stilde,S,2,FFT::forward); S=Stilde;std::cout << theFFT.MFlops()<<std::endl;
theFFT.FFT_dim(Stilde,S,3,FFT::forward);std::cout << theFFT.MFlops()<<std::endl;
SpinMatrixD Sp;
Sp = zero; Sp = Sp+cVol;