FFT double and single precision gives good performance now in multithreaded code.

2026-05-20 09:04:30 +01:00 · 2016-08-24 15:05:00 +01:00
parent 88be3b39bb
commit ff6da364e8
6 changed files with 298 additions and 492 deletions
@@ -35,6 +35,9 @@ int main (int argc, char ** argv)
 {
  Grid_init(&argc,&argv);

+  int threads = GridThread::GetThreads();
+  std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
+
  std::vector<int> latt_size   = GridDefaultLatt();
  std::vector<int> simd_layout( { vComplexD::Nsimd(),1,1,1});
  std::vector<int> mpi_layout  = GridDefaultMpi();
@@ -75,10 +78,10 @@ int main (int argc, char ** argv)

  FFT theFFT(&Fine);

-  theFFT.FFT_dim(Ctilde,C,0,FFT::forward);  C=Ctilde;
-  theFFT.FFT_dim(Ctilde,C,1,FFT::forward);  C=Ctilde;
-  theFFT.FFT_dim(Ctilde,C,2,FFT::forward);  C=Ctilde;
-  theFFT.FFT_dim(Ctilde,C,3,FFT::forward);
+  theFFT.FFT_dim(Ctilde,C,0,FFT::forward);  C=Ctilde; std::cout << theFFT.MFlops()<<std::endl;
+  theFFT.FFT_dim(Ctilde,C,1,FFT::forward);  C=Ctilde; std::cout << theFFT.MFlops()<<std::endl;
+  theFFT.FFT_dim(Ctilde,C,2,FFT::forward);  C=Ctilde; std::cout << theFFT.MFlops()<<std::endl;
+  theFFT.FFT_dim(Ctilde,C,3,FFT::forward);  std::cout << theFFT.MFlops()<<std::endl;

  //  C=zero;
  //  Ctilde = where(abs(Ctilde)<1.0e-10,C,Ctilde);
@@ -90,10 +93,10 @@ int main (int argc, char ** argv)
  C=C-Ctilde;
  std::cout << "diff scalar "<<norm2(C) << std::endl;

-  theFFT.FFT_dim(Stilde,S,0,FFT::forward);  S=Stilde;
-  theFFT.FFT_dim(Stilde,S,1,FFT::forward);  S=Stilde;
-  theFFT.FFT_dim(Stilde,S,2,FFT::forward);  S=Stilde;
-  theFFT.FFT_dim(Stilde,S,3,FFT::forward);
+  theFFT.FFT_dim(Stilde,S,0,FFT::forward);  S=Stilde; std::cout << theFFT.MFlops()<<std::endl;
+  theFFT.FFT_dim(Stilde,S,1,FFT::forward);  S=Stilde;std::cout << theFFT.MFlops()<<std::endl;
+  theFFT.FFT_dim(Stilde,S,2,FFT::forward);  S=Stilde;std::cout << theFFT.MFlops()<<std::endl;
+  theFFT.FFT_dim(Stilde,S,3,FFT::forward);std::cout << theFFT.MFlops()<<std::endl;

  SpinMatrixD Sp; 
  Sp = zero; Sp = Sp+cVol;