Hadrons: meson fields indentation fix

2025-11-19 22:19:32 +00:00 · 2018-08-06 12:42:25 +01:00
parent 3f0f92cda6
commit 4eac4e575e
1 changed files with 233 additions and 229 deletions
--- a/extras/Hadrons/Modules/MContraction/A2AMesonField.hpp
+++ b/extras/Hadrons/Modules/MContraction/A2AMesonField.hpp
@@ -21,9 +21,9 @@ class A2AMesonFieldPar : Serializable
 {
  public:
    GRID_SERIALIZABLE_CLASS_MEMBERS(A2AMesonFieldPar,
-				    int, cacheBlock,
+                                    int, cacheBlock,
-				    int, schurBlock,
+                                    int, schurBlock,
-				    int, Nmom,
+                                    int, Nmom,
                                    std::string, A2A,
                                    std::string, output);
 };
@@ -52,15 +52,15 @@ class TA2AMesonField : public Module<A2AMesonFieldPar>
    // Arithmetic help. Move to Grid??
    virtual void MesonField(Eigen::Tensor<ComplexD,5> &mat, 
-			    const LatticeFermion *lhs,
+                            const LatticeFermion *lhs,
-			    const LatticeFermion *rhs,
+                            const LatticeFermion *rhs,
-			    std::vector<Gamma::Algebra> gammas,
+                            std::vector<Gamma::Algebra> gammas,
-			    const std::vector<LatticeComplex > &mom,
+                            const std::vector<LatticeComplex > &mom,
-			    int orthogdim,
+                            int orthogdim,
-			    double &t0,
+                            double &t0,
-			    double &t1,
+                            double &t1,
-			    double &t2,
+                            double &t2,
-			    double &t3);      
+                            double &t3);      
 };
 MODULE_REGISTER(A2AMesonField, ARG(TA2AMesonField<FIMPL>), MContraction);
@@ -160,7 +160,8 @@ void TA2AMesonField<FImpl>::MesonField(Eigen::Tensor<ComplexD,5> &mat,
  int MFlvol = ld*Lblock*Rblock*Nmom;
  Vector<SpinMatrix_v > lvSum(MFrvol);
-  parallel_for (int r = 0; r < MFrvol; r++){
+  parallel_for (int r = 0; r < MFrvol; r++)
  {
    lvSum[r] = zero;
  }
@@ -176,110 +177,113 @@ void TA2AMesonField<FImpl>::MesonField(Eigen::Tensor<ComplexD,5> &mat,
  t0-=usecond();
  // Nested parallelism would be ok
  // Wasting cores here. Test case r
-  parallel_for(int r=0;r<rd;r++){
+  parallel_for(int r=0;r<rd;r++)
-
+  {
    int so=r*grid->_ostride[orthogdim]; // base offset for start of plane 
-    for(int n=0;n<e1;n++){
+    for(int n=0;n<e1;n++)
-      for(int b=0;b<e2;b++){
+    for(int b=0;b<e2;b++)
    {
      int ss= so+n*stride+b;
-	int ss= so+n*stride+b;
+      for(int i=0;i<Lblock;i++)
      {
 	      auto left = conjugate(lhs_wi[i]._odata[ss]);
-	for(int i=0;i<Lblock;i++){
+	      for(int j=0;j<Rblock;j++)
        {
 	        SpinMatrix_v vv;
 	        auto right = rhs_vj[j]._odata[ss];
-	  auto left = conjugate(lhs_wi[i]._odata[ss]);
+          for(int s1=0;s1<Ns;s1++)
-
+          for(int s2=0;s2<Ns;s2++)
-	  for(int j=0;j<Rblock;j++){
+          {
-
+            vv()(s1,s2)() = left()(s2)(0) * right()(s1)(0)
-	    SpinMatrix_v vv;
+                            + left()(s2)(1) * right()(s1)(1)
-	    auto right = rhs_vj[j]._odata[ss];
+                            + left()(s2)(2) * right()(s1)(2);
-	    for(int s1=0;s1<Ns;s1++){
+          }
 	    for(int s2=0;s2<Ns;s2++){
 	      vv()(s1,s2)() = left()(s2)(0) * right()(s1)(0)
 		+             left()(s2)(1) * right()(s1)(1)
 		+             left()(s2)(2) * right()(s1)(2);
 	    }}
-	    // After getting the sitewise product do the mom phase loop
+          // After getting the sitewise product do the mom phase loop
-	    int base = Nmom*i+Nmom*Lblock*j+Nmom*Lblock*Rblock*r;
+          int base = Nmom*i+Nmom*Lblock*j+Nmom*Lblock*Rblock*r;
-	    for ( int m=0;m<Nmom;m++){
+
-	      int idx = m+base;
+          for ( int m=0;m<Nmom;m++)
-	      auto phase = mom[m]._odata[ss];
+          {
-	      mac(&lvSum[idx],&vv,&phase);
+            int idx = m+base;
            auto phase = mom[m]._odata[ss];
            mac(&lvSum[idx],&vv,&phase);
          }
 	      }
 	    }
 	  }
 	}
      }
    }
  }
  t0+=usecond();
  // Sum across simd lanes in the plane, breaking out orthog dir.
  t1-=usecond();
-  parallel_for(int rt=0;rt<rd;rt++){
+  parallel_for(int rt=0;rt<rd;rt++)
-
+  {
    std::vector<int> icoor(Nd);
    std::vector<SpinMatrix_s> extracted(Nsimd);               
-    for(int i=0;i<Lblock;i++){
+    for(int i=0;i<Lblock;i++)
-    for(int j=0;j<Rblock;j++){
+    for(int j=0;j<Rblock;j++)
-    for(int m=0;m<Nmom;m++){
+    for(int m=0;m<Nmom;m++)
    {
      int ij_rdx = m+Nmom*i+Nmom*Lblock*j+Nmom*Lblock*Rblock*rt;
      extract(lvSum[ij_rdx],extracted);
      for(int idx=0;idx<Nsimd;idx++)
      {
        grid->iCoorFromIindex(icoor,idx);
-      for(int idx=0;idx<Nsimd;idx++){
+        int ldx    = rt+icoor[orthogdim]*rd;
-
+        int ij_ldx = m+Nmom*i+Nmom*Lblock*j+Nmom*Lblock*Rblock*ldx;
 	grid->iCoorFromIindex(icoor,idx);
 	int ldx    = rt+icoor[orthogdim]*rd;
 	int ij_ldx = m+Nmom*i+Nmom*Lblock*j+Nmom*Lblock*Rblock*ldx;
 	lsSum[ij_ldx]=lsSum[ij_ldx]+extracted[idx];
        lsSum[ij_ldx]=lsSum[ij_ldx]+extracted[idx];
      }
-    }}}
+    }
  }
  t1+=usecond();
  assert(mat.dimension(0) == Nmom);
  assert(mat.dimension(1) == Ngamma);
  assert(mat.dimension(2) == Nt);
  t2-=usecond();
  // ld loop and local only??
  int pd = grid->_processors[orthogdim];
  int pc = grid->_processor_coor[orthogdim];
  parallel_for_nest2(int lt=0;lt<ld;lt++)
  {
-    for(int pt=0;pt<pd;pt++){
+    for(int pt=0;pt<pd;pt++)
    {
      int t = lt + pt*ld;
-      if (pt == pc){
+      if (pt == pc)
-	for(int i=0;i<Lblock;i++){
+      {
-	  for(int j=0;j<Rblock;j++){
+	      for(int i=0;i<Lblock;i++)
-	    for(int m=0;m<Nmom;m++){
+	      for(int j=0;j<Rblock;j++)
-	      int ij_dx = m+Nmom*i + Nmom*Lblock * j + Nmom*Lblock * Rblock * lt;
+	      for(int m=0;m<Nmom;m++)
-	      for(int mu=0;mu<Ngamma;mu++){
+        {
-		// this is a bit slow
+	        int ij_dx = m+Nmom*i + Nmom*Lblock * j + Nmom*Lblock * Rblock * lt;
-		mat(m,mu,t,i,j) = trace(lsSum[ij_dx]*Gamma(gammas[mu]));
+
 	        for(int mu=0;mu<Ngamma;mu++)
          {
 		        // this is a bit slow
 	      	  mat(m,mu,t,i,j) = trace(lsSum[ij_dx]*Gamma(gammas[mu]));
 	        }
 	      }
-	    }
+      } 
-	  }
+      else 
-	}
+      { 
-      } else { 
+	      const scalar_type zz(0.0);
-	const scalar_type zz(0.0);
+
-	for(int i=0;i<Lblock;i++){
+        for(int i=0;i<Lblock;i++)
-	  for(int j=0;j<Rblock;j++){
+        for(int j=0;j<Rblock;j++)
-	    for(int mu=0;mu<Ngamma;mu++){
+        for(int mu=0;mu<Ngamma;mu++)
-	      for(int m=0;m<Nmom;m++){
+        for(int m=0;m<Nmom;m++)
-		mat(m,mu,t,i,j) =zz;
+        {
 		      mat(m,mu,t,i,j) =zz;
 	      }
 	    }
 	  }
 	}
      }
    }
  }
@@ -298,173 +302,173 @@ void TA2AMesonField<FImpl>::MesonField(Eigen::Tensor<ComplexD,5> &mat,
 template <typename FImpl>
 void TA2AMesonField<FImpl>::execute(void)
 {
-    LOG(Message) << "Computing A2A meson field" << std::endl;
+  LOG(Message) << "Computing A2A meson field" << std::endl;
-    auto &a2a = envGet(A2ABase, par().A2A);
+  auto &a2a = envGet(A2ABase, par().A2A);
-    
+  
-    // 2+6+4+4 = 16 gammas
+  // 2+6+4+4 = 16 gammas
-    // Ordering defined here
+  // Ordering defined here
-    std::vector<Gamma::Algebra> gammas ( {
+  std::vector<Gamma::Algebra> gammas ( {
-          Gamma::Algebra::Gamma5,
+        Gamma::Algebra::Gamma5,
-	  Gamma::Algebra::Identity,    
+  Gamma::Algebra::Identity,    
-	  Gamma::Algebra::GammaX,
+  Gamma::Algebra::GammaX,
-	  Gamma::Algebra::GammaY,
+  Gamma::Algebra::GammaY,
-	  Gamma::Algebra::GammaZ,
+  Gamma::Algebra::GammaZ,
-	  Gamma::Algebra::GammaT,
+  Gamma::Algebra::GammaT,
-	  Gamma::Algebra::GammaXGamma5,
+  Gamma::Algebra::GammaXGamma5,
-	  Gamma::Algebra::GammaYGamma5,
+  Gamma::Algebra::GammaYGamma5,
-	  Gamma::Algebra::GammaZGamma5,
+  Gamma::Algebra::GammaZGamma5,
-	  Gamma::Algebra::GammaTGamma5,
+  Gamma::Algebra::GammaTGamma5,
-	  Gamma::Algebra::SigmaXY,
+  Gamma::Algebra::SigmaXY,
-	  Gamma::Algebra::SigmaXZ,
+  Gamma::Algebra::SigmaXZ,
-	  Gamma::Algebra::SigmaXT,
+  Gamma::Algebra::SigmaXT,
-	  Gamma::Algebra::SigmaYZ,
+  Gamma::Algebra::SigmaYZ,
-	  Gamma::Algebra::SigmaYT,
+  Gamma::Algebra::SigmaYT,
-	  Gamma::Algebra::SigmaZT
+  Gamma::Algebra::SigmaZT
-    });
+  });
-    ///////////////////////////////////////////////
+  ///////////////////////////////////////////////
-    // Square assumption for now Nl = Nr = N
+  // Square assumption for now Nl = Nr = N
-    ///////////////////////////////////////////////
+  ///////////////////////////////////////////////
-    int nt = env().getDim(Tp);
+  int nt = env().getDim(Tp);
-    int nx = env().getDim(Xp);
+  int nx = env().getDim(Xp);
-    int ny = env().getDim(Yp);
+  int ny = env().getDim(Yp);
-    int nz = env().getDim(Zp);
+  int nz = env().getDim(Zp);
-    int Nl = a2a.get_Nl();
+  int Nl = a2a.get_Nl();
-    int N  = Nl + a2a.get_Nh();
+  int N  = Nl + a2a.get_Nh();
-    
+  
-    int ngamma = gammas.size();
+  int ngamma = gammas.size();
-    int schurBlock = par().schurBlock;
+  int schurBlock = par().schurBlock;
-    int cacheBlock = par().cacheBlock;
+  int cacheBlock = par().cacheBlock;
-    int nmom       = par().Nmom;
+  int nmom       = par().Nmom;
-    ///////////////////////////////////////////////
+  ///////////////////////////////////////////////
-    // Momentum setup
+  // Momentum setup
-    ///////////////////////////////////////////////
+  ///////////////////////////////////////////////
-    GridBase *grid = env().getGrid(1);
+  GridBase *grid = env().getGrid(1);
-    std::vector<LatticeComplex> phases(nmom,grid);
+  std::vector<LatticeComplex> phases(nmom,grid);
-    for(int m=0;m<nmom;m++){
+  for(int m=0;m<nmom;m++)
-      phases[m] = Complex(1.0);    // All zero momentum for now
+  {
-    }
+    phases[m] = Complex(1.0);    // All zero momentum for now
  }
-    Eigen::Tensor<ComplexD,5> mesonField       (nmom,ngamma,nt,N,N);    
+  Eigen::Tensor<ComplexD,5> mesonField       (nmom,ngamma,nt,N,N);    
-    LOG(Message) << "N = Nh+Nl for A2A MesonField is " << N << std::endl;
+  LOG(Message) << "N = Nh+Nl for A2A MesonField is " << N << std::endl;
-    envGetTmp(std::vector<FermionField>, w);
+  envGetTmp(std::vector<FermionField>, w);
-    envGetTmp(std::vector<FermionField>, v);
+  envGetTmp(std::vector<FermionField>, v);
-    envGetTmp(FermionField, tmp_5d);
+  envGetTmp(FermionField, tmp_5d);
-    LOG(Message) << "Finding v and w vectors for N =  " << N << std::endl;
+  LOG(Message) << "Finding v and w vectors for N =  " << N << std::endl;
-    //////////////////////////////////////////////////////////////////////////
+  //////////////////////////////////////////////////////////////////////////
-    // i,j   is first  loop over SchurBlock factors reusing 5D matrices
+  // i,j   is first  loop over SchurBlock factors reusing 5D matrices
-    // ii,jj is second loop over cacheBlock factors for high perf contractoin
+  // ii,jj is second loop over cacheBlock factors for high perf contractoin
-    // iii,jjj are loops within cacheBlock
+  // iii,jjj are loops within cacheBlock
-    // Total index is sum of these  i+ii+iii etc...
+  // Total index is sum of these  i+ii+iii etc...
-    //////////////////////////////////////////////////////////////////////////
+  //////////////////////////////////////////////////////////////////////////
-    
+  
-    double flops = 0.0;
+  double flops = 0.0;
-    double bytes = 0.0;
+  double bytes = 0.0;
-    double vol   = nx*ny*nz*nt;
+  double vol   = nx*ny*nz*nt;
-    double t_schur=0;
+  double t_schur=0;
-    double t_contr=0;
+  double t_contr=0;
-    double t_int_0=0;
+  double t_int_0=0;
-    double t_int_1=0;
+  double t_int_1=0;
-    double t_int_2=0;
+  double t_int_2=0;
-    double t_int_3=0;
+  double t_int_3=0;
-    double t0 = usecond();
+  double t0 = usecond();
-    int N_i = N;
+  int N_i = N;
-    int N_j = N;
+  int N_j = N;
-    for(int i=0;i<N_i;i+=schurBlock){ //loop over SchurBlocking to suppress 5D matrix overhead
+  
-    for(int j=0;j<N_j;j+=schurBlock){
+  for(int i=0;i<N_i;i+=schurBlock) //loop over SchurBlocking to suppress 5D matrix overhead
-      
+  for(int j=0;j<N_j;j+=schurBlock)
  {
    ///////////////////////////////////////////////////////////////
    // Get the W and V vectors for this schurBlock^2 set of terms
    ///////////////////////////////////////////////////////////////
    int N_ii = MIN(N_i-i,schurBlock);
    int N_jj = MIN(N_j-j,schurBlock);
    t_schur-=usecond();
    for(int ii =0;ii < N_ii;ii++) a2a.return_w(i+ii, tmp_5d, w[ii]);
    for(int jj =0;jj < N_jj;jj++) a2a.return_v(j+jj, tmp_5d, v[jj]);
    t_schur+=usecond();
    LOG(Message) << "Found w vectors " << i <<" .. " << i+N_ii-1 << std::endl;
    LOG(Message) << "Found v vectors " << j <<" .. " << j+N_jj-1 << std::endl;
    ///////////////////////////////////////////////////////////////
    // Series of cache blocked chunks of the contractions within this SchurBlock
    /////////////////////////////////////////////////////////////// 
    for(int ii=0;ii<N_ii;ii+=cacheBlock)
    for(int jj=0;jj<N_jj;jj+=cacheBlock)
    {
      int N_iii = MIN(N_ii-ii,cacheBlock);
      int N_jjj = MIN(N_jj-jj,cacheBlock);
      Eigen::Tensor<ComplexD,5> mesonFieldBlocked(nmom,ngamma,nt,N_iii,N_jjj);    
      t_contr-=usecond();
      MesonField(mesonFieldBlocked, &w[ii], &v[jj], gammas, phases,Tp,
          t_int_0,t_int_1,t_int_2,t_int_3);
      t_contr+=usecond();
      flops += vol * ( 2 * 8.0 + 6.0 + 8.0*nmom) * N_iii*N_jjj*ngamma;
      bytes  += vol * (12.0 * sizeof(Complex) ) * N_iii*N_jjj
                  +  vol * ( 2.0 * sizeof(Complex) *nmom ) * N_iii*N_jjj* ngamma;
      ///////////////////////////////////////////////////////////////
-      // Get the W and V vectors for this schurBlock^2 set of terms
+      // Copy back to full meson field tensor
      ///////////////////////////////////////////////////////////////
      int N_ii = MIN(N_i-i,schurBlock);
      int N_jj = MIN(N_j-j,schurBlock);
      t_schur-=usecond();
      for(int ii =0;ii < N_ii;ii++) a2a.return_w(i+ii, tmp_5d, w[ii]);
      for(int jj =0;jj < N_jj;jj++) a2a.return_v(j+jj, tmp_5d, v[jj]);
      t_schur+=usecond();
      LOG(Message) << "Found w vectors " << i <<" .. " << i+N_ii-1 << std::endl;
      LOG(Message) << "Found v vectors " << j <<" .. " << j+N_jj-1 << std::endl;
      ///////////////////////////////////////////////////////////////
      // Series of cache blocked chunks of the contractions within this SchurBlock
      /////////////////////////////////////////////////////////////// 
-      for(int ii=0;ii<N_ii;ii+=cacheBlock){
+      parallel_for_nest2(int iii=0;iii< N_iii;iii++)
-      for(int jj=0;jj<N_jj;jj+=cacheBlock){
+      for(int jjj=0;jjj< N_jjj;jjj++)
      for(int m =0;m< nmom;m++)
      for(int g =0;g< ngamma;g++)
      for(int t =0;t< nt;t++)
      {
        mesonField(m,g,t,i+ii+iii,j+jj+jjj) = mesonFieldBlocked(m,g,t,iii,jjj);
      }
    }
  }
-	int N_iii = MIN(N_ii-ii,cacheBlock);
+  double nodes=grid->NodeCount();
-	int N_jjj = MIN(N_jj-jj,cacheBlock);
+  double t1 = usecond();
-	Eigen::Tensor<ComplexD,5> mesonFieldBlocked(nmom,ngamma,nt,N_iii,N_jjj);    
+  LOG(Message) << " Contraction of MesonFields took "<<(t1-t0)/1.0e6<< " seconds "  << std::endl;
  LOG(Message) << " Schur "<<(t_schur)/1.0e6<< " seconds "  << std::endl;
  LOG(Message) << " Contr "<<(t_contr)/1.0e6<< " seconds "  << std::endl;
  LOG(Message) << " Intern0 "<<(t_int_0)/1.0e6<< " seconds "  << std::endl;
  LOG(Message) << " Intern1 "<<(t_int_1)/1.0e6<< " seconds "  << std::endl;
  LOG(Message) << " Intern2 "<<(t_int_2)/1.0e6<< " seconds "  << std::endl;
  LOG(Message) << " Intern3 "<<(t_int_3)/1.0e6<< " seconds "  << std::endl;
-	t_contr-=usecond();
+  double t_kernel = t_int_0 + t_int_1;
-	MesonField(mesonFieldBlocked, &w[ii], &v[jj], gammas, phases,Tp,
+  LOG(Message) << " Arith "<<flops/(t_kernel)/1.0e3/nodes<< " Gflop/s / node "  << std::endl;
-		   t_int_0,t_int_1,t_int_2,t_int_3);
+  LOG(Message) << " Arith "<<bytes/(t_kernel)/1.0e3/nodes<< " GB/s /node "  << std::endl;
 	t_contr+=usecond();
 	flops += vol * ( 2 * 8.0 + 6.0 + 8.0*nmom) * N_iii*N_jjj*ngamma;
-	bytes  += vol * (12.0 * sizeof(Complex) ) * N_iii*N_jjj
+  /////////////////////////////////////////////////////////////////////////
-               +  vol * ( 2.0 * sizeof(Complex) *nmom ) * N_iii*N_jjj* ngamma;
+  // Test: Build the pion correlator (two end)
  // < PI_ij(t0) PI_ji (t0+t) >
  /////////////////////////////////////////////////////////////////////////
  std::vector<ComplexD> corr(nt,ComplexD(0.0));
-	///////////////////////////////////////////////////////////////
+  for(int i=0;i<N;i++)
-	// Copy back to full meson field tensor
+  for(int j=0;j<N;j++)
-	/////////////////////////////////////////////////////////////// 
+  {
-	parallel_for_nest2(int iii=0;iii< N_iii;iii++) {
+    int m=0; // first momentum
-        for(int jjj=0;jjj< N_jjj;jjj++) {
+    int g=0; // first gamma in above ordering is gamma5 for pion
 	  for(int m =0;m< nmom;m++) {
 	  for(int g =0;g< ngamma;g++) {
          for(int t =0;t< nt;t++) {
 	    mesonField(m,g,t,i+ii+iii,j+jj+jjj) = mesonFieldBlocked(m,g,t,iii,jjj);
 	  }}}
-	}}
+    for(int t0=0;t0<nt;t0++)
-      }}
+    for(int t=0;t<nt;t++)
-    }}
+    {
 	    int tt = (t0+t)%nt;
-
+	    corr[t] += mesonField(m,g,t0,i,j)* mesonField(m,g,tt,j,i);
-    double nodes=grid->NodeCount();
+    }
-    double t1 = usecond();
+  }   
-    LOG(Message) << " Contraction of MesonFields took "<<(t1-t0)/1.0e6<< " seconds "  << std::endl;
+  for(int t=0;t<nt;t++) corr[t] = corr[t]/ (double)nt;
-    LOG(Message) << " Schur "<<(t_schur)/1.0e6<< " seconds "  << std::endl;
+  for(int t=0;t<nt;t++) LOG(Message) << " " << t << " " << corr[t]<<std::endl;
    LOG(Message) << " Contr "<<(t_contr)/1.0e6<< " seconds "  << std::endl;
    LOG(Message) << " Intern0 "<<(t_int_0)/1.0e6<< " seconds "  << std::endl;
    LOG(Message) << " Intern1 "<<(t_int_1)/1.0e6<< " seconds "  << std::endl;
    LOG(Message) << " Intern2 "<<(t_int_2)/1.0e6<< " seconds "  << std::endl;
    LOG(Message) << " Intern3 "<<(t_int_3)/1.0e6<< " seconds "  << std::endl;
    double t_kernel = t_int_0 + t_int_1;
    LOG(Message) << " Arith "<<flops/(t_kernel)/1.0e3/nodes<< " Gflop/s / node "  << std::endl;
    LOG(Message) << " Arith "<<bytes/(t_kernel)/1.0e3/nodes<< " GB/s /node "  << std::endl;
    /////////////////////////////////////////////////////////////////////////
    // Test: Build the pion correlator (two end)
    // < PI_ij(t0) PI_ji (t0+t) >
    /////////////////////////////////////////////////////////////////////////
    std::vector<ComplexD> corr(nt,ComplexD(0.0));
    for(int i=0;i<N;i++){
    for(int j=0;j<N;j++){
      int m=0; // first momentum
      int g=0; // first gamma in above ordering is gamma5 for pion
      for(int t0=0;t0<nt;t0++){
      for(int t=0;t<nt;t++){
 	int tt = (t0+t)%nt;
 	corr[t] += mesonField(m,g,t0,i,j)* mesonField(m,g,tt,j,i);
      }}
    }}    
    for(int t=0;t<nt;t++) corr[t] = corr[t]/ (double)nt;
    for(int t=0;t<nt;t++) LOG(Message) << " " << t << " " << corr[t]<<std::endl;
    //    saveResult(par().output, "meson", result);
 }
 END_MODULE_NAMESPACE