diff --git a/Grid/allocator/MemoryManagerCache.cc b/Grid/allocator/MemoryManagerCache.cc index c610fb9c..e5cc0c42 100644 --- a/Grid/allocator/MemoryManagerCache.cc +++ b/Grid/allocator/MemoryManagerCache.cc @@ -1,7 +1,6 @@ #include #ifndef GRID_UVM -#warning "Using explicit device memory copies" NAMESPACE_BEGIN(Grid); #define MAXLINE 512 diff --git a/Grid/lattice/Lattice_reduction.h b/Grid/lattice/Lattice_reduction.h index 837e3bea..53a592d1 100644 --- a/Grid/lattice/Lattice_reduction.h +++ b/Grid/lattice/Lattice_reduction.h @@ -376,9 +376,9 @@ axpby_norm_fast(Lattice &z,sobj a,sobj b,const Lattice &x,const Latt coalescedWrite(z_v[ss],tmp); }); bool ok; +#ifdef GRID_SYCL uint64_t csum=0; uint64_t csum2=0; -#ifdef GRID_SYCL if ( FlightRecorder::LoggingMode != FlightRecorder::LoggingModeNone) { // z_v @@ -522,14 +522,11 @@ template inline void sliceSum(const Lattice &Data, int ostride=grid->_ostride[orthogdim]; //Reduce Data down to lvSum - RealD t_sum =-usecond(); sliceSumReduction(Data,lvSum,rd, e1,e2,stride,ostride,Nsimd); - t_sum +=usecond(); // Sum across simd lanes in the plane, breaking out orthog dir. Coordinate icoor(Nd); - RealD t_rest =-usecond(); for(int rt=0;rt inline void sliceSum(const Lattice &Data, scalar_type * ptr = (scalar_type *) &result[0]; int words = fd*sizeof(sobj)/sizeof(scalar_type); grid->GlobalSumVector(ptr, words); - t_rest +=usecond(); - std::cout << GridLogMessage << " sliceSum local"< inline diff --git a/Grid/qcd/action/ActionBase.h b/Grid/qcd/action/ActionBase.h index 8acae81b..c3a46729 100644 --- a/Grid/qcd/action/ActionBase.h +++ b/Grid/qcd/action/ActionBase.h @@ -98,7 +98,7 @@ public: virtual RealD S(const GaugeField& U) = 0; // evaluate the action virtual RealD Sinitial(const GaugeField& U) { return this->S(U); } ; // if the refresh computes the action, can cache it. Alternately refreshAndAction() ? virtual void deriv(const GaugeField& U, GaugeField& dSdU) = 0; // evaluate the action derivative - + ///////////////////////////////////////////////////////////// // virtual smeared interface through configuration container ///////////////////////////////////////////////////////////// @@ -132,6 +132,10 @@ public: template class EmptyAction : public Action { + using Action::refresh; + using Action::Sinitial; + using Action::deriv; + virtual void refresh(const GaugeField& U, GridSerialRNG &sRNG, GridParallelRNG& pRNG) { assert(0);}; // refresh pseudofermions virtual RealD S(const GaugeField& U) { return 0.0;}; // evaluate the action virtual void deriv(const GaugeField& U, GaugeField& dSdU) { assert(0); }; // evaluate the action derivative diff --git a/Grid/qcd/action/gauge/WilsonGaugeAction.h b/Grid/qcd/action/gauge/WilsonGaugeAction.h index f535b54f..22c792cc 100644 --- a/Grid/qcd/action/gauge/WilsonGaugeAction.h +++ b/Grid/qcd/action/gauge/WilsonGaugeAction.h @@ -43,6 +43,11 @@ class WilsonGaugeAction : public Action { public: INHERIT_GIMPL_TYPES(Gimpl); + using Action::S; + using Action::Sinitial; + using Action::deriv; + using Action::refresh; + /////////////////////////// constructors explicit WilsonGaugeAction(RealD beta_):beta(beta_){}; diff --git a/Grid/qcd/utils/A2Autils.h b/Grid/qcd/utils/A2Autils.h index 1aeacbf2..7089fd1b 100644 --- a/Grid/qcd/utils/A2Autils.h +++ b/Grid/qcd/utils/A2Autils.h @@ -64,40 +64,6 @@ public: const std::vector &mom, int orthogdim, double *t_kernel = nullptr, double *t_gsum = nullptr); - template - static void MesonFieldGPU(TensorType &mat, - const FermionField *lhs_wi, - const FermionField *rhs_vj, - std::vector gammas, - const std::vector &mom, - int orthogdim, double *t_kernel = nullptr, double *t_gsum = nullptr); - /* - static void PionFieldWVmom(Eigen::Tensor &mat, - const FermionField *wi, - const FermionField *vj, - const std::vector &mom, - int orthogdim); - - static void PionFieldXX(Eigen::Tensor &mat, - const FermionField *wi, - const FermionField *vj, - int orthogdim, - int g5); - - static void PionFieldWV(Eigen::Tensor &mat, - const FermionField *wi, - const FermionField *vj, - int orthogdim); - static void PionFieldWW(Eigen::Tensor &mat, - const FermionField *wi, - const FermionField *wj, - int orthogdim); - static void PionFieldVV(Eigen::Tensor &mat, - const FermionField *vi, - const FermionField *vj, - int orthogdim); - */ - template // output: rank 5 tensor, e.g. Eigen::Tensor static void AslashField(TensorType &mat, const FermionField *lhs_wi, @@ -157,6 +123,211 @@ private: const int Ns, const int ss); }; +const int A2Ablocking=8; + +template using iVecSpinMatrix = iVector, Ns>, A2Ablocking>; +typedef iVecSpinMatrix VecSpinMatrix; +typedef iVecSpinMatrix vVecSpinMatrix; +typedef Lattice LatticeVecSpinMatrix; + +template using iVecComplex = iVector >, A2Ablocking>; +typedef iVecComplex VecComplex; +typedef iVecComplex vVecComplex; +typedef Lattice LatticeVecComplex; + +#define A2A_GPU_KERNELS +#ifdef A2A_GPU_KERNELS +template +template +void A2Autils::MesonField(TensorType &mat, + const FermionField *lhs_wi, + const FermionField *rhs_vj, + std::vector gammas, + const std::vector &mom, + int orthogdim, double *t_kernel, double *t_gsum) +{ + const int block=A2Ablocking; + typedef typename FImpl::SiteSpinor vobj; + + typedef typename vobj::scalar_object sobj; + typedef typename vobj::scalar_type scalar_type; + typedef typename vobj::vector_type vector_type; + + int Lblock = mat.dimension(3); + int Rblock = mat.dimension(4); + + // assert(Lblock % block==0); + // assert(Rblock % block==0); + + GridBase *grid = lhs_wi[0].Grid(); + + // const int Nd = grid->_ndimension; + const int Nsimd = grid->Nsimd(); + + int Nt = grid->GlobalDimensions()[orthogdim]; + int Ngamma = gammas.size(); + int Nmom = mom.size(); + + LatticeVecSpinMatrix SpinMat(grid); + LatticeVecSpinMatrix MomSpinMat(grid); + + std::vector sliced; + for(int i=0;ioSites(),(size_t)Nsimd,{ + auto left = conjugate(lhs_v(ss)); + auto right = rhs_v(ss); + auto vv = SpinMat_v(ss); + for(int s1=0;s1(sliced[t],jj); + auto trSG = trace(tmp*Gamma(gammas[mu])); + mat(m,mu,t,i,j) = trSG()(); + } + } + } + } + }//jo + } +} + +// "A-slash" field w_i(x)^dag * i * A_mu * gamma_mu * v_j(x) +// +// With: +// +// B_0 = A_0 + i A_1 +// B_1 = A_2 + i A_3 +// +// then in spin space +// +// ( 0 0 -conj(B_1) -B_0 ) +// i * A_mu g_mu = ( 0 0 -conj(B_0) B_1 ) +// ( B_1 B_0 0 0 ) +// ( conj(B_0) -conj(B_1) 0 0 ) + +template +template +void A2Autils::AslashField(TensorType &mat, + const FermionField *lhs_wi, + const FermionField *rhs_vj, + const std::vector &emB0, + const std::vector &emB1, + int orthogdim, double *t_kernel, double *t_gsum) +{ + const int block=A2Ablocking; + typedef typename FImpl::SiteSpinor vobj; + + typedef typename vobj::scalar_object sobj; + typedef typename vobj::scalar_type scalar_type; + typedef typename vobj::vector_type vector_type; + + int Lblock = mat.dimension(3); + int Rblock = mat.dimension(4); + + int Nem = emB0.size(); + assert(emB1.size() == Nem); + + // assert(Lblock % block==0); + // assert(Rblock % block==0); + + GridBase *grid = lhs_wi[0].Grid(); + + const int Nd = grid->_ndimension; + const int Nsimd = grid->Nsimd(); + + int Nt = grid->GlobalDimensions()[orthogdim]; + + LatticeVecSpinMatrix SpinMat(grid); + LatticeVecComplex Aslash(grid); + std::vector sliced; + + for(int i=0;ioSites(),(size_t)Nsimd,{ + auto left = conjugate(lhs_v(ss)); + auto right = rhs_v(ss); + auto vv = SpinMat_v(ss); + for(int s1=0;s1oSites(),(size_t)Nsimd,{ + auto vv = SpinMat_v(ss); + auto b0 = emB0_v(ss); + auto b1 = emB1_v(ss); + auto cb0 = conjugate(b0); + auto cb1 = conjugate(b1); + auto asl = Aslash_v(ss); + for(int j=jo;j template void A2Autils::MesonField(TensorType &mat, @@ -329,488 +500,41 @@ void A2Autils::MesonField(TensorType &mat, if (t_gsum) *t_gsum += usecond(); } -const int A2Ablocking=8; -template using iVecSpinMatrix = iVector, Ns>, A2Ablocking>; -typedef iVecSpinMatrix VecSpinMatrix; -typedef iVecSpinMatrix vVecSpinMatrix; -typedef Lattice LatticeVecSpinMatrix; - template template -void A2Autils::MesonFieldGPU(TensorType &mat, - const FermionField *lhs_wi, - const FermionField *rhs_vj, - std::vector gammas, - const std::vector &mom, - int orthogdim, double *t_kernel, double *t_gsum) +void A2Autils::AslashField(TensorType &mat, + const FermionField *lhs_wi, + const FermionField *rhs_vj, + const std::vector &emB0, + const std::vector &emB1, + int orthogdim, double *t_kernel, double *t_gsum) { - const int block=A2Ablocking; - typedef typename FImpl::SiteSpinor vobj; - - typedef typename vobj::scalar_object sobj; - typedef typename vobj::scalar_type scalar_type; - typedef typename vobj::vector_type vector_type; + typedef typename FermionField::vector_object vobj; + typedef typename vobj::scalar_object sobj; + typedef typename vobj::scalar_type scalar_type; + typedef typename vobj::vector_type vector_type; + typedef iSpinMatrix SpinMatrix_v; + typedef iSpinMatrix SpinMatrix_s; + typedef iSinglet Singlet_v; + typedef iSinglet Singlet_s; + int Lblock = mat.dimension(3); int Rblock = mat.dimension(4); - - // assert(Lblock % block==0); - // assert(Rblock % block==0); GridBase *grid = lhs_wi[0].Grid(); const int Nd = grid->_ndimension; const int Nsimd = grid->Nsimd(); - int Nt = grid->GlobalDimensions()[orthogdim]; - int Ngamma = gammas.size(); - int Nmom = mom.size(); - - - LatticeVecSpinMatrix SpinMat(grid); - LatticeVecSpinMatrix MomSpinMat(grid); - - RealD t_afor = 0.0; - RealD t_sum = 0.0; - RealD t_pha = 0.0; - RealD t_trace= 0.0; - uint64_t ncall=0; - - std::vector sliced; - for(int i=0;ioSites(),(size_t)Nsimd,{ - auto left = conjugate(lhs_v(ss)); - auto right = rhs_v(ss); - auto vv = SpinMat_v(ss); - for(int s1=0;s1 -void A2Autils::PionFieldXX(Eigen::Tensor &mat, - const FermionField *wi, - const FermionField *vj, - int orthogdim, - int g5) -{ - int Lblock = mat.dimension(1); - int Rblock = mat.dimension(2); - - GridBase *grid = wi[0].Grid(); - - const int nd = grid->_ndimension; - const int Nsimd = grid->Nsimd(); - - int Nt = grid->GlobalDimensions()[orthogdim]; + int Nt = grid->GlobalDimensions()[orthogdim]; + int Nem = emB0.size(); + assert(emB1.size() == Nem); int fd=grid->_fdimensions[orthogdim]; int ld=grid->_ldimensions[orthogdim]; int rd=grid->_rdimensions[orthogdim]; - - // will locally sum vectors first - // sum across these down to scalars - // splitting the SIMD - int MFrvol = rd*Lblock*Rblock; - int MFlvol = ld*Lblock*Rblock; - - std::vector lvSum(MFrvol); - thread_for(r,MFrvol,{ - lvSum[r] = Zero(); - }); - - std::vector lsSum(MFlvol); - thread_for(r,MFlvol,{ - lsSum[r]=scalar_type(0.0); - }); - - int e1= grid->_slice_nblock[orthogdim]; - int e2= grid->_slice_block [orthogdim]; - int stride=grid->_slice_stride[orthogdim]; - - thread_for(r,rd,{ - - int so=r*grid->_ostride[orthogdim]; // base offset for start of plane - - for(int n=0;n temp; - ExtractBuffer > extracted(Nsimd); - - for(int i=0;iiCoorFromIindex(icoor,idx); - - int ldx = rt+icoor[orthogdim]*rd; - - int ij_ldx =i+Lblock*j+Lblock*Rblock*ldx; - - lsSum[ij_ldx]=lsSum[ij_ldx]+extracted[idx]._internal; - - } - }} - }); - - assert(mat.dimension(0) == Nt); - // ld loop and local only?? - int pd = grid->_processors[orthogdim]; - int pc = grid->_processor_coor[orthogdim]; - thread_for_collapse(2,lt,ld,{ - for(int pt=0;ptGlobalSumVector(&mat(0,0,0),Nt*Lblock*Rblock); -} - -template -void A2Autils::PionFieldWVmom(Eigen::Tensor &mat, - const FermionField *wi, - const FermionField *vj, - const std::vector &mom, - int orthogdim) -{ - int Lblock = mat.dimension(2); - int Rblock = mat.dimension(3); - - GridBase *grid = wi[0].Grid(); - const int nd = grid->_ndimension; - const int Nsimd = grid->Nsimd(); - - int Nt = grid->GlobalDimensions()[orthogdim]; - int Nmom = mom.size(); - - int fd=grid->_fdimensions[orthogdim]; - int ld=grid->_ldimensions[orthogdim]; - int rd=grid->_rdimensions[orthogdim]; - - // will locally sum vectors first - // sum across these down to scalars - // splitting the SIMD - int MFrvol = rd*Lblock*Rblock*Nmom; - int MFlvol = ld*Lblock*Rblock*Nmom; - - std::vector lvSum(MFrvol); - thread_for(r,MFrvol,{ - lvSum[r] = Zero(); - }); - - std::vector lsSum(MFlvol); - thread_for(r,MFlvol,{ - lsSum[r]=scalar_type(0.0); - }); - - int e1= grid->_slice_nblock[orthogdim]; - int e2= grid->_slice_block [orthogdim]; - int stride=grid->_slice_stride[orthogdim]; - - thread_for(r,rd,{ - - int so=r*grid->_ostride[orthogdim]; // base offset for start of plane - - for(int n=0;n temp; - ExtractBuffer > extracted(Nsimd); - - for(int i=0;iiCoorFromIindex(icoor,idx); - - int ldx = rt+icoor[orthogdim]*rd; - - int ij_ldx = m+Nmom*i+Nmom*Lblock*j+Nmom*Lblock*Rblock*ldx; - - lsSum[ij_ldx]=lsSum[ij_ldx]+extracted[idx]._internal; - - } - }}} - }); - - assert(mat.dimension(0) == Nmom); - assert(mat.dimension(1) == Nt); - - int pd = grid->_processors[orthogdim]; - int pc = grid->_processor_coor[orthogdim]; - thread_for_collapse(2,lt,ld,{ - for(int pt=0;ptGlobalSumVector(&mat(0,0,0,0),Nmom*Nt*Lblock*Rblock); -} - -template -void A2Autils::PionFieldWV(Eigen::Tensor &mat, - const FermionField *wi, - const FermionField *vj, - int orthogdim) -{ - const int g5=1; - PionFieldXX(mat,wi,vj,orthogdim,g5); -} -template -void A2Autils::PionFieldWW(Eigen::Tensor &mat, - const FermionField *wi, - const FermionField *wj, - int orthogdim) -{ - const int nog5=0; - PionFieldXX(mat,wi,wj,orthogdim,nog5); -} -template -void A2Autils::PionFieldVV(Eigen::Tensor &mat, - const FermionField *vi, - const FermionField *vj, - int orthogdim) -{ - const int nog5=0; - PionFieldXX(mat,vi,vj,orthogdim,nog5); -} -*/ - -// "A-slash" field w_i(x)^dag * i * A_mu * gamma_mu * v_j(x) -// -// With: -// -// B_0 = A_0 + i A_1 -// B_1 = A_2 + i A_3 -// -// then in spin space -// -// ( 0 0 -conj(B_1) -B_0 ) -// i * A_mu g_mu = ( 0 0 -conj(B_0) B_1 ) -// ( B_1 B_0 0 0 ) -// ( conj(B_0) -conj(B_1) 0 0 ) -template -template -void A2Autils::AslashField(TensorType &mat, - const FermionField *lhs_wi, - const FermionField *rhs_vj, - const std::vector &emB0, - const std::vector &emB1, - int orthogdim, double *t_kernel, double *t_gsum) -{ - typedef typename FermionField::vector_object vobj; - typedef typename vobj::scalar_object sobj; - typedef typename vobj::scalar_type scalar_type; - typedef typename vobj::vector_type vector_type; - - typedef iSpinMatrix SpinMatrix_v; - typedef iSpinMatrix SpinMatrix_s; - typedef iSinglet Singlet_v; - typedef iSinglet Singlet_s; - - int Lblock = mat.dimension(3); - int Rblock = mat.dimension(4); - - GridBase *grid = lhs_wi[0].Grid(); - - const int Nd = grid->_ndimension; - const int Nsimd = grid->Nsimd(); - - int Nt = grid->GlobalDimensions()[orthogdim]; - int Nem = emB0.size(); - assert(emB1.size() == Nem); - - int fd=grid->_fdimensions[orthogdim]; - int ld=grid->_ldimensions[orthogdim]; - int rd=grid->_rdimensions[orthogdim]; - // will locally sum vectors first // sum across these down to scalars // splitting the SIMD @@ -836,7 +560,7 @@ void A2Autils::AslashField(TensorType &mat, // Nested parallelism would be ok // Wasting cores here. Test case r if (t_kernel) *t_kernel = -usecond(); - thread_for(r,rd, + for(int r=0;r_ostride[orthogdim]; // base offset for start of plane @@ -863,8 +587,8 @@ void A2Autils::AslashField(TensorType &mat, + left()(s2)(1) * right()(s1)(1) + left()(s2)(2) * right()(s1)(2); } - - // After getting the sitewise product do the mom phase loop + + // After getting the sitewise product do the mom phase loop int base = Nem*i+Nem*Lblock*j+Nem*Lblock*Rblock*r; for ( int m=0;m::AslashField(TensorType &mat, } } } - }); + } // Sum across simd lanes in the plane, breaking out orthog dir. thread_for(rt,rd, @@ -950,7 +674,7 @@ void A2Autils::AslashField(TensorType &mat, grid->GlobalSumVector(&mat(0,0,0,0,0),Nem*Nt*Lblock*Rblock); if (t_gsum) *t_gsum += usecond(); } - +#endif //////////////////////////////////////////// // Schematic thoughts about more generalised four quark insertion // @@ -1361,6 +1085,8 @@ Bag [8,4] fig8 (-227.58,3.58808e-17) trtr (-32.5776,1.83286e-17) // - 1602 }); } + + #ifdef DELTA_F_EQ_2 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Perhaps this should move out of the utils and into Hadrons module @@ -1592,5 +1318,534 @@ void A2Autils::DeltaFeq2(int dt_min,int dt_max, } #endif + /* + static void PionFieldWVmom(Eigen::Tensor &mat, + const FermionField *wi, + const FermionField *vj, + const std::vector &mom, + int orthogdim); + + static void PionFieldXX(Eigen::Tensor &mat, + const FermionField *wi, + const FermionField *vj, + int orthogdim, + int g5); + + static void PionFieldWV(Eigen::Tensor &mat, + const FermionField *wi, + const FermionField *vj, + int orthogdim); + static void PionFieldWW(Eigen::Tensor &mat, + const FermionField *wi, + const FermionField *wj, + int orthogdim); + static void PionFieldVV(Eigen::Tensor &mat, + const FermionField *vi, + const FermionField *vj, + int orthogdim); + */ + +/* + +template +template +void A2Autils::MesonField(TensorType &mat, + const FermionField *lhs_wi, + const FermionField *rhs_vj, + std::vector gammas, + const std::vector &mom, + int orthogdim, double *t_kernel, double *t_gsum) +{ + typedef typename FImpl::SiteSpinor vobj; + + typedef typename vobj::scalar_object sobj; + typedef typename vobj::scalar_type scalar_type; + typedef typename vobj::vector_type vector_type; + + typedef iSpinMatrix SpinMatrix_v; + typedef iSpinMatrix SpinMatrix_s; + + int Lblock = mat.dimension(3); + int Rblock = mat.dimension(4); + + GridBase *grid = lhs_wi[0].Grid(); + + const int Nd = grid->_ndimension; + const int Nsimd = grid->Nsimd(); + + int Nt = grid->GlobalDimensions()[orthogdim]; + int Ngamma = gammas.size(); + int Nmom = mom.size(); + + int fd=grid->_fdimensions[orthogdim]; + int ld=grid->_ldimensions[orthogdim]; + int rd=grid->_rdimensions[orthogdim]; + + // will locally sum vectors first + // sum across these down to scalars + // splitting the SIMD + int MFrvol = rd*Lblock*Rblock*Nmom; + int MFlvol = ld*Lblock*Rblock*Nmom; + + std::vector lvSum(MFrvol); + for(int r=0;r lsSum(MFlvol); + for(int r=0;r_slice_nblock[orthogdim]; + int e2= grid->_slice_block [orthogdim]; + int stride=grid->_slice_stride[orthogdim]; + + // potentially wasting cores here if local time extent too small + if (t_kernel) *t_kernel = -usecond(); + for(int r=0;r_ostride[orthogdim]; // base offset for start of plane + + for(int n=0;n extracted(Nsimd); + + for(int i=0;iiCoorFromIindex(icoor,idx); + + int ldx = rt+icoor[orthogdim]*rd; + + int ij_ldx = m+Nmom*i+Nmom*Lblock*j+Nmom*Lblock*Rblock*ldx; + + lsSum[ij_ldx]=lsSum[ij_ldx]+extracted[idx]; + + } + }}} + } + if (t_kernel) *t_kernel += usecond(); + assert(mat.dimension(0) == Nmom); + assert(mat.dimension(1) == Ngamma); + assert(mat.dimension(2) == Nt); + + // ld loop and local only?? + int pd = grid->_processors[orthogdim]; + int pc = grid->_processor_coor[orthogdim]; + thread_for_collapse(2,lt,ld,{ + for(int pt=0;ptGlobalSumVector(&mat(0,0,0,0,0),Nmom*Ngamma*Nt*Lblock*Rblock); + if (t_gsum) *t_gsum += usecond(); +} + +template +void A2Autils::PionFieldXX(Eigen::Tensor &mat, + const FermionField *wi, + const FermionField *vj, + int orthogdim, + int g5) +{ + int Lblock = mat.dimension(1); + int Rblock = mat.dimension(2); + + GridBase *grid = wi[0].Grid(); + + const int nd = grid->_ndimension; + const int Nsimd = grid->Nsimd(); + + int Nt = grid->GlobalDimensions()[orthogdim]; + + int fd=grid->_fdimensions[orthogdim]; + int ld=grid->_ldimensions[orthogdim]; + int rd=grid->_rdimensions[orthogdim]; + + // will locally sum vectors first + // sum across these down to scalars + // splitting the SIMD + int MFrvol = rd*Lblock*Rblock; + int MFlvol = ld*Lblock*Rblock; + + std::vector lvSum(MFrvol); + thread_for(r,MFrvol,{ + lvSum[r] = Zero(); + }); + + std::vector lsSum(MFlvol); + thread_for(r,MFlvol,{ + lsSum[r]=scalar_type(0.0); + }); + + int e1= grid->_slice_nblock[orthogdim]; + int e2= grid->_slice_block [orthogdim]; + int stride=grid->_slice_stride[orthogdim]; + + thread_for(r,rd,{ + + int so=r*grid->_ostride[orthogdim]; // base offset for start of plane + + for(int n=0;n temp; + ExtractBuffer > extracted(Nsimd); + + for(int i=0;iiCoorFromIindex(icoor,idx); + + int ldx = rt+icoor[orthogdim]*rd; + + int ij_ldx =i+Lblock*j+Lblock*Rblock*ldx; + + lsSum[ij_ldx]=lsSum[ij_ldx]+extracted[idx]._internal; + + } + }} + }); + + assert(mat.dimension(0) == Nt); + // ld loop and local only?? + int pd = grid->_processors[orthogdim]; + int pc = grid->_processor_coor[orthogdim]; + thread_for_collapse(2,lt,ld,{ + for(int pt=0;ptGlobalSumVector(&mat(0,0,0),Nt*Lblock*Rblock); +} + +template +void A2Autils::PionFieldWVmom(Eigen::Tensor &mat, + const FermionField *wi, + const FermionField *vj, + const std::vector &mom, + int orthogdim) +{ + int Lblock = mat.dimension(2); + int Rblock = mat.dimension(3); + + GridBase *grid = wi[0].Grid(); + + const int nd = grid->_ndimension; + const int Nsimd = grid->Nsimd(); + + int Nt = grid->GlobalDimensions()[orthogdim]; + int Nmom = mom.size(); + + int fd=grid->_fdimensions[orthogdim]; + int ld=grid->_ldimensions[orthogdim]; + int rd=grid->_rdimensions[orthogdim]; + + // will locally sum vectors first + // sum across these down to scalars + // splitting the SIMD + int MFrvol = rd*Lblock*Rblock*Nmom; + int MFlvol = ld*Lblock*Rblock*Nmom; + + std::vector lvSum(MFrvol); + thread_for(r,MFrvol,{ + lvSum[r] = Zero(); + }); + + std::vector lsSum(MFlvol); + thread_for(r,MFlvol,{ + lsSum[r]=scalar_type(0.0); + }); + + int e1= grid->_slice_nblock[orthogdim]; + int e2= grid->_slice_block [orthogdim]; + int stride=grid->_slice_stride[orthogdim]; + + thread_for(r,rd,{ + + int so=r*grid->_ostride[orthogdim]; // base offset for start of plane + + for(int n=0;n temp; + ExtractBuffer > extracted(Nsimd); + + for(int i=0;iiCoorFromIindex(icoor,idx); + + int ldx = rt+icoor[orthogdim]*rd; + + int ij_ldx = m+Nmom*i+Nmom*Lblock*j+Nmom*Lblock*Rblock*ldx; + + lsSum[ij_ldx]=lsSum[ij_ldx]+extracted[idx]._internal; + + } + }}} + }); + + assert(mat.dimension(0) == Nmom); + assert(mat.dimension(1) == Nt); + + int pd = grid->_processors[orthogdim]; + int pc = grid->_processor_coor[orthogdim]; + thread_for_collapse(2,lt,ld,{ + for(int pt=0;ptGlobalSumVector(&mat(0,0,0,0),Nmom*Nt*Lblock*Rblock); +} + +template +void A2Autils::PionFieldWV(Eigen::Tensor &mat, + const FermionField *wi, + const FermionField *vj, + int orthogdim) +{ + const int g5=1; + PionFieldXX(mat,wi,vj,orthogdim,g5); +} +template +void A2Autils::PionFieldWW(Eigen::Tensor &mat, + const FermionField *wi, + const FermionField *wj, + int orthogdim) +{ + const int nog5=0; + PionFieldXX(mat,wi,wj,orthogdim,nog5); +} +template +void A2Autils::PionFieldVV(Eigen::Tensor &mat, + const FermionField *vi, + const FermionField *vj, + int orthogdim) +{ + const int nog5=0; + PionFieldXX(mat,vi,vj,orthogdim,nog5); +} +*/ + NAMESPACE_END(Grid); diff --git a/Grid/util/FlightRecorder.cc b/Grid/util/FlightRecorder.cc index c19d3dbb..139e7957 100644 --- a/Grid/util/FlightRecorder.cc +++ b/Grid/util/FlightRecorder.cc @@ -280,10 +280,11 @@ void FlightRecorder::xmitLog(void *buf,uint64_t bytes) if(LoggingMode == LoggingModeNone) return; if ( ChecksumCommsSend ){ - uint64_t *ubuf = (uint64_t *)buf; - if(LoggingMode == LoggingModeNone) return; + + if(LoggingMode == LoggingModeNone) return; #ifdef GRID_SYCL + uint64_t *ubuf = (uint64_t *)buf; uint64_t _xor = svm_xor(ubuf,bytes/sizeof(uint64_t)); if(LoggingMode == LoggingModePrint) { std::cerr<<"FlightRecorder::xmitLog : "<< XmitLoggingCounter <<" "<< std::hex << _xor < L_list({8,12,16,24,32}); + std::vector L_list({8,12,16,24}); int selm1=sel-1; std::vector clover; diff --git a/examples/Example_taku.cc b/examples/Example_taku.cc deleted file mode 100644 index b9ad272e..00000000 --- a/examples/Example_taku.cc +++ /dev/null @@ -1,383 +0,0 @@ -/* - * Warning: This code illustrative only: not well tested, and not meant for production use - * without regression / tests being applied - */ - -#include - -using namespace std; -using namespace Grid; - -RealD LLscale =1.0; -RealD LCscale =1.0; - -template class CovariantLaplacianCshift : public SparseMatrixBase -{ -public: - INHERIT_GIMPL_TYPES(Gimpl); - - GridBase *grid; - GaugeField U; - - CovariantLaplacianCshift(GaugeField &_U) : - grid(_U.Grid()), - U(_U) { }; - - virtual GridBase *Grid(void) { return grid; }; - - virtual void M (const Field &in, Field &out) - { - out=Zero(); - for(int mu=0;mu(U, mu); // NB: Inefficent - out = out - Gimpl::CovShiftForward(Umu,mu,in); - out = out - Gimpl::CovShiftBackward(Umu,mu,in); - out = out + 2.0*in; - } - }; - virtual void Mdag (const Field &in, Field &out) { M(in,out);}; // Laplacian is hermitian - virtual void Mdiag (const Field &in, Field &out) {assert(0);}; // Unimplemented need only for multigrid - virtual void Mdir (const Field &in, Field &out,int dir, int disp){assert(0);}; // Unimplemented need only for multigrid - virtual void MdirAll (const Field &in, std::vector &out) {assert(0);}; // Unimplemented need only for multigrid -}; - -void MakePhase(Coordinate mom,LatticeComplex &phase) -{ - GridBase *grid = phase.Grid(); - auto latt_size = grid->GlobalDimensions(); - ComplexD ci(0.0,1.0); - phase=Zero(); - - LatticeComplex coor(phase.Grid()); - for(int mu=0;mu -void GaussianSmear(LatticeGaugeField &U,Field &unsmeared,Field &smeared) -{ - typedef CovariantLaplacianCshift Laplacian_t; - Laplacian_t Laplacian(U); - - Integer Iterations = 40; - Real width = 2.0; - Real coeff = (width*width) / Real(4*Iterations); - - Field tmp(U.Grid()); - smeared=unsmeared; - // chi = (1-p^2/2N)^N kronecker - for(int n = 0; n < Iterations; ++n) { - Laplacian.M(smeared,tmp); - smeared = smeared - coeff*tmp; - std::cout << " smear iter " << n<<" " < -void Solve(Action &D,LatticePropagator &source,LatticePropagator &propagator) -{ - GridBase *UGrid = D.GaugeGrid(); - GridBase *FGrid = D.FermionGrid(); - - LatticeFermion src4 (UGrid); - LatticeFermion src5 (FGrid); - LatticeFermion result5(FGrid); - LatticeFermion result4(UGrid); - LatticePropagator prop5(FGrid); - - ConjugateGradient CG(1.0e-8,100000); - SchurRedBlackDiagMooeeSolve schur(CG); - ZeroGuesser ZG; // Could be a DeflatedGuesser if have eigenvectors - for(int s=0;s(src4,source,s,c); - - D.ImportPhysicalFermionSource(src4,src5); - - result5=Zero(); - schur(D,src5,result5,ZG); - std::cout<(prop5,result5,s,c); - FermToProp(propagator,result4,s,c); - } - } - LatticePropagator Axial_mu(UGrid); - LatticePropagator Vector_mu(UGrid); - - LatticeComplex PA (UGrid); - LatticeComplex VV (UGrid); - LatticeComplex PJ5q(UGrid); - LatticeComplex PP (UGrid); - - std::vector sumPA; - std::vector sumVV; - std::vector sumPP; - std::vector sumPJ5q; - - Gamma g5(Gamma::Algebra::Gamma5); - D.ContractConservedCurrent(prop5,prop5,Axial_mu,source,Current::Axial,Tdir); - PA = trace(g5*Axial_mu); // Pseudoscalar-Axial conserved current - sliceSum(PA,sumPA,Tdir); - - int Nt{static_cast(sumPA.size())}; - - for(int t=0;t >, data); -}; - -void MesonTrace(std::string file,LatticePropagator &q1,LatticePropagator &q2,LatticeComplex &phase) -{ - const int nchannel=3; - Gamma::Algebra Gammas[nchannel][2] = { - {Gamma::Algebra::GammaX,Gamma::Algebra::GammaX}, - {Gamma::Algebra::GammaY,Gamma::Algebra::GammaY}, - {Gamma::Algebra::GammaZ,Gamma::Algebra::GammaZ} - }; - - Gamma G5(Gamma::Algebra::Gamma5); - - LatticeComplex meson_CF(q1.Grid()); - MesonFile MF; - - for(int ch=0;ch meson_T; - sliceSum(meson_CF,meson_T, Tdir); - - int nt=meson_T.size(); - - std::vector corr(nt); - for(int t=0;t seeds4({1,2,3,4}); - GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); - - LatticeGaugeField Umu(UGrid); - std::string config; - RealD M5=1.8; - if( argc > 1 && argv[1][0] != '-' ) - { - std::cout<::ColdConfiguration(Umu); - config="ColdConfig"; - // RealD P=1.0; // Don't scale - RealD P=0.5871119; // 48I - // RealD P=0.6153342; // 64I - // RealD P=0.6388238 // 32Ifine - RealD u0 = sqrt(sqrt(P)); - RealD M5mf = M5 - 4.0*(1.0-u0); - RealD w0 = 1.0 - M5mf; -#if 0 - // M5=1.8 with U=u0 - Umu = Umu * u0; - LLscale = 1.0; - LCscale = 1.0; - std::cout< PointProps(nmass,UGrid); - // std::vector GaussProps(nmass,UGrid); - // std::vector Z2Props (nmass,UGrid); - - for(int m=0;m - -using namespace std; -using namespace Grid; - -RealD LLscale =1.0; -RealD LCscale =1.0; - -template class CovariantLaplacianCshift : public SparseMatrixBase -{ -public: - INHERIT_GIMPL_TYPES(Gimpl); - - GridBase *grid; - GaugeField U; - - CovariantLaplacianCshift(GaugeField &_U) : - grid(_U.Grid()), - U(_U) { }; - - virtual GridBase *Grid(void) { return grid; }; - - virtual void M (const Field &in, Field &out) - { - out=Zero(); - for(int mu=0;mu(U, mu); // NB: Inefficent - out = out - Gimpl::CovShiftForward(Umu,mu,in); - out = out - Gimpl::CovShiftBackward(Umu,mu,in); - out = out + 2.0*in; - } - }; - virtual void Mdag (const Field &in, Field &out) { M(in,out);}; // Laplacian is hermitian - virtual void Mdiag (const Field &in, Field &out) {assert(0);}; // Unimplemented need only for multigrid - virtual void Mdir (const Field &in, Field &out,int dir, int disp){assert(0);}; // Unimplemented need only for multigrid - virtual void MdirAll (const Field &in, std::vector &out) {assert(0);}; // Unimplemented need only for multigrid -}; - -void MakePhase(Coordinate mom,LatticeComplex &phase) -{ - GridBase *grid = phase.Grid(); - auto latt_size = grid->GlobalDimensions(); - ComplexD ci(0.0,1.0); - phase=Zero(); - - LatticeComplex coor(phase.Grid()); - for(int mu=0;mu -void GaussianSmear(LatticeGaugeField &U,Field &unsmeared,Field &smeared) -{ - typedef CovariantLaplacianCshift Laplacian_t; - Laplacian_t Laplacian(U); - - Integer Iterations = 40; - Real width = 2.0; - Real coeff = (width*width) / Real(4*Iterations); - - Field tmp(U.Grid()); - smeared=unsmeared; - // chi = (1-p^2/2N)^N kronecker - for(int n = 0; n < Iterations; ++n) { - Laplacian.M(smeared,tmp); - smeared = smeared - coeff*tmp; - std::cout << " smear iter " << n<<" " < -void MasslessFreePropagator(Action &D,LatticePropagator &source,LatticePropagator &propagator) -{ - GridBase *UGrid = source.Grid(); - GridBase *FGrid = D.FermionGrid(); - bool fiveD = true; //calculate 5d free propagator - RealD mass = D.Mass(); - LatticeFermion src4 (UGrid); - LatticeFermion result4 (UGrid); - LatticeFermion result5(FGrid); - LatticeFermion src5(FGrid); - LatticePropagator prop5(FGrid); - for(int s=0;s(src4,source,s,c); - - D.ImportPhysicalFermionSource(src4,src5); - D.FreePropagator(src5,result5,mass,true); - std::cout<(prop5,result5,s,c); - FermToProp(propagator,result4,s,c); - } - } - - LatticePropagator Vector_mu(UGrid); - LatticeComplex VV (UGrid); - std::vector sumVV; - Gamma::Algebra GammaV[3] = { - Gamma::Algebra::GammaX, - Gamma::Algebra::GammaY, - Gamma::Algebra::GammaZ - }; - for( int mu=0;mu<3;mu++ ) { - Gamma gV(GammaV[mu]); - D.ContractConservedCurrent(prop5,prop5,Vector_mu,source,Current::Vector,mu); - VV = trace(gV*Vector_mu); // (local) Vector-Vector conserved current - sliceSum(VV,sumVV,Tdir); - int Nt = sumVV.size(); - for(int t=0;t -void MasslessFreePropagator1(Action &D,LatticePropagator &source,LatticePropagator &propagator) -{ - bool fiveD = false; //calculate 4d free propagator - RealD mass = D.Mass(); - GridBase *UGrid = source.Grid(); - LatticeFermion src4 (UGrid); - LatticeFermion result4 (UGrid); - for(int s=0;s(src4,source,s,c); - D.FreePropagator(src4,result4,mass,false); - FermToProp(propagator,result4,s,c); - } - } -} - -template -void Solve(Action &D,LatticePropagator &source,LatticePropagator &propagator) -{ - GridBase *UGrid = D.GaugeGrid(); - GridBase *FGrid = D.FermionGrid(); - - LatticeFermion src4 (UGrid); - LatticeFermion src5 (FGrid); - LatticeFermion result5(FGrid); - LatticeFermion result4(UGrid); - LatticePropagator prop5(FGrid); - - ConjugateGradient CG(1.0e-10,100000); - SchurRedBlackDiagMooeeSolve schur(CG); - ZeroGuesser ZG; // Could be a DeflatedGuesser if have eigenvectors - for(int s=0;s(src4,source,s,c); - - D.ImportPhysicalFermionSource(src4,src5); - - result5=Zero(); - schur(D,src5,result5,ZG); - std::cout<(prop5,result5,s,c); - FermToProp(propagator,result4,s,c); - } - } - LatticePropagator Axial_mu(UGrid); - LatticePropagator Vector_mu(UGrid); - - LatticeComplex PA (UGrid); - LatticeComplex VV (UGrid); - LatticeComplex PJ5q(UGrid); - LatticeComplex PP (UGrid); - - std::vector sumPA; - std::vector sumVV; - std::vector sumPP; - std::vector sumPJ5q; - - Gamma g5(Gamma::Algebra::Gamma5); - D.ContractConservedCurrent(prop5,prop5,Axial_mu,source,Current::Axial,Tdir); - PA = trace(g5*Axial_mu); // Pseudoscalar-Axial conserved current - sliceSum(PA,sumPA,Tdir); - - int Nt{static_cast(sumPA.size())}; - - for(int t=0;t >, data); -}; - -void MesonTrace(std::string file,LatticePropagator &q1,LatticePropagator &q2,LatticeComplex &phase) -{ - const int nchannel=4; - Gamma::Algebra Gammas[nchannel][2] = { - {Gamma::Algebra::GammaXGamma5,Gamma::Algebra::GammaXGamma5}, - {Gamma::Algebra::GammaYGamma5,Gamma::Algebra::GammaYGamma5}, - {Gamma::Algebra::GammaZGamma5,Gamma::Algebra::GammaZGamma5}, - {Gamma::Algebra::Identity,Gamma::Algebra::Identity} - }; - - LatticeComplex meson_CF(q1.Grid()); - MesonFile MF; - - for(int ch=0;ch meson_T; - sliceSum(meson_CF,meson_T, Tdir); - - int nt=meson_T.size(); - - std::vector corr(nt); - for(int t=0;t seeds4({1,2,3,4}); - // GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); - - LatticeGaugeField Umu(UGrid); - std::string config; - RealD M5=atof(getenv("M5")); - RealD mq = atof(getenv("mass")); - int tadpole = atof(getenv("tadpole")); - std::vector masses({ mq} ); // u/d, s, c ?? - if( argc > 1 && argv[1][0] != '-' ) - { - std::cout<::ColdConfiguration(Umu); - config="ColdConfig"; - // RealD P=1.0; // Don't scale - // RealD P=0.6388238 // 32Ifine - // RealD P=0.6153342; // 64I - RealD P=0.5871119; // 48I - RealD u0 = sqrt(sqrt(P)); - RealD w0 = 1 - M5; - std::cout< boundary = {1,1,1,-1}; - FermionActionD::ImplParams Params(boundary); - RealD b=1.5; - RealD c=0.5; - std::cout< PointProps(nmass,UGrid); - // std::vector FreeProps(nmass,UGrid); - // LatticePropagator delta(UGrid); - - for(int m=0;m - -using namespace std; -using namespace Grid; - -RealD LLscale =1.0; -RealD LCscale =1.0; - -template class CovariantLaplacianCshift : public SparseMatrixBase -{ -public: - INHERIT_GIMPL_TYPES(Gimpl); - - GridBase *grid; - GaugeField U; - - CovariantLaplacianCshift(GaugeField &_U) : - grid(_U.Grid()), - U(_U) { }; - - virtual GridBase *Grid(void) { return grid; }; - - virtual void M (const Field &in, Field &out) - { - out=Zero(); - for(int mu=0;mu(U, mu); // NB: Inefficent - out = out - Gimpl::CovShiftForward(Umu,mu,in); - out = out - Gimpl::CovShiftBackward(Umu,mu,in); - out = out + 2.0*in; - } - }; - virtual void Mdag (const Field &in, Field &out) { M(in,out);}; // Laplacian is hermitian - virtual void Mdiag (const Field &in, Field &out) {assert(0);}; // Unimplemented need only for multigrid - virtual void Mdir (const Field &in, Field &out,int dir, int disp){assert(0);}; // Unimplemented need only for multigrid - virtual void MdirAll (const Field &in, std::vector &out) {assert(0);}; // Unimplemented need only for multigrid -}; - -void MakePhase(Coordinate mom,LatticeComplex &phase) -{ - GridBase *grid = phase.Grid(); - auto latt_size = grid->GlobalDimensions(); - ComplexD ci(0.0,1.0); - phase=Zero(); - - LatticeComplex coor(phase.Grid()); - for(int mu=0;mu -void GaussianSmear(LatticeGaugeField &U,Field &unsmeared,Field &smeared) -{ - typedef CovariantLaplacianCshift Laplacian_t; - Laplacian_t Laplacian(U); - - Integer Iterations = 40; - Real width = 2.0; - Real coeff = (width*width) / Real(4*Iterations); - - Field tmp(U.Grid()); - smeared=unsmeared; - // chi = (1-p^2/2N)^N kronecker - for(int n = 0; n < Iterations; ++n) { - Laplacian.M(smeared,tmp); - smeared = smeared - coeff*tmp; - std::cout << " smear iter " << n<<" " < -void MasslessFreePropagator(Action &D,LatticePropagator &source,LatticePropagator &propagator) -{ - GridBase *UGrid = source.Grid(); - GridBase *FGrid = D.FermionGrid(); - bool fiveD = true; //calculate 4d free propagator - RealD mass = D.Mass(); - LatticeFermion src4 (UGrid); - LatticeFermion result4 (UGrid); - LatticeFermion result5(FGrid); - LatticeFermion src5(FGrid); - LatticePropagator prop5(FGrid); - for(int s=0;s(src4,source,s,c); - - D.ImportPhysicalFermionSource(src4,src5); - D.FreePropagator(src5,result5,mass,true); - std::cout<(prop5,result5,s,c); - FermToProp(propagator,result4,s,c); - } - } - - LatticePropagator Vector_mu(UGrid); - LatticeComplex VV (UGrid); - std::vector sumVV; - Gamma::Algebra GammaV[3] = { - Gamma::Algebra::GammaX, - Gamma::Algebra::GammaY, - Gamma::Algebra::GammaZ - }; - for( int mu=0;mu<3;mu++ ) { - Gamma gV(GammaV[mu]); - D.ContractConservedCurrent(prop5,prop5,Vector_mu,source,Current::Vector,mu); - VV = trace(gV*Vector_mu); // (local) Vector-Vector conserved current - sliceSum(VV,sumVV,Tdir); - int Nt = sumVV.size(); - for(int t=0;t -void Solve(Action &D,LatticePropagator &source,LatticePropagator &propagator) -{ - GridBase *UGrid = D.GaugeGrid(); - GridBase *FGrid = D.FermionGrid(); - - LatticeFermion src4 (UGrid); - LatticeFermion src5 (FGrid); - LatticeFermion result5(FGrid); - LatticeFermion result4(UGrid); - LatticePropagator prop5(FGrid); - - ConjugateGradient CG(1.0e-6,100000); - SchurRedBlackDiagMooeeSolve schur(CG); - ZeroGuesser ZG; // Could be a DeflatedGuesser if have eigenvectors - for(int s=0;s(src4,source,s,c); - - D.ImportPhysicalFermionSource(src4,src5); - - result5=Zero(); - schur(D,src5,result5,ZG); - std::cout<(prop5,result5,s,c); - FermToProp(propagator,result4,s,c); - } - } - LatticePropagator Axial_mu(UGrid); - LatticePropagator Vector_mu(UGrid); - - LatticeComplex PA (UGrid); - LatticeComplex VV (UGrid); - LatticeComplex PJ5q(UGrid); - LatticeComplex PP (UGrid); - - std::vector sumPA; - std::vector sumVV; - std::vector sumPP; - std::vector sumPJ5q; - - Gamma g5(Gamma::Algebra::Gamma5); - D.ContractConservedCurrent(prop5,prop5,Axial_mu,source,Current::Axial,Tdir); - PA = trace(g5*Axial_mu); // Pseudoscalar-Axial conserved current - sliceSum(PA,sumPA,Tdir); - - int Nt{static_cast(sumPA.size())}; - - for(int t=0;t >, data); -}; - -void MesonTrace(std::string file,LatticePropagator &q1,LatticePropagator &q2,LatticeComplex &phase) -{ - const int nchannel=3; - Gamma::Algebra Gammas[nchannel][2] = { - {Gamma::Algebra::GammaX,Gamma::Algebra::GammaX}, - {Gamma::Algebra::GammaY,Gamma::Algebra::GammaY}, - // {Gamma::Algebra::GammaZ,Gamma::Algebra::GammaZ} - {Gamma::Algebra::Gamma5,Gamma::Algebra::Gamma5} - }; - - Gamma G5(Gamma::Algebra::Gamma5); - - LatticeComplex meson_CF(q1.Grid()); - MesonFile MF; - - for(int ch=0;ch meson_T; - sliceSum(meson_CF,meson_T, Tdir); - - int nt=meson_T.size(); - - std::vector corr(nt); - for(int t=0;t seeds4({1,2,3,4}); - // GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); - - LatticeGaugeField Umu(UGrid); - std::string config; - RealD M5=atof(getenv("M5")); - RealD mq = atof(getenv("mass")); - std::vector masses({ mq} ); // u/d, s, c ?? - if( argc > 1 && argv[1][0] != '-' ) - { - std::cout<::ColdConfiguration(Umu); - config="ColdConfig"; - // RealD P=1.0; // Don't scale - // RealD P=0.6153342; // 64I - // RealD P=0.6388238 // 32Ifine - // RealD P=0.5871119; // 48I - // RealD u0 = sqrt(sqrt(P)); - // Umu = Umu * u0; - RealD w0 = 1 - M5; - LLscale = 1.0/(1-w0*w0)/(1-w0*w0); - LCscale = 1.0/(1-w0*w0)/(1-w0*w0); - std::cout< PointProps(nmass,UGrid); - std::vector FreeProps(nmass,UGrid); - LatticePropagator delta(UGrid); - - for(int m=0;m phi(VDIM,&grid); + std::vector B0(Nem,&grid); + std::vector B1(Nem,&grid); std::cout << GridLogMessage << "Initialising random meson fields" << std::endl; for (unsigned int i = 0; i < VDIM; ++i){ random(pRNG,phi[i]); } + for (unsigned int i = 0; i < Nem; ++i){ + random(pRNG,B0[i]); + random(pRNG,B1[i]); + } std::cout << GridLogMessage << "Meson fields initialised, rho non-zero only for t = " << TSRC << std::endl; // Gamma matrices used in the contraction std::vector Gmu = { + Gamma::Algebra::GammaX, + Gamma::Algebra::GammaY, + Gamma::Algebra::GammaZ, + Gamma::Algebra::GammaT, Gamma::Algebra::GammaX, Gamma::Algebra::GammaY, Gamma::Algebra::GammaZ, @@ -74,11 +85,15 @@ int main(int argc, char *argv[]) std::vector> momenta = { {0.,0.,0.}, {1.,0.,0.}, + {-1.,0.,0.}, + {0,1.,0.}, + {0,-1.,0.}, + {0,0,1.}, + {0,0,-1.}, {1.,1.,0.}, {1.,1.,1.}, {2.,0.,0.} }; - // 5 momenta x VDIMxVDIM = 125 calls (x 16 spins) 1.4s => 1400/125 ~10ms per call std::cout << GridLogMessage << "Meson fields will be created for " << Gmu.size() << " Gamma matrices and " << momenta.size() << " momenta." << std::endl; std::cout << GridLogMessage << "Computing complex phases" << std::endl; @@ -98,46 +113,28 @@ int main(int argc, char *argv[]) std::cout << GridLogMessage << "Computing complex phases done." << std::endl; Eigen::Tensor Mpp(momenta.size(),Gmu.size(),Nt,VDIM,VDIM); - Eigen::Tensor Mpp_gpu(momenta.size(),Gmu.size(),Nt,VDIM,VDIM); + Eigen::Tensor App(B0.size(),1,Nt,VDIM,VDIM); // timer double start,stop; + ///////////////////////////////////////////////////////////////////////// //execute meson field routine - std::cout << GridLogMessage << "Meson Field Warmup Begin" << std::endl; + ///////////////////////////////////////////////////////////////////////// A2Autils::MesonField(Mpp,&phi[0],&phi[0],Gmu,phases,Tp); - std::cout << GridLogMessage << "Meson Field Timing Begin" << std::endl; start = usecond(); A2Autils::MesonField(Mpp,&phi[0],&phi[0],Gmu,phases,Tp); stop = usecond(); std::cout << GridLogMessage << "M(phi,phi) created, execution time " << stop-start << " us" << std::endl; - std::cout << GridLogMessage << "Meson Field GPU Warmup Begin" << std::endl; - A2Autils::MesonFieldGPU(Mpp_gpu,&phi[0],&phi[0],Gmu,phases,Tp); - std::cout << GridLogMessage << "Meson Field GPU Timing Begin" << std::endl; + ///////////////////////////////////////////////////////////////////////// + //execute aslash field routine + ///////////////////////////////////////////////////////////////////////// + A2Autils::AslashField(App,&phi[0],&phi[0],B0,B1,Tp); start = usecond(); - A2Autils::MesonFieldGPU(Mpp_gpu,&phi[0],&phi[0],Gmu,phases,Tp); + A2Autils::AslashField(App,&phi[0],&phi[0],B0,B1,Tp); stop = usecond(); - std::cout << GridLogMessage << "M_gpu(phi,phi) created, execution time " << stop-start << " us" << std::endl; - - for(int mom=0;mom