From 44f4f5c8e2f5b64329cbd8bf4648476cf7b18d8b Mon Sep 17 00:00:00 2001 From: paboyle Date: Fri, 27 Jul 2018 23:00:16 +0100 Subject: [PATCH] Momentum loop --- benchmarks/Benchmark_meson_field.cc | 183 ++++++++++++++++++++++++++-- 1 file changed, 176 insertions(+), 7 deletions(-) diff --git a/benchmarks/Benchmark_meson_field.cc b/benchmarks/Benchmark_meson_field.cc index aa2dbd0a..10a69525 100644 --- a/benchmarks/Benchmark_meson_field.cc +++ b/benchmarks/Benchmark_meson_field.cc @@ -180,7 +180,6 @@ void sliceInnerProductMesonFieldGamma(std::vector< std::vector > &mat, const int Nsimd = grid->Nsimd(); int Nt = grid->GlobalDimensions()[orthogdim]; int Ngamma = gammas.size(); - // int Nmom = mom.size(); assert(mat.size()==Lblock*Rblock*Ngamma); for(int t=0;t > &mat, // splitting the SIMD int MFrvol = rd*Lblock*Rblock*Ngamma; int MFlvol = ld*Lblock*Rblock*Ngamma; - int MFfvol = fd*Lblock*Rblock*Ngamma; std::vector > lvSum(MFrvol); parallel_for (int r = 0; r < MFrvol; r++){ @@ -330,8 +328,6 @@ void sliceInnerProductMesonFieldGamma1(std::vector< std::vector > &mat int Nt = grid->GlobalDimensions()[orthogdim]; int Ngamma = gammas.size(); - // int Nmom = mom.size(); - assert(mat.size()==Lblock*Rblock*Ngamma); for(int t=0;t > &mat int MFrvol = rd*Lblock*Rblock; int MFlvol = ld*Lblock*Rblock; - int MFfvol = fd*Lblock*Rblock*Ngamma; // Do to dirac matrices here - Vector lvSum(MFrvol); parallel_for (int r = 0; r < MFrvol; r++){ lvSum[r] = zero; @@ -450,6 +444,161 @@ void sliceInnerProductMesonFieldGamma1(std::vector< std::vector > &mat return; } +template +void sliceInnerProductMesonFieldGammaMom(std::vector< std::vector > &mat, + const std::vector > &lhs, + const std::vector > &rhs, + int orthogdim, + std::vector gammas, + const std::vector &mom) +{ + typedef typename vobj::scalar_object sobj; + typedef typename vobj::scalar_type scalar_type; + typedef typename vobj::vector_type vector_type; + typedef iSpinMatrix SpinMatrix_v; + typedef iSpinMatrix SpinMatrix_s; + + int Lblock = lhs.size(); + int Rblock = rhs.size(); + + GridBase *grid = lhs[0]._grid; + + const int Nd = grid->_ndimension; + const int Nsimd = grid->Nsimd(); + int Nt = grid->GlobalDimensions()[orthogdim]; + int Ngamma = gammas.size(); + int Nmom = mom.size(); + + assert(mat.size()==Lblock*Rblock*Ngamma*Nmom); + for(int t=0;t_fdimensions[orthogdim]; + int ld=grid->_ldimensions[orthogdim]; + int rd=grid->_rdimensions[orthogdim]; + + // will locally sum vectors first + // sum across these down to scalars + // splitting the SIMD + int MFrvol = rd*Lblock*Rblock*Nmom; + int MFlvol = ld*Lblock*Rblock*Nmom; + + Vector lvSum(MFrvol); + parallel_for (int r = 0; r < MFrvol; r++){ + lvSum[r] = zero; + } + + Vector lsSum(MFlvol); + parallel_for (int r = 0; r < MFlvol; r++){ + lsSum[r]=scalar_type(0.0); + } + + int e1= grid->_slice_nblock[orthogdim]; + int e2= grid->_slice_block [orthogdim]; + int stride=grid->_slice_stride[orthogdim]; + + std::cout << GridLogMessage << " Entering first parallel loop "<_ostride[orthogdim]; // base offset for start of plane + + for(int n=0;n > phase(Nmom); + for(int m=0;m icoor(Nd); + std::vector extracted(Nsimd); + + + for(int i=0;iiCoorFromIindex(icoor,idx); + + int ldx = rt+icoor[orthogdim]*rd; + + int ij_ldx = m+Nmom*i+Nmom*Lblock*j+Nmom*Lblock*Rblock*ldx; + + lsSum[ij_ldx]=lsSum[ij_ldx]+extracted[idx]; + + } + }}} + } + + std::cout << GridLogMessage << " Entering third parallel loop "<_processor_coor[orthogdim]){ + for(int m=0;m(std::vector< std::vector > &mat, @@ -491,7 +640,8 @@ int main (int argc, char ** argv) std::vector simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); std::vector mpi_layout = GridDefaultMpi(); GridCartesian Grid(latt_size,simd_layout,mpi_layout); - + + int Nmom=9; int nt = latt_size[Tp]; uint64_t vol = 1; for(int d=0;d v(Nm,&Grid); std::vector w(Nm,&Grid); + std::vector phases(Nmom,&Grid); for(int i=0;i > MesonFields (Nm*Nm); std::vector > MesonFields4 (Nm*Nm*4); std::vector > MesonFields16 (Nm*Nm*16); + std::vector > MesonFields16mom (Nm*Nm*16*Nmom); std::vector > MesonFieldsRef(Nm*Nm); for(int i=0;i