From bb024dd1143c543b456f46a0df7483bd7ae56312 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Sat, 15 Jun 2019 08:30:05 +0100 Subject: [PATCH] Loop construct changed --- Grid/qcd/utils/A2Autils.h | 56 ++++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 30 deletions(-) diff --git a/Grid/qcd/utils/A2Autils.h b/Grid/qcd/utils/A2Autils.h index 273bf9c3..c4f106f2 100644 --- a/Grid/qcd/utils/A2Autils.h +++ b/Grid/qcd/utils/A2Autils.h @@ -2,8 +2,7 @@ //#include #include -namespace Grid { -namespace QCD { +NAMESPACE_BEGIN(Grid); #undef DELTA_F_EQ_2 @@ -142,12 +141,12 @@ void A2Autils::MesonField(TensorType &mat, int MFlvol = ld*Lblock*Rblock*Nmom; Vector lvSum(MFrvol); - thread_loop( (int r = 0; r < MFrvol; r++),{ + thread_for( r, MFrvol,{ lvSum[r] = Zero(); }); Vector lsSum(MFlvol); - thread_loop( (int r = 0; r < MFlvol; r++),{ + thread_for(r,MFlvol,{ lsSum[r]=scalar_type(0.0); }); @@ -157,7 +156,7 @@ void A2Autils::MesonField(TensorType &mat, // potentially wasting cores here if local time extent too small if (t_kernel) *t_kernel = -usecond(); - thread_loop( (int r=0;r_ostride[orthogdim]; // base offset for start of plane @@ -198,9 +197,8 @@ void A2Autils::MesonField(TensorType &mat, } }); - // Sum across simd lanes in the plane, breaking out orthog dir. - thread_loop( (int rt=0;rt extracted(Nsimd); @@ -234,8 +232,7 @@ void A2Autils::MesonField(TensorType &mat, // ld loop and local only?? int pd = grid->_processors[orthogdim]; int pc = grid->_processor_coor[orthogdim]; - thread_loop_collapse(2, (int lt=0;lt::PionFieldXX(Eigen::Tensor &mat, int MFlvol = ld*Lblock*Rblock; Vector lvSum(MFrvol); - thread_loop( (int r = 0; r < MFrvol; r++),{ + thread_for(r,MFrvol,{ lvSum[r] = Zero(); }); Vector lsSum(MFlvol); - thread_loop( (int r = 0; r < MFlvol; r++),{ + thread_for(r,MFlvol,{ lsSum[r]=scalar_type(0.0); }); @@ -345,7 +342,7 @@ void A2Autils::PionFieldXX(Eigen::Tensor &mat, int e2= grid->_slice_block [orthogdim]; int stride=grid->_slice_stride[orthogdim]; - thread_loop( (int r=0;r_ostride[orthogdim]; // base offset for start of plane @@ -394,7 +391,7 @@ void A2Autils::PionFieldXX(Eigen::Tensor &mat, }); // Sum across simd lanes in the plane, breaking out orthog dir. - thread_loop( (int rt=0;rt temp; @@ -426,8 +423,7 @@ void A2Autils::PionFieldXX(Eigen::Tensor &mat, // ld loop and local only?? int pd = grid->_processors[orthogdim]; int pc = grid->_processor_coor[orthogdim]; - thread_loop_collapse(2,(int lt=0;lt::PionFieldWVmom(Eigen::Tensor &mat, int MFlvol = ld*Lblock*Rblock*Nmom; Vector lvSum(MFrvol); - thread_loop( (int r = 0; r < MFrvol; r++),{ + thread_for(r,MFrvol,{ lvSum[r] = Zero(); }); Vector lsSum(MFlvol); - thread_loop( (int r = 0; r < MFlvol; r++),{ + thread_for(r,MFlvol,{ lsSum[r]=scalar_type(0.0); }); @@ -493,7 +489,7 @@ void A2Autils::PionFieldWVmom(Eigen::Tensor &mat, int e2= grid->_slice_block [orthogdim]; int stride=grid->_slice_stride[orthogdim]; - thread_loop( (int r=0;r_ostride[orthogdim]; // base offset for start of plane @@ -542,7 +538,7 @@ void A2Autils::PionFieldWVmom(Eigen::Tensor &mat, // Sum across simd lanes in the plane, breaking out orthog dir. - thread_loop( (int rt=0;rt temp; @@ -573,10 +569,10 @@ void A2Autils::PionFieldWVmom(Eigen::Tensor &mat, assert(mat.dimension(0) == Nmom); assert(mat.dimension(1) == Nt); - + int pd = grid->_processors[orthogdim]; int pc = grid->_processor_coor[orthogdim]; - thread_loop_collapse(2,(int lt=0;lt::AslashField(TensorType &mat, int MFlvol = ld*Lblock*Rblock*Nem; Vector lvSum(MFrvol); - thread_loop( (int r = 0; r < MFrvol; r++), + thread_for(r,MFrvol, { lvSum[r] = Zero(); }); Vector lsSum(MFlvol); - thread_loop( (int r = 0; r < MFlvol; r++), + thread_for(r,MFlvol, { lsSum[r] = scalar_type(0.0); }); @@ -705,7 +701,7 @@ void A2Autils::AslashField(TensorType &mat, // Nested parallelism would be ok // Wasting cores here. Test case r if (t_kernel) *t_kernel = -usecond(); - thread_loop( (int r=0;r_ostride[orthogdim]; // base offset for start of plane @@ -757,7 +753,7 @@ void A2Autils::AslashField(TensorType &mat, }); // Sum across simd lanes in the plane, breaking out orthog dir. - thread_loop( (int rt=0;rt extracted(Nsimd); @@ -786,7 +782,7 @@ void A2Autils::AslashField(TensorType &mat, // ld loop and local only?? int pd = grid->_processors[orthogdim]; int pc = grid->_processor_coor[orthogdim]; - thread_loop_collapse(2,(int lt=0;lt::ContractWWVV(std::vector &WWVV, WWVV[t] = Zero(); } - thread_loop( (int ss=0;ssoSites();ss++),{ + thread_for(ss,grid->oSites(),{ for(int d_o=0;d_o::ContractFourQuarkColourDiagonal(const PropagatorField &WWV auto WWVV1_v = WWVV1.View(); auto O_trtr_v= O_trtr.View(); auto O_fig8_v= O_fig8.View(); - thread_loop( (int ss=0;ssoSites();ss++),{ + thread_for(ss,grid->oSites(),{ typedef typename ComplexField::vector_object vobj; @@ -1093,7 +1089,7 @@ void A2Autils::ContractFourQuarkColourMix(const PropagatorField &WWVV0, auto O_trtr_v= O_trtr.View(); auto O_fig8_v= O_fig8.View(); - thread_loop( (int ss=0;ssoSites();ss++),{ + thread_for(ss,grid->oSites(),{ typedef typename ComplexField::vector_object vobj; @@ -1399,5 +1395,5 @@ void A2Autils::DeltaFeq2(int dt_min,int dt_max, } #endif -}} +NAMESPACE_END(Grid);