1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-09 21:50:45 +01:00

Loop construct changed

This commit is contained in:
Peter Boyle 2019-06-15 08:30:05 +01:00
parent 52456b9ec7
commit bb024dd114

View File

@ -2,8 +2,7 @@
//#include <Grid/Hadrons/Global.hpp> //#include <Grid/Hadrons/Global.hpp>
#include <Grid/Eigen/unsupported/CXX11/Tensor> #include <Grid/Eigen/unsupported/CXX11/Tensor>
namespace Grid { NAMESPACE_BEGIN(Grid);
namespace QCD {
#undef DELTA_F_EQ_2 #undef DELTA_F_EQ_2
@ -142,12 +141,12 @@ void A2Autils<FImpl>::MesonField(TensorType &mat,
int MFlvol = ld*Lblock*Rblock*Nmom; int MFlvol = ld*Lblock*Rblock*Nmom;
Vector<SpinMatrix_v > lvSum(MFrvol); Vector<SpinMatrix_v > lvSum(MFrvol);
thread_loop( (int r = 0; r < MFrvol; r++),{ thread_for( r, MFrvol,{
lvSum[r] = Zero(); lvSum[r] = Zero();
}); });
Vector<SpinMatrix_s > lsSum(MFlvol); Vector<SpinMatrix_s > lsSum(MFlvol);
thread_loop( (int r = 0; r < MFlvol; r++),{ thread_for(r,MFlvol,{
lsSum[r]=scalar_type(0.0); lsSum[r]=scalar_type(0.0);
}); });
@ -157,7 +156,7 @@ void A2Autils<FImpl>::MesonField(TensorType &mat,
// potentially wasting cores here if local time extent too small // potentially wasting cores here if local time extent too small
if (t_kernel) *t_kernel = -usecond(); if (t_kernel) *t_kernel = -usecond();
thread_loop( (int r=0;r<rd;r++),{ thread_for(r,rd,{
int so=r*grid->_ostride[orthogdim]; // base offset for start of plane int so=r*grid->_ostride[orthogdim]; // base offset for start of plane
@ -198,9 +197,8 @@ void A2Autils<FImpl>::MesonField(TensorType &mat,
} }
}); });
// Sum across simd lanes in the plane, breaking out orthog dir. // Sum across simd lanes in the plane, breaking out orthog dir.
thread_loop( (int rt=0;rt<rd;rt++),{ thread_for(rt,rd,{
Coordinate icoor(Nd); Coordinate icoor(Nd);
ExtractBuffer<SpinMatrix_s> extracted(Nsimd); ExtractBuffer<SpinMatrix_s> extracted(Nsimd);
@ -234,8 +232,7 @@ void A2Autils<FImpl>::MesonField(TensorType &mat,
// ld loop and local only?? // ld loop and local only??
int pd = grid->_processors[orthogdim]; int pd = grid->_processors[orthogdim];
int pc = grid->_processor_coor[orthogdim]; int pc = grid->_processor_coor[orthogdim];
thread_loop_collapse(2, (int lt=0;lt<ld;lt++), thread_for_collapse(2,lt,ld,{
{
for(int pt=0;pt<pd;pt++){ for(int pt=0;pt<pd;pt++){
int t = lt + pt*ld; int t = lt + pt*ld;
if (pt == pc){ if (pt == pc){
@ -332,12 +329,12 @@ void A2Autils<FImpl>::PionFieldXX(Eigen::Tensor<ComplexD,3> &mat,
int MFlvol = ld*Lblock*Rblock; int MFlvol = ld*Lblock*Rblock;
Vector<vector_type > lvSum(MFrvol); Vector<vector_type > lvSum(MFrvol);
thread_loop( (int r = 0; r < MFrvol; r++),{ thread_for(r,MFrvol,{
lvSum[r] = Zero(); lvSum[r] = Zero();
}); });
Vector<scalar_type > lsSum(MFlvol); Vector<scalar_type > lsSum(MFlvol);
thread_loop( (int r = 0; r < MFlvol; r++),{ thread_for(r,MFlvol,{
lsSum[r]=scalar_type(0.0); lsSum[r]=scalar_type(0.0);
}); });
@ -345,7 +342,7 @@ void A2Autils<FImpl>::PionFieldXX(Eigen::Tensor<ComplexD,3> &mat,
int e2= grid->_slice_block [orthogdim]; int e2= grid->_slice_block [orthogdim];
int stride=grid->_slice_stride[orthogdim]; int stride=grid->_slice_stride[orthogdim];
thread_loop( (int r=0;r<rd;r++),{ thread_for(r,rd,{
int so=r*grid->_ostride[orthogdim]; // base offset for start of plane int so=r*grid->_ostride[orthogdim]; // base offset for start of plane
@ -394,7 +391,7 @@ void A2Autils<FImpl>::PionFieldXX(Eigen::Tensor<ComplexD,3> &mat,
}); });
// Sum across simd lanes in the plane, breaking out orthog dir. // Sum across simd lanes in the plane, breaking out orthog dir.
thread_loop( (int rt=0;rt<rd;rt++),{ thread_for(rt,rd,{
Coordinate icoor(nd); Coordinate icoor(nd);
iScalar<vector_type> temp; iScalar<vector_type> temp;
@ -426,8 +423,7 @@ void A2Autils<FImpl>::PionFieldXX(Eigen::Tensor<ComplexD,3> &mat,
// ld loop and local only?? // ld loop and local only??
int pd = grid->_processors[orthogdim]; int pd = grid->_processors[orthogdim];
int pc = grid->_processor_coor[orthogdim]; int pc = grid->_processor_coor[orthogdim];
thread_loop_collapse(2,(int lt=0;lt<ld;lt++), thread_for_collapse(2,lt,ld,{
{
for(int pt=0;pt<pd;pt++){ for(int pt=0;pt<pd;pt++){
int t = lt + pt*ld; int t = lt + pt*ld;
if (pt == pc){ if (pt == pc){
@ -480,12 +476,12 @@ void A2Autils<FImpl>::PionFieldWVmom(Eigen::Tensor<ComplexD,4> &mat,
int MFlvol = ld*Lblock*Rblock*Nmom; int MFlvol = ld*Lblock*Rblock*Nmom;
Vector<vector_type > lvSum(MFrvol); Vector<vector_type > lvSum(MFrvol);
thread_loop( (int r = 0; r < MFrvol; r++),{ thread_for(r,MFrvol,{
lvSum[r] = Zero(); lvSum[r] = Zero();
}); });
Vector<scalar_type > lsSum(MFlvol); Vector<scalar_type > lsSum(MFlvol);
thread_loop( (int r = 0; r < MFlvol; r++),{ thread_for(r,MFlvol,{
lsSum[r]=scalar_type(0.0); lsSum[r]=scalar_type(0.0);
}); });
@ -493,7 +489,7 @@ void A2Autils<FImpl>::PionFieldWVmom(Eigen::Tensor<ComplexD,4> &mat,
int e2= grid->_slice_block [orthogdim]; int e2= grid->_slice_block [orthogdim];
int stride=grid->_slice_stride[orthogdim]; int stride=grid->_slice_stride[orthogdim];
thread_loop( (int r=0;r<rd;r++),{ thread_for(r,rd,{
int so=r*grid->_ostride[orthogdim]; // base offset for start of plane int so=r*grid->_ostride[orthogdim]; // base offset for start of plane
@ -542,7 +538,7 @@ void A2Autils<FImpl>::PionFieldWVmom(Eigen::Tensor<ComplexD,4> &mat,
// Sum across simd lanes in the plane, breaking out orthog dir. // Sum across simd lanes in the plane, breaking out orthog dir.
thread_loop( (int rt=0;rt<rd;rt++),{ thread_for(rt,rd,{
Coordinate icoor(nd); Coordinate icoor(nd);
iScalar<vector_type> temp; iScalar<vector_type> temp;
@ -573,10 +569,10 @@ void A2Autils<FImpl>::PionFieldWVmom(Eigen::Tensor<ComplexD,4> &mat,
assert(mat.dimension(0) == Nmom); assert(mat.dimension(0) == Nmom);
assert(mat.dimension(1) == Nt); assert(mat.dimension(1) == Nt);
int pd = grid->_processors[orthogdim]; int pd = grid->_processors[orthogdim];
int pc = grid->_processor_coor[orthogdim]; int pc = grid->_processor_coor[orthogdim];
thread_loop_collapse(2,(int lt=0;lt<ld;lt++), { thread_for_collapse(2,lt,ld,{
for(int pt=0;pt<pd;pt++){ for(int pt=0;pt<pd;pt++){
int t = lt + pt*ld; int t = lt + pt*ld;
if (pt == pc){ if (pt == pc){
@ -687,13 +683,13 @@ void A2Autils<FImpl>::AslashField(TensorType &mat,
int MFlvol = ld*Lblock*Rblock*Nem; int MFlvol = ld*Lblock*Rblock*Nem;
Vector<vector_type> lvSum(MFrvol); Vector<vector_type> lvSum(MFrvol);
thread_loop( (int r = 0; r < MFrvol; r++), thread_for(r,MFrvol,
{ {
lvSum[r] = Zero(); lvSum[r] = Zero();
}); });
Vector<scalar_type> lsSum(MFlvol); Vector<scalar_type> lsSum(MFlvol);
thread_loop( (int r = 0; r < MFlvol; r++), thread_for(r,MFlvol,
{ {
lsSum[r] = scalar_type(0.0); lsSum[r] = scalar_type(0.0);
}); });
@ -705,7 +701,7 @@ void A2Autils<FImpl>::AslashField(TensorType &mat,
// Nested parallelism would be ok // Nested parallelism would be ok
// Wasting cores here. Test case r // Wasting cores here. Test case r
if (t_kernel) *t_kernel = -usecond(); if (t_kernel) *t_kernel = -usecond();
thread_loop( (int r=0;r<rd;r++), thread_for(r,rd,
{ {
int so=r*grid->_ostride[orthogdim]; // base offset for start of plane int so=r*grid->_ostride[orthogdim]; // base offset for start of plane
@ -757,7 +753,7 @@ void A2Autils<FImpl>::AslashField(TensorType &mat,
}); });
// Sum across simd lanes in the plane, breaking out orthog dir. // Sum across simd lanes in the plane, breaking out orthog dir.
thread_loop( (int rt=0;rt<rd;rt++), thread_for(rt,rd,
{ {
Coordinate icoor(Nd); Coordinate icoor(Nd);
ExtractBuffer<scalar_type> extracted(Nsimd); ExtractBuffer<scalar_type> extracted(Nsimd);
@ -786,7 +782,7 @@ void A2Autils<FImpl>::AslashField(TensorType &mat,
// ld loop and local only?? // ld loop and local only??
int pd = grid->_processors[orthogdim]; int pd = grid->_processors[orthogdim];
int pc = grid->_processor_coor[orthogdim]; int pc = grid->_processor_coor[orthogdim];
thread_loop_collapse(2,(int lt=0;lt<ld;lt++), thread_for_collapse(2,lt,ld,
{ {
for(int pt=0;pt<pd;pt++) for(int pt=0;pt<pd;pt++)
{ {
@ -992,7 +988,7 @@ void A2Autils<FImpl>::ContractWWVV(std::vector<PropagatorField> &WWVV,
WWVV[t] = Zero(); WWVV[t] = Zero();
} }
thread_loop( (int ss=0;ss<grid->oSites();ss++),{ thread_for(ss,grid->oSites(),{
for(int d_o=0;d_o<N_d;d_o+=d_unroll){ for(int d_o=0;d_o<N_d;d_o+=d_unroll){
for(int t=0;t<N_t;t++){ for(int t=0;t<N_t;t++){
for(int s=0;s<N_s;s++){ for(int s=0;s<N_s;s++){
@ -1048,7 +1044,7 @@ void A2Autils<FImpl>::ContractFourQuarkColourDiagonal(const PropagatorField &WWV
auto WWVV1_v = WWVV1.View(); auto WWVV1_v = WWVV1.View();
auto O_trtr_v= O_trtr.View(); auto O_trtr_v= O_trtr.View();
auto O_fig8_v= O_fig8.View(); auto O_fig8_v= O_fig8.View();
thread_loop( (int ss=0;ss<grid->oSites();ss++),{ thread_for(ss,grid->oSites(),{
typedef typename ComplexField::vector_object vobj; typedef typename ComplexField::vector_object vobj;
@ -1093,7 +1089,7 @@ void A2Autils<FImpl>::ContractFourQuarkColourMix(const PropagatorField &WWVV0,
auto O_trtr_v= O_trtr.View(); auto O_trtr_v= O_trtr.View();
auto O_fig8_v= O_fig8.View(); auto O_fig8_v= O_fig8.View();
thread_loop( (int ss=0;ss<grid->oSites();ss++),{ thread_for(ss,grid->oSites(),{
typedef typename ComplexField::vector_object vobj; typedef typename ComplexField::vector_object vobj;
@ -1399,5 +1395,5 @@ void A2Autils<FImpl>::DeltaFeq2(int dt_min,int dt_max,
} }
#endif #endif
}} NAMESPACE_END(Grid);