mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-03 21:44:33 +00:00 
			
		
		
		
	changed if and accelerator_for - no runtime errors any more
This commit is contained in:
		@@ -513,19 +513,18 @@ void BaryonUtils<FImpl>::ContractBaryons(const PropagatorField &q1_left,
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
  GridBase *grid = q1_left.Grid();
 | 
					  GridBase *grid = q1_left.Grid();
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  autoView( vbaryon_corr , baryon_corr , AcceleratorWrite);
 | 
					  autoView(vbaryon_corr , baryon_corr , AcceleratorWrite);
 | 
				
			||||||
  autoView( vcorr_read   , baryon_corr , AcceleratorRead);
 | 
					  autoView( v1          , q1_left     , AcceleratorRead);
 | 
				
			||||||
  autoView( v1           , q1_left     , AcceleratorRead);
 | 
					  autoView( v2          , q2_left     , AcceleratorRead);
 | 
				
			||||||
  autoView( v2           , q2_left     , AcceleratorRead);
 | 
					  autoView( v3          , q3_left     , AcceleratorRead);
 | 
				
			||||||
  autoView( v3           , q3_left     , AcceleratorRead);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
  Real bytes =0.;
 | 
					  Real bytes =0.;
 | 
				
			||||||
  bytes += grid->oSites() * (432.*sizeof(vComplex) + 126.*sizeof(int) + 36.*sizeof(Real));
 | 
					  bytes += grid->oSites() * (432.*sizeof(vComplex) + 126.*sizeof(int) + 36.*sizeof(Real));
 | 
				
			||||||
  for (int ie=0; ie < 6 ; ie++){
 | 
					  for (int ie=0; ie < 6 ; ie++){
 | 
				
			||||||
    if(ie==0 or ie==3){
 | 
					    if(ie==0 or ie==3){
 | 
				
			||||||
       //bytes += grid->oSites() * (4.*sizeof(int) + 4752.*sizeof(vComplex)) * wick_contractions[ie];
 | 
					       bytes += ( wick_contractions & (1 << ie) ) ? grid->oSites() * (4.*sizeof(int) + 4752.*sizeof(vComplex)) : 0.;
 | 
				
			||||||
    } else{
 | 
					    } else{
 | 
				
			||||||
       //bytes += grid->oSites() * (64.*sizeof(int) + 5184.*sizeof(vComplex)) * wick_contractions[ie];
 | 
					       bytes += ( wick_contractions & (1 << ie) ) ? grid->oSites() * (64.*sizeof(int) + 5184.*sizeof(vComplex)) : 0.;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  Real t=0.;
 | 
					  Real t=0.;
 | 
				
			||||||
@@ -535,8 +534,7 @@ void BaryonUtils<FImpl>::ContractBaryons(const PropagatorField &q1_left,
 | 
				
			|||||||
    auto D1 = v1(ss);
 | 
					    auto D1 = v1(ss);
 | 
				
			||||||
    auto D2 = v2(ss);
 | 
					    auto D2 = v2(ss);
 | 
				
			||||||
    auto D3 = v3(ss);
 | 
					    auto D3 = v3(ss);
 | 
				
			||||||
    //typedef decltype(coalescedRead(vbaryon_corr[0])) cVec;
 | 
					    typedef decltype(coalescedRead(vbaryon_corr[0])) cVec;
 | 
				
			||||||
    typedef decltype(coalescedRead(vcorr_read[0])) cVec;
 | 
					 | 
				
			||||||
    cVec result=Zero();
 | 
					    cVec result=Zero();
 | 
				
			||||||
    BaryonSite(D1,D2,D3,GammaA_left,GammaB_left,GammaA_right,GammaB_right,parity,wick_contractions,result);
 | 
					    BaryonSite(D1,D2,D3,GammaA_left,GammaB_left,GammaA_right,GammaB_right,parity,wick_contractions,result);
 | 
				
			||||||
    coalescedWrite(vbaryon_corr[ss],result);
 | 
					    coalescedWrite(vbaryon_corr[ss],result);
 | 
				
			||||||
@@ -564,18 +562,16 @@ void BaryonUtils<FImpl>::ContractBaryonsMatrix(const PropagatorField &q1_left,
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
  GridBase *grid = q1_left.Grid();
 | 
					  GridBase *grid = q1_left.Grid();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  autoView( vbaryon_corr , baryon_corr , AcceleratorWrite);
 | 
					  autoView(vbaryon_corr , baryon_corr , AcceleratorWrite);
 | 
				
			||||||
  autoView( vcorr_read   , baryon_corr , AcceleratorRead);
 | 
					  autoView( v1          , q1_left     , AcceleratorRead);
 | 
				
			||||||
  autoView( v1           , q1_left     , AcceleratorRead);
 | 
					  autoView( v2          , q2_left     , AcceleratorRead);
 | 
				
			||||||
  autoView( v2           , q2_left     , AcceleratorRead);
 | 
					  autoView( v3          , q3_left     , AcceleratorRead);
 | 
				
			||||||
  autoView( v3           , q3_left     , AcceleratorRead);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
  accelerator_for(ss, grid->oSites(), grid->Nsimd(), {
 | 
					  accelerator_for(ss, grid->oSites(), grid->Nsimd(), {
 | 
				
			||||||
    auto D1 = v1(ss);
 | 
					    auto D1 = v1(ss);
 | 
				
			||||||
    auto D2 = v2(ss);
 | 
					    auto D2 = v2(ss);
 | 
				
			||||||
    auto D3 = v3(ss);
 | 
					    auto D3 = v3(ss);
 | 
				
			||||||
    //typedef decltype(coalescedRead(vbaryon_corr[0])) spinor;
 | 
					    typedef decltype(coalescedRead(vbaryon_corr[0])) spinor;
 | 
				
			||||||
    typedef decltype(coalescedRead(vcorr_read[0])) spinor;
 | 
					 | 
				
			||||||
    spinor result=Zero();
 | 
					    spinor result=Zero();
 | 
				
			||||||
    BaryonSiteMatrix(D1,D2,D3,GammaA_left,GammaB_left,GammaA_right,GammaB_right,wick_contractions,result);
 | 
					    BaryonSiteMatrix(D1,D2,D3,GammaA_left,GammaB_left,GammaA_right,GammaB_right,wick_contractions,result);
 | 
				
			||||||
    coalescedWrite(vbaryon_corr[ss],result);
 | 
					    coalescedWrite(vbaryon_corr[ss],result);
 | 
				
			||||||
@@ -941,10 +937,9 @@ void BaryonUtils<FImpl>::BaryonGamma3pt(
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
  GridBase *grid = q_tf.Grid();
 | 
					  GridBase *grid = q_tf.Grid();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  autoView( vcorr      , stn_corr , AcceleratorWrite);
 | 
					  autoView( vcorr , stn_corr , AcceleratorWrite);
 | 
				
			||||||
  autoView( vcorr_read , stn_corr , AcceleratorRead);
 | 
					  autoView( vq_ti , q_ti     , AcceleratorRead);
 | 
				
			||||||
  autoView( vq_ti      , q_ti     , AcceleratorRead);
 | 
					  autoView( vq_tf , q_tf     , AcceleratorRead);
 | 
				
			||||||
  autoView( vq_tf      , q_tf     , AcceleratorRead);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
  Vector<mobj> my_Dq_spec{Dq_spec1,Dq_spec2};
 | 
					  Vector<mobj> my_Dq_spec{Dq_spec1,Dq_spec2};
 | 
				
			||||||
  mobj * Dq_spec_p = &my_Dq_spec[0];
 | 
					  mobj * Dq_spec_p = &my_Dq_spec[0];
 | 
				
			||||||
@@ -953,29 +948,28 @@ void BaryonUtils<FImpl>::BaryonGamma3pt(
 | 
				
			|||||||
    accelerator_for(ss, grid->oSites(), grid->Nsimd(), {
 | 
					    accelerator_for(ss, grid->oSites(), grid->Nsimd(), {
 | 
				
			||||||
      auto Dq_ti = vq_ti(ss);
 | 
					      auto Dq_ti = vq_ti(ss);
 | 
				
			||||||
      auto Dq_tf = vq_tf(ss);
 | 
					      auto Dq_tf = vq_tf(ss);
 | 
				
			||||||
      typedef decltype(coalescedRead(vcorr_read[0])) spinor;
 | 
					      typedef decltype(coalescedRead(vcorr[0])) spinor;
 | 
				
			||||||
      spinor result=Zero();
 | 
					      spinor result=Zero();
 | 
				
			||||||
      BaryonGamma3ptGroup1Site(Dq_ti,Dq_spec_p[0],Dq_spec_p[1],Dq_tf,GammaJ,GammaBi,GammaBf,wick_contraction,result);
 | 
					      BaryonGamma3ptGroup1Site(Dq_ti,Dq_spec_p[0],Dq_spec_p[1],Dq_tf,GammaJ,GammaBi,GammaBf,wick_contraction,result);
 | 
				
			||||||
      coalescedWrite(vcorr[ss],coalescedRead(vcorr_read[ss])+result); 
 | 
					      coalescedWrite(vcorr[ss],coalescedRead(vcorr[ss])+result); 
 | 
				
			||||||
    });//end loop over lattice sites
 | 
					    });//end loop over lattice sites
 | 
				
			||||||
 | 
					 | 
				
			||||||
  } else if (group == 2) {
 | 
					  } else if (group == 2) {
 | 
				
			||||||
    accelerator_for(ss, grid->oSites(), grid->Nsimd(), {
 | 
					    accelerator_for(ss, grid->oSites(), grid->Nsimd(), {
 | 
				
			||||||
      auto Dq_ti = vq_ti(ss);
 | 
					      auto Dq_ti = vq_ti(ss);
 | 
				
			||||||
      auto Dq_tf = vq_tf(ss);
 | 
					      auto Dq_tf = vq_tf(ss);
 | 
				
			||||||
      typedef decltype(coalescedRead(vcorr_read[0])) spinor;
 | 
					      typedef decltype(coalescedRead(vcorr[0])) spinor;
 | 
				
			||||||
      spinor result=Zero();
 | 
					      spinor result=Zero();
 | 
				
			||||||
      BaryonGamma3ptGroup2Site(Dq_spec_p[0],Dq_ti,Dq_spec_p[1],Dq_tf,GammaJ,GammaBi,GammaBf,wick_contraction,result); 
 | 
					      BaryonGamma3ptGroup2Site(Dq_spec_p[0],Dq_ti,Dq_spec_p[1],Dq_tf,GammaJ,GammaBi,GammaBf,wick_contraction,result); 
 | 
				
			||||||
      coalescedWrite(vcorr[ss],coalescedRead(vcorr_read[ss])+result); 
 | 
					      coalescedWrite(vcorr[ss],coalescedRead(vcorr[ss])+result); 
 | 
				
			||||||
    });//end loop over lattice sites
 | 
					    });//end loop over lattice sites
 | 
				
			||||||
  } else if (group == 3) {
 | 
					  } else if (group == 3) {
 | 
				
			||||||
    accelerator_for(ss, grid->oSites(), grid->Nsimd(), {
 | 
					    accelerator_for(ss, grid->oSites(), grid->Nsimd(), {
 | 
				
			||||||
      auto Dq_ti = vq_ti(ss);
 | 
					      auto Dq_ti = vq_ti(ss);
 | 
				
			||||||
      auto Dq_tf = vq_tf(ss);
 | 
					      auto Dq_tf = vq_tf(ss);
 | 
				
			||||||
      typedef decltype(coalescedRead(vcorr_read[0])) spinor;
 | 
					      typedef decltype(coalescedRead(vcorr[0])) spinor;
 | 
				
			||||||
      spinor result=Zero();
 | 
					      spinor result=Zero();
 | 
				
			||||||
      BaryonGamma3ptGroup3Site(Dq_spec_p[0],Dq_spec_p[1],Dq_ti,Dq_tf,GammaJ,GammaBi,GammaBf,wick_contraction,result); 
 | 
					      BaryonGamma3ptGroup3Site(Dq_spec_p[0],Dq_spec_p[1],Dq_ti,Dq_tf,GammaJ,GammaBi,GammaBf,wick_contraction,result); 
 | 
				
			||||||
      coalescedWrite(vcorr[ss],coalescedRead(vcorr_read[ss])+result); 
 | 
					      coalescedWrite(vcorr[ss],coalescedRead(vcorr[ss])+result); 
 | 
				
			||||||
    });//end loop over lattice sites
 | 
					    });//end loop over lattice sites
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -1206,6 +1200,7 @@ void BaryonUtils<FImpl>::SigmaToNucleonQ2NonEyeSite(const mobj &Du_ti,
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
  Real ee;
 | 
					  Real ee;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  for (int ie_n=0; ie_n < 6 ; ie_n++){
 | 
					  for (int ie_n=0; ie_n < 6 ; ie_n++){
 | 
				
			||||||
    int a_n    = (ie_n < 3 ? ie_n       : (6-ie_n)%3 ); //epsilon[ie_n][0]; //a
 | 
					    int a_n    = (ie_n < 3 ? ie_n       : (6-ie_n)%3 ); //epsilon[ie_n][0]; //a
 | 
				
			||||||
    int b_n    = (ie_n < 3 ? (ie_n+1)%3 : (8-ie_n)%3 ); //epsilon[ie_n][1]; //b
 | 
					    int b_n    = (ie_n < 3 ? (ie_n+1)%3 : (8-ie_n)%3 ); //epsilon[ie_n][1]; //b
 | 
				
			||||||
@@ -1250,6 +1245,7 @@ void BaryonUtils<FImpl>::SigmaToNucleonQ2NonEyeSite(const mobj &Du_ti,
 | 
				
			|||||||
      }}
 | 
					      }}
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
template<class FImpl>
 | 
					template<class FImpl>
 | 
				
			||||||
@@ -1275,27 +1271,32 @@ void BaryonUtils<FImpl>::SigmaToNucleonEye(const PropagatorField &qq_loop,
 | 
				
			|||||||
  autoView( vd_tf   , qd_tf    , AcceleratorRead);
 | 
					  autoView( vd_tf   , qd_tf    , AcceleratorRead);
 | 
				
			||||||
  autoView( vs_ti   , qs_ti    , AcceleratorRead);
 | 
					  autoView( vs_ti   , qs_ti    , AcceleratorRead);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  bool doQ1 = (op == "Q1");
 | 
					 | 
				
			||||||
  bool doQ2 = (op == "Q2");
 | 
					 | 
				
			||||||
  
 | 
					 | 
				
			||||||
  Vector<mobj> my_Dq_spec{Du_spec};
 | 
					  Vector<mobj> my_Dq_spec{Du_spec};
 | 
				
			||||||
  mobj * Dq_spec_p = &my_Dq_spec[0];
 | 
					  mobj * Dq_spec_p = &my_Dq_spec[0];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  accelerator_for(ss, grid->oSites(), grid->Nsimd(), {
 | 
					  if(op == "Q1"){
 | 
				
			||||||
    auto Dq_loop = vq_loop(ss);
 | 
					    accelerator_for(ss, grid->oSites(), grid->Nsimd(), {
 | 
				
			||||||
    auto Dd_tf = vd_tf(ss);
 | 
					      auto Dq_loop = vq_loop(ss);
 | 
				
			||||||
    auto Ds_ti = vs_ti(ss);
 | 
					      auto Dd_tf = vd_tf(ss);
 | 
				
			||||||
    typedef decltype(coalescedRead(vcorr[0])) spinor;
 | 
					      auto Ds_ti = vs_ti(ss);
 | 
				
			||||||
    spinor result=Zero();
 | 
					      typedef decltype(coalescedRead(vcorr[0])) spinor;
 | 
				
			||||||
    if(doQ1){
 | 
					      spinor result=Zero();
 | 
				
			||||||
      SigmaToNucleonQ1EyeSite(Dq_loop,Dq_spec_p[0],Dd_tf,Ds_ti,Gamma_H,GammaB_sigma,GammaB_nucl,result);
 | 
					      SigmaToNucleonQ1EyeSite(Dq_loop,Dq_spec_p[0],Dd_tf,Ds_ti,Gamma_H,GammaB_sigma,GammaB_nucl,result);
 | 
				
			||||||
    } else if(doQ2){
 | 
					      coalescedWrite(vcorr[ss],result);
 | 
				
			||||||
 | 
					    });//end loop over lattice sites
 | 
				
			||||||
 | 
					  } else if(op == "Q2"){
 | 
				
			||||||
 | 
					    accelerator_for(ss, grid->oSites(), grid->Nsimd(), {
 | 
				
			||||||
 | 
					      auto Dq_loop = vq_loop(ss);
 | 
				
			||||||
 | 
					      auto Dd_tf = vd_tf(ss);
 | 
				
			||||||
 | 
					      auto Ds_ti = vs_ti(ss);
 | 
				
			||||||
 | 
					      typedef decltype(coalescedRead(vcorr[0])) spinor;
 | 
				
			||||||
 | 
					      spinor result=Zero();
 | 
				
			||||||
      SigmaToNucleonQ2EyeSite(Dq_loop,Dq_spec_p[0],Dd_tf,Ds_ti,Gamma_H,GammaB_sigma,GammaB_nucl,result);
 | 
					      SigmaToNucleonQ2EyeSite(Dq_loop,Dq_spec_p[0],Dd_tf,Ds_ti,Gamma_H,GammaB_sigma,GammaB_nucl,result);
 | 
				
			||||||
    } else {
 | 
					      coalescedWrite(vcorr[ss],result);
 | 
				
			||||||
      assert(0 && "Weak Operator not correctly specified");
 | 
					    });//end loop over lattice sites
 | 
				
			||||||
    }
 | 
					  } else {
 | 
				
			||||||
    coalescedWrite(vcorr[ss],result);
 | 
					    assert(0 && "Weak Operator not correctly specified");
 | 
				
			||||||
  });//end loop over lattice sites
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
template<class FImpl>
 | 
					template<class FImpl>
 | 
				
			||||||
@@ -1323,28 +1324,34 @@ void BaryonUtils<FImpl>::SigmaToNucleonNonEye(const PropagatorField &qq_ti,
 | 
				
			|||||||
  autoView( vd_tf , qd_tf    , AcceleratorRead  );
 | 
					  autoView( vd_tf , qd_tf    , AcceleratorRead  );
 | 
				
			||||||
  autoView( vs_ti , qs_ti    , AcceleratorRead  );
 | 
					  autoView( vs_ti , qs_ti    , AcceleratorRead  );
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  bool doQ1 = (op == "Q1");
 | 
					 | 
				
			||||||
  bool doQ2 = (op == "Q2");
 | 
					 | 
				
			||||||
  
 | 
					 | 
				
			||||||
  Vector<mobj> my_Dq_spec{Du_spec};
 | 
					  Vector<mobj> my_Dq_spec{Du_spec};
 | 
				
			||||||
  mobj * Dq_spec_p = &my_Dq_spec[0];
 | 
					  mobj * Dq_spec_p = &my_Dq_spec[0];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  accelerator_for(ss, grid->oSites(), grid->Nsimd(), {
 | 
					  if(op == "Q1"){
 | 
				
			||||||
    auto Dq_ti = vq_ti(ss);
 | 
					    accelerator_for(ss, grid->oSites(), grid->Nsimd(), {
 | 
				
			||||||
    auto Dq_tf = vq_tf(ss);
 | 
					      auto Dq_ti = vq_ti(ss);
 | 
				
			||||||
    auto Dd_tf = vd_tf(ss);
 | 
					      auto Dq_tf = vq_tf(ss);
 | 
				
			||||||
    auto Ds_ti = vs_ti(ss);
 | 
					      auto Dd_tf = vd_tf(ss);
 | 
				
			||||||
    typedef decltype(coalescedRead(vcorr[0])) spinor;
 | 
					      auto Ds_ti = vs_ti(ss);
 | 
				
			||||||
    spinor result=Zero();
 | 
					      typedef decltype(coalescedRead(vcorr[0])) spinor;
 | 
				
			||||||
    if(doQ1){
 | 
					      spinor result=Zero();
 | 
				
			||||||
      SigmaToNucleonQ1NonEyeSite(Dq_ti,Dq_tf,Dq_spec_p[0],Dd_tf,Ds_ti,Gamma_H,GammaB_sigma,GammaB_nucl,result);
 | 
					      SigmaToNucleonQ1NonEyeSite(Dq_ti,Dq_tf,Dq_spec_p[0],Dd_tf,Ds_ti,Gamma_H,GammaB_sigma,GammaB_nucl,result);
 | 
				
			||||||
    } else if(doQ2){
 | 
					      coalescedWrite(vcorr[ss],result);
 | 
				
			||||||
 | 
					    });//end loop over lattice sites
 | 
				
			||||||
 | 
					  } else if(op == "Q2"){
 | 
				
			||||||
 | 
					    accelerator_for(ss, grid->oSites(), grid->Nsimd(), {
 | 
				
			||||||
 | 
					      auto Dq_ti = vq_ti(ss);
 | 
				
			||||||
 | 
					      auto Dq_tf = vq_tf(ss);
 | 
				
			||||||
 | 
					      auto Dd_tf = vd_tf(ss);
 | 
				
			||||||
 | 
					      auto Ds_ti = vs_ti(ss);
 | 
				
			||||||
 | 
					      typedef decltype(coalescedRead(vcorr[0])) spinor;
 | 
				
			||||||
 | 
					      spinor result=Zero();
 | 
				
			||||||
      SigmaToNucleonQ2NonEyeSite(Dq_ti,Dq_tf,Dq_spec_p[0],Dd_tf,Ds_ti,Gamma_H,GammaB_sigma,GammaB_nucl,result);
 | 
					      SigmaToNucleonQ2NonEyeSite(Dq_ti,Dq_tf,Dq_spec_p[0],Dd_tf,Ds_ti,Gamma_H,GammaB_sigma,GammaB_nucl,result);
 | 
				
			||||||
    } else {
 | 
					      coalescedWrite(vcorr[ss],result);
 | 
				
			||||||
      assert(0 && "Weak Operator not correctly specified");
 | 
					    });//end loop over lattice sites
 | 
				
			||||||
    }
 | 
					  } else {
 | 
				
			||||||
    coalescedWrite(vcorr[ss],result);
 | 
					    assert(0 && "Weak Operator not correctly specified");
 | 
				
			||||||
  });//end loop over lattice sites
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
NAMESPACE_END(Grid);
 | 
					NAMESPACE_END(Grid);
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user