mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-04 14:04:32 +00:00 
			
		
		
		
	Vectorise the XYZT face gathering better.
Hard coded for simd_layout <= 2 in any given spread out direction; full generality is inconsistent with efficiency.
This commit is contained in:
		@@ -113,8 +113,6 @@ public:
 | 
			
		||||
//  outerproduct, 
 | 
			
		||||
//  zeroit
 | 
			
		||||
//  permute
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class funcReduce {
 | 
			
		||||
public:
 | 
			
		||||
  funcReduce() {};
 | 
			
		||||
@@ -168,7 +166,7 @@ void Tester(const functor &func)
 | 
			
		||||
 | 
			
		||||
  int ok=0;
 | 
			
		||||
  for(int i=0;i<Nsimd;i++){
 | 
			
		||||
    if ( abs(reference[i]-result[i])>1.0e-7){
 | 
			
		||||
    if ( abs(reference[i]-result[i])>1.0e-6){
 | 
			
		||||
      std::cout<<GridLogMessage<< "*****" << std::endl;
 | 
			
		||||
      std::cout<<GridLogMessage<< "["<<i<<"] "<< abs(reference[i]-result[i]) << " " <<reference[i]<< " " << result[i]<<std::endl;
 | 
			
		||||
      ok++;
 | 
			
		||||
@@ -245,6 +243,28 @@ public:
 | 
			
		||||
  }
 | 
			
		||||
  std::string name(void) const { return std::string("Permute"); }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
class funcExchange {
 | 
			
		||||
public:
 | 
			
		||||
  int n;
 | 
			
		||||
  funcExchange(int _n) { n=_n;};
 | 
			
		||||
  template<class vec>    void operator()(vec &r1,vec &r2,vec &i1,vec &i2) const { exchange(r1,r2,i1,i2,n);}
 | 
			
		||||
  template<class scal>   void apply(std::vector<scal> &r1,std::vector<scal> &r2,std::vector<scal> &in1,std::vector<scal> &in2)  const { 
 | 
			
		||||
    int sz=in1.size();
 | 
			
		||||
 | 
			
		||||
    
 | 
			
		||||
    int msk = sz>>(n+1);
 | 
			
		||||
 | 
			
		||||
    int j1=0;
 | 
			
		||||
    int j2=0;
 | 
			
		||||
    for(int i=0;i<sz;i++) if ( (i&msk) == 0 ) r1[j1++] = in1[ i ];
 | 
			
		||||
    for(int i=0;i<sz;i++) if ( (i&msk) == 0 ) r1[j1++] = in2[ i ];
 | 
			
		||||
    for(int i=0;i<sz;i++) if ( (i&msk)  ) r2[j2++] = in1[ i ];
 | 
			
		||||
    for(int i=0;i<sz;i++) if ( (i&msk)  ) r2[j2++] = in2[ i ];
 | 
			
		||||
  }
 | 
			
		||||
  std::string name(void) const { return std::string("Exchange"); }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
class funcRotate {
 | 
			
		||||
public:
 | 
			
		||||
  int n;
 | 
			
		||||
@@ -325,6 +345,87 @@ void PermTester(const functor &func)
 | 
			
		||||
  assert(ok==0);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
template<class scal, class vec,class functor > 
 | 
			
		||||
void ExchangeTester(const functor &func)
 | 
			
		||||
{
 | 
			
		||||
  GridSerialRNG          sRNG;
 | 
			
		||||
  sRNG.SeedRandomDevice();
 | 
			
		||||
  
 | 
			
		||||
  int Nsimd = vec::Nsimd();
 | 
			
		||||
 | 
			
		||||
  std::vector<scal> input1(Nsimd);
 | 
			
		||||
  std::vector<scal> input2(Nsimd);
 | 
			
		||||
  std::vector<scal> result1(Nsimd);
 | 
			
		||||
  std::vector<scal> result2(Nsimd);
 | 
			
		||||
  std::vector<scal> reference1(Nsimd);
 | 
			
		||||
  std::vector<scal> reference2(Nsimd);
 | 
			
		||||
  std::vector<scal> test1(Nsimd);
 | 
			
		||||
  std::vector<scal> test2(Nsimd);
 | 
			
		||||
 | 
			
		||||
  std::vector<vec,alignedAllocator<vec> > buf(6);
 | 
			
		||||
  vec & v_input1 = buf[0];
 | 
			
		||||
  vec & v_input2 = buf[1];
 | 
			
		||||
  vec & v_result1 = buf[2];
 | 
			
		||||
  vec & v_result2 = buf[3];
 | 
			
		||||
  vec & v_test1 = buf[4];
 | 
			
		||||
  vec & v_test2 = buf[5];
 | 
			
		||||
 | 
			
		||||
  for(int i=0;i<Nsimd;i++){
 | 
			
		||||
    random(sRNG,input1[i]);
 | 
			
		||||
    random(sRNG,input2[i]);
 | 
			
		||||
    random(sRNG,result1[i]);
 | 
			
		||||
    random(sRNG,result2[i]);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  merge<vec,scal>(v_input1,input1);
 | 
			
		||||
  merge<vec,scal>(v_input2,input2);
 | 
			
		||||
  merge<vec,scal>(v_result1,result1);
 | 
			
		||||
  merge<vec,scal>(v_result2,result1);
 | 
			
		||||
 | 
			
		||||
  func(v_result1,v_result2,v_input1,v_input2);
 | 
			
		||||
  func.apply(reference1,reference2,input1,input2);
 | 
			
		||||
 | 
			
		||||
  func(v_test1,v_test2,v_result1,v_result2);
 | 
			
		||||
 | 
			
		||||
  extract<vec,scal>(v_result1,result1);
 | 
			
		||||
  extract<vec,scal>(v_result2,result2);
 | 
			
		||||
  extract<vec,scal>(v_test1,test1);
 | 
			
		||||
  extract<vec,scal>(v_test2,test2);
 | 
			
		||||
 | 
			
		||||
  std::cout<<GridLogMessage << " " << func.name() << " " <<func.n <<std::endl;
 | 
			
		||||
 | 
			
		||||
  //  for(int i=0;i<Nsimd;i++) std::cout << " i "<<i<<" "<<reference1[i]<<" "<<result1[i]<<std::endl;
 | 
			
		||||
  //  for(int i=0;i<Nsimd;i++) std::cout << " i "<<i<<" "<<reference2[i]<<" "<<result2[i]<<std::endl;
 | 
			
		||||
 | 
			
		||||
  for(int i=0;i<Nsimd;i++){
 | 
			
		||||
    int found=0;
 | 
			
		||||
    for(int j=0;j<Nsimd;j++){
 | 
			
		||||
      if(reference1[j]==result1[i]) {
 | 
			
		||||
	found=1;
 | 
			
		||||
	//	std::cout << " i "<<i<<" j "<<j<<" "<<reference1[j]<<" "<<result1[i]<<std::endl;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    assert(found==1);
 | 
			
		||||
  }
 | 
			
		||||
  for(int i=0;i<Nsimd;i++){
 | 
			
		||||
    int found=0;
 | 
			
		||||
    for(int j=0;j<Nsimd;j++){
 | 
			
		||||
      if(reference2[j]==result2[i]) {
 | 
			
		||||
	found=1;
 | 
			
		||||
	//	std::cout << " i "<<i<<" j "<<j<<" "<<reference2[j]<<" "<<result2[i]<<std::endl;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    assert(found==1);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  //  for(int i=0;i<Nsimd;i++){
 | 
			
		||||
    //    std::cout << " i "<< i<<" test1"<<test1[i]<<" "<<input1[i]<<std::endl;
 | 
			
		||||
    //    std::cout << " i "<< i<<" test2"<<test2[i]<<" "<<input2[i]<<std::endl;
 | 
			
		||||
  //  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
int main (int argc, char ** argv)
 | 
			
		||||
{
 | 
			
		||||
  Grid_init(&argc,&argv);
 | 
			
		||||
@@ -363,6 +464,15 @@ int main (int argc, char ** argv)
 | 
			
		||||
    PermTester<RealF,vRealF>(funcPermute(i));
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  std::cout<<GridLogMessage << "==================================="<<  std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "Testing vRealF exchanges "<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "==================================="<<  std::endl;
 | 
			
		||||
 | 
			
		||||
  // Log2 iteration
 | 
			
		||||
  for(int i=0;(1<<i)< vRealF::Nsimd();i++){
 | 
			
		||||
    ExchangeTester<RealF,vRealF>(funcExchange(i));
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  std::cout<<GridLogMessage << "==================================="<<  std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "Testing vRealF rotate "<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "==================================="<<  std::endl;
 | 
			
		||||
@@ -394,6 +504,14 @@ int main (int argc, char ** argv)
 | 
			
		||||
    PermTester<RealD,vRealD>(funcPermute(i));
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  std::cout<<GridLogMessage << "==================================="<<  std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "Testing vRealD exchanges "<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "==================================="<<  std::endl;
 | 
			
		||||
  // Log2 iteration
 | 
			
		||||
  for(int i=0;(1<<i)< vRealD::Nsimd();i++){
 | 
			
		||||
    ExchangeTester<RealD,vRealD>(funcExchange(i));
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  std::cout<<GridLogMessage << "==================================="<<  std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "Testing vRealD rotate "<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "==================================="<<  std::endl;
 | 
			
		||||
@@ -429,6 +547,16 @@ int main (int argc, char ** argv)
 | 
			
		||||
    PermTester<ComplexF,vComplexF>(funcPermute(i));
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  std::cout<<GridLogMessage << "==================================="<<  std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "Testing vComplexF exchanges "<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "==================================="<<  std::endl;
 | 
			
		||||
  // Log2 iteration
 | 
			
		||||
  for(int i=0;(1<<i)< vComplexF::Nsimd();i++){
 | 
			
		||||
    ExchangeTester<ComplexF,vComplexF>(funcExchange(i));
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  std::cout<<GridLogMessage << "==================================="<<  std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "Testing vComplexF rotate "<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "==================================="<<  std::endl;
 | 
			
		||||
@@ -466,6 +594,15 @@ int main (int argc, char ** argv)
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  std::cout<<GridLogMessage << "==================================="<<  std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "Testing vComplexD exchanges "<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "==================================="<<  std::endl;
 | 
			
		||||
  // Log2 iteration
 | 
			
		||||
  for(int i=0;(1<<i)< vComplexD::Nsimd();i++){
 | 
			
		||||
    ExchangeTester<ComplexD,vComplexD>(funcExchange(i));
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  std::cout<<GridLogMessage << "==================================="<<  std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "Testing vComplexD rotate "<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "==================================="<<  std::endl;
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user