1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-17 07:17:06 +01:00

Vectorise the XYZT face gathering better.

Hard coded for simd_layout <= 2 in any given spread out direction; full generality is inconsistent
with efficiency.
This commit is contained in:
paboyle
2017-02-15 11:11:04 +00:00
parent aca7a3ef0a
commit bd600702cf
10 changed files with 510 additions and 34 deletions

View File

@ -113,8 +113,6 @@ public:
// outerproduct,
// zeroit
// permute
class funcReduce {
public:
funcReduce() {};
@ -168,7 +166,7 @@ void Tester(const functor &func)
int ok=0;
for(int i=0;i<Nsimd;i++){
if ( abs(reference[i]-result[i])>1.0e-7){
if ( abs(reference[i]-result[i])>1.0e-6){
std::cout<<GridLogMessage<< "*****" << std::endl;
std::cout<<GridLogMessage<< "["<<i<<"] "<< abs(reference[i]-result[i]) << " " <<reference[i]<< " " << result[i]<<std::endl;
ok++;
@ -245,6 +243,28 @@ public:
}
std::string name(void) const { return std::string("Permute"); }
};
class funcExchange {
public:
int n;
funcExchange(int _n) { n=_n;};
template<class vec> void operator()(vec &r1,vec &r2,vec &i1,vec &i2) const { exchange(r1,r2,i1,i2,n);}
template<class scal> void apply(std::vector<scal> &r1,std::vector<scal> &r2,std::vector<scal> &in1,std::vector<scal> &in2) const {
int sz=in1.size();
int msk = sz>>(n+1);
int j1=0;
int j2=0;
for(int i=0;i<sz;i++) if ( (i&msk) == 0 ) r1[j1++] = in1[ i ];
for(int i=0;i<sz;i++) if ( (i&msk) == 0 ) r1[j1++] = in2[ i ];
for(int i=0;i<sz;i++) if ( (i&msk) ) r2[j2++] = in1[ i ];
for(int i=0;i<sz;i++) if ( (i&msk) ) r2[j2++] = in2[ i ];
}
std::string name(void) const { return std::string("Exchange"); }
};
class funcRotate {
public:
int n;
@ -325,6 +345,87 @@ void PermTester(const functor &func)
assert(ok==0);
}
template<class scal, class vec,class functor >
void ExchangeTester(const functor &func)
{
GridSerialRNG sRNG;
sRNG.SeedRandomDevice();
int Nsimd = vec::Nsimd();
std::vector<scal> input1(Nsimd);
std::vector<scal> input2(Nsimd);
std::vector<scal> result1(Nsimd);
std::vector<scal> result2(Nsimd);
std::vector<scal> reference1(Nsimd);
std::vector<scal> reference2(Nsimd);
std::vector<scal> test1(Nsimd);
std::vector<scal> test2(Nsimd);
std::vector<vec,alignedAllocator<vec> > buf(6);
vec & v_input1 = buf[0];
vec & v_input2 = buf[1];
vec & v_result1 = buf[2];
vec & v_result2 = buf[3];
vec & v_test1 = buf[4];
vec & v_test2 = buf[5];
for(int i=0;i<Nsimd;i++){
random(sRNG,input1[i]);
random(sRNG,input2[i]);
random(sRNG,result1[i]);
random(sRNG,result2[i]);
}
merge<vec,scal>(v_input1,input1);
merge<vec,scal>(v_input2,input2);
merge<vec,scal>(v_result1,result1);
merge<vec,scal>(v_result2,result1);
func(v_result1,v_result2,v_input1,v_input2);
func.apply(reference1,reference2,input1,input2);
func(v_test1,v_test2,v_result1,v_result2);
extract<vec,scal>(v_result1,result1);
extract<vec,scal>(v_result2,result2);
extract<vec,scal>(v_test1,test1);
extract<vec,scal>(v_test2,test2);
std::cout<<GridLogMessage << " " << func.name() << " " <<func.n <<std::endl;
// for(int i=0;i<Nsimd;i++) std::cout << " i "<<i<<" "<<reference1[i]<<" "<<result1[i]<<std::endl;
// for(int i=0;i<Nsimd;i++) std::cout << " i "<<i<<" "<<reference2[i]<<" "<<result2[i]<<std::endl;
for(int i=0;i<Nsimd;i++){
int found=0;
for(int j=0;j<Nsimd;j++){
if(reference1[j]==result1[i]) {
found=1;
// std::cout << " i "<<i<<" j "<<j<<" "<<reference1[j]<<" "<<result1[i]<<std::endl;
}
}
assert(found==1);
}
for(int i=0;i<Nsimd;i++){
int found=0;
for(int j=0;j<Nsimd;j++){
if(reference2[j]==result2[i]) {
found=1;
// std::cout << " i "<<i<<" j "<<j<<" "<<reference2[j]<<" "<<result2[i]<<std::endl;
}
}
assert(found==1);
}
// for(int i=0;i<Nsimd;i++){
// std::cout << " i "<< i<<" test1"<<test1[i]<<" "<<input1[i]<<std::endl;
// std::cout << " i "<< i<<" test2"<<test2[i]<<" "<<input2[i]<<std::endl;
// }
}
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
@ -363,6 +464,15 @@ int main (int argc, char ** argv)
PermTester<RealF,vRealF>(funcPermute(i));
}
std::cout<<GridLogMessage << "==================================="<< std::endl;
std::cout<<GridLogMessage << "Testing vRealF exchanges "<<std::endl;
std::cout<<GridLogMessage << "==================================="<< std::endl;
// Log2 iteration
for(int i=0;(1<<i)< vRealF::Nsimd();i++){
ExchangeTester<RealF,vRealF>(funcExchange(i));
}
std::cout<<GridLogMessage << "==================================="<< std::endl;
std::cout<<GridLogMessage << "Testing vRealF rotate "<<std::endl;
std::cout<<GridLogMessage << "==================================="<< std::endl;
@ -394,6 +504,14 @@ int main (int argc, char ** argv)
PermTester<RealD,vRealD>(funcPermute(i));
}
std::cout<<GridLogMessage << "==================================="<< std::endl;
std::cout<<GridLogMessage << "Testing vRealD exchanges "<<std::endl;
std::cout<<GridLogMessage << "==================================="<< std::endl;
// Log2 iteration
for(int i=0;(1<<i)< vRealD::Nsimd();i++){
ExchangeTester<RealD,vRealD>(funcExchange(i));
}
std::cout<<GridLogMessage << "==================================="<< std::endl;
std::cout<<GridLogMessage << "Testing vRealD rotate "<<std::endl;
std::cout<<GridLogMessage << "==================================="<< std::endl;
@ -429,6 +547,16 @@ int main (int argc, char ** argv)
PermTester<ComplexF,vComplexF>(funcPermute(i));
}
std::cout<<GridLogMessage << "==================================="<< std::endl;
std::cout<<GridLogMessage << "Testing vComplexF exchanges "<<std::endl;
std::cout<<GridLogMessage << "==================================="<< std::endl;
// Log2 iteration
for(int i=0;(1<<i)< vComplexF::Nsimd();i++){
ExchangeTester<ComplexF,vComplexF>(funcExchange(i));
}
std::cout<<GridLogMessage << "==================================="<< std::endl;
std::cout<<GridLogMessage << "Testing vComplexF rotate "<<std::endl;
std::cout<<GridLogMessage << "==================================="<< std::endl;
@ -466,6 +594,15 @@ int main (int argc, char ** argv)
}
std::cout<<GridLogMessage << "==================================="<< std::endl;
std::cout<<GridLogMessage << "Testing vComplexD exchanges "<<std::endl;
std::cout<<GridLogMessage << "==================================="<< std::endl;
// Log2 iteration
for(int i=0;(1<<i)< vComplexD::Nsimd();i++){
ExchangeTester<ComplexD,vComplexD>(funcExchange(i));
}
std::cout<<GridLogMessage << "==================================="<< std::endl;
std::cout<<GridLogMessage << "Testing vComplexD rotate "<<std::endl;
std::cout<<GridLogMessage << "==================================="<< std::endl;