1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-10 07:55:35 +00:00

introduce AddTimesI and SubTimesI; slight benefit in operators, but < 1%; breaks all other impls

This commit is contained in:
nmeyer-ur 2020-06-03 15:20:13 +02:00
parent 5ee3ea2144
commit 9872c76825
4 changed files with 256 additions and 78 deletions

View File

@ -1,6 +1,6 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/spin/TwoSpinor.h
@ -33,7 +33,7 @@ NAMESPACE_BEGIN(Grid);
//////////////////////////////////////////////////////////////////////////////////////////////////////
// Normalisation alert; the g5 project is 1/2(1+-G5)
// Normalisation alert; the g5 project is 1/2(1+-G5)
// the xyzt projects are (1+-Gxyzt)
//
// * xyzt project
@ -59,7 +59,7 @@ NAMESPACE_BEGIN(Grid);
//
// Both four spinor and two spinor result variants are provided.
//
// The four spinor project will be recursively provided to Lattice wide routines, and likely used in
// The four spinor project will be recursively provided to Lattice wide routines, and likely used in
// the domain wall and mobius implementations.
//
//////////////////////////////////////////////////////////////////////////////////////////////////////
@ -74,13 +74,17 @@ NAMESPACE_BEGIN(Grid);
// To fail is not to err (Cryptic clue: suggest to Google SFINAE ;) )
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProjXp (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
{
hspin(0)=fspin(0)+timesI(fspin(3));
hspin(1)=fspin(1)+timesI(fspin(2));
//hspin(0)=fspin(0)+timesI(fspin(3));
//hspin(1)=fspin(1)+timesI(fspin(2));
hspin(0)=addTimesI(fspin(0), fspin(3));
hspin(1)=addTimesI(fspin(1), fspin(2));
}
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProjXm (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
{
hspin(0)=fspin(0)-timesI(fspin(3));
hspin(1)=fspin(1)-timesI(fspin(2));
//hspin(0)=fspin(0)-timesI(fspin(3));
//hspin(1)=fspin(1)-timesI(fspin(2));
hspin(0)=subTimesI(fspin(0), fspin(3));
hspin(1)=subTimesI(fspin(1), fspin(2));
}
// 0 0 0 -1 [0] -+ [3]
@ -105,14 +109,18 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
*/
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProjZp (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
{
hspin(0)=fspin(0)+timesI(fspin(2));
hspin(1)=fspin(1)-timesI(fspin(3));
//hspin(0)=fspin(0)+timesI(fspin(2));
//hspin(1)=fspin(1)-timesI(fspin(3));
hspin(0)=addTimesI(fspin(0), fspin(2));
hspin(1)=subTimesI(fspin(1), fspin(3));
}
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProjZm (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
hspin(0)=fspin(0)-timesI(fspin(2));
hspin(1)=fspin(1)+timesI(fspin(3));
//hspin(0)=fspin(0)-timesI(fspin(2));
//hspin(1)=fspin(1)+timesI(fspin(3));
hspin(0)=subTimesI(fspin(0), fspin(2));
hspin(1)=addTimesI(fspin(1), fspin(3));
}
/*Gt
* 0 0 1 0 [0]+-[2]
@ -133,8 +141,8 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
hspin(1)=fspin(1)-fspin(3);
}
/*G5
* 1 0 0 0
* 0 1 0 0
* 1 0 0 0
* 0 1 0 0
* 0 0 -1 0
* 0 0 0 -1
*/
@ -152,7 +160,7 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
hspin(0)=fspin(2);
hspin(1)=fspin(3);
}
// template<class vtype> accelerator_inline void fspProj5p (iVector<vtype,Ns> &rfspin,const iVector<vtype,Ns> &fspin)
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProj5p (iVector<vtype,Ns> &rfspin,const iVector<vtype,Ns> &fspin)
{
@ -202,16 +210,20 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
fspin(0)+=hspin(0);
fspin(1)+=hspin(1);
fspin(2)-=timesI(hspin(1));
fspin(3)-=timesI(hspin(0));
//fspin(2)-=timesI(hspin(1));
//fspin(3)-=timesI(hspin(0));
fspin(2)=subTimesI(fspin(2), hspin(1));
fspin(3)=subTimesI(fspin(3), hspin(0));
}
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconXm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
fspin(0)+=hspin(0);
fspin(1)+=hspin(1);
fspin(2)+=timesI(hspin(1));
fspin(3)+=timesI(hspin(0));
//fspin(2)+=timesI(hspin(1));
//fspin(3)+=timesI(hspin(0));
fspin(2)=addTimesI(fspin(2), hspin(1));
fspin(3)=addTimesI(fspin(3), hspin(0));
}
// 0 0 0 -1 [0] -+ [3]
@ -279,16 +291,20 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
fspin(0)+=hspin(0);
fspin(1)+=hspin(1);
fspin(2)-=timesI(hspin(0));
fspin(3)+=timesI(hspin(1));
//fspin(2)-=timesI(hspin(0));
//fspin(3)+=timesI(hspin(1));
fspin(2)=subTimesI(fspin(2), hspin(0));
fspin(3)=addTimesI(fspin(3), hspin(1));
}
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconZm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
{
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
fspin(0)+=hspin(0);
fspin(1)+=hspin(1);
fspin(2)+=timesI(hspin(0));
fspin(3)-=timesI(hspin(1));
//fspin(2)+=timesI(hspin(0));
//fspin(3)-=timesI(hspin(1));
fspin(2)=addTimesI(fspin(2), hspin(0));
fspin(3)=subTimesI(fspin(3), hspin(1));
}
/*Gt
* 0 0 1 0 [0]+-[2]
@ -329,8 +345,8 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
fspin(3)-=hspin(1);
}
/*G5
* 1 0 0 0
* 0 1 0 0
* 1 0 0 0
* 0 1 0 0
* 0 0 -1 0
* 0 0 0 -1
*/
@ -383,7 +399,7 @@ template<class rtype,class vtype> accelerator_inline void spProjXp (iScalar<rtyp
}
template<class rtype,class vtype,int N> accelerator_inline void spProjXp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
for(int i=0;i<N;i++){
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
spProjXp(hspin._internal[i][j],fspin._internal[i][j]);
}}
@ -402,7 +418,7 @@ template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accel
}
template<class rtype,class vtype,int N> accelerator_inline void spReconXp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
for(int i=0;i<N;i++){
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
spReconXp(hspin._internal[i][j],fspin._internal[i][j]);
}}
@ -420,7 +436,7 @@ template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accel
}
template<class rtype,class vtype,int N> accelerator_inline void accumReconXp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
for(int i=0;i<N;i++){
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
accumReconXp(hspin._internal[i][j],fspin._internal[i][j]);
}}
@ -446,7 +462,7 @@ template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accel
template<class rtype,class vtype,int N> accelerator_inline void spProjXm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
spProjXm(hspin._internal[i][j],fspin._internal[i][j]);
}}
@ -468,7 +484,7 @@ template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accel
template<class rtype,class vtype,int N> accelerator_inline void spReconXm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
spReconXm(hspin._internal[i][j],fspin._internal[i][j]);
}}
@ -489,7 +505,7 @@ template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accel
template<class rtype,class vtype,int N> accelerator_inline void accumReconXm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
accumReconXm(hspin._internal[i][j],fspin._internal[i][j]);
}}
@ -515,7 +531,7 @@ template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accel
template<class rtype,class vtype,int N> accelerator_inline void spProjYp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
spProjYp(hspin._internal[i][j],fspin._internal[i][j]);
}}
@ -537,7 +553,7 @@ template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accel
template<class rtype,class vtype,int N> accelerator_inline void spReconYp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
spReconYp(hspin._internal[i][j],fspin._internal[i][j]);
}}
@ -558,7 +574,7 @@ template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accel
template<class rtype,class vtype,int N> accelerator_inline void accumReconYp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
accumReconYp(hspin._internal[i][j],fspin._internal[i][j]);
}}
@ -583,7 +599,7 @@ template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accel
template<class rtype,class vtype,int N> accelerator_inline void spProjYm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
spProjYm(hspin._internal[i][j],fspin._internal[i][j]);
}}
@ -605,7 +621,7 @@ template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accel
template<class rtype,class vtype,int N> accelerator_inline void spReconYm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
spReconYm(hspin._internal[i][j],fspin._internal[i][j]);
}}
@ -626,7 +642,7 @@ template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accel
template<class rtype,class vtype,int N> accelerator_inline void accumReconYm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
accumReconYm(hspin._internal[i][j],fspin._internal[i][j]);
}}
@ -651,7 +667,7 @@ template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accel
template<class rtype,class vtype,int N> accelerator_inline void spProjZp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
spProjZp(hspin._internal[i][j],fspin._internal[i][j]);
}}
@ -673,7 +689,7 @@ template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accel
template<class rtype,class vtype,int N> accelerator_inline void spReconZp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
spReconZp(hspin._internal[i][j],fspin._internal[i][j]);
}}
@ -694,7 +710,7 @@ template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accel
template<class rtype,class vtype,int N> accelerator_inline void accumReconZp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
accumReconZp(hspin._internal[i][j],fspin._internal[i][j]);
}}
@ -719,7 +735,7 @@ template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accel
template<class rtype,class vtype,int N> accelerator_inline void spProjZm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
spProjZm(hspin._internal[i][j],fspin._internal[i][j]);
}}
@ -741,7 +757,7 @@ template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accel
template<class rtype,class vtype,int N> accelerator_inline void spReconZm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
spReconZm(hspin._internal[i][j],fspin._internal[i][j]);
}}
@ -762,7 +778,7 @@ template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accel
template<class rtype,class vtype,int N> accelerator_inline void accumReconZm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
accumReconZm(hspin._internal[i][j],fspin._internal[i][j]);
}}
@ -787,7 +803,7 @@ template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accel
template<class rtype,class vtype,int N> accelerator_inline void spProjTp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
spProjTp(hspin._internal[i][j],fspin._internal[i][j]);
}}
@ -809,7 +825,7 @@ template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accel
template<class rtype,class vtype,int N> accelerator_inline void spReconTp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
spReconTp(hspin._internal[i][j],fspin._internal[i][j]);
}}
@ -830,7 +846,7 @@ template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accel
template<class rtype,class vtype,int N> accelerator_inline void accumReconTp (iMatrix<rtype,N> &hspin, const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
accumReconTp(hspin._internal[i][j],fspin._internal[i][j]);
}}
@ -855,7 +871,7 @@ template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accel
template<class rtype,class vtype,int N> accelerator_inline void spProjTm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
spProjTm(hspin._internal[i][j],fspin._internal[i][j]);
}}
@ -877,7 +893,7 @@ template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accel
template<class rtype,class vtype,int N> accelerator_inline void spReconTm (iMatrix<rtype,N> &hspin, const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
spReconTm(hspin._internal[i][j],fspin._internal[i][j]);
}}
@ -898,7 +914,7 @@ template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accel
template<class rtype,class vtype,int N> accelerator_inline void accumReconTm (iMatrix<rtype,N> &hspin, const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
accumReconTm(hspin._internal[i][j],fspin._internal[i][j]);
}}
@ -923,7 +939,7 @@ template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accel
template<class rtype,class vtype,int N> accelerator_inline void spProj5p (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
spProj5p(hspin._internal[i][j],fspin._internal[i][j]);
}}
@ -944,7 +960,7 @@ template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accel
template<class rtype,class vtype,int N> accelerator_inline void spRecon5p (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
spRecon5p(hspin._internal[i][j],fspin._internal[i][j]);
}}
@ -965,7 +981,7 @@ template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accel
template<class rtype,class vtype,int N> accelerator_inline void accumRecon5p (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
accumRecon5p(hspin._internal[i][j],fspin._internal[i][j]);
}}
@ -990,7 +1006,7 @@ template<class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inlin
template<class vtype,int N> accelerator_inline void spProj5p (iMatrix<vtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
spProj5p(hspin._internal[i][j],fspin._internal[i][j]);
}}
@ -1013,7 +1029,7 @@ template<class rtype,class vtype,int N,IfNotSpinor<iVector<rtype,N> > = 0> accel
}
template<class rtype,class vtype,int N> accelerator_inline void spProj5m (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
for(int i=0;i<N;i++){
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
spProj5m(hspin._internal[i][j],fspin._internal[i][j]);
}}
@ -1034,7 +1050,7 @@ template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accel
template<class rtype,class vtype,int N> accelerator_inline void spRecon5m (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
spRecon5m(hspin._internal[i][j],fspin._internal[i][j]);
}}
@ -1055,7 +1071,7 @@ template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accel
template<class rtype,class vtype,int N> accelerator_inline void accumRecon5m (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
accumRecon5m(hspin._internal[i][j],fspin._internal[i][j]);
}}
@ -1081,7 +1097,7 @@ template<class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inlin
template<class vtype,int N> accelerator_inline void spProj5m (iMatrix<vtype,N> &hspin,const iMatrix<vtype,N> &fspin)
{
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
for(int i=0;i<N;i++){
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
spProj5m(hspin._internal[i][j],fspin._internal[i][j]);
}}

View File

@ -443,8 +443,8 @@ struct TimesMinusI{
};
// alternative implementation using fcadd
// this is not optimal because we have op1 = op2 + TimesMinusI(op3) etc
// ideally we have AddTimesMinusI(op1,op2,op3)
// this is not optimal because we have op1 = op2 + TimesMinusI(op3) = op2 - TimesI(op3) etc
// but ideally we have op1 = SubTimesI(op2,op3)
//
// makes performance worse in Benchmark_wilson using MPI
// increases halogtime and gathertime
@ -467,6 +467,34 @@ struct TimesMinusI{
};
*/
// SVE only, fcadd returns a +- i*b
// a + i * b
struct AddTimesI{
// Complex float
inline vecf operator()(vecf a, vecf b){
pred pg1 = acle<float>::pg1();
return svcadd_x(pg1, a, b, 90);
}
// Complex double
inline vecd operator()(vecd a, vecd b){
pred pg1 = acle<double>::pg1();
return svcadd_x(pg1, a, b, 90);
}
};
// a - i * b
struct SubTimesI{
// Complex float
inline vecf operator()(vecf a, vecf b){
pred pg1 = acle<float>::pg1();
return svcadd_x(pg1, a, b, 270);
}
// Complex double
inline vecd operator()(vecd a, vecd b){
pred pg1 = acle<double>::pg1();
return svcadd_x(pg1, a, b, 270);
}
};
struct TimesI{
// Complex float
inline vecf operator()(vecf a, vecf b){
@ -493,7 +521,7 @@ struct TimesI{
// alternative implementation using fcadd
// this is not optimal because we have op1 = op2 + TimesI(op3) etc
// ideally we have AddTimesI(op1,op2,op3)
// ideally we have op1 = AddTimesI(op2,op3)
//
// makes performance worse in Benchmark_wilson using MPI
// increases halogtime and gathertime
@ -800,7 +828,7 @@ typedef veci SIMD_Itype; // Integer type
// prefetch utilities
inline void v_prefetch0(int size, const char *ptr){};
/* PF 256 worse than PF 64
/* PF 256
inline void prefetch_HINT_T0(const char *ptr){
static int64_t last_ptr;
int64_t vptr = reinterpret_cast<std::intptr_t>(ptr) & 0x7fffffffffffff00ll;
@ -812,7 +840,7 @@ inline void prefetch_HINT_T0(const char *ptr){
}
};
*/
/* beneficial for operators?
/* PF 64
inline void prefetch_HINT_T0(const char *ptr){
pred pg1 = Optimization::acle<double>::pg1();
svprfd(pg1, ptr, SV_PLDL1STRM);
@ -839,5 +867,8 @@ typedef Optimization::MaddRealPart MaddRealPartSIMD;
typedef Optimization::Conj ConjSIMD;
typedef Optimization::TimesMinusI TimesMinusISIMD;
typedef Optimization::TimesI TimesISIMD;
typedef Optimization::AddTimesI AddTimesISIMD;
typedef Optimization::SubTimesI SubTimesISIMD;
NAMESPACE_END(Grid);

View File

@ -298,7 +298,7 @@ public:
// FIXME -- alias this to an accelerator_inline MAC struct.
// FIXME VLA build error
// specialize mac for A64FX
#if defined(A64FX) || defined(A64FXFIXEDSIZE)
friend accelerator_inline void mac(Grid_simd *__restrict__ y,
const Grid_simd *__restrict__ a,
@ -894,6 +894,47 @@ accelerator_inline Grid_simd<S, V> timesI(const Grid_simd<S, V> &in) {
return in;
}
// -----------------------------------------------------------------------------
// SVE only
///////////////////////
// AddTimesI
///////////////////////
template <class S, class V, IfComplex<S> = 0>
accelerator_inline void addTimesI(Grid_simd<S, V> &ret, const Grid_simd<S, V> &in1, const Grid_simd<S, V> &in2) {
ret.v = binary<V>(in1.v, in2.v, AddTimesISIMD());
}
template <class S, class V, IfComplex<S> = 0>
accelerator_inline Grid_simd<S, V> addTimesI(const Grid_simd<S, V> &in1, const Grid_simd<S, V> &in2) {
Grid_simd<S, V> ret;
ret = addTimesI(in1, in2);
return ret;
}
template <class S, class V, IfNotComplex<S> = 0>
accelerator_inline Grid_simd<S, V> addTimesI(const Grid_simd<S, V> &in1, const Grid_simd<S, V> &in2) {
return in1;
}
///////////////////////
// SubTimesI
///////////////////////
template <class S, class V, IfComplex<S> = 0>
accelerator_inline void subTimesI(Grid_simd<S, V> &ret, const Grid_simd<S, V> &in1, const Grid_simd<S, V> &in2) {
ret.v = binary<V>(in1.v, in2.v, SubTimesISIMD());
}
template <class S, class V, IfComplex<S> = 0>
accelerator_inline Grid_simd<S, V> subTimesI(const Grid_simd<S, V> &in1, const Grid_simd<S, V> &in2) {
Grid_simd<S, V> ret;
ret = subTimesI(in1, in2);
return ret;
}
template <class S, class V, IfNotComplex<S> = 0>
accelerator_inline Grid_simd<S, V> subTimesI(const Grid_simd<S, V> &in1, const Grid_simd<S, V> &in2) {
return in1;
}
// end SVE
// -----------------------------------------------------------------------------
/////////////////////
// Inner, outer
/////////////////////

View File

@ -1,6 +1,6 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/tensors/Tensor_reality.h
@ -31,16 +31,16 @@ Author: neo <cossu@post.kek.jp>
NAMESPACE_BEGIN(Grid);
///////////////////////////////////////////////
///////////////////////////////////////////////
// multiply by I; make recursive.
///////////////////////////////////////////////
template<class vtype> accelerator_inline iScalar<vtype> timesI(const iScalar<vtype>&r)
///////////////////////////////////////////////
template<class vtype> accelerator_inline iScalar<vtype> timesI(const iScalar<vtype>&r)
{
iScalar<vtype> ret;
timesI(ret._internal,r._internal);
return ret;
}
template<class vtype,int N> accelerator_inline iVector<vtype,N> timesI(const iVector<vtype,N>&r)
template<class vtype,int N> accelerator_inline iVector<vtype,N> timesI(const iVector<vtype,N>&r)
{
iVector<vtype,N> ret;
for(int i=0;i<N;i++){
@ -58,11 +58,11 @@ template<class vtype,int N> accelerator_inline iMatrix<vtype,N> timesI(const iMa
return ret;
}
template<class vtype> accelerator_inline void timesI(iScalar<vtype> &ret,const iScalar<vtype>&r)
template<class vtype> accelerator_inline void timesI(iScalar<vtype> &ret,const iScalar<vtype>&r)
{
timesI(ret._internal,r._internal);
}
template<class vtype,int N> accelerator_inline void timesI(iVector<vtype,N> &ret,const iVector<vtype,N>&r)
template<class vtype,int N> accelerator_inline void timesI(iVector<vtype,N> &ret,const iVector<vtype,N>&r)
{
for(int i=0;i<N;i++){
timesI(ret._internal[i],r._internal[i]);
@ -77,13 +77,13 @@ template<class vtype,int N> accelerator_inline void timesI(iMatrix<vtype,N> &re
}
template<class vtype> accelerator_inline iScalar<vtype> timesMinusI(const iScalar<vtype>&r)
template<class vtype> accelerator_inline iScalar<vtype> timesMinusI(const iScalar<vtype>&r)
{
iScalar<vtype> ret;
timesMinusI(ret._internal,r._internal);
return ret;
}
template<class vtype,int N> accelerator_inline iVector<vtype,N> timesMinusI(const iVector<vtype,N>&r)
template<class vtype,int N> accelerator_inline iVector<vtype,N> timesMinusI(const iVector<vtype,N>&r)
{
iVector<vtype,N> ret;
for(int i=0;i<N;i++){
@ -101,11 +101,11 @@ template<class vtype,int N> accelerator_inline iMatrix<vtype,N> timesMinusI(cons
return ret;
}
template<class vtype> accelerator_inline void timesMinusI(iScalar<vtype> &ret,const iScalar<vtype>&r)
template<class vtype> accelerator_inline void timesMinusI(iScalar<vtype> &ret,const iScalar<vtype>&r)
{
timesMinusI(ret._internal,r._internal);
}
template<class vtype,int N> accelerator_inline void timesMinusI(iVector<vtype,N> &ret,const iVector<vtype,N>&r)
template<class vtype,int N> accelerator_inline void timesMinusI(iVector<vtype,N> &ret,const iVector<vtype,N>&r)
{
for(int i=0;i<N;i++){
timesMinusI(ret._internal[i],r._internal[i]);
@ -120,9 +120,99 @@ template<class vtype,int N> accelerator_inline void timesMinusI(iMatrix<vtype,N
}
///////////////////////////////////////////////
// -----------------------------------------------------------------------------
// SVE
template<class vtype> accelerator_inline iScalar<vtype> addTimesI(const iScalar<vtype>&r1, const iScalar<vtype>&r2)
{
iScalar<vtype> ret;
addTimesI(ret._internal,r1._internal,r2._internal);
return ret;
}
template<class vtype,int N> accelerator_inline iVector<vtype,N> addTimesI(const iVector<vtype,N>&r1, const iVector<vtype,N>&r2)
{
iVector<vtype,N> ret;
for(int i=0;i<N;i++){
addTimesI(ret._internal[i],r1._internal[i],r2._internal[i]);
}
return ret;
}
template<class vtype,int N> accelerator_inline iMatrix<vtype,N> addTimesI(const iMatrix<vtype,N>&r1, const iMatrix<vtype,N>&r2)
{
iMatrix<vtype,N> ret;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
addTimesI(ret._internal[i][j],r1._internal[i][j],r2._internal[i][j]);
}}
return ret;
}
template<class vtype> accelerator_inline void addTimesI(iScalar<vtype> &ret,const iScalar<vtype>&r1,const iScalar<vtype>&r2)
{
addTimesI(ret._internal,r1._internal,r2._internal);
}
template<class vtype,int N> accelerator_inline void addTimesI(iVector<vtype,N> &ret,const iVector<vtype,N>&r1,const iVector<vtype,N>&r2)
{
for(int i=0;i<N;i++){
addTimesI(ret._internal[i],r1._internal[i],r2._internal[i]);
}
}
template<class vtype,int N> accelerator_inline void addTimesI(iMatrix<vtype,N> &ret,const iMatrix<vtype,N>&r1,const iMatrix<vtype,N>&r2)
{
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
addTimesI(ret._internal[i][j],r1._internal[i][j],r2._internal[i][j]);
}}
}
template<class vtype> accelerator_inline iScalar<vtype> subTimesI(const iScalar<vtype>&r1, const iScalar<vtype>&r2)
{
iScalar<vtype> ret;
subTimesI(ret._internal,r1._internal,r2._internal);
return ret;
}
template<class vtype,int N> accelerator_inline iVector<vtype,N> subTimesI(const iVector<vtype,N>&r1, const iVector<vtype,N>&r2)
{
iVector<vtype,N> ret;
for(int i=0;i<N;i++){
subTimesI(ret._internal[i],r1._internal[i],r2._internal[i]);
}
return ret;
}
template<class vtype,int N> accelerator_inline iMatrix<vtype,N> subTimesI(const iMatrix<vtype,N>&r1, const iMatrix<vtype,N>&r2)
{
iMatrix<vtype,N> ret;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
subTimesI(ret._internal[i][j],r1._internal[i][j],r2._internal[i][j]);
}}
return ret;
}
template<class vtype> accelerator_inline void subTimesI(iScalar<vtype> &ret,const iScalar<vtype>&r1,const iScalar<vtype>&r2)
{
subTimesI(ret._internal,r1._internal,r2._internal);
}
template<class vtype,int N> accelerator_inline void subTimesI(iVector<vtype,N> &ret,const iVector<vtype,N>&r1,const iVector<vtype,N>&r2)
{
for(int i=0;i<N;i++){
subTimesI(ret._internal[i],r1._internal[i],r2._internal[i]);
}
}
template<class vtype,int N> accelerator_inline void subTimesI(iMatrix<vtype,N> &ret,const iMatrix<vtype,N>&r1,const iMatrix<vtype,N>&r2)
{
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
subTimesI(ret._internal[i][j],r1._internal[i][j],r2._internal[i][j]);
}}
}
// -----------------------------------------------------------------------------
// end SVE
///////////////////////////////////////////////
// Conj function for scalar, vector, matrix
///////////////////////////////////////////////
///////////////////////////////////////////////
template<class vtype> accelerator_inline iScalar<vtype> conjugate(const iScalar<vtype>&r)
{
iScalar<vtype> ret;
@ -147,9 +237,9 @@ template<class vtype,int N> accelerator_inline iMatrix<vtype,N> conjugate(const
return ret;
}
///////////////////////////////////////////////
///////////////////////////////////////////////
// Adj function for scalar, vector, matrix
///////////////////////////////////////////////
///////////////////////////////////////////////
template<class vtype> accelerator_inline iScalar<vtype> adj(const iScalar<vtype>&r)
{
iScalar<vtype> ret;
@ -206,7 +296,7 @@ template<class itype,int N> accelerator_inline auto real(const iVector<itype,N>
}
return ret;
}
template<class itype> accelerator_inline auto imag(const iScalar<itype> &z) -> iScalar<decltype(imag(z._internal))>
{
iScalar<decltype(imag(z._internal))> ret;