mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
revert Add/SubTimesI and prefetching in stencil
This reverts commit 9b2699226c
.
This commit is contained in:
parent
93a37c8f68
commit
433766ac62
@ -164,7 +164,12 @@ Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
|
||||
if((!local)&&(!st.same_node[Dir]) ) { \
|
||||
LOAD_CHI(base); \
|
||||
MULT_2SPIN_1(Dir); \
|
||||
PREFETCH_CHIMU(base); \
|
||||
/* PREFETCH_GAUGE_L1(NxtDir); */ \
|
||||
MULT_2SPIN_2; \
|
||||
if (s == 0) { \
|
||||
if ((Dir == 0) || (Dir == 4)) { PREFETCH_GAUGE_L2(Dir); } \
|
||||
} \
|
||||
RECON; \
|
||||
nmu++; \
|
||||
}
|
||||
@ -175,7 +180,12 @@ Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
|
||||
if((!local)&&(!st.same_node[Dir]) ) { \
|
||||
LOAD_CHI(base); \
|
||||
MULT_2SPIN_1(Dir); \
|
||||
PREFETCH_CHIMU(base); \
|
||||
/* PREFETCH_GAUGE_L1(NxtDir); */ \
|
||||
MULT_2SPIN_2; \
|
||||
if (s == 0) { \
|
||||
if ((Dir == 0) || (Dir == 4)) { PREFETCH_GAUGE_L2(Dir); } \
|
||||
} \
|
||||
RECON; \
|
||||
nmu++; \
|
||||
}
|
||||
|
@ -445,21 +445,18 @@ void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField
|
||||
#ifndef GRID_NVCC
|
||||
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSite); return;}
|
||||
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSite); /* printf("."); */ return;}
|
||||
//if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSite); printf("."); return;}
|
||||
#endif
|
||||
} else if( interior ) {
|
||||
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALLNB(GenericDhopSiteInt); return;}
|
||||
#ifndef GRID_NVCC
|
||||
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALLNB(HandDhopSiteInt); return;}
|
||||
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteInt); /* printf("-"); */ return;}
|
||||
//if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteInt); printf("-"); return;}
|
||||
#endif
|
||||
} else if( exterior ) {
|
||||
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteExt); return;}
|
||||
#ifndef GRID_NVCC
|
||||
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteExt); return;}
|
||||
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteExt); /* printf("+"); */ return;}
|
||||
//if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteExt); printf("+"); return;}
|
||||
#endif
|
||||
}
|
||||
assert(0 && " Kernel optimisation case not covered ");
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/spin/TwoSpinor.h
|
||||
|
||||
@ -33,7 +33,7 @@ NAMESPACE_BEGIN(Grid);
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Normalisation alert; the g5 project is 1/2(1+-G5)
|
||||
// Normalisation alert; the g5 project is 1/2(1+-G5)
|
||||
// the xyzt projects are (1+-Gxyzt)
|
||||
//
|
||||
// * xyzt project
|
||||
@ -59,7 +59,7 @@ NAMESPACE_BEGIN(Grid);
|
||||
//
|
||||
// Both four spinor and two spinor result variants are provided.
|
||||
//
|
||||
// The four spinor project will be recursively provided to Lattice wide routines, and likely used in
|
||||
// The four spinor project will be recursively provided to Lattice wide routines, and likely used in
|
||||
// the domain wall and mobius implementations.
|
||||
//
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -74,17 +74,13 @@ NAMESPACE_BEGIN(Grid);
|
||||
// To fail is not to err (Cryptic clue: suggest to Google SFINAE ;) )
|
||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProjXp (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
|
||||
{
|
||||
//hspin(0)=fspin(0)+timesI(fspin(3));
|
||||
//hspin(1)=fspin(1)+timesI(fspin(2));
|
||||
hspin(0)=addTimesI(fspin(0), fspin(3));
|
||||
hspin(1)=addTimesI(fspin(1), fspin(2));
|
||||
hspin(0)=fspin(0)+timesI(fspin(3));
|
||||
hspin(1)=fspin(1)+timesI(fspin(2));
|
||||
}
|
||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProjXm (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
|
||||
{
|
||||
//hspin(0)=fspin(0)-timesI(fspin(3));
|
||||
//hspin(1)=fspin(1)-timesI(fspin(2));
|
||||
hspin(0)=subTimesI(fspin(0), fspin(3));
|
||||
hspin(1)=subTimesI(fspin(1), fspin(2));
|
||||
hspin(0)=fspin(0)-timesI(fspin(3));
|
||||
hspin(1)=fspin(1)-timesI(fspin(2));
|
||||
}
|
||||
|
||||
// 0 0 0 -1 [0] -+ [3]
|
||||
@ -109,18 +105,14 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
|
||||
*/
|
||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProjZp (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
|
||||
{
|
||||
//hspin(0)=fspin(0)+timesI(fspin(2));
|
||||
//hspin(1)=fspin(1)-timesI(fspin(3));
|
||||
hspin(0)=addTimesI(fspin(0), fspin(2));
|
||||
hspin(1)=subTimesI(fspin(1), fspin(3));
|
||||
hspin(0)=fspin(0)+timesI(fspin(2));
|
||||
hspin(1)=fspin(1)-timesI(fspin(3));
|
||||
}
|
||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProjZm (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
||||
//hspin(0)=fspin(0)-timesI(fspin(2));
|
||||
//hspin(1)=fspin(1)+timesI(fspin(3));
|
||||
hspin(0)=subTimesI(fspin(0), fspin(2));
|
||||
hspin(1)=addTimesI(fspin(1), fspin(3));
|
||||
hspin(0)=fspin(0)-timesI(fspin(2));
|
||||
hspin(1)=fspin(1)+timesI(fspin(3));
|
||||
}
|
||||
/*Gt
|
||||
* 0 0 1 0 [0]+-[2]
|
||||
@ -141,8 +133,8 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
|
||||
hspin(1)=fspin(1)-fspin(3);
|
||||
}
|
||||
/*G5
|
||||
* 1 0 0 0
|
||||
* 0 1 0 0
|
||||
* 1 0 0 0
|
||||
* 0 1 0 0
|
||||
* 0 0 -1 0
|
||||
* 0 0 0 -1
|
||||
*/
|
||||
@ -160,7 +152,7 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
|
||||
hspin(0)=fspin(2);
|
||||
hspin(1)=fspin(3);
|
||||
}
|
||||
|
||||
|
||||
// template<class vtype> accelerator_inline void fspProj5p (iVector<vtype,Ns> &rfspin,const iVector<vtype,Ns> &fspin)
|
||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProj5p (iVector<vtype,Ns> &rfspin,const iVector<vtype,Ns> &fspin)
|
||||
{
|
||||
@ -210,20 +202,16 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
|
||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
||||
fspin(0)+=hspin(0);
|
||||
fspin(1)+=hspin(1);
|
||||
//fspin(2)-=timesI(hspin(1));
|
||||
//fspin(3)-=timesI(hspin(0));
|
||||
fspin(2)=subTimesI(fspin(2), hspin(1));
|
||||
fspin(3)=subTimesI(fspin(3), hspin(0));
|
||||
fspin(2)-=timesI(hspin(1));
|
||||
fspin(3)-=timesI(hspin(0));
|
||||
}
|
||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconXm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
||||
fspin(0)+=hspin(0);
|
||||
fspin(1)+=hspin(1);
|
||||
//fspin(2)+=timesI(hspin(1));
|
||||
//fspin(3)+=timesI(hspin(0));
|
||||
fspin(2)=addTimesI(fspin(2), hspin(1));
|
||||
fspin(3)=addTimesI(fspin(3), hspin(0));
|
||||
fspin(2)+=timesI(hspin(1));
|
||||
fspin(3)+=timesI(hspin(0));
|
||||
}
|
||||
|
||||
// 0 0 0 -1 [0] -+ [3]
|
||||
@ -291,20 +279,16 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
|
||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
||||
fspin(0)+=hspin(0);
|
||||
fspin(1)+=hspin(1);
|
||||
//fspin(2)-=timesI(hspin(0));
|
||||
//fspin(3)+=timesI(hspin(1));
|
||||
fspin(2)=subTimesI(fspin(2), hspin(0));
|
||||
fspin(3)=addTimesI(fspin(3), hspin(1));
|
||||
fspin(2)-=timesI(hspin(0));
|
||||
fspin(3)+=timesI(hspin(1));
|
||||
}
|
||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconZm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
||||
fspin(0)+=hspin(0);
|
||||
fspin(1)+=hspin(1);
|
||||
//fspin(2)+=timesI(hspin(0));
|
||||
//fspin(3)-=timesI(hspin(1));
|
||||
fspin(2)=addTimesI(fspin(2), hspin(0));
|
||||
fspin(3)=subTimesI(fspin(3), hspin(1));
|
||||
fspin(2)+=timesI(hspin(0));
|
||||
fspin(3)-=timesI(hspin(1));
|
||||
}
|
||||
/*Gt
|
||||
* 0 0 1 0 [0]+-[2]
|
||||
@ -345,8 +329,8 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
|
||||
fspin(3)-=hspin(1);
|
||||
}
|
||||
/*G5
|
||||
* 1 0 0 0
|
||||
* 0 1 0 0
|
||||
* 1 0 0 0
|
||||
* 0 1 0 0
|
||||
* 0 0 -1 0
|
||||
* 0 0 0 -1
|
||||
*/
|
||||
@ -399,7 +383,7 @@ template<class rtype,class vtype> accelerator_inline void spProjXp (iScalar<rtyp
|
||||
}
|
||||
template<class rtype,class vtype,int N> accelerator_inline void spProjXp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||
{
|
||||
for(int i=0;i<N;i++){
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
spProjXp(hspin._internal[i][j],fspin._internal[i][j]);
|
||||
}}
|
||||
@ -418,7 +402,7 @@ template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accel
|
||||
}
|
||||
template<class rtype,class vtype,int N> accelerator_inline void spReconXp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||
{
|
||||
for(int i=0;i<N;i++){
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
spReconXp(hspin._internal[i][j],fspin._internal[i][j]);
|
||||
}}
|
||||
@ -436,7 +420,7 @@ template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accel
|
||||
}
|
||||
template<class rtype,class vtype,int N> accelerator_inline void accumReconXp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||
{
|
||||
for(int i=0;i<N;i++){
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
accumReconXp(hspin._internal[i][j],fspin._internal[i][j]);
|
||||
}}
|
||||
@ -462,7 +446,7 @@ template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accel
|
||||
template<class rtype,class vtype,int N> accelerator_inline void spProjXm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++){
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
spProjXm(hspin._internal[i][j],fspin._internal[i][j]);
|
||||
}}
|
||||
@ -484,7 +468,7 @@ template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accel
|
||||
template<class rtype,class vtype,int N> accelerator_inline void spReconXm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++){
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
spReconXm(hspin._internal[i][j],fspin._internal[i][j]);
|
||||
}}
|
||||
@ -505,7 +489,7 @@ template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accel
|
||||
template<class rtype,class vtype,int N> accelerator_inline void accumReconXm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++){
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
accumReconXm(hspin._internal[i][j],fspin._internal[i][j]);
|
||||
}}
|
||||
@ -531,7 +515,7 @@ template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accel
|
||||
template<class rtype,class vtype,int N> accelerator_inline void spProjYp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++){
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
spProjYp(hspin._internal[i][j],fspin._internal[i][j]);
|
||||
}}
|
||||
@ -553,7 +537,7 @@ template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accel
|
||||
template<class rtype,class vtype,int N> accelerator_inline void spReconYp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++){
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
spReconYp(hspin._internal[i][j],fspin._internal[i][j]);
|
||||
}}
|
||||
@ -574,7 +558,7 @@ template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accel
|
||||
template<class rtype,class vtype,int N> accelerator_inline void accumReconYp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++){
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
accumReconYp(hspin._internal[i][j],fspin._internal[i][j]);
|
||||
}}
|
||||
@ -599,7 +583,7 @@ template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accel
|
||||
template<class rtype,class vtype,int N> accelerator_inline void spProjYm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++){
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
spProjYm(hspin._internal[i][j],fspin._internal[i][j]);
|
||||
}}
|
||||
@ -621,7 +605,7 @@ template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accel
|
||||
template<class rtype,class vtype,int N> accelerator_inline void spReconYm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++){
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
spReconYm(hspin._internal[i][j],fspin._internal[i][j]);
|
||||
}}
|
||||
@ -642,7 +626,7 @@ template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accel
|
||||
template<class rtype,class vtype,int N> accelerator_inline void accumReconYm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++){
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
accumReconYm(hspin._internal[i][j],fspin._internal[i][j]);
|
||||
}}
|
||||
@ -667,7 +651,7 @@ template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accel
|
||||
template<class rtype,class vtype,int N> accelerator_inline void spProjZp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++){
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
spProjZp(hspin._internal[i][j],fspin._internal[i][j]);
|
||||
}}
|
||||
@ -689,7 +673,7 @@ template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accel
|
||||
template<class rtype,class vtype,int N> accelerator_inline void spReconZp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++){
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
spReconZp(hspin._internal[i][j],fspin._internal[i][j]);
|
||||
}}
|
||||
@ -710,7 +694,7 @@ template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accel
|
||||
template<class rtype,class vtype,int N> accelerator_inline void accumReconZp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++){
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
accumReconZp(hspin._internal[i][j],fspin._internal[i][j]);
|
||||
}}
|
||||
@ -735,7 +719,7 @@ template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accel
|
||||
template<class rtype,class vtype,int N> accelerator_inline void spProjZm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++){
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
spProjZm(hspin._internal[i][j],fspin._internal[i][j]);
|
||||
}}
|
||||
@ -757,7 +741,7 @@ template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accel
|
||||
template<class rtype,class vtype,int N> accelerator_inline void spReconZm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++){
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
spReconZm(hspin._internal[i][j],fspin._internal[i][j]);
|
||||
}}
|
||||
@ -778,7 +762,7 @@ template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accel
|
||||
template<class rtype,class vtype,int N> accelerator_inline void accumReconZm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++){
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
accumReconZm(hspin._internal[i][j],fspin._internal[i][j]);
|
||||
}}
|
||||
@ -803,7 +787,7 @@ template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accel
|
||||
template<class rtype,class vtype,int N> accelerator_inline void spProjTp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++){
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
spProjTp(hspin._internal[i][j],fspin._internal[i][j]);
|
||||
}}
|
||||
@ -825,7 +809,7 @@ template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accel
|
||||
template<class rtype,class vtype,int N> accelerator_inline void spReconTp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++){
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
spReconTp(hspin._internal[i][j],fspin._internal[i][j]);
|
||||
}}
|
||||
@ -846,7 +830,7 @@ template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accel
|
||||
template<class rtype,class vtype,int N> accelerator_inline void accumReconTp (iMatrix<rtype,N> &hspin, const iMatrix<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++){
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
accumReconTp(hspin._internal[i][j],fspin._internal[i][j]);
|
||||
}}
|
||||
@ -871,7 +855,7 @@ template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accel
|
||||
template<class rtype,class vtype,int N> accelerator_inline void spProjTm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++){
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
spProjTm(hspin._internal[i][j],fspin._internal[i][j]);
|
||||
}}
|
||||
@ -893,7 +877,7 @@ template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accel
|
||||
template<class rtype,class vtype,int N> accelerator_inline void spReconTm (iMatrix<rtype,N> &hspin, const iMatrix<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++){
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
spReconTm(hspin._internal[i][j],fspin._internal[i][j]);
|
||||
}}
|
||||
@ -914,7 +898,7 @@ template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accel
|
||||
template<class rtype,class vtype,int N> accelerator_inline void accumReconTm (iMatrix<rtype,N> &hspin, const iMatrix<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++){
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
accumReconTm(hspin._internal[i][j],fspin._internal[i][j]);
|
||||
}}
|
||||
@ -939,7 +923,7 @@ template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accel
|
||||
template<class rtype,class vtype,int N> accelerator_inline void spProj5p (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++){
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
spProj5p(hspin._internal[i][j],fspin._internal[i][j]);
|
||||
}}
|
||||
@ -960,7 +944,7 @@ template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accel
|
||||
template<class rtype,class vtype,int N> accelerator_inline void spRecon5p (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++){
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
spRecon5p(hspin._internal[i][j],fspin._internal[i][j]);
|
||||
}}
|
||||
@ -981,7 +965,7 @@ template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accel
|
||||
template<class rtype,class vtype,int N> accelerator_inline void accumRecon5p (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++){
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
accumRecon5p(hspin._internal[i][j],fspin._internal[i][j]);
|
||||
}}
|
||||
@ -1006,7 +990,7 @@ template<class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inlin
|
||||
template<class vtype,int N> accelerator_inline void spProj5p (iMatrix<vtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++){
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
spProj5p(hspin._internal[i][j],fspin._internal[i][j]);
|
||||
}}
|
||||
@ -1029,7 +1013,7 @@ template<class rtype,class vtype,int N,IfNotSpinor<iVector<rtype,N> > = 0> accel
|
||||
}
|
||||
template<class rtype,class vtype,int N> accelerator_inline void spProj5m (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||
{
|
||||
for(int i=0;i<N;i++){
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
spProj5m(hspin._internal[i][j],fspin._internal[i][j]);
|
||||
}}
|
||||
@ -1050,7 +1034,7 @@ template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accel
|
||||
template<class rtype,class vtype,int N> accelerator_inline void spRecon5m (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++){
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
spRecon5m(hspin._internal[i][j],fspin._internal[i][j]);
|
||||
}}
|
||||
@ -1071,7 +1055,7 @@ template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accel
|
||||
template<class rtype,class vtype,int N> accelerator_inline void accumRecon5m (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++){
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
accumRecon5m(hspin._internal[i][j],fspin._internal[i][j]);
|
||||
}}
|
||||
@ -1097,7 +1081,7 @@ template<class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inlin
|
||||
template<class vtype,int N> accelerator_inline void spProj5m (iMatrix<vtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||
{
|
||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
||||
for(int i=0;i<N;i++){
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
spProj5m(hspin._internal[i][j],fspin._internal[i][j]);
|
||||
}}
|
||||
|
@ -442,59 +442,6 @@ struct TimesMinusI{
|
||||
}
|
||||
};
|
||||
|
||||
// alternative implementation using fcadd
|
||||
// this is not optimal because we have op1 = op2 + TimesMinusI(op3) = op2 - TimesI(op3) etc
|
||||
// but ideally we have op1 = SubTimesI(op2,op3)
|
||||
//
|
||||
// makes performance worse in Benchmark_wilson using MPI
|
||||
// increases halogtime and gathertime
|
||||
/*
|
||||
struct TimesMinusI{
|
||||
// Complex float
|
||||
inline vecf operator()(vecf a, vecf b){
|
||||
pred pg1 = acle<float>::pg1();
|
||||
vecf z_v = acle<float>::zero();
|
||||
|
||||
return svcadd_x(pg1, z_v, a, 270);
|
||||
}
|
||||
// Complex double
|
||||
inline vecd operator()(vecd a, vecd b){
|
||||
pred pg1 = acle<double>::pg1();
|
||||
vecd z_v = acle<double>::zero();
|
||||
|
||||
return svcadd_x(pg1, z_v, a, 270);
|
||||
}
|
||||
};
|
||||
*/
|
||||
|
||||
// SVE only, fcadd returns a +- i*b
|
||||
// a + i * b
|
||||
struct AddTimesI{
|
||||
// Complex float
|
||||
inline vecf operator()(vecf a, vecf b){
|
||||
pred pg1 = acle<float>::pg1();
|
||||
return svcadd_x(pg1, a, b, 90);
|
||||
}
|
||||
// Complex double
|
||||
inline vecd operator()(vecd a, vecd b){
|
||||
pred pg1 = acle<double>::pg1();
|
||||
return svcadd_x(pg1, a, b, 90);
|
||||
}
|
||||
};
|
||||
// a - i * b
|
||||
struct SubTimesI{
|
||||
// Complex float
|
||||
inline vecf operator()(vecf a, vecf b){
|
||||
pred pg1 = acle<float>::pg1();
|
||||
return svcadd_x(pg1, a, b, 270);
|
||||
}
|
||||
// Complex double
|
||||
inline vecd operator()(vecd a, vecd b){
|
||||
pred pg1 = acle<double>::pg1();
|
||||
return svcadd_x(pg1, a, b, 270);
|
||||
}
|
||||
};
|
||||
|
||||
struct TimesI{
|
||||
// Complex float
|
||||
inline vecf operator()(vecf a, vecf b){
|
||||
@ -518,33 +465,6 @@ struct TimesI{
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
// alternative implementation using fcadd
|
||||
// this is not optimal because we have op1 = op2 + TimesI(op3) etc
|
||||
// ideally we have op1 = AddTimesI(op2,op3)
|
||||
//
|
||||
// makes performance worse in Benchmark_wilson using MPI
|
||||
// increases halogtime and gathertime
|
||||
/*
|
||||
struct TimesI{
|
||||
// Complex float
|
||||
inline vecf operator()(vecf a, vecf b){
|
||||
pred pg1 = acle<float>::pg1();
|
||||
vecf z_v = acle<float>::zero();
|
||||
|
||||
return svcadd_x(pg1, z_v, a, 90);
|
||||
}
|
||||
// Complex double
|
||||
inline vecd operator()(vecd a, vecd b){
|
||||
pred pg1 = acle<double>::pg1();
|
||||
vecd z_v = acle<double>::zero();
|
||||
|
||||
return svcadd_x(pg1, z_v, a, 90);
|
||||
}
|
||||
};
|
||||
*/
|
||||
|
||||
|
||||
struct PrecisionChange {
|
||||
static inline vech StoH (vecf sa, vecf sb) {
|
||||
pred pg1s = acle<float>::pg1();
|
||||
@ -827,25 +747,6 @@ typedef veci SIMD_Itype; // Integer type
|
||||
|
||||
// prefetch utilities
|
||||
inline void v_prefetch0(int size, const char *ptr){};
|
||||
|
||||
/* PF 256
|
||||
inline void prefetch_HINT_T0(const char *ptr){
|
||||
static int64_t last_ptr;
|
||||
int64_t vptr = reinterpret_cast<std::intptr_t>(ptr) & 0x7fffffffffffff00ll;
|
||||
if (last_ptr != vptr) {
|
||||
last_ptr = vptr;
|
||||
pred pg1 = Optimization::acle<double>::pg1();
|
||||
svprfd(pg1, reinterpret_cast<int64_t*>(ptr), SV_PLDL1STRM);
|
||||
svprfd(pg1, ptr, SV_PLDL1STRM);
|
||||
}
|
||||
};
|
||||
*/
|
||||
/* PF 64
|
||||
inline void prefetch_HINT_T0(const char *ptr){
|
||||
pred pg1 = Optimization::acle<double>::pg1();
|
||||
svprfd(pg1, ptr, SV_PLDL1STRM);
|
||||
};
|
||||
*/
|
||||
inline void prefetch_HINT_T0(const char *ptr){};
|
||||
|
||||
// Function name aliases
|
||||
@ -867,8 +768,5 @@ typedef Optimization::MaddRealPart MaddRealPartSIMD;
|
||||
typedef Optimization::Conj ConjSIMD;
|
||||
typedef Optimization::TimesMinusI TimesMinusISIMD;
|
||||
typedef Optimization::TimesI TimesISIMD;
|
||||
typedef Optimization::AddTimesI AddTimesISIMD;
|
||||
typedef Optimization::SubTimesI SubTimesISIMD;
|
||||
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
@ -298,7 +298,7 @@ public:
|
||||
|
||||
// FIXME -- alias this to an accelerator_inline MAC struct.
|
||||
|
||||
// specialize mac for A64FX
|
||||
// FIXME VLA build error
|
||||
#if defined(A64FX) || defined(A64FXFIXEDSIZE)
|
||||
friend accelerator_inline void mac(Grid_simd *__restrict__ y,
|
||||
const Grid_simd *__restrict__ a,
|
||||
@ -894,47 +894,6 @@ accelerator_inline Grid_simd<S, V> timesI(const Grid_simd<S, V> &in) {
|
||||
return in;
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
// SVE only
|
||||
///////////////////////
|
||||
// AddTimesI
|
||||
///////////////////////
|
||||
template <class S, class V, IfComplex<S> = 0>
|
||||
accelerator_inline void addTimesI(Grid_simd<S, V> &ret, const Grid_simd<S, V> &in1, const Grid_simd<S, V> &in2) {
|
||||
ret.v = binary<V>(in1.v, in2.v, AddTimesISIMD());
|
||||
}
|
||||
template <class S, class V, IfComplex<S> = 0>
|
||||
accelerator_inline Grid_simd<S, V> addTimesI(const Grid_simd<S, V> &in1, const Grid_simd<S, V> &in2) {
|
||||
Grid_simd<S, V> ret;
|
||||
ret = addTimesI(in1, in2);
|
||||
return ret;
|
||||
}
|
||||
template <class S, class V, IfNotComplex<S> = 0>
|
||||
accelerator_inline Grid_simd<S, V> addTimesI(const Grid_simd<S, V> &in1, const Grid_simd<S, V> &in2) {
|
||||
return in1;
|
||||
}
|
||||
///////////////////////
|
||||
// SubTimesI
|
||||
///////////////////////
|
||||
template <class S, class V, IfComplex<S> = 0>
|
||||
accelerator_inline void subTimesI(Grid_simd<S, V> &ret, const Grid_simd<S, V> &in1, const Grid_simd<S, V> &in2) {
|
||||
ret.v = binary<V>(in1.v, in2.v, SubTimesISIMD());
|
||||
}
|
||||
template <class S, class V, IfComplex<S> = 0>
|
||||
accelerator_inline Grid_simd<S, V> subTimesI(const Grid_simd<S, V> &in1, const Grid_simd<S, V> &in2) {
|
||||
Grid_simd<S, V> ret;
|
||||
ret = subTimesI(in1, in2);
|
||||
return ret;
|
||||
}
|
||||
template <class S, class V, IfNotComplex<S> = 0>
|
||||
accelerator_inline Grid_simd<S, V> subTimesI(const Grid_simd<S, V> &in1, const Grid_simd<S, V> &in2) {
|
||||
return in1;
|
||||
}
|
||||
|
||||
// end SVE
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
/////////////////////
|
||||
// Inner, outer
|
||||
/////////////////////
|
||||
|
@ -68,27 +68,8 @@ void Gather_plane_simple_table (Vector<std::pair<int,int> >& table,const Lattice
|
||||
int num=table.size();
|
||||
std::pair<int,int> *table_v = & table[0];
|
||||
auto rhs_v = rhs.View();
|
||||
|
||||
// main loop
|
||||
accelerator_forNB( i,num, vobj::Nsimd(), {
|
||||
typedef decltype(coalescedRead(buffer[0])) compressed_t;
|
||||
// prefetching:
|
||||
// +1% performance for Wilson on 32**4
|
||||
// -2% performance for DW on 24**4 x 12
|
||||
|
||||
const int dist = 7;
|
||||
if (i+dist < num){
|
||||
svbool_t pg1 = svptrue_b64();
|
||||
|
||||
// prefetch input
|
||||
auto in = rhs_v(so+table_v[i+dist].second);
|
||||
svprfd(pg1, (char*)&in, SV_PLDL2STRM);
|
||||
|
||||
// prefetch store buffer
|
||||
uint64_t o = table_v[i+dist].first;
|
||||
svprfd(pg1, (char*)&buffer[off+o], SV_PSTL2STRM);
|
||||
}
|
||||
|
||||
compressed_t tmp_c;
|
||||
uint64_t o = table_v[i].first;
|
||||
compress.Compress(&tmp_c,0,rhs_v(so+table_v[i].second));
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/tensors/Tensor_reality.h
|
||||
|
||||
@ -31,16 +31,16 @@ Author: neo <cossu@post.kek.jp>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
///////////////////////////////////////////////
|
||||
///////////////////////////////////////////////
|
||||
// multiply by I; make recursive.
|
||||
///////////////////////////////////////////////
|
||||
template<class vtype> accelerator_inline iScalar<vtype> timesI(const iScalar<vtype>&r)
|
||||
///////////////////////////////////////////////
|
||||
template<class vtype> accelerator_inline iScalar<vtype> timesI(const iScalar<vtype>&r)
|
||||
{
|
||||
iScalar<vtype> ret;
|
||||
timesI(ret._internal,r._internal);
|
||||
return ret;
|
||||
}
|
||||
template<class vtype,int N> accelerator_inline iVector<vtype,N> timesI(const iVector<vtype,N>&r)
|
||||
template<class vtype,int N> accelerator_inline iVector<vtype,N> timesI(const iVector<vtype,N>&r)
|
||||
{
|
||||
iVector<vtype,N> ret;
|
||||
for(int i=0;i<N;i++){
|
||||
@ -58,11 +58,11 @@ template<class vtype,int N> accelerator_inline iMatrix<vtype,N> timesI(const iMa
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<class vtype> accelerator_inline void timesI(iScalar<vtype> &ret,const iScalar<vtype>&r)
|
||||
template<class vtype> accelerator_inline void timesI(iScalar<vtype> &ret,const iScalar<vtype>&r)
|
||||
{
|
||||
timesI(ret._internal,r._internal);
|
||||
}
|
||||
template<class vtype,int N> accelerator_inline void timesI(iVector<vtype,N> &ret,const iVector<vtype,N>&r)
|
||||
template<class vtype,int N> accelerator_inline void timesI(iVector<vtype,N> &ret,const iVector<vtype,N>&r)
|
||||
{
|
||||
for(int i=0;i<N;i++){
|
||||
timesI(ret._internal[i],r._internal[i]);
|
||||
@ -77,13 +77,13 @@ template<class vtype,int N> accelerator_inline void timesI(iMatrix<vtype,N> &re
|
||||
}
|
||||
|
||||
|
||||
template<class vtype> accelerator_inline iScalar<vtype> timesMinusI(const iScalar<vtype>&r)
|
||||
template<class vtype> accelerator_inline iScalar<vtype> timesMinusI(const iScalar<vtype>&r)
|
||||
{
|
||||
iScalar<vtype> ret;
|
||||
timesMinusI(ret._internal,r._internal);
|
||||
return ret;
|
||||
}
|
||||
template<class vtype,int N> accelerator_inline iVector<vtype,N> timesMinusI(const iVector<vtype,N>&r)
|
||||
template<class vtype,int N> accelerator_inline iVector<vtype,N> timesMinusI(const iVector<vtype,N>&r)
|
||||
{
|
||||
iVector<vtype,N> ret;
|
||||
for(int i=0;i<N;i++){
|
||||
@ -101,11 +101,11 @@ template<class vtype,int N> accelerator_inline iMatrix<vtype,N> timesMinusI(cons
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<class vtype> accelerator_inline void timesMinusI(iScalar<vtype> &ret,const iScalar<vtype>&r)
|
||||
template<class vtype> accelerator_inline void timesMinusI(iScalar<vtype> &ret,const iScalar<vtype>&r)
|
||||
{
|
||||
timesMinusI(ret._internal,r._internal);
|
||||
}
|
||||
template<class vtype,int N> accelerator_inline void timesMinusI(iVector<vtype,N> &ret,const iVector<vtype,N>&r)
|
||||
template<class vtype,int N> accelerator_inline void timesMinusI(iVector<vtype,N> &ret,const iVector<vtype,N>&r)
|
||||
{
|
||||
for(int i=0;i<N;i++){
|
||||
timesMinusI(ret._internal[i],r._internal[i]);
|
||||
@ -120,99 +120,9 @@ template<class vtype,int N> accelerator_inline void timesMinusI(iMatrix<vtype,N
|
||||
}
|
||||
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// SVE
|
||||
|
||||
template<class vtype> accelerator_inline iScalar<vtype> addTimesI(const iScalar<vtype>&r1, const iScalar<vtype>&r2)
|
||||
{
|
||||
iScalar<vtype> ret;
|
||||
addTimesI(ret._internal,r1._internal,r2._internal);
|
||||
return ret;
|
||||
}
|
||||
template<class vtype,int N> accelerator_inline iVector<vtype,N> addTimesI(const iVector<vtype,N>&r1, const iVector<vtype,N>&r2)
|
||||
{
|
||||
iVector<vtype,N> ret;
|
||||
for(int i=0;i<N;i++){
|
||||
addTimesI(ret._internal[i],r1._internal[i],r2._internal[i]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
template<class vtype,int N> accelerator_inline iMatrix<vtype,N> addTimesI(const iMatrix<vtype,N>&r1, const iMatrix<vtype,N>&r2)
|
||||
{
|
||||
iMatrix<vtype,N> ret;
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
addTimesI(ret._internal[i][j],r1._internal[i][j],r2._internal[i][j]);
|
||||
}}
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<class vtype> accelerator_inline void addTimesI(iScalar<vtype> &ret,const iScalar<vtype>&r1,const iScalar<vtype>&r2)
|
||||
{
|
||||
addTimesI(ret._internal,r1._internal,r2._internal);
|
||||
}
|
||||
template<class vtype,int N> accelerator_inline void addTimesI(iVector<vtype,N> &ret,const iVector<vtype,N>&r1,const iVector<vtype,N>&r2)
|
||||
{
|
||||
for(int i=0;i<N;i++){
|
||||
addTimesI(ret._internal[i],r1._internal[i],r2._internal[i]);
|
||||
}
|
||||
}
|
||||
template<class vtype,int N> accelerator_inline void addTimesI(iMatrix<vtype,N> &ret,const iMatrix<vtype,N>&r1,const iMatrix<vtype,N>&r2)
|
||||
{
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
addTimesI(ret._internal[i][j],r1._internal[i][j],r2._internal[i][j]);
|
||||
}}
|
||||
}
|
||||
|
||||
template<class vtype> accelerator_inline iScalar<vtype> subTimesI(const iScalar<vtype>&r1, const iScalar<vtype>&r2)
|
||||
{
|
||||
iScalar<vtype> ret;
|
||||
subTimesI(ret._internal,r1._internal,r2._internal);
|
||||
return ret;
|
||||
}
|
||||
template<class vtype,int N> accelerator_inline iVector<vtype,N> subTimesI(const iVector<vtype,N>&r1, const iVector<vtype,N>&r2)
|
||||
{
|
||||
iVector<vtype,N> ret;
|
||||
for(int i=0;i<N;i++){
|
||||
subTimesI(ret._internal[i],r1._internal[i],r2._internal[i]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
template<class vtype,int N> accelerator_inline iMatrix<vtype,N> subTimesI(const iMatrix<vtype,N>&r1, const iMatrix<vtype,N>&r2)
|
||||
{
|
||||
iMatrix<vtype,N> ret;
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
subTimesI(ret._internal[i][j],r1._internal[i][j],r2._internal[i][j]);
|
||||
}}
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<class vtype> accelerator_inline void subTimesI(iScalar<vtype> &ret,const iScalar<vtype>&r1,const iScalar<vtype>&r2)
|
||||
{
|
||||
subTimesI(ret._internal,r1._internal,r2._internal);
|
||||
}
|
||||
template<class vtype,int N> accelerator_inline void subTimesI(iVector<vtype,N> &ret,const iVector<vtype,N>&r1,const iVector<vtype,N>&r2)
|
||||
{
|
||||
for(int i=0;i<N;i++){
|
||||
subTimesI(ret._internal[i],r1._internal[i],r2._internal[i]);
|
||||
}
|
||||
}
|
||||
template<class vtype,int N> accelerator_inline void subTimesI(iMatrix<vtype,N> &ret,const iMatrix<vtype,N>&r1,const iMatrix<vtype,N>&r2)
|
||||
{
|
||||
for(int i=0;i<N;i++){
|
||||
for(int j=0;j<N;j++){
|
||||
subTimesI(ret._internal[i][j],r1._internal[i][j],r2._internal[i][j]);
|
||||
}}
|
||||
}
|
||||
// -----------------------------------------------------------------------------
|
||||
// end SVE
|
||||
|
||||
|
||||
///////////////////////////////////////////////
|
||||
///////////////////////////////////////////////
|
||||
// Conj function for scalar, vector, matrix
|
||||
///////////////////////////////////////////////
|
||||
///////////////////////////////////////////////
|
||||
template<class vtype> accelerator_inline iScalar<vtype> conjugate(const iScalar<vtype>&r)
|
||||
{
|
||||
iScalar<vtype> ret;
|
||||
@ -237,9 +147,9 @@ template<class vtype,int N> accelerator_inline iMatrix<vtype,N> conjugate(const
|
||||
return ret;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////
|
||||
///////////////////////////////////////////////
|
||||
// Adj function for scalar, vector, matrix
|
||||
///////////////////////////////////////////////
|
||||
///////////////////////////////////////////////
|
||||
template<class vtype> accelerator_inline iScalar<vtype> adj(const iScalar<vtype>&r)
|
||||
{
|
||||
iScalar<vtype> ret;
|
||||
@ -296,7 +206,7 @@ template<class itype,int N> accelerator_inline auto real(const iVector<itype,N>
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
template<class itype> accelerator_inline auto imag(const iScalar<itype> &z) -> iScalar<decltype(imag(z._internal))>
|
||||
{
|
||||
iScalar<decltype(imag(z._internal))> ret;
|
||||
|
Loading…
Reference in New Issue
Block a user