mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-18 09:45:55 +01:00
introduce AddTimesI and SubTimesI; slight benefit in operators, but < 1%; breaks all other impls
This commit is contained in:
parent
5ee3ea2144
commit
9872c76825
@ -74,13 +74,17 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
// To fail is not to err (Cryptic clue: suggest to Google SFINAE ;) )
|
// To fail is not to err (Cryptic clue: suggest to Google SFINAE ;) )
|
||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProjXp (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
|
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProjXp (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
|
||||||
{
|
{
|
||||||
hspin(0)=fspin(0)+timesI(fspin(3));
|
//hspin(0)=fspin(0)+timesI(fspin(3));
|
||||||
hspin(1)=fspin(1)+timesI(fspin(2));
|
//hspin(1)=fspin(1)+timesI(fspin(2));
|
||||||
|
hspin(0)=addTimesI(fspin(0), fspin(3));
|
||||||
|
hspin(1)=addTimesI(fspin(1), fspin(2));
|
||||||
}
|
}
|
||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProjXm (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
|
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProjXm (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
|
||||||
{
|
{
|
||||||
hspin(0)=fspin(0)-timesI(fspin(3));
|
//hspin(0)=fspin(0)-timesI(fspin(3));
|
||||||
hspin(1)=fspin(1)-timesI(fspin(2));
|
//hspin(1)=fspin(1)-timesI(fspin(2));
|
||||||
|
hspin(0)=subTimesI(fspin(0), fspin(3));
|
||||||
|
hspin(1)=subTimesI(fspin(1), fspin(2));
|
||||||
}
|
}
|
||||||
|
|
||||||
// 0 0 0 -1 [0] -+ [3]
|
// 0 0 0 -1 [0] -+ [3]
|
||||||
@ -105,14 +109,18 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
|
|||||||
*/
|
*/
|
||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProjZp (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
|
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProjZp (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
|
||||||
{
|
{
|
||||||
hspin(0)=fspin(0)+timesI(fspin(2));
|
//hspin(0)=fspin(0)+timesI(fspin(2));
|
||||||
hspin(1)=fspin(1)-timesI(fspin(3));
|
//hspin(1)=fspin(1)-timesI(fspin(3));
|
||||||
|
hspin(0)=addTimesI(fspin(0), fspin(2));
|
||||||
|
hspin(1)=subTimesI(fspin(1), fspin(3));
|
||||||
}
|
}
|
||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProjZm (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
|
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProjZm (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
||||||
hspin(0)=fspin(0)-timesI(fspin(2));
|
//hspin(0)=fspin(0)-timesI(fspin(2));
|
||||||
hspin(1)=fspin(1)+timesI(fspin(3));
|
//hspin(1)=fspin(1)+timesI(fspin(3));
|
||||||
|
hspin(0)=subTimesI(fspin(0), fspin(2));
|
||||||
|
hspin(1)=addTimesI(fspin(1), fspin(3));
|
||||||
}
|
}
|
||||||
/*Gt
|
/*Gt
|
||||||
* 0 0 1 0 [0]+-[2]
|
* 0 0 1 0 [0]+-[2]
|
||||||
@ -202,16 +210,20 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
|
|||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
||||||
fspin(0)+=hspin(0);
|
fspin(0)+=hspin(0);
|
||||||
fspin(1)+=hspin(1);
|
fspin(1)+=hspin(1);
|
||||||
fspin(2)-=timesI(hspin(1));
|
//fspin(2)-=timesI(hspin(1));
|
||||||
fspin(3)-=timesI(hspin(0));
|
//fspin(3)-=timesI(hspin(0));
|
||||||
|
fspin(2)=subTimesI(fspin(2), hspin(1));
|
||||||
|
fspin(3)=subTimesI(fspin(3), hspin(0));
|
||||||
}
|
}
|
||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconXm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconXm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
||||||
fspin(0)+=hspin(0);
|
fspin(0)+=hspin(0);
|
||||||
fspin(1)+=hspin(1);
|
fspin(1)+=hspin(1);
|
||||||
fspin(2)+=timesI(hspin(1));
|
//fspin(2)+=timesI(hspin(1));
|
||||||
fspin(3)+=timesI(hspin(0));
|
//fspin(3)+=timesI(hspin(0));
|
||||||
|
fspin(2)=addTimesI(fspin(2), hspin(1));
|
||||||
|
fspin(3)=addTimesI(fspin(3), hspin(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
// 0 0 0 -1 [0] -+ [3]
|
// 0 0 0 -1 [0] -+ [3]
|
||||||
@ -279,16 +291,20 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
|
|||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
||||||
fspin(0)+=hspin(0);
|
fspin(0)+=hspin(0);
|
||||||
fspin(1)+=hspin(1);
|
fspin(1)+=hspin(1);
|
||||||
fspin(2)-=timesI(hspin(0));
|
//fspin(2)-=timesI(hspin(0));
|
||||||
fspin(3)+=timesI(hspin(1));
|
//fspin(3)+=timesI(hspin(1));
|
||||||
|
fspin(2)=subTimesI(fspin(2), hspin(0));
|
||||||
|
fspin(3)=addTimesI(fspin(3), hspin(1));
|
||||||
}
|
}
|
||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconZm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconZm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
||||||
fspin(0)+=hspin(0);
|
fspin(0)+=hspin(0);
|
||||||
fspin(1)+=hspin(1);
|
fspin(1)+=hspin(1);
|
||||||
fspin(2)+=timesI(hspin(0));
|
//fspin(2)+=timesI(hspin(0));
|
||||||
fspin(3)-=timesI(hspin(1));
|
//fspin(3)-=timesI(hspin(1));
|
||||||
|
fspin(2)=addTimesI(fspin(2), hspin(0));
|
||||||
|
fspin(3)=subTimesI(fspin(3), hspin(1));
|
||||||
}
|
}
|
||||||
/*Gt
|
/*Gt
|
||||||
* 0 0 1 0 [0]+-[2]
|
* 0 0 1 0 [0]+-[2]
|
||||||
|
@ -443,8 +443,8 @@ struct TimesMinusI{
|
|||||||
};
|
};
|
||||||
|
|
||||||
// alternative implementation using fcadd
|
// alternative implementation using fcadd
|
||||||
// this is not optimal because we have op1 = op2 + TimesMinusI(op3) etc
|
// this is not optimal because we have op1 = op2 + TimesMinusI(op3) = op2 - TimesI(op3) etc
|
||||||
// ideally we have AddTimesMinusI(op1,op2,op3)
|
// but ideally we have op1 = SubTimesI(op2,op3)
|
||||||
//
|
//
|
||||||
// makes performance worse in Benchmark_wilson using MPI
|
// makes performance worse in Benchmark_wilson using MPI
|
||||||
// increases halogtime and gathertime
|
// increases halogtime and gathertime
|
||||||
@ -467,6 +467,34 @@ struct TimesMinusI{
|
|||||||
};
|
};
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
// SVE only, fcadd returns a +- i*b
|
||||||
|
// a + i * b
|
||||||
|
struct AddTimesI{
|
||||||
|
// Complex float
|
||||||
|
inline vecf operator()(vecf a, vecf b){
|
||||||
|
pred pg1 = acle<float>::pg1();
|
||||||
|
return svcadd_x(pg1, a, b, 90);
|
||||||
|
}
|
||||||
|
// Complex double
|
||||||
|
inline vecd operator()(vecd a, vecd b){
|
||||||
|
pred pg1 = acle<double>::pg1();
|
||||||
|
return svcadd_x(pg1, a, b, 90);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
// a - i * b
|
||||||
|
struct SubTimesI{
|
||||||
|
// Complex float
|
||||||
|
inline vecf operator()(vecf a, vecf b){
|
||||||
|
pred pg1 = acle<float>::pg1();
|
||||||
|
return svcadd_x(pg1, a, b, 270);
|
||||||
|
}
|
||||||
|
// Complex double
|
||||||
|
inline vecd operator()(vecd a, vecd b){
|
||||||
|
pred pg1 = acle<double>::pg1();
|
||||||
|
return svcadd_x(pg1, a, b, 270);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
struct TimesI{
|
struct TimesI{
|
||||||
// Complex float
|
// Complex float
|
||||||
inline vecf operator()(vecf a, vecf b){
|
inline vecf operator()(vecf a, vecf b){
|
||||||
@ -493,7 +521,7 @@ struct TimesI{
|
|||||||
|
|
||||||
// alternative implementation using fcadd
|
// alternative implementation using fcadd
|
||||||
// this is not optimal because we have op1 = op2 + TimesI(op3) etc
|
// this is not optimal because we have op1 = op2 + TimesI(op3) etc
|
||||||
// ideally we have AddTimesI(op1,op2,op3)
|
// ideally we have op1 = AddTimesI(op2,op3)
|
||||||
//
|
//
|
||||||
// makes performance worse in Benchmark_wilson using MPI
|
// makes performance worse in Benchmark_wilson using MPI
|
||||||
// increases halogtime and gathertime
|
// increases halogtime and gathertime
|
||||||
@ -800,7 +828,7 @@ typedef veci SIMD_Itype; // Integer type
|
|||||||
// prefetch utilities
|
// prefetch utilities
|
||||||
inline void v_prefetch0(int size, const char *ptr){};
|
inline void v_prefetch0(int size, const char *ptr){};
|
||||||
|
|
||||||
/* PF 256 worse than PF 64
|
/* PF 256
|
||||||
inline void prefetch_HINT_T0(const char *ptr){
|
inline void prefetch_HINT_T0(const char *ptr){
|
||||||
static int64_t last_ptr;
|
static int64_t last_ptr;
|
||||||
int64_t vptr = reinterpret_cast<std::intptr_t>(ptr) & 0x7fffffffffffff00ll;
|
int64_t vptr = reinterpret_cast<std::intptr_t>(ptr) & 0x7fffffffffffff00ll;
|
||||||
@ -812,7 +840,7 @@ inline void prefetch_HINT_T0(const char *ptr){
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
*/
|
*/
|
||||||
/* beneficial for operators?
|
/* PF 64
|
||||||
inline void prefetch_HINT_T0(const char *ptr){
|
inline void prefetch_HINT_T0(const char *ptr){
|
||||||
pred pg1 = Optimization::acle<double>::pg1();
|
pred pg1 = Optimization::acle<double>::pg1();
|
||||||
svprfd(pg1, ptr, SV_PLDL1STRM);
|
svprfd(pg1, ptr, SV_PLDL1STRM);
|
||||||
@ -839,5 +867,8 @@ typedef Optimization::MaddRealPart MaddRealPartSIMD;
|
|||||||
typedef Optimization::Conj ConjSIMD;
|
typedef Optimization::Conj ConjSIMD;
|
||||||
typedef Optimization::TimesMinusI TimesMinusISIMD;
|
typedef Optimization::TimesMinusI TimesMinusISIMD;
|
||||||
typedef Optimization::TimesI TimesISIMD;
|
typedef Optimization::TimesI TimesISIMD;
|
||||||
|
typedef Optimization::AddTimesI AddTimesISIMD;
|
||||||
|
typedef Optimization::SubTimesI SubTimesISIMD;
|
||||||
|
|
||||||
|
|
||||||
NAMESPACE_END(Grid);
|
NAMESPACE_END(Grid);
|
||||||
|
@ -298,7 +298,7 @@ public:
|
|||||||
|
|
||||||
// FIXME -- alias this to an accelerator_inline MAC struct.
|
// FIXME -- alias this to an accelerator_inline MAC struct.
|
||||||
|
|
||||||
// FIXME VLA build error
|
// specialize mac for A64FX
|
||||||
#if defined(A64FX) || defined(A64FXFIXEDSIZE)
|
#if defined(A64FX) || defined(A64FXFIXEDSIZE)
|
||||||
friend accelerator_inline void mac(Grid_simd *__restrict__ y,
|
friend accelerator_inline void mac(Grid_simd *__restrict__ y,
|
||||||
const Grid_simd *__restrict__ a,
|
const Grid_simd *__restrict__ a,
|
||||||
@ -894,6 +894,47 @@ accelerator_inline Grid_simd<S, V> timesI(const Grid_simd<S, V> &in) {
|
|||||||
return in;
|
return in;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// -----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
// SVE only
|
||||||
|
///////////////////////
|
||||||
|
// AddTimesI
|
||||||
|
///////////////////////
|
||||||
|
template <class S, class V, IfComplex<S> = 0>
|
||||||
|
accelerator_inline void addTimesI(Grid_simd<S, V> &ret, const Grid_simd<S, V> &in1, const Grid_simd<S, V> &in2) {
|
||||||
|
ret.v = binary<V>(in1.v, in2.v, AddTimesISIMD());
|
||||||
|
}
|
||||||
|
template <class S, class V, IfComplex<S> = 0>
|
||||||
|
accelerator_inline Grid_simd<S, V> addTimesI(const Grid_simd<S, V> &in1, const Grid_simd<S, V> &in2) {
|
||||||
|
Grid_simd<S, V> ret;
|
||||||
|
ret = addTimesI(in1, in2);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
template <class S, class V, IfNotComplex<S> = 0>
|
||||||
|
accelerator_inline Grid_simd<S, V> addTimesI(const Grid_simd<S, V> &in1, const Grid_simd<S, V> &in2) {
|
||||||
|
return in1;
|
||||||
|
}
|
||||||
|
///////////////////////
|
||||||
|
// SubTimesI
|
||||||
|
///////////////////////
|
||||||
|
template <class S, class V, IfComplex<S> = 0>
|
||||||
|
accelerator_inline void subTimesI(Grid_simd<S, V> &ret, const Grid_simd<S, V> &in1, const Grid_simd<S, V> &in2) {
|
||||||
|
ret.v = binary<V>(in1.v, in2.v, SubTimesISIMD());
|
||||||
|
}
|
||||||
|
template <class S, class V, IfComplex<S> = 0>
|
||||||
|
accelerator_inline Grid_simd<S, V> subTimesI(const Grid_simd<S, V> &in1, const Grid_simd<S, V> &in2) {
|
||||||
|
Grid_simd<S, V> ret;
|
||||||
|
ret = subTimesI(in1, in2);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
template <class S, class V, IfNotComplex<S> = 0>
|
||||||
|
accelerator_inline Grid_simd<S, V> subTimesI(const Grid_simd<S, V> &in1, const Grid_simd<S, V> &in2) {
|
||||||
|
return in1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// end SVE
|
||||||
|
// -----------------------------------------------------------------------------
|
||||||
|
|
||||||
/////////////////////
|
/////////////////////
|
||||||
// Inner, outer
|
// Inner, outer
|
||||||
/////////////////////
|
/////////////////////
|
||||||
|
@ -120,6 +120,96 @@ template<class vtype,int N> accelerator_inline void timesMinusI(iMatrix<vtype,N
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// -----------------------------------------------------------------------------
|
||||||
|
// SVE
|
||||||
|
|
||||||
|
template<class vtype> accelerator_inline iScalar<vtype> addTimesI(const iScalar<vtype>&r1, const iScalar<vtype>&r2)
|
||||||
|
{
|
||||||
|
iScalar<vtype> ret;
|
||||||
|
addTimesI(ret._internal,r1._internal,r2._internal);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
template<class vtype,int N> accelerator_inline iVector<vtype,N> addTimesI(const iVector<vtype,N>&r1, const iVector<vtype,N>&r2)
|
||||||
|
{
|
||||||
|
iVector<vtype,N> ret;
|
||||||
|
for(int i=0;i<N;i++){
|
||||||
|
addTimesI(ret._internal[i],r1._internal[i],r2._internal[i]);
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
template<class vtype,int N> accelerator_inline iMatrix<vtype,N> addTimesI(const iMatrix<vtype,N>&r1, const iMatrix<vtype,N>&r2)
|
||||||
|
{
|
||||||
|
iMatrix<vtype,N> ret;
|
||||||
|
for(int i=0;i<N;i++){
|
||||||
|
for(int j=0;j<N;j++){
|
||||||
|
addTimesI(ret._internal[i][j],r1._internal[i][j],r2._internal[i][j]);
|
||||||
|
}}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class vtype> accelerator_inline void addTimesI(iScalar<vtype> &ret,const iScalar<vtype>&r1,const iScalar<vtype>&r2)
|
||||||
|
{
|
||||||
|
addTimesI(ret._internal,r1._internal,r2._internal);
|
||||||
|
}
|
||||||
|
template<class vtype,int N> accelerator_inline void addTimesI(iVector<vtype,N> &ret,const iVector<vtype,N>&r1,const iVector<vtype,N>&r2)
|
||||||
|
{
|
||||||
|
for(int i=0;i<N;i++){
|
||||||
|
addTimesI(ret._internal[i],r1._internal[i],r2._internal[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
template<class vtype,int N> accelerator_inline void addTimesI(iMatrix<vtype,N> &ret,const iMatrix<vtype,N>&r1,const iMatrix<vtype,N>&r2)
|
||||||
|
{
|
||||||
|
for(int i=0;i<N;i++){
|
||||||
|
for(int j=0;j<N;j++){
|
||||||
|
addTimesI(ret._internal[i][j],r1._internal[i][j],r2._internal[i][j]);
|
||||||
|
}}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class vtype> accelerator_inline iScalar<vtype> subTimesI(const iScalar<vtype>&r1, const iScalar<vtype>&r2)
|
||||||
|
{
|
||||||
|
iScalar<vtype> ret;
|
||||||
|
subTimesI(ret._internal,r1._internal,r2._internal);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
template<class vtype,int N> accelerator_inline iVector<vtype,N> subTimesI(const iVector<vtype,N>&r1, const iVector<vtype,N>&r2)
|
||||||
|
{
|
||||||
|
iVector<vtype,N> ret;
|
||||||
|
for(int i=0;i<N;i++){
|
||||||
|
subTimesI(ret._internal[i],r1._internal[i],r2._internal[i]);
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
template<class vtype,int N> accelerator_inline iMatrix<vtype,N> subTimesI(const iMatrix<vtype,N>&r1, const iMatrix<vtype,N>&r2)
|
||||||
|
{
|
||||||
|
iMatrix<vtype,N> ret;
|
||||||
|
for(int i=0;i<N;i++){
|
||||||
|
for(int j=0;j<N;j++){
|
||||||
|
subTimesI(ret._internal[i][j],r1._internal[i][j],r2._internal[i][j]);
|
||||||
|
}}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class vtype> accelerator_inline void subTimesI(iScalar<vtype> &ret,const iScalar<vtype>&r1,const iScalar<vtype>&r2)
|
||||||
|
{
|
||||||
|
subTimesI(ret._internal,r1._internal,r2._internal);
|
||||||
|
}
|
||||||
|
template<class vtype,int N> accelerator_inline void subTimesI(iVector<vtype,N> &ret,const iVector<vtype,N>&r1,const iVector<vtype,N>&r2)
|
||||||
|
{
|
||||||
|
for(int i=0;i<N;i++){
|
||||||
|
subTimesI(ret._internal[i],r1._internal[i],r2._internal[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
template<class vtype,int N> accelerator_inline void subTimesI(iMatrix<vtype,N> &ret,const iMatrix<vtype,N>&r1,const iMatrix<vtype,N>&r2)
|
||||||
|
{
|
||||||
|
for(int i=0;i<N;i++){
|
||||||
|
for(int j=0;j<N;j++){
|
||||||
|
subTimesI(ret._internal[i][j],r1._internal[i][j],r2._internal[i][j]);
|
||||||
|
}}
|
||||||
|
}
|
||||||
|
// -----------------------------------------------------------------------------
|
||||||
|
// end SVE
|
||||||
|
|
||||||
|
|
||||||
///////////////////////////////////////////////
|
///////////////////////////////////////////////
|
||||||
// Conj function for scalar, vector, matrix
|
// Conj function for scalar, vector, matrix
|
||||||
///////////////////////////////////////////////
|
///////////////////////////////////////////////
|
||||||
|
Loading…
x
Reference in New Issue
Block a user