mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-04 19:25:56 +01:00
Project on group fix on GPU tracked to reciprocal sqrt collision between CUDA and Grid rsqrt
This commit is contained in:
parent
d10422ded8
commit
cc9c993f74
@ -350,7 +350,6 @@ GridUnopClass(UnaryTimesI, timesI(a));
|
|||||||
GridUnopClass(UnaryTimesMinusI, timesMinusI(a));
|
GridUnopClass(UnaryTimesMinusI, timesMinusI(a));
|
||||||
GridUnopClass(UnaryAbs, abs(a));
|
GridUnopClass(UnaryAbs, abs(a));
|
||||||
GridUnopClass(UnarySqrt, sqrt(a));
|
GridUnopClass(UnarySqrt, sqrt(a));
|
||||||
GridUnopClass(UnaryRsqrt, rsqrt(a));
|
|
||||||
GridUnopClass(UnarySin, sin(a));
|
GridUnopClass(UnarySin, sin(a));
|
||||||
GridUnopClass(UnaryCos, cos(a));
|
GridUnopClass(UnaryCos, cos(a));
|
||||||
GridUnopClass(UnaryAsin, asin(a));
|
GridUnopClass(UnaryAsin, asin(a));
|
||||||
@ -463,7 +462,6 @@ GRID_DEF_UNOP(timesMinusI, UnaryTimesMinusI);
|
|||||||
GRID_DEF_UNOP(abs, UnaryAbs); // abs overloaded in cmath C++98; DON'T do the
|
GRID_DEF_UNOP(abs, UnaryAbs); // abs overloaded in cmath C++98; DON'T do the
|
||||||
// abs-fabs-dabs-labs thing
|
// abs-fabs-dabs-labs thing
|
||||||
GRID_DEF_UNOP(sqrt, UnarySqrt);
|
GRID_DEF_UNOP(sqrt, UnarySqrt);
|
||||||
GRID_DEF_UNOP(rsqrt, UnaryRsqrt);
|
|
||||||
GRID_DEF_UNOP(sin, UnarySin);
|
GRID_DEF_UNOP(sin, UnarySin);
|
||||||
GRID_DEF_UNOP(cos, UnaryCos);
|
GRID_DEF_UNOP(cos, UnaryCos);
|
||||||
GRID_DEF_UNOP(asin, UnaryAsin);
|
GRID_DEF_UNOP(asin, UnaryAsin);
|
||||||
|
@ -125,14 +125,6 @@ accelerator_inline Grid_simd<S, V> sqrt(const Grid_simd<S, V> &r) {
|
|||||||
return SimdApply(SqrtRealFunctor<S>(), r);
|
return SimdApply(SqrtRealFunctor<S>(), r);
|
||||||
}
|
}
|
||||||
template <class S, class V>
|
template <class S, class V>
|
||||||
accelerator_inline Grid_simd<S, V> rsqrt(const Grid_simd<S, V> &r) {
|
|
||||||
return SimdApply(RSqrtRealFunctor<S>(), r);
|
|
||||||
}
|
|
||||||
template <class Scalar>
|
|
||||||
accelerator_inline Scalar rsqrt(const Scalar &r) {
|
|
||||||
return (RSqrtRealFunctor<Scalar>(), r);
|
|
||||||
}
|
|
||||||
template <class S, class V>
|
|
||||||
accelerator_inline Grid_simd<S, V> cos(const Grid_simd<S, V> &r) {
|
accelerator_inline Grid_simd<S, V> cos(const Grid_simd<S, V> &r) {
|
||||||
return SimdApply(CosRealFunctor<S>(), r);
|
return SimdApply(CosRealFunctor<S>(), r);
|
||||||
}
|
}
|
||||||
|
@ -92,17 +92,22 @@ accelerator_inline iMatrix<vtype,N> ProjectOnGroup(const iMatrix<vtype,N> &arg)
|
|||||||
{
|
{
|
||||||
// need a check for the group type?
|
// need a check for the group type?
|
||||||
iMatrix<vtype,N> ret(arg);
|
iMatrix<vtype,N> ret(arg);
|
||||||
|
vtype rnrm;
|
||||||
vtype nrm;
|
vtype nrm;
|
||||||
vtype inner;
|
vtype inner;
|
||||||
for(int c1=0;c1<N;c1++){
|
for(int c1=0;c1<N;c1++){
|
||||||
|
|
||||||
|
// Normalises row c1
|
||||||
zeroit(inner);
|
zeroit(inner);
|
||||||
for(int c2=0;c2<N;c2++)
|
for(int c2=0;c2<N;c2++)
|
||||||
inner += innerProduct(ret._internal[c1][c2],ret._internal[c1][c2]);
|
inner += innerProduct(ret._internal[c1][c2],ret._internal[c1][c2]);
|
||||||
|
|
||||||
nrm = rsqrt(inner);
|
nrm = sqrt(inner);
|
||||||
|
nrm = 1.0/nrm;
|
||||||
for(int c2=0;c2<N;c2++)
|
for(int c2=0;c2<N;c2++)
|
||||||
ret._internal[c1][c2]*= nrm;
|
ret._internal[c1][c2]*= nrm;
|
||||||
|
|
||||||
|
// Remove c1 from rows c1+1...N-1
|
||||||
for (int b=c1+1; b<N; ++b){
|
for (int b=c1+1; b<N; ++b){
|
||||||
decltype(ret._internal[b][b]*ret._internal[b][b]) pr;
|
decltype(ret._internal[b][b]*ret._internal[b][b]) pr;
|
||||||
zeroit(pr);
|
zeroit(pr);
|
||||||
|
@ -84,7 +84,6 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
}
|
}
|
||||||
|
|
||||||
UNARY(sqrt);
|
UNARY(sqrt);
|
||||||
UNARY(rsqrt);
|
|
||||||
UNARY(sin);
|
UNARY(sin);
|
||||||
UNARY(cos);
|
UNARY(cos);
|
||||||
UNARY(asin);
|
UNARY(asin);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user