mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
Project on group fix on GPU tracked to reciprocal sqrt collision between CUDA and Grid rsqrt
This commit is contained in:
parent
d10422ded8
commit
cc9c993f74
@ -350,7 +350,6 @@ GridUnopClass(UnaryTimesI, timesI(a));
|
||||
GridUnopClass(UnaryTimesMinusI, timesMinusI(a));
|
||||
GridUnopClass(UnaryAbs, abs(a));
|
||||
GridUnopClass(UnarySqrt, sqrt(a));
|
||||
GridUnopClass(UnaryRsqrt, rsqrt(a));
|
||||
GridUnopClass(UnarySin, sin(a));
|
||||
GridUnopClass(UnaryCos, cos(a));
|
||||
GridUnopClass(UnaryAsin, asin(a));
|
||||
@ -463,7 +462,6 @@ GRID_DEF_UNOP(timesMinusI, UnaryTimesMinusI);
|
||||
GRID_DEF_UNOP(abs, UnaryAbs); // abs overloaded in cmath C++98; DON'T do the
|
||||
// abs-fabs-dabs-labs thing
|
||||
GRID_DEF_UNOP(sqrt, UnarySqrt);
|
||||
GRID_DEF_UNOP(rsqrt, UnaryRsqrt);
|
||||
GRID_DEF_UNOP(sin, UnarySin);
|
||||
GRID_DEF_UNOP(cos, UnaryCos);
|
||||
GRID_DEF_UNOP(asin, UnaryAsin);
|
||||
|
@ -125,14 +125,6 @@ accelerator_inline Grid_simd<S, V> sqrt(const Grid_simd<S, V> &r) {
|
||||
return SimdApply(SqrtRealFunctor<S>(), r);
|
||||
}
|
||||
template <class S, class V>
|
||||
accelerator_inline Grid_simd<S, V> rsqrt(const Grid_simd<S, V> &r) {
|
||||
return SimdApply(RSqrtRealFunctor<S>(), r);
|
||||
}
|
||||
template <class Scalar>
|
||||
accelerator_inline Scalar rsqrt(const Scalar &r) {
|
||||
return (RSqrtRealFunctor<Scalar>(), r);
|
||||
}
|
||||
template <class S, class V>
|
||||
accelerator_inline Grid_simd<S, V> cos(const Grid_simd<S, V> &r) {
|
||||
return SimdApply(CosRealFunctor<S>(), r);
|
||||
}
|
||||
|
@ -92,17 +92,22 @@ accelerator_inline iMatrix<vtype,N> ProjectOnGroup(const iMatrix<vtype,N> &arg)
|
||||
{
|
||||
// need a check for the group type?
|
||||
iMatrix<vtype,N> ret(arg);
|
||||
vtype rnrm;
|
||||
vtype nrm;
|
||||
vtype inner;
|
||||
for(int c1=0;c1<N;c1++){
|
||||
|
||||
// Normalises row c1
|
||||
zeroit(inner);
|
||||
for(int c2=0;c2<N;c2++)
|
||||
inner += innerProduct(ret._internal[c1][c2],ret._internal[c1][c2]);
|
||||
|
||||
nrm = rsqrt(inner);
|
||||
nrm = sqrt(inner);
|
||||
nrm = 1.0/nrm;
|
||||
for(int c2=0;c2<N;c2++)
|
||||
ret._internal[c1][c2]*= nrm;
|
||||
|
||||
// Remove c1 from rows c1+1...N-1
|
||||
for (int b=c1+1; b<N; ++b){
|
||||
decltype(ret._internal[b][b]*ret._internal[b][b]) pr;
|
||||
zeroit(pr);
|
||||
|
@ -84,7 +84,6 @@ NAMESPACE_BEGIN(Grid);
|
||||
}
|
||||
|
||||
UNARY(sqrt);
|
||||
UNARY(rsqrt);
|
||||
UNARY(sin);
|
||||
UNARY(cos);
|
||||
UNARY(asin);
|
||||
|
Loading…
Reference in New Issue
Block a user