mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-25 13:15:55 +01:00
Make tests running past nvcc. Different NVCC versions proving tricky to keep happy. This is 9.2
This commit is contained in:
parent
a4d9200293
commit
e73b909a48
@ -481,7 +481,7 @@ struct Rotate{
|
|||||||
// Some Template specialization
|
// Some Template specialization
|
||||||
|
|
||||||
// Hack for CLANG until mm512_reduce_add_ps etc... are implemented in GCC and Clang releases
|
// Hack for CLANG until mm512_reduce_add_ps etc... are implemented in GCC and Clang releases
|
||||||
#ifndef __INTEL_COMPILER
|
#if 0
|
||||||
#warning "Slow reduction due to incomplete reduce intrinsics"
|
#warning "Slow reduction due to incomplete reduce intrinsics"
|
||||||
//Complex float Reduce
|
//Complex float Reduce
|
||||||
template<>
|
template<>
|
||||||
|
@ -747,10 +747,11 @@ accelerator_inline Grid_simd<S, V> operator/(Grid_simd<S, V> a, Grid_simd<S, V>
|
|||||||
ret = a * conjugate(b) ;
|
ret = a * conjugate(b) ;
|
||||||
den = b * conjugate(b) ;
|
den = b * conjugate(b) ;
|
||||||
|
|
||||||
auto real_den = toReal(den);
|
// duplicates real part
|
||||||
|
auto real_den = toReal(den);
|
||||||
ret.v=binary<V>(ret.v, real_den.v, DivSIMD());
|
simd zden;
|
||||||
|
memcpy((void *)&zden.v,(void *)&real_den.v,sizeof(zden));
|
||||||
|
ret.v=binary<V>(ret.v, zden.v, DivSIMD());
|
||||||
return ret;
|
return ret;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -839,26 +840,28 @@ accelerator_inline Grid_simd<S, V> trace(const Grid_simd<S, V> &arg) {
|
|||||||
// insert real into complex and zero imag;
|
// insert real into complex and zero imag;
|
||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
|
||||||
|
template <class T> struct toRealMapper {};
|
||||||
|
template<> struct toRealMapper<vComplexF> { typedef vRealF Realified; };
|
||||||
|
template<> struct toRealMapper<vComplexD> { typedef vRealD Realified; };
|
||||||
// real = toReal( complex )
|
// real = toReal( complex )
|
||||||
template <class S, class V, IfReal<S> = 0>
|
template <class Csimd> // must be a real arg
|
||||||
accelerator_inline Grid_simd<S, V> toReal(const Grid_simd<complex<S>, V> &in) {
|
accelerator_inline typename toRealMapper<Csimd>::Realified toReal(const Csimd &in) {
|
||||||
typedef Grid_simd<S, V> simd;
|
typedef typename toRealMapper<Csimd>::Realified Rsimd;
|
||||||
simd ret;
|
Rsimd ret;
|
||||||
typename simd::conv_t conv;
|
typename Rsimd::conv_t conv;
|
||||||
conv.v = in.v; // copy the vector content (bytewise)
|
memcpy((void *)&conv.v,(void *)&in.v,sizeof(conv.v));
|
||||||
for (int i = 0; i < simd::Nsimd(); i += 2) {
|
for (int i = 0; i < Rsimd::Nsimd(); i += 2) {
|
||||||
conv.s[i + 1] = conv.s[i]; // duplicate (r,r);(r,r);(r,r); etc...
|
conv.s[i + 1] = conv.s[i]; // duplicate (r,r);(r,r);(r,r); etc...
|
||||||
}
|
}
|
||||||
ret.v = conv.v;
|
memcpy((void *)&ret.v,(void *)&conv.v,sizeof(ret.v));
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template <class T> struct toComplexMapper {};
|
template <class T> struct toComplexMapper {};
|
||||||
template<> struct toComplexMapper<vRealF> { typedef vComplexF Complexified; };
|
template<> struct toComplexMapper<vRealF> { typedef vComplexF Complexified; };
|
||||||
template<> struct toComplexMapper<vRealD> { typedef vComplexD Complexified; };
|
template<> struct toComplexMapper<vRealD> { typedef vComplexD Complexified; };
|
||||||
|
|
||||||
|
|
||||||
// complex = toComplex( real )
|
// complex = toComplex( real )
|
||||||
template <class Rsimd> // must be a real arg
|
template <class Rsimd> // must be a real arg
|
||||||
accelerator_inline typename toComplexMapper<Rsimd>::Complexified toComplex(const Rsimd &in) {
|
accelerator_inline typename toComplexMapper<Rsimd>::Complexified toComplex(const Rsimd &in) {
|
||||||
|
@ -55,6 +55,7 @@ template<class vtype, int N> accelerator_inline iVector<vtype, N> Exponentiate(c
|
|||||||
|
|
||||||
|
|
||||||
// Specialisation: Cayley-Hamilton exponential for SU(3)
|
// Specialisation: Cayley-Hamilton exponential for SU(3)
|
||||||
|
#ifndef GRID_NVCC
|
||||||
template<class vtype, typename std::enable_if< GridTypeMapper<vtype>::TensorLevel == 0>::type * =nullptr>
|
template<class vtype, typename std::enable_if< GridTypeMapper<vtype>::TensorLevel == 0>::type * =nullptr>
|
||||||
accelerator_inline iMatrix<vtype,3> Exponentiate(const iMatrix<vtype,3> &arg, RealD alpha , Integer Nexp = DEFAULT_MAT_EXP )
|
accelerator_inline iMatrix<vtype,3> Exponentiate(const iMatrix<vtype,3> &arg, RealD alpha , Integer Nexp = DEFAULT_MAT_EXP )
|
||||||
{
|
{
|
||||||
@ -114,7 +115,7 @@ accelerator_inline iMatrix<vtype,3> Exponentiate(const iMatrix<vtype,3> &arg, Re
|
|||||||
|
|
||||||
return (f0 * unit + timesMinusI(f1) * arg*alpha - f2 * iQ2);
|
return (f0 * unit + timesMinusI(f1) * arg*alpha - f2 * iQ2);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
// General exponential
|
// General exponential
|
||||||
|
@ -179,7 +179,7 @@ int main(int argc, char **argv) {
|
|||||||
std::cout << "Norm2 LatticeReal : "<< norm2(BarReal) << std::endl;
|
std::cout << "Norm2 LatticeReal : "<< norm2(BarReal) << std::endl;
|
||||||
std::cout << "Norm2 LatticeComplex : "<< norm2(BarComplex) << std::endl;
|
std::cout << "Norm2 LatticeComplex : "<< norm2(BarComplex) << std::endl;
|
||||||
|
|
||||||
exit(0);
|
// exit(0);
|
||||||
|
|
||||||
TComplex tr = trace(cmat);
|
TComplex tr = trace(cmat);
|
||||||
|
|
||||||
|
@ -87,9 +87,9 @@ int main (int argc, char ** argv)
|
|||||||
auto Uprime_v = Uprime.View();
|
auto Uprime_v = Uprime.View();
|
||||||
auto U_v = U.View();
|
auto U_v = U.View();
|
||||||
auto mom_v = mom.View();
|
auto mom_v = mom.View();
|
||||||
parallel_for(auto i=mom_v.begin();i<mom_v.end();i++){ // exp(pmu dt) * Umu
|
thread_loop( (auto i=mom_v.begin();i<mom_v.end();i++),{ // exp(pmu dt) * Umu
|
||||||
Uprime_v[i](mu) = U_v[i](mu) + mom_v[i](mu)*U_v[i](mu)*dt ;
|
Uprime_v[i](mu) = U_v[i](mu) + mom_v[i](mu)*U_v[i](mu)*dt ;
|
||||||
}
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
ComplexD Sprime = Action.S(Uprime);
|
ComplexD Sprime = Action.S(Uprime);
|
||||||
|
@ -109,10 +109,10 @@ int main(int argc, char **argv)
|
|||||||
auto Uprime_v = Uprime.View();
|
auto Uprime_v = Uprime.View();
|
||||||
auto U_v = U.View();
|
auto U_v = U.View();
|
||||||
auto mom_v = mom.View();
|
auto mom_v = mom.View();
|
||||||
parallel_for(int ss = 0; ss < mom.Grid()->oSites(); ss++)
|
thread_loop( (int ss = 0; ss < mom.Grid()->oSites(); ss++),
|
||||||
{
|
{
|
||||||
Uprime_v[ss]._internal[mu] = ProjectOnGroup(Exponentiate(mom_v[ss]._internal[mu], dt, 12) * U_v[ss]._internal[mu]);
|
Uprime_v[ss]._internal[mu] = ProjectOnGroup(Exponentiate(mom_v[ss]._internal[mu], dt, 12) * U_v[ss]._internal[mu]);
|
||||||
}
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
std::cout << GridLogMessage << "Initial mom hamiltonian is " << Hmom << std::endl;
|
std::cout << GridLogMessage << "Initial mom hamiltonian is " << Hmom << std::endl;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user