1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-10 07:55:35 +00:00

Merge pull request #118 from Lanny91/hotfix/bgq

Hotfix/bgq
This commit is contained in:
Antonin Portelli 2017-06-23 16:59:27 +01:00 committed by GitHub
commit 4372d04ad4
6 changed files with 176 additions and 18 deletions

View File

@ -701,9 +701,28 @@ namespace Optimization {
//Integer Reduce
template<>
inline Integer Reduce<Integer, __m256i>::operator()(__m256i in){
// FIXME unimplemented
printf("Reduce : Missing integer implementation -> FIX\n");
assert(0);
__m128i ret;
#if defined (AVX2)
// AVX2 horizontal adds within upper and lower halves of register; use
// SSE to add upper and lower halves for result.
__m256i v1, v2;
__m128i u1, u2;
v1 = _mm256_hadd_epi32(in, in);
v2 = _mm256_hadd_epi32(v1, v1);
u1 = _mm256_castsi256_si128(v2); // upper half
u2 = _mm256_extracti128_si256(v2, 1); // lower half
ret = _mm_add_epi32(u1, u2);
#else
// No AVX horizontal add; extract upper and lower halves of register & use
// SSE intrinsics.
__m128i u1, u2, u3;
u1 = _mm256_extractf128_si256(in, 0); // upper half
u2 = _mm256_extractf128_si256(in, 1); // lower half
u3 = _mm_add_epi32(u1, u2);
u1 = _mm_hadd_epi32(u3, u3);
ret = _mm_hadd_epi32(u1, u1);
#endif
return _mm_cvtsi128_si32(ret);
}
}

View File

@ -543,6 +543,24 @@ namespace Optimization {
u512d conv; conv.v = v1;
return conv.f[0];
}
//Integer Reduce
template<>
inline Integer Reduce<Integer, __m512i>::operator()(__m512i in){
// No full vector reduce, use AVX to add upper and lower halves of register
// and perform AVX reduction.
__m256i v1, v2, v3;
__m128i u1, u2, ret;
v1 = _mm512_castsi512_si256(in); // upper half
v2 = _mm512_extracti32x8_epi32(in, 1); // lower half
v3 = _mm256_add_epi32(v1, v2);
v1 = _mm256_hadd_epi32(v3, v3);
v2 = _mm256_hadd_epi32(v1, v1);
u1 = _mm256_castsi256_si128(v2) // upper half
u2 = _mm256_extracti128_si256(v2, 1); // lower half
ret = _mm_add_epi32(u1, u2);
return _mm_cvtsi128_si32(ret);
}
#else
//Complex float Reduce
template<>
@ -570,9 +588,7 @@ namespace Optimization {
//Integer Reduce
template<>
inline Integer Reduce<Integer, __m512i>::operator()(__m512i in){
// FIXME unimplemented
printf("Reduce : Missing integer implementation -> FIX\n");
assert(0);
return _mm512_reduce_add_epi32(in);
}
#endif

View File

@ -401,9 +401,7 @@ namespace Optimization {
//Integer Reduce
template<>
inline Integer Reduce<Integer, __m512i>::operator()(__m512i in){
// FIXME unimplemented
printf("Reduce : Missing integer implementation -> FIX\n");
assert(0);
return _mm512_reduce_add_epi32(in);
}

View File

@ -374,6 +374,84 @@ namespace Optimization {
// Complex float
FLOAT_WRAP_2(operator(), inline)
};
#define USE_FP16
struct PrecisionChange {
static inline vech StoH (const vector4float &a, const vector4float &b) {
vech ret;
std::cout << GridLogError << "QPX single to half precision conversion not yet supported." << std::endl;
assert(0);
return ret;
}
static inline void HtoS (vech h, vector4float &sa, vector4float &sb) {
std::cout << GridLogError << "QPX half to single precision conversion not yet supported." << std::endl;
assert(0);
}
static inline vector4float DtoS (vector4double a, vector4double b) {
vector4float ret;
std::cout << GridLogError << "QPX double to single precision conversion not yet supported." << std::endl;
assert(0);
return ret;
}
static inline void StoD (vector4float s, vector4double &a, vector4double &b) {
std::cout << GridLogError << "QPX single to double precision conversion not yet supported." << std::endl;
assert(0);
}
static inline vech DtoH (vector4double a, vector4double b,
vector4double c, vector4double d) {
vech ret;
std::cout << GridLogError << "QPX double to half precision conversion not yet supported." << std::endl;
assert(0);
return ret;
}
static inline void HtoD (vech h, vector4double &a, vector4double &b,
vector4double &c, vector4double &d) {
std::cout << GridLogError << "QPX half to double precision conversion not yet supported." << std::endl;
assert(0);
}
};
//////////////////////////////////////////////
// Exchange support
#define FLOAT_WRAP_EXCHANGE(fn) \
static inline void fn(vector4float &out1, vector4float &out2, \
vector4float in1, vector4float in2) \
{ \
vector4double out1d, out2d, in1d, in2d; \
in1d = Vset()(in1); \
in2d = Vset()(in2); \
fn(out1d, out2d, in1d, in2d); \
Vstore()(out1d, out1); \
Vstore()(out2d, out2); \
}
struct Exchange{
// double precision
static inline void Exchange0(vector4double &out1, vector4double &out2,
vector4double in1, vector4double in2) {
out1 = vec_perm(in1, in2, vec_gpci(0145));
out2 = vec_perm(in1, in2, vec_gpci(02367));
}
static inline void Exchange1(vector4double &out1, vector4double &out2,
vector4double in1, vector4double in2) {
out1 = vec_perm(in1, in2, vec_gpci(0426));
out2 = vec_perm(in1, in2, vec_gpci(01537));
}
static inline void Exchange2(vector4double &out1, vector4double &out2,
vector4double in1, vector4double in2) {
assert(0);
}
static inline void Exchange3(vector4double &out1, vector4double &out2,
vector4double in1, vector4double in2) {
assert(0);
}
// single precision
FLOAT_WRAP_EXCHANGE(Exchange0);
FLOAT_WRAP_EXCHANGE(Exchange1);
FLOAT_WRAP_EXCHANGE(Exchange2);
FLOAT_WRAP_EXCHANGE(Exchange3);
};
struct Permute{
//Complex double
@ -497,15 +575,19 @@ namespace Optimization {
//Integer Reduce
template<>
inline Integer Reduce<Integer, int>::operator()(int in){
// FIXME unimplemented
printf("Reduce : Missing integer implementation -> FIX\n");
assert(0);
inline Integer Reduce<Integer, veci>::operator()(veci in){
Integer a = 0;
for (unsigned int i = 0; i < W<Integer>::r; ++i)
{
a += in.v[i];
}
return a;
}
}
////////////////////////////////////////////////////////////////////////////////
// Here assign types
typedef Optimization::vech SIMD_Htype; // Half precision type
typedef Optimization::vector4float SIMD_Ftype; // Single precision type
typedef vector4double SIMD_Dtype; // Double precision type
typedef Optimization::veci SIMD_Itype; // Integer type

View File

@ -570,9 +570,9 @@ namespace Optimization {
//Integer Reduce
template<>
inline Integer Reduce<Integer, __m128i>::operator()(__m128i in){
// FIXME unimplemented
printf("Reduce : Missing integer implementation -> FIX\n");
assert(0);
__m128i v1 = _mm_hadd_epi32(in, in);
__m128i v2 = _mm_hadd_epi32(v1, v1);
return _mm_cvtsi128_si32(v2);
}
}

View File

@ -183,8 +183,6 @@ void IntTester(const functor &func)
{
typedef Integer scal;
typedef vInteger vec;
GridSerialRNG sRNG;
sRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
int Nsimd = vec::Nsimd();
@ -287,6 +285,50 @@ void ReductionTester(const functor &func)
}
template<class reduced,class scal, class vec,class functor >
void IntReductionTester(const functor &func)
{
int Nsimd = vec::Nsimd();
std::vector<scal> input1(Nsimd);
std::vector<scal> input2(Nsimd);
reduced result(0);
reduced reference(0);
reduced tmp;
std::vector<vec,alignedAllocator<vec> > buf(3);
vec & v_input1 = buf[0];
vec & v_input2 = buf[1];
for(int i=0;i<Nsimd;i++){
input1[i] = (i + 1) * 30;
input2[i] = (i + 1) * 20;
}
merge<vec,scal>(v_input1,input1);
merge<vec,scal>(v_input2,input2);
func.template vfunc<reduced,vec>(result,v_input1,v_input2);
for(int i=0;i<Nsimd;i++) {
func.template sfunc<reduced,scal>(tmp,input1[i],input2[i]);
reference+=tmp;
}
std::cout<<GridLogMessage << " " << func.name()<<std::endl;
int ok=0;
if ( reference-result != 0 ){
std::cout<<GridLogMessage<< "*****" << std::endl;
std::cout<<GridLogMessage<< reference-result << " " <<reference<< " " << result<<std::endl;
ok++;
}
if ( ok==0 ) {
std::cout<<GridLogMessage << " OK!" <<std::endl;
}
assert(ok==0);
}
class funcPermute {
public:
@ -691,6 +733,7 @@ int main (int argc, char ** argv)
IntTester(funcPlus());
IntTester(funcMinus());
IntTester(funcTimes());
IntReductionTester<Integer, Integer, vInteger>(funcReduce());
std::cout<<GridLogMessage << "==================================="<< std::endl;
std::cout<<GridLogMessage << "Testing precisionChange "<< std::endl;