From 3a3f54932aa4eab5f72d914d3e7b19a09fed3669 Mon Sep 17 00:00:00 2001 From: neo Date: Wed, 20 May 2015 17:22:40 +0900 Subject: [PATCH] Implemented all SSE4 functions. A test code Grid_simd_new.cc has been created to test the new class. Tests are all OK. --- lib/simd/Grid_sse4.h | 170 +++++++++++++++++++++++++----- lib/simd/Grid_vector_types.h | 194 ++++++++++++++++++++++++----------- tests/Grid_main.cc | 16 ++- tests/Grid_simd_new.cc | 165 +++++++++++++++++++++++++++++ tests/Makefile.am | 5 +- 5 files changed, 458 insertions(+), 92 deletions(-) create mode 100644 tests/Grid_simd_new.cc diff --git a/lib/simd/Grid_sse4.h b/lib/simd/Grid_sse4.h index ed4039b7..ddc3490b 100644 --- a/lib/simd/Grid_sse4.h +++ b/lib/simd/Grid_sse4.h @@ -1,8 +1,10 @@ //---------------------------------------------------------------------- /*! @file Grid_sse4.h - @brief Optimization libraries + @brief Optimization libraries for SSE4 instructions set + + Using intrinsics */ -// Time-stamp: <2015-05-19 17:06:51 neo> +// Time-stamp: <2015-05-20 16:45:39 neo> //---------------------------------------------------------------------- #include @@ -49,6 +51,20 @@ namespace Optimization { }; + struct Vstream{ + //Float + inline void operator()(__m128 a, __m128 b){ + _mm_stream_ps((float *)&a,b); + } + //Double + inline void operator()(__m128d a, __m128d b){ + _mm_stream_pd((double *)&a,b); + } + + + }; + + struct Vset{ // Complex float @@ -75,27 +91,20 @@ namespace Optimization { }; + template struct Reduce{ - //Complex float - inline Grid::ComplexF operator()(__m128 in){ - union { - __m128 v1; - float f[4]; - } u128; - u128.v1 = _mm_add_ps(in, _mm_shuffle_ps(in,in, 0b01001110)); // FIXME Prefer to use _MM_SHUFFLE macros - return Grid::ComplexF(u128.f[0], u128.f[1]); + //Need templated class to overload output type + //General form must generate error if compiled + inline Out_type operator()(In_type in){ + printf("Error, using wrong Reduce function\n"); + exit(1); + return 0; } - //Complex double - inline Grid::ComplexD operator()(__m128d in){ - printf("Missing complex double implementation -> FIX\n"); - return Grid::ComplexD(0,0); // FIXME wrong - } - - - }; + + ///////////////////////////////////////////////////// // Arithmetic operations ///////////////////////////////////////////////////// @@ -129,25 +138,26 @@ namespace Optimization { } }; + struct MultComplex{ // Complex float inline __m128 operator()(__m128 a, __m128 b){ __m128 ymm0,ymm1,ymm2; ymm0 = _mm_shuffle_ps(a,a,_MM_SHUFFLE(2,2,0,0)); // ymm0 <- ar ar, - ymm0 = _mm_mul_ps(ymm0,b); // ymm0 <- ar bi, ar br + ymm0 = _mm_mul_ps(ymm0,b); // ymm0 <- ar bi, ar br ymm1 = _mm_shuffle_ps(b,b,_MM_SHUFFLE(2,3,0,1)); // ymm1 <- br,bi ymm2 = _mm_shuffle_ps(a,a,_MM_SHUFFLE(3,3,1,1)); // ymm2 <- ai,ai - ymm1 = _mm_mul_ps(ymm1,ymm2); // ymm1 <- br ai, ai bi + ymm1 = _mm_mul_ps(ymm1,ymm2); // ymm1 <- br ai, ai bi return _mm_addsub_ps(ymm0,ymm1); } // Complex double inline __m128d operator()(__m128d a, __m128d b){ __m128d ymm0,ymm1,ymm2; - ymm0 = _mm_shuffle_pd(a,a,0x0); // ymm0 <- ar ar, + ymm0 = _mm_shuffle_pd(a,a,0x0); // ymm0 <- ar ar, ymm0 = _mm_mul_pd(ymm0,b); // ymm0 <- ar bi, ar br - ymm1 = _mm_shuffle_pd(b,b,0x1); // ymm1 <- br,bi b01 - ymm2 = _mm_shuffle_pd(a,a,0x3); // ymm2 <- ai,ai b11 - ymm1 = _mm_mul_pd(ymm1,ymm2); // ymm1 <- br ai, ai bi + ymm1 = _mm_shuffle_pd(b,b,0x1); // ymm1 <- br,bi b01 + ymm2 = _mm_shuffle_pd(a,a,0x3); // ymm2 <- ai,ai b11 + ymm1 = _mm_mul_pd(ymm1,ymm2); // ymm1 <- br ai, ai bi return _mm_addsub_pd(ymm0,ymm1); } }; @@ -165,14 +175,112 @@ namespace Optimization { inline __m128i operator()(__m128i a, __m128i b){ return _mm_mul_epi32(a,b); } + }; + + + struct Conj{ + // Complex single + inline __m128 operator()(__m128 in){ + return _mm_xor_ps(_mm_addsub_ps(_mm_setzero_ps(),in), _mm_set1_ps(-0.f)); + } + // Complex double + inline __m128d operator()(__m128d in){ + return _mm_xor_pd(_mm_addsub_pd(_mm_setzero_pd(),in), _mm_set1_pd(-0.f));//untested + } + // do not define for integer input + }; + + struct TimesMinusI{ + //Complex single + inline __m128 operator()(__m128 in, __m128 ret){ + __m128 tmp =_mm_addsub_ps(_mm_setzero_ps(),in); // r,-i + return _mm_shuffle_ps(tmp,tmp,_MM_SHUFFLE(2,3,0,1)); + } + //Complex double + inline __m128d operator()(__m128d in, __m128d ret){ + __m128d tmp =_mm_addsub_pd(_mm_setzero_pd(),in); // r,-i + return _mm_shuffle_pd(tmp,tmp,0x1); + } + + + }; + + struct TimesI{ + //Complex single + inline __m128 operator()(__m128 in, __m128 ret){ + __m128 tmp =_mm_shuffle_ps(in,in,_MM_SHUFFLE(2,3,0,1)); + return _mm_addsub_ps(_mm_setzero_ps(),tmp); // r,-i + } + //Complex double + inline __m128d operator()(__m128d in, __m128d ret){ + __m128d tmp = _mm_shuffle_pd(in,in,0x1); + return _mm_addsub_pd(_mm_setzero_pd(),tmp); // r,-i + } + }; + + ////////////////////////////////////////////// + // Some Template specialization + + //Complex float Reduce + template<> + inline Grid::ComplexF Reduce::operator()(__m128 in){ + union { + __m128 v1; + float f[4]; + } u128; + u128.v1 = _mm_add_ps(in, _mm_shuffle_ps(in,in, 0b01001110)); // FIXME Prefer to use _MM_SHUFFLE macros + return Grid::ComplexF(u128.f[0], u128.f[1]); + } + //Real float Reduce + template<> + inline Grid::RealF Reduce::operator()(__m128 in){ + // FIXME Hack + const Grid::RealF * ptr = (const Grid::RealF *) ∈ + Grid::RealF ret = 0; + for(int i=0;i< 4 ;i++){ // 4 number of simd lanes for float + ret = ret+ptr[i]; + } + return ret; + } + + + //Complex double Reduce + template<> + inline Grid::ComplexD Reduce::operator()(__m128d in){ + printf("Reduce : Missing good complex double implementation -> FIX\n"); + return Grid::ComplexD(in[0], in[1]); // inefficient + } + + //Real double Reduce + template<> + inline Grid::RealD Reduce::operator()(__m128d in){ + // FIXME Hack + const Grid::RealD * ptr =(const Grid::RealD *) ∈ + Grid::RealD ret = 0; + for(int i=0;i< 2 ;i++){// 2 number of simd lanes for float + ret = ret+ptr[i]; + } + return ret; + } + + //Integer Reduce + template<> + inline Integer Reduce::operator()(__m128i in){ + // FIXME unimplemented + printf("Reduce : Missing integer implementation -> FIX\n"); + assert(0); + } + + } +////////////////////////////////////////////////////////////////////////////////////// // Here assign types namespace Grid { typedef __m128 SIMD_Ftype; // Single precision type @@ -180,15 +288,21 @@ namespace Grid { typedef __m128i SIMD_Itype; // Integer type - // Function names - typedef Optimization::Vsplat VsplatSIMD; - typedef Optimization::Vstore VstoreSIMD; + // Function name aliases + typedef Optimization::Vsplat VsplatSIMD; + typedef Optimization::Vstore VstoreSIMD; + typedef Optimization::Vset VsetSIMD; + typedef Optimization::Vstream VstreamSIMD; + template using ReduceSIMD = Optimization::Reduce; + // Arithmetic operations typedef Optimization::Sum SumSIMD; typedef Optimization::Sub SubSIMD; typedef Optimization::Mult MultSIMD; typedef Optimization::MultComplex MultComplexSIMD; - typedef Optimization::Vset VsetSIMD; + typedef Optimization::Conj ConjSIMD; + typedef Optimization::TimesMinusI TimesMinusISIMD; + typedef Optimization::TimesI TimesISIMD; } diff --git a/lib/simd/Grid_vector_types.h b/lib/simd/Grid_vector_types.h index 030a8a79..442c5871 100644 --- a/lib/simd/Grid_vector_types.h +++ b/lib/simd/Grid_vector_types.h @@ -2,7 +2,7 @@ /*! @file Grid_vector_types.h @brief Defines templated class Grid_simd to deal with inner vector types */ -// Time-stamp: <2015-05-19 17:20:36 neo> +// Time-stamp: <2015-05-20 17:21:52 neo> //--------------------------------------------------------------------------- #ifndef GRID_VECTOR_TYPES #define GRID_VECTOR_TYPES @@ -22,6 +22,16 @@ namespace Grid { typedef T type; }; + // type alias used to simplify the syntax of std::enable_if + template using Invoke = + typename T::type; + template using EnableIf = + Invoke>; + template using NotEnableIf = + Invoke>; + + + //////////////////////////////////////////////////////// // Check for complexity with type traits template @@ -93,31 +103,32 @@ namespace Grid { // Initialise to 1,0,i for the correct types /////////////////////////////////////////////// // if not complex overload here - template < class S = Scalar_type,typename std::enable_if < !is_complex < S >::value, int >::type = 0 > + template < class S = Scalar_type, NotEnableIf,int> = 0 > friend inline void vone(Grid_simd &ret) { vsplat(ret,1.0); } - template < class S = Scalar_type,typename std::enable_if < !is_complex < S >::value, int >::type = 0 > + template < class S = Scalar_type, NotEnableIf,int> = 0 > friend inline void vzero(Grid_simd &ret) { vsplat(ret,0.0); } - // overload for complex type - template < class S = Scalar_type,typename std::enable_if < is_complex < S >::value, int >::type = 0 > + // For complex types + template < class S = Scalar_type, EnableIf, int> = 0 > friend inline void vone(Grid_simd &ret) { vsplat(ret,1.0,0.0); } - template < class S = Scalar_type,typename std::enable_if < is_complex < S >::value, int >::type = 0 > + template < class S = Scalar_type, EnableIf, int> = 0 > friend inline void vzero(Grid_simd &ret) { vsplat(ret,0.0,0.0); }// use xor? - - // For integral type - template < class S = Scalar_type,typename std::enable_if < std::is_integral < S >::value, int >::type = 0 > + template < class S = Scalar_type, EnableIf, int> = 0 > + friend inline void vcomplex_i(Grid_simd &ret){ vsplat(ret,0.0,1.0);} + + // For integral types + template < class S = Scalar_type, EnableIf, int> = 0 > friend inline void vone(Grid_simd &ret) { vsplat(ret,1); } - template < class S = Scalar_type,typename std::enable_if < std::is_integral < S >::value, int >::type = 0 > + template < class S = Scalar_type, EnableIf, int> = 0 > friend inline void vzero(Grid_simd &ret) { vsplat(ret,0); } - template < class S = Scalar_type,typename std::enable_if < std::is_integral < S >::value, int >::type = 0 > + template < class S = Scalar_type, EnableIf, int> = 0 > friend inline void vtrue (Grid_simd &ret){vsplat(ret,0xFFFFFFFF);} - template < class S = Scalar_type,typename std::enable_if < std::is_integral < S >::value, int >::type = 0 > + template < class S = Scalar_type, EnableIf, int> = 0 > friend inline void vfalse(vInteger &ret){vsplat(ret,0);} - // do not compile if real or integer, send an error message from the compiler - template < class S = Scalar_type,typename std::enable_if < is_complex < S >::value, int >::type = 0 > - friend inline void vcomplex_i(Grid_simd &ret){ vsplat(ret,0.0,1.0);} + + //////////////////////////////////// // Arithmetic operator overloads +,-,* @@ -137,7 +148,7 @@ namespace Grid { }; // Distinguish between complex types and others - template < class S = Scalar_type, typename std::enable_if < is_complex < S >::value, int >::type = 0 > + template < class S = Scalar_type, EnableIf, int> = 0 > friend inline Grid_simd operator * (Grid_simd a, Grid_simd b) { Grid_simd ret; @@ -146,7 +157,7 @@ namespace Grid { }; // Real/Integer types - template < class S = Scalar_type,typename std::enable_if < !is_complex < S >::value, int >::type = 0 > + template < class S = Scalar_type, NotEnableIf, int> = 0 > friend inline Grid_simd operator * (Grid_simd a, Grid_simd b) { Grid_simd ret; @@ -155,8 +166,6 @@ namespace Grid { }; - - //////////////////////////////////////////////////////////////////////// // FIXME: gonna remove these load/store, get, set, prefetch //////////////////////////////////////////////////////////////////////// @@ -169,14 +178,14 @@ namespace Grid { /////////////////////// // overload if complex template < class S = Scalar_type > - friend inline void vsplat(Grid_simd &ret, typename std::enable_if< is_complex < S >::value, S>::type c){ + friend inline void vsplat(Grid_simd &ret, EnableIf, S> c){ Real a = real(c); Real b = imag(c); vsplat(ret,a,b); } - // this only for the complex version - template < class S = Scalar_type, typename std::enable_if < is_complex < S >::value, int >::type = 0 > + // this is only for the complex version + template < class S = Scalar_type, EnableIf, int> = 0 > friend inline void vsplat(Grid_simd &ret,Real a, Real b){ ret.v = binary(a, b, VsplatSIMD()); } @@ -186,22 +195,45 @@ namespace Grid { ret.v = unary(a, VsplatSIMD()); } - + /////////////////////// + // Vstore + /////////////////////// friend inline void vstore(const Grid_simd &ret, Scalar_type *a){ binary(ret.v, (Real*)a, VstoreSIMD()); } + /////////////////////// + // Vstream + /////////////////////// + friend inline void vstream(Grid_simd &out,const Grid_simd &in){ + binary(out.v, in.v, VstreamSIMD()); + } + + template < class S = Scalar_type, EnableIf, int> = 0 > + friend inline void vstream(Grid_simd &out,const Grid_simd &in){ + out=in; + } + + /////////////////////// + // Vprefetch + /////////////////////// friend inline void vprefetch(const Grid_simd &v) { _mm_prefetch((const char*)&v.v,_MM_HINT_T0); } + /////////////////////// + // Reduce + /////////////////////// friend inline Scalar_type Reduce(const Grid_simd & in) { - // FIXME add operator + return unary(in.v, ReduceSIMD()); } + //////////////////////////// + // opreator scalar * simd + //////////////////////////// friend inline Grid_simd operator * (const Scalar_type &a, Grid_simd b){ Grid_simd va; vsplat(va,a); @@ -214,25 +246,63 @@ namespace Grid { /////////////////////// // Conjugate /////////////////////// - + template < class S = Scalar_type, EnableIf, int> = 0 > friend inline Grid_simd conj(const Grid_simd &in){ - Grid_simd ret ; vzero(ret); - // FIXME add operator + Grid_simd ret ; + ret.v = unary(in.v, ConjSIMD()); return ret; } + template < class S = Scalar_type, NotEnableIf, int> = 0 > + friend inline Grid_simd conj(const Grid_simd &in){ + return in; // for real objects + } + + + /////////////////////// + // timesMinusI + /////////////////////// + template < class S = Scalar_type, EnableIf, int> = 0 > + friend inline void timesMinusI( Grid_simd &ret,const Grid_simd &in){ + ret.v = binary(in.v, ret.v, TimesMinusISIMD()); + } + + template < class S = Scalar_type, EnableIf, int> = 0 > friend inline Grid_simd timesMinusI(const Grid_simd &in){ Grid_simd ret; - vzero(ret); - // FIXME add operator + timesMinusI(ret,in); return ret; } - friend inline Grid_simd timesI(const Grid_simd &in){ - Grid_simd ret; vzero(ret); - // FIXME add operator - return ret; + + template < class S = Scalar_type, NotEnableIf, int> = 0 > + friend inline Grid_simd timesMinusI(const Grid_simd &in){ + return in; + } + + + /////////////////////// + // timesI + /////////////////////// + template < class S = Scalar_type, EnableIf, int> = 0 > + friend inline void timesI(Grid_simd &ret,const Grid_simd &in){ + ret.v = binary(in.v, ret.v, TimesISIMD()); } + template < class S = Scalar_type, EnableIf, int> = 0 > + friend inline Grid_simd timesI(const Grid_simd &in){ + Grid_simd ret; + timesI(ret,in); + return ret; + } + + template < class S = Scalar_type, NotEnableIf, int> = 0 > + friend inline Grid_simd timesI(const Grid_simd &in){ + return in; + } + + + /////////////////////// // Unary negation + /////////////////////// friend inline Grid_simd operator -(const Grid_simd &r) { vComplexF ret; vzero(ret); @@ -256,41 +326,22 @@ namespace Grid { - friend inline void permute(Grid_simd &y,Grid_simd b,int perm) - { - Gpermute(y,b,perm); - } - /* + //////////////////////////////////////////////////////////////////// + // General permute; assumes vector length is same across + // all subtypes; may not be a good assumption, but could + // add the vector width as a template param for BG/Q for example + //////////////////////////////////////////////////////////////////// friend inline void permute(Grid_simd &y,Grid_simd b,int perm) { Gpermute(y,b,perm); } - friend inline void merge(Grid_simd &y,std::vector &extracted) - { - Gmerge(y,extracted); - } - friend inline void extract(const Grid_simd &y,std::vector &extracted) - { - Gextract(y,extracted); - } - friend inline void merge(Grid_simd &y,std::vector &extracted) - { - Gmerge(y,extracted); - } - friend inline void extract(const Grid_simd &y,std::vector &extracted) - { - Gextract(y,extracted); - } - */ - + + };// end of Grid_simd class definition - - - template inline Grid_simd< scalar_type, vector_type> innerProduct(const Grid_simd< scalar_type, vector_type> & l, const Grid_simd< scalar_type, vector_type> & r) { @@ -314,7 +365,7 @@ namespace Grid { } - // Define available types (now change names to avoid clashing) + // Define available types (now change names to avoid clashing with the rest of the code) typedef Grid_simd< float , SIMD_Ftype > MyRealF; typedef Grid_simd< double , SIMD_Dtype > MyRealD; @@ -323,6 +374,29 @@ namespace Grid { + + //////////////////////////////////////////////////////////////////// + // Temporary hack to keep independent from the rest of the code + template<> struct isGridTensor { + static const bool value = false; + static const bool notvalue = true; + }; + template<> struct isGridTensor { + static const bool value = false; + static const bool notvalue = true; + }; + template<> struct isGridTensor { + static const bool value = false; + static const bool notvalue = true; + }; + template<> struct isGridTensor { + static const bool value = false; + static const bool notvalue = true; + }; + + + + } #endif diff --git a/tests/Grid_main.cc b/tests/Grid_main.cc index 17e36df9..2aa44a0f 100644 --- a/tests/Grid_main.cc +++ b/tests/Grid_main.cc @@ -161,30 +161,40 @@ int main (int argc, char ** argv) ///////// Tests the new class Grid_simd std::complex ctest(3.0,2.0); std::complex ctestf(3.0,2.0); - MyComplexF TestMe1(1.0); // fill real part + MyComplexF TestMe1(1.0); // fills only real part MyComplexD TestMe2(ctest); MyComplexD TestMe3(ctest);// compiler generate conversion of basic types //MyRealF TestMe5(ctest);// Must generate compiler error - MyRealD TestMe4(2.0); + MyRealD TestRe1(2.0); + MyRealF TestRe2(3.0); + vone(TestRe2); + MyComplexF TestMe6(ctestf); MyComplexF TestMe7(ctestf); MyComplexD TheSum= TestMe2*TestMe3; MyComplexF TheSumF= TestMe6*TestMe7; + + double dsum[2]; _mm_store_pd(dsum, TheSum.v); for (int i =0; i< 2; i++) printf("%f\n", dsum[i]); + MyComplexD TheSumI = timesMinusI(TheSum); + MyComplexF TheSumIF = timesMinusI(TheSumF); float fsum[4]; _mm_store_ps(fsum, TheSumF.v); for (int i =0; i< 4; i++) printf("%f\n", fsum[i]); - vstore(TheSum, &ctest); + vstore(TheSumI, &ctest); + std::complex sum = Reduce(TheSumF); std::cout << ctest<< std::endl; + std::cout << sum<< std::endl; + #endif /////////////////////// diff --git a/tests/Grid_simd_new.cc b/tests/Grid_simd_new.cc new file mode 100644 index 00000000..3de12231 --- /dev/null +++ b/tests/Grid_simd_new.cc @@ -0,0 +1,165 @@ +#include +#include "simd/Grid_vector_types.h" +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +class funcPlus { +public: + funcPlus() {}; + template void operator()(vec &rr,vec &i1,vec &i2) const { rr = i1+i2;} + std::string name(void) const { return std::string("Plus"); } +}; +class funcMinus { +public: + funcMinus() {}; + template void operator()(vec &rr,vec &i1,vec &i2) const { rr = i1-i2;} + std::string name(void) const { return std::string("Minus"); } +}; +class funcTimes { +public: + funcTimes() {}; + template void operator()(vec &rr,vec &i1,vec &i2) const { rr = i1*i2;} + std::string name(void) const { return std::string("Times"); } +}; +class funcConj { +public: + funcConj() {}; + template void operator()(vec &rr,vec &i1,vec &i2) const { rr = conj(i1);} + std::string name(void) const { return std::string("Conj"); } +}; +class funcAdj { +public: + funcAdj() {}; + template void operator()(vec &rr,vec &i1,vec &i2) const { rr = adj(i1);} + std::string name(void) const { return std::string("Adj"); } +}; + +class funcTimesI { +public: + funcTimesI() {}; + template void operator()(vec &rr,vec &i1,vec &i2) const { rr = timesI(i1);} + std::string name(void) const { return std::string("timesI"); } +}; + +class funcTimesMinusI { +public: + funcTimesMinusI() {}; + template void operator()(vec &rr,vec &i1,vec &i2) const { rr = timesMinusI(i1);} + std::string name(void) const { return std::string("timesMinusI"); } +}; + +template +void Tester(const functor &func) +{ + GridSerialRNG sRNG; + sRNG.SeedRandomDevice(); + + int Nsimd = vec::Nsimd(); + + std::vector input1(Nsimd); + std::vector input2(Nsimd); + std::vector result(Nsimd); + std::vector reference(Nsimd); + + std::vector > buf(3); + vec & v_input1 = buf[0]; + vec & v_input2 = buf[1]; + vec & v_result = buf[2]; + + + for(int i=0;i(v_input1,input1); + merge(v_input2,input2); + merge(v_result,result); + + func(v_result,v_input1,v_input2); + + for(int i=0;i(v_result,result); + std::cout << " " << func.name()<0){ + std::cout<< "*****" << std::endl; + std::cout<< "["< latt_size = GridDefaultLatt(); + std::vector simd_layout = GridDefaultSimd(4,MyComplexF::Nsimd()); + std::vector mpi_layout = GridDefaultMpi(); + + GridCartesian Grid(latt_size,simd_layout,mpi_layout); + std::vector seeds({1,2,3,4}); + + // Insist that operations on random scalars gives + // identical results to on vectors. + + std::cout << "==================================="<< std::endl; + std::cout << "Testing MyComplexF "<(funcTimesI()); + Tester(funcTimesMinusI()); + Tester(funcPlus()); + Tester(funcMinus()); + Tester(funcTimes()); + Tester(funcConj()); + Tester(funcAdj()); + + std::cout << "==================================="<< std::endl; + std::cout << "Testing MyComplexD "<(funcTimesI()); + Tester(funcTimesMinusI()); + Tester(funcPlus()); + Tester(funcMinus()); + Tester(funcTimes()); + Tester(funcConj()); + Tester(funcAdj()); + + std::cout << "==================================="<< std::endl; + std::cout << "Testing MyRealF "<(funcPlus()); + Tester(funcMinus()); + Tester(funcTimes()); + Tester(funcAdj()); + + std::cout << "==================================="<< std::endl; + std::cout << "Testing MyRealD "<(funcPlus()); + Tester(funcMinus()); + Tester(funcTimes()); + Tester(funcAdj()); + + Grid_finalize(); +} diff --git a/tests/Makefile.am b/tests/Makefile.am index 82d1b3be..8b68855d 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -5,7 +5,7 @@ AM_LDFLAGS = -L$(top_builddir)/lib # # Test code # -bin_PROGRAMS = Grid_main Grid_stencil Grid_nersc_io Grid_cshift Grid_gamma Grid_simd Grid_rng Grid_remez +bin_PROGRAMS = Grid_main Grid_stencil Grid_nersc_io Grid_cshift Grid_gamma Grid_simd Grid_rng Grid_remez Grid_simd_new Grid_main_SOURCES = Grid_main.cc Grid_main_LDADD = -lGrid @@ -30,3 +30,6 @@ Grid_stencil_LDADD = -lGrid Grid_simd_SOURCES = Grid_simd.cc Grid_simd_LDADD = -lGrid + +Grid_simd_new_SOURCES = Grid_simd_new.cc +Grid_simd_new_LDADD = -lGrid