1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-17 23:37:06 +01:00

checked performance of new vector libaries.

Added check for c++11 support on the configure.ac
This commit is contained in:
neo
2015-05-26 12:02:54 +09:00
parent 1c862dc15b
commit ece86f717b
12 changed files with 398 additions and 59 deletions

View File

@ -16,6 +16,9 @@
/* GRID_COMMS_NONE */
#define GRID_COMMS_NONE 1
/* define if the compiler supports basic C++11 syntax */
/* #undef HAVE_CXX11 */
/* Define to 1 if you have the declaration of `be64toh', and to 0 if you
don't. */
#define HAVE_DECL_BE64TOH 1

View File

@ -15,6 +15,9 @@
/* GRID_COMMS_NONE */
#undef GRID_COMMS_NONE
/* define if the compiler supports basic C++11 syntax */
#undef HAVE_CXX11
/* Define to 1 if you have the declaration of `be64toh', and to 0 if you
don't. */
#undef HAVE_DECL_BE64TOH

View File

@ -103,6 +103,10 @@ namespace Grid {
inline void sub (RealF * __restrict__ y,const RealF * __restrict__ l,const RealF *__restrict__ r){ *y = (*l) - (*r); }
inline void add (RealF * __restrict__ y,const RealF * __restrict__ l,const RealF *__restrict__ r){ *y = (*l) + (*r); }
inline void vstream(ComplexF &l, const ComplexF &r){ l=r;}
inline void vstream(ComplexD &l, const ComplexD &r){ l=r;}
inline void vstream(RealF &l, const RealF &r){ l=r;}
inline void vstream(RealD &l, const RealD &r){ l=r;}
class Zero{};

View File

@ -160,13 +160,21 @@ template<class vobj> void Copy_plane(Lattice<vobj>& lhs,Lattice<vobj> &rhs, int
PARALLEL_NESTED_LOOP2
for(int n=0;n<rhs._grid->_slice_nblock[dimension];n++){
for(int b=0;b<rhs._grid->_slice_block[dimension];b++){
/*
int o =n*rhs._grid->_slice_stride[dimension];
int ocb=1<<lhs._grid->CheckerBoardFromOindex(o+b);
if ( ocb&cbmask ) {
lhs._odata[lo+o+b]=rhs._odata[ro+o+b];
}
*/
int o =n*rhs._grid->_slice_stride[dimension]+b;
int ocb=1<<lhs._grid->CheckerBoardFromOindex(o);
if ( ocb&cbmask ) {
//lhs._odata[lo+o]=rhs._odata[ro+o];
vstream(lhs._odata[lo+o],rhs._odata[ro+o]);
}
}
}

View File

@ -38,6 +38,10 @@ public:
iScalar(scalar_type s) : _internal(s) {};// recurse down and hit the constructor for vector_type
iScalar(const Zero &z){ *this = zero; };
iScalar<vtype> & operator= (const Zero &hero){
zeroit(*this);
return *this;
@ -206,6 +210,16 @@ public:
iMatrix(const Zero &z){ *this = zero; };
iMatrix() =default;
// No copy constructor...
iMatrix& operator=(const iMatrix& rhs){
for(int i=0;i<N;i++)
for(int j=0;j<N;j++)
vstream(_internal[i][j],rhs._internal[i][j]);
return *this;
};
iMatrix<vtype,N> & operator= (const Zero &hero){
zeroit(*this);

View File

@ -4,7 +4,7 @@
Using intrinsics
*/
// Time-stamp: <2015-05-22 15:51:24 neo>
// Time-stamp: <2015-05-22 18:58:27 neo>
//----------------------------------------------------------------------
#include <immintrin.h>
@ -307,9 +307,7 @@ namespace Optimization {
conv.v = b;
switch (perm){
// 8x32 bits=>3 permutes
case 2:
conv.f = _mm256_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(2,3,0,1));
break;
case 2: conv.f = _mm256_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(2,3,0,1)); break;
case 1: conv.f = _mm256_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(1,0,3,2)); break;
case 0: conv.f = _mm256_permute2f128_ps(conv.f,conv.f,0x01); break;
default: assert(0); break;