mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-17 23:37:06 +01:00
checked performance of new vector libaries.
Added check for c++11 support on the configure.ac
This commit is contained in:
@ -16,6 +16,9 @@
|
||||
/* GRID_COMMS_NONE */
|
||||
#define GRID_COMMS_NONE 1
|
||||
|
||||
/* define if the compiler supports basic C++11 syntax */
|
||||
/* #undef HAVE_CXX11 */
|
||||
|
||||
/* Define to 1 if you have the declaration of `be64toh', and to 0 if you
|
||||
don't. */
|
||||
#define HAVE_DECL_BE64TOH 1
|
||||
|
@ -15,6 +15,9 @@
|
||||
/* GRID_COMMS_NONE */
|
||||
#undef GRID_COMMS_NONE
|
||||
|
||||
/* define if the compiler supports basic C++11 syntax */
|
||||
#undef HAVE_CXX11
|
||||
|
||||
/* Define to 1 if you have the declaration of `be64toh', and to 0 if you
|
||||
don't. */
|
||||
#undef HAVE_DECL_BE64TOH
|
||||
|
@ -103,6 +103,10 @@ namespace Grid {
|
||||
inline void sub (RealF * __restrict__ y,const RealF * __restrict__ l,const RealF *__restrict__ r){ *y = (*l) - (*r); }
|
||||
inline void add (RealF * __restrict__ y,const RealF * __restrict__ l,const RealF *__restrict__ r){ *y = (*l) + (*r); }
|
||||
|
||||
inline void vstream(ComplexF &l, const ComplexF &r){ l=r;}
|
||||
inline void vstream(ComplexD &l, const ComplexD &r){ l=r;}
|
||||
inline void vstream(RealF &l, const RealF &r){ l=r;}
|
||||
inline void vstream(RealD &l, const RealD &r){ l=r;}
|
||||
|
||||
|
||||
class Zero{};
|
||||
|
@ -160,13 +160,21 @@ template<class vobj> void Copy_plane(Lattice<vobj>& lhs,Lattice<vobj> &rhs, int
|
||||
PARALLEL_NESTED_LOOP2
|
||||
for(int n=0;n<rhs._grid->_slice_nblock[dimension];n++){
|
||||
for(int b=0;b<rhs._grid->_slice_block[dimension];b++){
|
||||
|
||||
/*
|
||||
int o =n*rhs._grid->_slice_stride[dimension];
|
||||
int ocb=1<<lhs._grid->CheckerBoardFromOindex(o+b);
|
||||
if ( ocb&cbmask ) {
|
||||
lhs._odata[lo+o+b]=rhs._odata[ro+o+b];
|
||||
}
|
||||
|
||||
*/
|
||||
|
||||
int o =n*rhs._grid->_slice_stride[dimension]+b;
|
||||
int ocb=1<<lhs._grid->CheckerBoardFromOindex(o);
|
||||
if ( ocb&cbmask ) {
|
||||
//lhs._odata[lo+o]=rhs._odata[ro+o];
|
||||
vstream(lhs._odata[lo+o],rhs._odata[ro+o]);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -38,6 +38,10 @@ public:
|
||||
iScalar(scalar_type s) : _internal(s) {};// recurse down and hit the constructor for vector_type
|
||||
iScalar(const Zero &z){ *this = zero; };
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
iScalar<vtype> & operator= (const Zero &hero){
|
||||
zeroit(*this);
|
||||
return *this;
|
||||
@ -206,6 +210,16 @@ public:
|
||||
iMatrix(const Zero &z){ *this = zero; };
|
||||
iMatrix() =default;
|
||||
|
||||
// No copy constructor...
|
||||
|
||||
iMatrix& operator=(const iMatrix& rhs){
|
||||
for(int i=0;i<N;i++)
|
||||
for(int j=0;j<N;j++)
|
||||
vstream(_internal[i][j],rhs._internal[i][j]);
|
||||
return *this;
|
||||
};
|
||||
|
||||
|
||||
|
||||
iMatrix<vtype,N> & operator= (const Zero &hero){
|
||||
zeroit(*this);
|
||||
|
@ -4,7 +4,7 @@
|
||||
|
||||
Using intrinsics
|
||||
*/
|
||||
// Time-stamp: <2015-05-22 15:51:24 neo>
|
||||
// Time-stamp: <2015-05-22 18:58:27 neo>
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
#include <immintrin.h>
|
||||
@ -307,9 +307,7 @@ namespace Optimization {
|
||||
conv.v = b;
|
||||
switch (perm){
|
||||
// 8x32 bits=>3 permutes
|
||||
case 2:
|
||||
conv.f = _mm256_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(2,3,0,1));
|
||||
break;
|
||||
case 2: conv.f = _mm256_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(2,3,0,1)); break;
|
||||
case 1: conv.f = _mm256_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(1,0,3,2)); break;
|
||||
case 0: conv.f = _mm256_permute2f128_ps(conv.f,conv.f,0x01); break;
|
||||
default: assert(0); break;
|
||||
|
Reference in New Issue
Block a user