1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-14 05:07:05 +01:00
This commit is contained in:
Azusa Yamaguchi
2015-06-16 20:47:31 +01:00
6 changed files with 75 additions and 18 deletions

View File

@ -4,7 +4,7 @@
Using intrinsics
*/
// Time-stamp: <2015-06-09 14:26:59 neo>
// Time-stamp: <2015-06-16 23:30:41 neo>
//----------------------------------------------------------------------
#include <immintrin.h>
@ -248,7 +248,7 @@ namespace Optimization {
return _mm256_set_m128i(a1,a0);
#endif
#if defined (AVX2)
return _mm256_mul_epi32(a,b);
return _mm256_mullo_epi32(a,b);
#endif
}

View File

@ -4,7 +4,7 @@
Using intrinsics
*/
// Time-stamp: <2015-06-09 14:24:01 neo>
// Time-stamp: <2015-06-16 23:27:54 neo>
//----------------------------------------------------------------------
#include <pmmintrin.h>
@ -97,7 +97,7 @@ namespace Optimization {
}
// Integer
inline __m128i operator()(Integer *a){
return _mm_set_epi32(a[0],a[1],a[2],a[3]);
return _mm_set_epi32(a[3],a[2],a[1],a[0]);
}
@ -181,7 +181,7 @@ namespace Optimization {
}
// Integer
inline __m128i operator()(__m128i a, __m128i b){
return _mm_mul_epi32(a,b);
return _mm_mullo_epi32(a,b);
}
};

View File

@ -63,26 +63,70 @@ namespace Grid {
for(int c1=0;c1<N;c1++){
nrm = 0.0;
for(int c2=0;c2<N;c2++)
nrm = real(innerProduct(ret._internal[c1][c2],ret._internal[c1][c2]));
nrm += real(innerProduct(ret._internal[c1][c2],ret._internal[c1][c2]));
nrm = 1.0/sqrt(nrm);
std::cout << "norm : "<< nrm << "\n";
for(int c2=0;c2<N;c2++)
ret._internal[c1][c2]*= nrm;
for (int b=c1+1; b<N; ++b){
decltype(ret._internal[b][b]*ret._internal[b][b]) pr = 0.0;
for(int c=0; c<N; ++c)
pr += ret._internal[c1][c]*ret._internal[b][c];
pr += conjugate(ret._internal[c1][c])*ret._internal[b][c];
std::cout << "pr : "<< pr << "\n";
for(int c=0; c<N; ++c){
ret._internal[b][c] -= pr * ret._internal[c1][c];
}
}
}
// assuming the determinant is ok
return ret;
}
///////////////////////////////////////////////
// Determinant function for scalar, vector, matrix
///////////////////////////////////////////////
inline ComplexF Determinant( const ComplexF &arg){ return arg;}
inline ComplexD Determinant( const ComplexD &arg){ return arg;}
inline RealF Determinant( const RealF &arg){ return arg;}
inline RealD Determinant( const RealD &arg){ return arg;}
template<class vtype> inline auto Determinant(const iScalar<vtype>&r) -> iScalar<decltype(Determinant(r._internal))>
{
iScalar<decltype(Determinant(r._internal))> ret;
ret._internal = Determinant(r._internal);
return ret;
}
template<class vtype,int N, typename std::enable_if< GridTypeMapper<vtype>::TensorLevel == 0 >::type * =nullptr>
inline iScalar<vtype> Determinant(const iMatrix<vtype,N> &arg)
{
iMatrix<vtype,N> ret(arg);
iScalar<vtype> det = vtype(1.0);
/* Conversion of matrix to upper triangular */
for(int i = 0; i < N; i++){
for(int j = 0; j < N; j++){
if(j>i){
vtype ratio = ret._internal[j][i]/ret._internal[i][i];
for(int k = 0; k < N; k++){
ret._internal[j][k] -= ratio * ret._internal[i][k];
}
}
}
}
for(int i = 0; i < N; i++)
det *= ret._internal[i][i];
return det;
}
///////////////////////////////////////////////
// Exponentiate function for scalar, vector, matrix
///////////////////////////////////////////////

View File

@ -57,7 +57,7 @@ inline void extract(typename std::enable_if<!isGridTensor<vsimd>::value, const v
extracted[i]=buf[i*s];
for(int ii=1;ii<s;ii++){
if ( buf[i*s]!=buf[i*s+ii] ){
std::cout << " SIMD extract failure splat="<<s<<" ii "<<ii<<" " <<Nextr<<" "<< Nsimd<<" "<<std::endl;
std::cout << " SIMD extract failure splat = "<<s<<" ii "<<ii<<" " <<Nextr<<" "<< Nsimd<<" "<<std::endl;
for(int vv=0;vv<Nsimd;vv++) {
std::cout<< buf[vv]<<" ";
}