mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-10-31 03:54:33 +00:00 
			
		
		
		
	Merge branch 'develop' of https://github.com/paboyle/Grid into develop
This commit is contained in:
		| @@ -87,15 +87,22 @@ void ThinQRfact (Eigen::MatrixXcd &m_rr, | |||||||
|   //////////////////////////////////////////////////////////////////////////////////////////////////// |   //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|   sliceInnerProductMatrix(m_rr,R,R,Orthog); |   sliceInnerProductMatrix(m_rr,R,R,Orthog); | ||||||
|  |  | ||||||
|   //////////////////////////////////////////////////////////////////////////////////////////////////// |   // Force manifest hermitian to avoid rounding related | ||||||
|   // Cholesky from Eigen |   m_rr = 0.5*(m_rr+m_rr.adjoint()); | ||||||
|   // There exists a ldlt that is documented as more stable |  | ||||||
|   //////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|   Eigen::MatrixXcd L    = m_rr.llt().matrixL();  |  | ||||||
|  |  | ||||||
|  | #if 0 | ||||||
|  |   std::cout << " Calling Cholesky  ldlt on m_rr "  << m_rr <<std::endl; | ||||||
|  |   Eigen::MatrixXcd L_ldlt = m_rr.ldlt().matrixL();  | ||||||
|  |   std::cout << " Called Cholesky  ldlt on m_rr "  << L_ldlt <<std::endl; | ||||||
|  |   auto  D_ldlt = m_rr.ldlt().vectorD();  | ||||||
|  |   std::cout << " Called Cholesky  ldlt on m_rr "  << D_ldlt <<std::endl; | ||||||
|  | #endif | ||||||
|  |  | ||||||
|  |   //  std::cout << " Calling Cholesky  llt on m_rr "  <<std::endl; | ||||||
|  |   Eigen::MatrixXcd L    = m_rr.llt().matrixL();  | ||||||
|  |   //  std::cout << " Called Cholesky  llt on m_rr "  << L <<std::endl; | ||||||
|   C    = L.adjoint(); |   C    = L.adjoint(); | ||||||
|   Cinv = C.inverse(); |   Cinv = C.inverse(); | ||||||
|  |  | ||||||
|   //////////////////////////////////////////////////////////////////////////////////////////////////// |   //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|   // Q = R C^{-1} |   // Q = R C^{-1} | ||||||
|   // |   // | ||||||
| @@ -103,7 +110,6 @@ void ThinQRfact (Eigen::MatrixXcd &m_rr, | |||||||
|   // |   // | ||||||
|   // NB maddMatrix conventions are Right multiplication X[j] a[j,i] already |   // NB maddMatrix conventions are Right multiplication X[j] a[j,i] already | ||||||
|   //////////////////////////////////////////////////////////////////////////////////////////////////// |   //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|   // FIXME:: make a sliceMulMatrix to avoid zero vector |  | ||||||
|   sliceMulMatrix(Q,Cinv,R,Orthog); |   sliceMulMatrix(Q,Cinv,R,Orthog); | ||||||
| } | } | ||||||
| //////////////////////////////////////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|   | |||||||
| @@ -1,7 +1,5 @@ | |||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| #include <Grid/GridCore.h> | #include <Grid/GridCore.h> | ||||||
|  | #include <fcntl.h> | ||||||
|  |  | ||||||
| namespace Grid { | namespace Grid { | ||||||
|  |  | ||||||
| @@ -63,4 +61,37 @@ void *PointerCache::Lookup(size_t bytes) { | |||||||
|   return NULL; |   return NULL; | ||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
|  | void check_huge_pages(void *Buf,uint64_t BYTES) | ||||||
|  | { | ||||||
|  | #ifdef __linux__ | ||||||
|  |   int fd = open("/proc/self/pagemap", O_RDONLY); | ||||||
|  |   assert(fd >= 0); | ||||||
|  |   const int page_size = 4096; | ||||||
|  |   uint64_t virt_pfn = (uint64_t)Buf / page_size; | ||||||
|  |   off_t offset = sizeof(uint64_t) * virt_pfn; | ||||||
|  |   uint64_t npages = (BYTES + page_size-1) / page_size; | ||||||
|  |   uint64_t pagedata[npages]; | ||||||
|  |   uint64_t ret = lseek(fd, offset, SEEK_SET); | ||||||
|  |   assert(ret == offset); | ||||||
|  |   ret = ::read(fd, pagedata, sizeof(uint64_t)*npages); | ||||||
|  |   assert(ret == sizeof(uint64_t) * npages); | ||||||
|  |   int nhugepages = npages / 512; | ||||||
|  |   int n4ktotal, nnothuge; | ||||||
|  |   n4ktotal = 0; | ||||||
|  |   nnothuge = 0; | ||||||
|  |   for (int i = 0; i < nhugepages; ++i) { | ||||||
|  |     uint64_t baseaddr = (pagedata[i*512] & 0x7fffffffffffffULL) * page_size; | ||||||
|  |     for (int j = 0; j < 512; ++j) { | ||||||
|  |       uint64_t pageaddr = (pagedata[i*512+j] & 0x7fffffffffffffULL) * page_size; | ||||||
|  |       ++n4ktotal; | ||||||
|  |       if (pageaddr != baseaddr + j * page_size) | ||||||
|  | 	++nnothuge; | ||||||
|  |       } | ||||||
|  |   } | ||||||
|  |   int rank = CartesianCommunicator::RankWorld(); | ||||||
|  |   printf("rank %d Allocated %d 4k pages, %d not in huge pages\n", rank, n4ktotal, nnothuge); | ||||||
|  | #endif | ||||||
|  | } | ||||||
|  |  | ||||||
| } | } | ||||||
|   | |||||||
| @@ -64,6 +64,8 @@ namespace Grid { | |||||||
|  |  | ||||||
|   }; |   }; | ||||||
|  |  | ||||||
|  |   void check_huge_pages(void *Buf,uint64_t BYTES); | ||||||
|  |  | ||||||
| //////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////// | ||||||
| // A lattice of something, but assume the something is SIMDized. | // A lattice of something, but assume the something is SIMDized. | ||||||
| //////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////// | ||||||
|   | |||||||
							
								
								
									
										16252
									
								
								lib/json/json.hpp
									
									
									
									
									
								
							
							
						
						
									
										16252
									
								
								lib/json/json.hpp
									
									
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -86,7 +86,7 @@ namespace Grid { | |||||||
|                                       or element<T>::is_number; |                                       or element<T>::is_number; | ||||||
|   }; |   }; | ||||||
|    |    | ||||||
|   // Vector flatening utility class //////////////////////////////////////////// |   // Vector flattening utility class //////////////////////////////////////////// | ||||||
|   // Class to flatten a multidimensional std::vector |   // Class to flatten a multidimensional std::vector | ||||||
|   template <typename V> |   template <typename V> | ||||||
|   class Flatten |   class Flatten | ||||||
|   | |||||||
| @@ -42,6 +42,7 @@ JSONWriter::~JSONWriter(void) | |||||||
|  |  | ||||||
|   // write prettified JSON to file |   // write prettified JSON to file | ||||||
|   std::ofstream os(fileName_); |   std::ofstream os(fileName_); | ||||||
|  |   //std::cout << "JSONWriter::~JSONWriter" << std::endl; | ||||||
|   os << std::setw(2) << json::parse(ss_.str()) << std::endl; |   os << std::setw(2) << json::parse(ss_.str()) << std::endl; | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -56,6 +57,7 @@ void JSONWriter::push(const string &s) | |||||||
|  |  | ||||||
| void JSONWriter::pop(void) | void JSONWriter::pop(void) | ||||||
| { | { | ||||||
|  |   //std::cout << "JSONWriter::pop" << std::endl; | ||||||
|   delete_comma(); |   delete_comma(); | ||||||
|   ss_ << "},"; |   ss_ << "},"; | ||||||
| } | } | ||||||
| @@ -67,20 +69,22 @@ void JSONWriter::delete_comma() | |||||||
|   ss_.str(dlast); |   ss_.str(dlast); | ||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
| // here we are hitting a g++ bug (Bug 56480) | // here we are hitting a g++ bug (Bug 56480) | ||||||
| // compiles fine with clang | // compiles fine with clang | ||||||
| // have to wrap in the Grid namespace | // have to wrap in the Grid namespace | ||||||
| // annoying, but necessary for TravisCI | // annoying, but necessary for TravisCI | ||||||
| namespace Grid | namespace Grid | ||||||
| { | { | ||||||
|   template<> |   void JSONWriter::writeDefault(const std::string &s,	const std::string &x) | ||||||
|   void JSONWriter::writeDefault(const std::string &s, |  | ||||||
| 				const std::string &x) |  | ||||||
|   { |   { | ||||||
|  |     //std::cout << "JSONWriter::writeDefault(string) : " << s <<  std::endl; | ||||||
|  |     std::ostringstream os; | ||||||
|  |     os << std::boolalpha << x; | ||||||
|     if (s.size()) |     if (s.size()) | ||||||
|       ss_ << "\""<< s << "\" : \"" << x << "\" ," ;  |       ss_ << "\""<< s << "\" : \"" << os.str() << "\" ," ; | ||||||
|     else |     else | ||||||
|       ss_ << "\"" << x << "\" ," ; |      ss_ << os.str() << " ," ; | ||||||
|   } |   } | ||||||
| }// namespace Grid  | }// namespace Grid  | ||||||
|  |  | ||||||
| @@ -138,6 +142,7 @@ void JSONReader::pop(void) | |||||||
|  |  | ||||||
| bool JSONReader::nextElement(const std::string &s) | bool JSONReader::nextElement(const std::string &s) | ||||||
| { | { | ||||||
|  |   // Work in progress | ||||||
|   // JSON dictionaries do not support multiple names  |   // JSON dictionaries do not support multiple names  | ||||||
|   // Same name objects must be packed in vectors |   // Same name objects must be packed in vectors | ||||||
|   ++it_; |   ++it_; | ||||||
|   | |||||||
| @@ -58,10 +58,15 @@ namespace Grid | |||||||
|     void writeDefault(const std::string &s, const std::complex<U> &x); |     void writeDefault(const std::string &s, const std::complex<U> &x); | ||||||
|     template <typename U> |     template <typename U> | ||||||
|     void writeDefault(const std::string &s, const std::vector<U> &x); |     void writeDefault(const std::string &s, const std::vector<U> &x); | ||||||
|  |     template <typename U, typename P> | ||||||
|  |     void writeDefault(const std::string &s, const std::pair<U,P> &x); | ||||||
|  |  | ||||||
|     template<std::size_t N> |     template<std::size_t N> | ||||||
|     void writeDefault(const std::string &s, const char(&x)[N]); |     void writeDefault(const std::string &s, const char(&x)[N]); | ||||||
|  |  | ||||||
|  |     void writeDefault(const std::string &s, const std::string &x); | ||||||
|  |  | ||||||
|  |  | ||||||
|   private: |   private: | ||||||
|     void delete_comma(); |     void delete_comma(); | ||||||
|     std::string         fileName_; |     std::string         fileName_; | ||||||
| @@ -82,6 +87,8 @@ namespace Grid | |||||||
|     void readDefault(const std::string &s, std::complex<U> &output); |     void readDefault(const std::string &s, std::complex<U> &output); | ||||||
|     template <typename U> |     template <typename U> | ||||||
|     void readDefault(const std::string &s, std::vector<U> &output); |     void readDefault(const std::string &s, std::vector<U> &output); | ||||||
|  |     template <typename U, typename P> | ||||||
|  |     void readDefault(const std::string &s, std::pair<U,P> &output); | ||||||
|   private: |   private: | ||||||
|     json                jobject_; // main object |     json                jobject_; // main object | ||||||
|     json                jcur_;  // current json object |     json                jcur_;  // current json object | ||||||
| @@ -106,7 +113,7 @@ namespace Grid | |||||||
|   template <typename U> |   template <typename U> | ||||||
|   void JSONWriter::writeDefault(const std::string &s, const U &x) |   void JSONWriter::writeDefault(const std::string &s, const U &x) | ||||||
|   { |   { | ||||||
|     //std::cout << "JSONReader::writeDefault(U) : " << s <<  std::endl; |     //std::cout << "JSONWriter::writeDefault(U) : " << s <<  " " << x <<std::endl; | ||||||
|     std::ostringstream os; |     std::ostringstream os; | ||||||
|     os << std::boolalpha << x; |     os << std::boolalpha << x; | ||||||
|     if (s.size()) |     if (s.size()) | ||||||
| @@ -118,7 +125,7 @@ namespace Grid | |||||||
|   template <typename U> |   template <typename U> | ||||||
|   void JSONWriter::writeDefault(const std::string &s, const std::complex<U> &x) |   void JSONWriter::writeDefault(const std::string &s, const std::complex<U> &x) | ||||||
|   { |   { | ||||||
|     //std::cout << "JSONReader::writeDefault(complex) : " << s <<  std::endl; |     //std::cout << "JSONWriter::writeDefault(complex) : " << s <<  " " << x <<  std::endl; | ||||||
|     std::ostringstream os; |     std::ostringstream os; | ||||||
|     os << "["<< std::boolalpha << x.real() << ", " << x.imag() << "]"; |     os << "["<< std::boolalpha << x.real() << ", " << x.imag() << "]"; | ||||||
|     if (s.size()) |     if (s.size()) | ||||||
| @@ -127,10 +134,22 @@ namespace Grid | |||||||
|      ss_ << os.str() << " ," ; |      ss_ << os.str() << " ," ; | ||||||
|   } |   } | ||||||
|  |  | ||||||
|  |   template <typename U, typename P> | ||||||
|  |   void JSONWriter::writeDefault(const std::string &s, const std::pair<U,P> &x) | ||||||
|  |   { | ||||||
|  |     //std::cout << "JSONWriter::writeDefault(pair) : " << s <<  " " << x <<  std::endl; | ||||||
|  |     std::ostringstream os; | ||||||
|  |     os << "["<< std::boolalpha << "\""<< x.first << "\" , \"" << x.second << "\" ]"; | ||||||
|  |     if (s.size()) | ||||||
|  |       ss_ << "\""<< s << "\" : " << os.str() << " ," ; | ||||||
|  |     else | ||||||
|  |      ss_ << os.str() << " ," ; | ||||||
|  |   } | ||||||
|  |  | ||||||
|   template <typename U> |   template <typename U> | ||||||
|   void JSONWriter::writeDefault(const std::string &s, const std::vector<U> &x) |   void JSONWriter::writeDefault(const std::string &s, const std::vector<U> &x) | ||||||
|   { |   { | ||||||
|     //std::cout << "JSONReader::writeDefault(vec U) : " << s <<  std::endl; |     //std::cout << "JSONWriter::writeDefault(vec U) : " << s <<  std::endl; | ||||||
|  |  | ||||||
|     if (s.size()) |     if (s.size()) | ||||||
|       ss_ << " \""<<s<<"\" : ["; |       ss_ << " \""<<s<<"\" : ["; | ||||||
| @@ -146,12 +165,12 @@ namespace Grid | |||||||
|  |  | ||||||
|   template<std::size_t N> |   template<std::size_t N> | ||||||
|   void JSONWriter::writeDefault(const std::string &s, const char(&x)[N]){ |   void JSONWriter::writeDefault(const std::string &s, const char(&x)[N]){ | ||||||
|     //std::cout << "JSONReader::writeDefault(char U) : " << s <<  std::endl; |     //std::cout << "JSONWriter::writeDefault(char U) : " << s <<  "  " << x << std::endl; | ||||||
|  |  | ||||||
|     if (s.size()) |     if (s.size()) | ||||||
|     ss_ << "\""<< s << "\" : \"" << x << "\" ," ; |       ss_ << "\""<< s << "\" : \"" << x << "\" ," ; | ||||||
|     else |     else | ||||||
|     ss_ << "\"" << x << "\" ," ; |       ss_ << "\"" << x << "\" ," ; | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   // Reader template implementation //////////////////////////////////////////// |   // Reader template implementation //////////////////////////////////////////// | ||||||
| @@ -173,11 +192,35 @@ namespace Grid | |||||||
|  |  | ||||||
|   } |   } | ||||||
|  |  | ||||||
|  |   // Reader template implementation //////////////////////////////////////////// | ||||||
|  |   template <typename U, typename P> | ||||||
|  |   void JSONReader::readDefault(const std::string &s, std::pair<U,P> &output) | ||||||
|  |   { | ||||||
|  |     U first; | ||||||
|  |     P second; | ||||||
|  |     json j; | ||||||
|  |     if (s.size()){ | ||||||
|  |       //std::cout << "JSONReader::readDefault(pair) : " << s << "  |  "<< jcur_[s] << std::endl; | ||||||
|  |       j = jcur_[s]; | ||||||
|  |     } else { | ||||||
|  |       j = jcur_; | ||||||
|  |     } | ||||||
|  |     json::iterator it = j.begin(); | ||||||
|  |     jcur_ = *it; | ||||||
|  |     read("", first); | ||||||
|  |     it++; | ||||||
|  |     jcur_ = *it; | ||||||
|  |     read("", second); | ||||||
|  |     output = std::pair<U,P>(first,second); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|   template <typename U> |   template <typename U> | ||||||
|   void JSONReader::readDefault(const std::string &s, std::complex<U> &output) |   void JSONReader::readDefault(const std::string &s, std::complex<U> &output) | ||||||
|   { |   { | ||||||
|     U tmp1, tmp2; |     U tmp1, tmp2; | ||||||
|     //std::cout << "JSONReader::readDefault( complex U) : " << s << "  :  "<< jcur_ << std::endl; |     //std::cout << "JSONReader::readDefault(complex U) : " << s << "  :  "<< jcur_ << std::endl; | ||||||
|     json j = jcur_; |     json j = jcur_; | ||||||
|     json::iterator it = j.begin(); |     json::iterator it = j.begin(); | ||||||
|     jcur_ = *it; |     jcur_ = *it; | ||||||
|   | |||||||
| @@ -82,11 +82,11 @@ namespace Optimization { | |||||||
|       double tmp[2]={a,b}; |       double tmp[2]={a,b}; | ||||||
|       return vld1q_f64(tmp); |       return vld1q_f64(tmp); | ||||||
|     } |     } | ||||||
|     //Real double // N:tbc |     //Real double | ||||||
|     inline float64x2_t operator()(double a){ |     inline float64x2_t operator()(double a){ | ||||||
|       return vdupq_n_f64(a); |       return vdupq_n_f64(a); | ||||||
|     } |     } | ||||||
|     //Integer // N:tbc |     //Integer | ||||||
|     inline uint32x4_t operator()(Integer a){ |     inline uint32x4_t operator()(Integer a){ | ||||||
|       return vdupq_n_u32(a); |       return vdupq_n_u32(a); | ||||||
|     } |     } | ||||||
| @@ -124,33 +124,32 @@ namespace Optimization { | |||||||
|   // Nils: Vset untested; not used currently in Grid at all; |   // Nils: Vset untested; not used currently in Grid at all; | ||||||
|   // git commit 4a8c4ccfba1d05159348d21a9698028ea847e77b |   // git commit 4a8c4ccfba1d05159348d21a9698028ea847e77b | ||||||
|   struct Vset{ |   struct Vset{ | ||||||
|     // Complex float // N:ok |     // Complex float | ||||||
|     inline float32x4_t operator()(Grid::ComplexF *a){ |     inline float32x4_t operator()(Grid::ComplexF *a){ | ||||||
|       float tmp[4]={a[1].imag(),a[1].real(),a[0].imag(),a[0].real()}; |       float tmp[4]={a[1].imag(),a[1].real(),a[0].imag(),a[0].real()}; | ||||||
|       return vld1q_f32(tmp); |       return vld1q_f32(tmp); | ||||||
|     } |     } | ||||||
|     // Complex double // N:ok |     // Complex double | ||||||
|     inline float64x2_t operator()(Grid::ComplexD *a){ |     inline float64x2_t operator()(Grid::ComplexD *a){ | ||||||
|       double tmp[2]={a[0].imag(),a[0].real()}; |       double tmp[2]={a[0].imag(),a[0].real()}; | ||||||
|       return vld1q_f64(tmp); |       return vld1q_f64(tmp); | ||||||
|     } |     } | ||||||
|     // Real float // N:ok |     // Real float | ||||||
|     inline float32x4_t operator()(float *a){ |     inline float32x4_t operator()(float *a){ | ||||||
|       float tmp[4]={a[3],a[2],a[1],a[0]}; |       float tmp[4]={a[3],a[2],a[1],a[0]}; | ||||||
|       return vld1q_f32(tmp); |       return vld1q_f32(tmp); | ||||||
|     } |     } | ||||||
|     // Real double // N:ok |     // Real double | ||||||
|     inline float64x2_t operator()(double *a){ |     inline float64x2_t operator()(double *a){ | ||||||
|       double tmp[2]={a[1],a[0]}; |       double tmp[2]={a[1],a[0]}; | ||||||
|       return vld1q_f64(tmp); |       return vld1q_f64(tmp); | ||||||
|     } |     } | ||||||
|     // Integer // N:ok |     // Integer | ||||||
|     inline uint32x4_t operator()(Integer *a){ |     inline uint32x4_t operator()(Integer *a){ | ||||||
|       return vld1q_dup_u32(a); |       return vld1q_dup_u32(a); | ||||||
|     } |     } | ||||||
|   }; |   }; | ||||||
|  |  | ||||||
|   // N:leaving as is |  | ||||||
|   template <typename Out_type, typename In_type> |   template <typename Out_type, typename In_type> | ||||||
|   struct Reduce{ |   struct Reduce{ | ||||||
|     //Need templated class to overload output type |     //Need templated class to overload output type | ||||||
| @@ -249,9 +248,9 @@ namespace Optimization { | |||||||
|       return vfmaq_f32(r4, r0, a); //  ar*br-ai*bi ai*br+ar*bi ... |       return vfmaq_f32(r4, r0, a); //  ar*br-ai*bi ai*br+ar*bi ... | ||||||
|  |  | ||||||
|       // no fma, use mul and add |       // no fma, use mul and add | ||||||
|       //float32x4_t r5; |       // float32x4_t r5; | ||||||
|       //r5 = vmulq_f32(r0, a); |       // r5 = vmulq_f32(r0, a); | ||||||
|       //return vaddq_f32(r4, r5); |       // return vaddq_f32(r4, r5); | ||||||
|     } |     } | ||||||
|     // Complex double |     // Complex double | ||||||
|     inline float64x2_t operator()(float64x2_t a, float64x2_t b){ |     inline float64x2_t operator()(float64x2_t a, float64x2_t b){ | ||||||
| @@ -272,9 +271,9 @@ namespace Optimization { | |||||||
|       return vfmaq_f64(r4, r0, a); //  ar*br-ai*bi ai*br+ar*bi |       return vfmaq_f64(r4, r0, a); //  ar*br-ai*bi ai*br+ar*bi | ||||||
|  |  | ||||||
|       // no fma, use mul and add |       // no fma, use mul and add | ||||||
|       //float64x2_t r5; |       // float64x2_t r5; | ||||||
|       //r5 = vmulq_f64(r0, a); |       // r5 = vmulq_f64(r0, a); | ||||||
|       //return vaddq_f64(r4, r5); |       // return vaddq_f64(r4, r5); | ||||||
|     } |     } | ||||||
|   }; |   }; | ||||||
|  |  | ||||||
| @@ -421,11 +420,6 @@ namespace Optimization { | |||||||
|       } |       } | ||||||
|     } |     } | ||||||
|  |  | ||||||
| // working, but no restriction on n |  | ||||||
| //    template<int n> static inline float32x4_t tRotate(float32x4_t in){ return vextq_f32(in,in,n); }; |  | ||||||
| //    template<int n> static inline float64x2_t tRotate(float64x2_t in){ return vextq_f64(in,in,n); }; |  | ||||||
|  |  | ||||||
| // restriction on n |  | ||||||
|     template<int n> static inline float32x4_t tRotate(float32x4_t in){ return vextq_f32(in,in,n%4); }; |     template<int n> static inline float32x4_t tRotate(float32x4_t in){ return vextq_f32(in,in,n%4); }; | ||||||
|     template<int n> static inline float64x2_t tRotate(float64x2_t in){ return vextq_f64(in,in,n%2); }; |     template<int n> static inline float64x2_t tRotate(float64x2_t in){ return vextq_f64(in,in,n%2); }; | ||||||
|  |  | ||||||
| @@ -441,7 +435,7 @@ namespace Optimization { | |||||||
|       sb = vcvt_high_f32_f16(h); |       sb = vcvt_high_f32_f16(h); | ||||||
|       // there is no direct conversion from lower float32x4_t to float64x2_t |       // there is no direct conversion from lower float32x4_t to float64x2_t | ||||||
|       // vextq_f16 not supported by clang 3.8 / 4.0 / arm clang |       // vextq_f16 not supported by clang 3.8 / 4.0 / arm clang | ||||||
|       //float16x8_t h1 = vextq_f16(h, h, 4); // correct, but not supported by clang |       // float16x8_t h1 = vextq_f16(h, h, 4); // correct, but not supported by clang | ||||||
|       // workaround for clang |       // workaround for clang | ||||||
|       uint32x4_t h1u = reinterpret_cast<uint32x4_t>(h); |       uint32x4_t h1u = reinterpret_cast<uint32x4_t>(h); | ||||||
|       float16x8_t h1 = reinterpret_cast<float16x8_t>(vextq_u32(h1u, h1u, 2)); |       float16x8_t h1 = reinterpret_cast<float16x8_t>(vextq_u32(h1u, h1u, 2)); | ||||||
| @@ -547,7 +541,7 @@ namespace Optimization { | |||||||
|  |  | ||||||
|  |  | ||||||
|   //Complex double Reduce |   //Complex double Reduce | ||||||
|   template<> // N:by Boyle |   template<> | ||||||
|   inline Grid::ComplexD Reduce<Grid::ComplexD, float64x2_t>::operator()(float64x2_t in){ |   inline Grid::ComplexD Reduce<Grid::ComplexD, float64x2_t>::operator()(float64x2_t in){ | ||||||
|     u128d conv; conv.v = in; |     u128d conv; conv.v = in; | ||||||
|     return Grid::ComplexD(conv.f[0],conv.f[1]); |     return Grid::ComplexD(conv.f[0],conv.f[1]); | ||||||
| @@ -562,9 +556,7 @@ namespace Optimization { | |||||||
|   //Integer Reduce |   //Integer Reduce | ||||||
|   template<> |   template<> | ||||||
|   inline Integer Reduce<Integer, uint32x4_t>::operator()(uint32x4_t in){ |   inline Integer Reduce<Integer, uint32x4_t>::operator()(uint32x4_t in){ | ||||||
|     // FIXME unimplemented |     return vaddvq_u32(in); | ||||||
|     printf("Reduce : Missing integer implementation -> FIX\n"); |  | ||||||
|     assert(0); |  | ||||||
|   } |   } | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -604,3 +596,4 @@ namespace Optimization { | |||||||
|   typedef Optimization::TimesI      TimesISIMD; |   typedef Optimization::TimesI      TimesISIMD; | ||||||
|  |  | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -400,11 +400,13 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal | |||||||
|       if ( sshift[0] == sshift[1] ) { |       if ( sshift[0] == sshift[1] ) { | ||||||
| 	if (splice_dim) { | 	if (splice_dim) { | ||||||
| 	  splicetime-=usecond(); | 	  splicetime-=usecond(); | ||||||
| 	  same_node = same_node && GatherSimd(source,dimension,shift,0x3,compress,face_idx); | 	  auto tmp  = GatherSimd(source,dimension,shift,0x3,compress,face_idx); | ||||||
|  | 	  same_node = same_node && tmp; | ||||||
| 	  splicetime+=usecond(); | 	  splicetime+=usecond(); | ||||||
| 	} else {  | 	} else {  | ||||||
| 	  nosplicetime-=usecond(); | 	  nosplicetime-=usecond(); | ||||||
| 	  same_node = same_node && Gather(source,dimension,shift,0x3,compress,face_idx); | 	  auto tmp  = Gather(source,dimension,shift,0x3,compress,face_idx); | ||||||
|  | 	  same_node = same_node && tmp; | ||||||
| 	  nosplicetime+=usecond(); | 	  nosplicetime+=usecond(); | ||||||
| 	} | 	} | ||||||
|       } else { |       } else { | ||||||
| @@ -412,13 +414,15 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal | |||||||
| 	  splicetime-=usecond(); | 	  splicetime-=usecond(); | ||||||
| 	  // if checkerboard is unfavourable take two passes | 	  // if checkerboard is unfavourable take two passes | ||||||
| 	  // both with block stride loop iteration | 	  // both with block stride loop iteration | ||||||
| 	  same_node = same_node && GatherSimd(source,dimension,shift,0x1,compress,face_idx); | 	  auto tmp1 =  GatherSimd(source,dimension,shift,0x1,compress,face_idx); | ||||||
| 	  same_node = same_node && GatherSimd(source,dimension,shift,0x2,compress,face_idx); | 	  auto tmp2 =  GatherSimd(source,dimension,shift,0x2,compress,face_idx); | ||||||
|  | 	  same_node = same_node && tmp1 && tmp2; | ||||||
| 	  splicetime+=usecond(); | 	  splicetime+=usecond(); | ||||||
| 	} else { | 	} else { | ||||||
| 	  nosplicetime-=usecond(); | 	  nosplicetime-=usecond(); | ||||||
| 	  same_node = same_node && Gather(source,dimension,shift,0x1,compress,face_idx); | 	  auto tmp1 = Gather(source,dimension,shift,0x1,compress,face_idx); | ||||||
| 	  same_node = same_node && Gather(source,dimension,shift,0x2,compress,face_idx); | 	  auto tmp2 = Gather(source,dimension,shift,0x2,compress,face_idx); | ||||||
|  | 	  same_node = same_node && tmp1 && tmp2; | ||||||
| 	  nosplicetime+=usecond(); | 	  nosplicetime+=usecond(); | ||||||
| 	} | 	} | ||||||
|       } |       } | ||||||
|   | |||||||
| @@ -29,7 +29,6 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | |||||||
|     /*  END LEGAL */ |     /*  END LEGAL */ | ||||||
| #include <Grid/Grid.h> | #include <Grid/Grid.h> | ||||||
|  |  | ||||||
|  |  | ||||||
| using namespace Grid; | using namespace Grid; | ||||||
| using namespace Grid::QCD; | using namespace Grid::QCD; | ||||||
|  |  | ||||||
| @@ -151,6 +150,11 @@ int main(int argc,char **argv) | |||||||
|   ioTest<TextWriter, TextReader>("iotest.dat", obj, "text   (object)           "); |   ioTest<TextWriter, TextReader>("iotest.dat", obj, "text   (object)           "); | ||||||
|   ioTest<TextWriter, TextReader>("iotest.dat", vec, "text   (vector of objects)"); |   ioTest<TextWriter, TextReader>("iotest.dat", vec, "text   (vector of objects)"); | ||||||
|   ioTest<TextWriter, TextReader>("iotest.dat", pair, "text   (pair of objects)"); |   ioTest<TextWriter, TextReader>("iotest.dat", pair, "text   (pair of objects)"); | ||||||
|  |   //// text | ||||||
|  |   ioTest<JSONWriter, JSONReader>("iotest.json", obj,  "JSON   (object)           "); | ||||||
|  |   ioTest<JSONWriter, JSONReader>("iotest.json", vec,  "JSON   (vector of objects)"); | ||||||
|  |   ioTest<JSONWriter, JSONReader>("iotest.json", pair, "JSON   (pair of objects)"); | ||||||
|  |  | ||||||
|   //// HDF5 |   //// HDF5 | ||||||
| #undef HAVE_HDF5 | #undef HAVE_HDF5 | ||||||
| #ifdef HAVE_HDF5 | #ifdef HAVE_HDF5 | ||||||
| @@ -201,8 +205,10 @@ int main(int argc,char **argv) | |||||||
|     JSONWriter JW("bother.json"); |     JSONWriter JW("bother.json"); | ||||||
|  |  | ||||||
|     // test basic type writing |     // test basic type writing | ||||||
|  |     myenum a = myenum::red; | ||||||
|     push(JW,"BasicTypes"); |     push(JW,"BasicTypes"); | ||||||
|     write(JW,std::string("i16"),i16); |     write(JW,std::string("i16"),i16); | ||||||
|  |     write(JW,"myenum",a); | ||||||
|     write(JW,"u16",u16); |     write(JW,"u16",u16); | ||||||
|     write(JW,"i32",i32); |     write(JW,"i32",i32); | ||||||
|     write(JW,"u32",u32); |     write(JW,"u32",u32); | ||||||
| @@ -213,13 +219,14 @@ int main(int argc,char **argv) | |||||||
|     write(JW,"b",b); |     write(JW,"b",b); | ||||||
|     pop(JW); |     pop(JW); | ||||||
|  |  | ||||||
|  |  | ||||||
|     // test serializable class writing |     // test serializable class writing | ||||||
|     myclass obj(1234); // non-trivial constructor |     myclass obj(1234); // non-trivial constructor | ||||||
|  |     std::cout << obj << std::endl; | ||||||
|     std::cout << "-- serialisable class writing to 'bother.json'..." << std::endl; |     std::cout << "-- serialisable class writing to 'bother.json'..." << std::endl; | ||||||
|     write(JW,"obj",obj); |     write(JW,"obj",obj); | ||||||
|     JW.write("obj2", obj); |     JW.write("obj2", obj); | ||||||
|  |  | ||||||
|     std::cout << obj << std::endl; |  | ||||||
|  |  | ||||||
|     std::vector<myclass> vec; |     std::vector<myclass> vec; | ||||||
|     vec.push_back(myclass(1234)); |     vec.push_back(myclass(1234)); | ||||||
| @@ -229,6 +236,7 @@ int main(int argc,char **argv) | |||||||
|  |  | ||||||
|   } |   } | ||||||
|  |  | ||||||
|  |  | ||||||
|   { |   { | ||||||
|     JSONReader RD("bother.json"); |     JSONReader RD("bother.json"); | ||||||
|     myclass jcopy1; |     myclass jcopy1; | ||||||
| @@ -239,6 +247,7 @@ int main(int argc,char **argv) | |||||||
|     std::cout << jcopy1 << std::endl << jveccopy1 << std::endl; |     std::cout << jcopy1 << std::endl << jveccopy1 << std::endl; | ||||||
|   } |   } | ||||||
|   |   | ||||||
|  |  | ||||||
| /* | /* | ||||||
|   // This is still work in progress |   // This is still work in progress | ||||||
|   { |   { | ||||||
|   | |||||||
							
								
								
									
										87
									
								
								tests/solver/Test_staggered_cg_prec.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										87
									
								
								tests/solver/Test_staggered_cg_prec.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,87 @@ | |||||||
|  |     /************************************************************************************* | ||||||
|  |  | ||||||
|  |     Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|  |     Source file: ./tests/Test_wilson_cg_unprec.cc | ||||||
|  |  | ||||||
|  |     Copyright (C) 2015 | ||||||
|  |  | ||||||
|  | Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> | ||||||
|  | Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||||
|  |  | ||||||
|  |     This program is free software; you can redistribute it and/or modify | ||||||
|  |     it under the terms of the GNU General Public License as published by | ||||||
|  |     the Free Software Foundation; either version 2 of the License, or | ||||||
|  |     (at your option) any later version. | ||||||
|  |  | ||||||
|  |     This program is distributed in the hope that it will be useful, | ||||||
|  |     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  |     GNU General Public License for more details. | ||||||
|  |  | ||||||
|  |     You should have received a copy of the GNU General Public License along | ||||||
|  |     with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  |     See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  |     *************************************************************************************/ | ||||||
|  |     /*  END LEGAL */ | ||||||
|  | #include <Grid/Grid.h> | ||||||
|  | #include <Grid/algorithms/iterative/BlockConjugateGradient.h> | ||||||
|  |  | ||||||
|  | using namespace std; | ||||||
|  | using namespace Grid; | ||||||
|  | using namespace Grid::QCD; | ||||||
|  |  | ||||||
|  | template<class d> | ||||||
|  | struct scal { | ||||||
|  |   d internal; | ||||||
|  | }; | ||||||
|  |  | ||||||
|  |   Gamma::Algebra Gmu [] = { | ||||||
|  |     Gamma::Algebra::GammaX, | ||||||
|  |     Gamma::Algebra::GammaY, | ||||||
|  |     Gamma::Algebra::GammaZ, | ||||||
|  |     Gamma::Algebra::GammaT | ||||||
|  |   }; | ||||||
|  |  | ||||||
|  | int main (int argc, char ** argv) | ||||||
|  | { | ||||||
|  |   typedef typename ImprovedStaggeredFermionR::FermionField FermionField;  | ||||||
|  |   typedef typename ImprovedStaggeredFermionR::ComplexField ComplexField;  | ||||||
|  |   typename ImprovedStaggeredFermionR::ImplParams params;  | ||||||
|  |  | ||||||
|  |   Grid_init(&argc,&argv); | ||||||
|  |  | ||||||
|  |   std::vector<int> latt_size   = GridDefaultLatt(); | ||||||
|  |   std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); | ||||||
|  |   std::vector<int> mpi_layout  = GridDefaultMpi(); | ||||||
|  |   GridCartesian               Grid(latt_size,simd_layout,mpi_layout); | ||||||
|  |   GridRedBlackCartesian     RBGrid(latt_size,simd_layout,mpi_layout); | ||||||
|  |  | ||||||
|  |   std::vector<int> seeds({1,2,3,4}); | ||||||
|  |   GridParallelRNG          pRNG(&Grid);  pRNG.SeedFixedIntegers(seeds); | ||||||
|  |  | ||||||
|  |   FermionField src(&Grid); random(pRNG,src); | ||||||
|  |   RealD nrm = norm2(src); | ||||||
|  |   LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu); | ||||||
|  |  | ||||||
|  |   double volume=1; | ||||||
|  |   for(int mu=0;mu<Nd;mu++){ | ||||||
|  |     volume=volume*latt_size[mu]; | ||||||
|  |   }   | ||||||
|  |    | ||||||
|  |   RealD mass=0.1; | ||||||
|  |   ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass); | ||||||
|  |  | ||||||
|  |   FermionField res_o(&RBGrid);  | ||||||
|  |   FermionField src_o(&RBGrid);  | ||||||
|  |   pickCheckerboard(Odd,src_o,src); | ||||||
|  |   res_o=zero; | ||||||
|  |  | ||||||
|  |   SchurDiagMooeeOperator<ImprovedStaggeredFermionR,FermionField> HermOpEO(Ds); | ||||||
|  |   ConjugateGradient<FermionField> CG(1.0e-8,10000); | ||||||
|  |   CG(HermOpEO,src_o,res_o); | ||||||
|  |  | ||||||
|  |   Grid_finalize(); | ||||||
|  | } | ||||||
		Reference in New Issue
	
	Block a user