From ad31cd0c23ce5128b2745bd524f48d59a0efe3af Mon Sep 17 00:00:00 2001
From: Peter Boyle <paboyle@ph.ed.ac.uk>
Date: Fri, 3 Apr 2015 22:54:13 +0100
Subject: [PATCH] Clean up but no major changes

---
 Grid_Cartesian.h     |  13 ++----
 Grid_Lattice.h       | 108 +++++++++++++++----------------------------
 Grid_cshift_common.h |  27 ++++++-----
 Grid_cshift_mpi.h    |  82 ++------------------------------
 Grid_math_types.h    |   2 +-
 Grid_mpi.cc          |  22 +++------
 Grid_vComplexD.h     |   8 ++--
 Grid_vComplexF.h     |   7 ++-
 Grid_vRealD.h        |   5 +-
 Grid_vRealF.h        |   5 +-
 10 files changed, 74 insertions(+), 205 deletions(-)
diff --git a/Grid_Cartesian.h b/Grid_Cartesian.h
index ced06825..a983639c 100644
--- a/Grid_Cartesian.h
+++ b/Grid_Cartesian.h
@@ -201,10 +201,8 @@ public:
             block = block*_rdimensions[d];
         }
             
-        if ( _isites != vComplex::Nsimd()) {
-            printf("bad layout for grid isites %d Nsimd %d\n",_isites,vComplex::Nsimd());
-            exit(0);
-        }
+        assert( _isites == vComplex::Nsimd());
+
     };
 };
  
@@ -235,10 +233,8 @@ public:
       int ocb=CheckerBoardFromOsite(osite);
 	  
       if ( (source_cb+ocb)&1 ) {
-	printf("Checkerboard shift %d\n",(shift)/2);
 	return (shift)/2;
       } else {
-	printf("Checkerboard shift %d\n",(shift+1)/2);
 	return (shift+1)/2;
       }
     }
@@ -314,10 +310,7 @@ public:
             block = block*_rdimensions[d];
         }
             
-        if ( _isites != vComplex::Nsimd()) {
-            printf("bad layout for grid isites %d Nsimd %d\n",_isites,vComplex::Nsimd());
-            exit(0);
-        }
+        assert ( _isites == vComplex::Nsimd());
     };
 protected:
     virtual int oIndex(std::vector<int> &coor)
diff --git a/Grid_Lattice.h b/Grid_Lattice.h
index 6219235a..aab4fec9 100644
--- a/Grid_Lattice.h
+++ b/Grid_Lattice.h
@@ -30,9 +30,7 @@ public:
 
     Lattice(SimdGrid *grid) : _grid(grid) {
         _odata.reserve(_grid->oSites());
-        if ( ((uint64_t)&_odata[0])&0xF) {
-            exit(-1);
-        }
+        assert((((uint64_t)&_odata[0])&0xF) ==0);
         checkerboard=0;
     }
     
@@ -97,26 +95,25 @@ public:
     template<class sobj>
     friend void pokeSite(const sobj &s,Lattice<vobj> &l,std::vector<int> &site){
 
-        if ( l.checkerboard != l._grid->CheckerBoard(site)){
-	  printf("Poking wrong checkerboard\n");
-	  exit(EXIT_FAILURE);
-        }
+      typedef typename vobj::scalar_type stype;
+      typedef typename vobj::vector_type vtype;
 
-        int o_index = l._grid->oIndex(site);
-        int i_index = l._grid->iIndex(site);
+      assert( l.checkerboard == l._grid->CheckerBoard(site));
+
+      int o_index = l._grid->oIndex(site);
+      int i_index = l._grid->iIndex(site);
+      
+      stype *v_ptr = (stype *)&l._odata[o_index];
+      stype *s_ptr = (stype *)&s;
+      v_ptr = v_ptr + 2*i_index;
         
-	// BUGGY. This assumes complex real
-        Real *v_ptr = (Real *)&l._odata[o_index];
-        Real *s_ptr = (Real *)&s;
-        v_ptr = v_ptr + 2*i_index;
-        
-        for(int i=0;i<sizeof(sobj);i+=2*sizeof(Real)){
-            v_ptr[0] = s_ptr[0];
-            v_ptr[1] = s_ptr[1];
-            v_ptr+=2*vComplex::Nsimd();
-            s_ptr+=2;
-        }
-        return;
+      for(int i=0;i<sizeof(sobj);i+=2*sizeof(stype)){
+	v_ptr[0] = s_ptr[0];
+	v_ptr[1] = s_ptr[1];
+	v_ptr+=2*vtype::Nsimd();
+	s_ptr+=2;
+      }
+      return;
     };
     
     
@@ -124,25 +121,25 @@ public:
     template<class sobj>
     friend void peekSite(sobj &s,const Lattice<vobj> &l,std::vector<int> &site){
         
-        // FIXME : define exceptions set and throw up.
-        if ( l.checkerboard != l._grid->CheckerBoard(site)){
-            printf("Peeking wrong checkerboard\n");
-            exit(EXIT_FAILURE);
-        }
-        int o_index = l._grid->oIndex(site);
-        int i_index = l._grid->iIndex(site);
-        
-        Real *v_ptr = (Real *)&l._odata[o_index];
-        Real *s_ptr = (Real *)&s;
-        v_ptr = v_ptr + 2*i_index;
-        
-        for(int i=0;i<sizeof(sobj);i+=2*sizeof(Real)){
-            s_ptr[0] = v_ptr[0];
-            s_ptr[1] = v_ptr[1];
-            v_ptr+=2*vComplex::Nsimd();
-            s_ptr+=2;
-        }
-        return;
+      typedef typename vobj::scalar_type stype;
+      typedef typename vobj::vector_type vtype;
+
+      assert( l.checkerboard== l._grid->CheckerBoard(site));
+
+      int o_index = l._grid->oIndex(site);
+      int i_index = l._grid->iIndex(site);
+      
+      stype *v_ptr = (stype *)&l._odata[o_index];
+      stype *s_ptr = (stype *)&s;
+      v_ptr = v_ptr + 2*i_index;
+      
+      for(int i=0;i<sizeof(sobj);i+=2*sizeof(stype)){
+	s_ptr[0] = v_ptr[0];
+	s_ptr[1] = v_ptr[1];
+	v_ptr+=2*vtype::Nsimd();
+	s_ptr+=2;
+      }
+      return;
     };
     
     // Randomise
@@ -291,15 +288,6 @@ public:
     }
 }; // class Lattice
 
-    /* Need to implement the multiplication return type matching S S -> S, S M -> M, M S -> M through
-     all nested possibilities.
-     template<template<class> class lhs,template<class> class rhs>
-     class MultTypeSelector {
-     template<typename vtype> using ltype = lhs
-     typedef lhs type;
-     };
-     */
-    
     template<class obj1,class obj2>
     void conformable(const Lattice<obj1> &lhs,const Lattice<obj2> &rhs)
     {
@@ -313,28 +301,6 @@ public:
 	uint32_t vec_len = lhs._grid->oSites();
 #pragma omp parallel for
         for(int ss=0;ss<vec_len;ss++){
-
-	  const char * ptr =(const char*)&lhs._odata[ss];
-#ifdef PREFETCH
-          v_prefetch0(sizeof(obj2), ptr);
-#endif
-
-	  for(int i=0;i<sizeof(obj2);i+=64){
-	    _mm_prefetch(ptr+i+4096,_MM_HINT_T1);
-	    _mm_prefetch(ptr+i+256,_MM_HINT_T0);
-	  }
-
-	  ptr =(const char*)&rhs._odata[ss];
-#ifdef PREFETCH
-          v_prefetch0(sizeof(obj3), ptr);
-#endif
-
-	  for(int i=0;i<sizeof(obj3);i+=64){
-	    _mm_prefetch(ptr+i+4096,_MM_HINT_T1);
-	    _mm_prefetch(ptr+i+256,_MM_HINT_T0);
-	  }
-
-
 	  mult(&ret._odata[ss],&lhs._odata[ss],&rhs._odata[ss]);
         }
     }
diff --git a/Grid_cshift_common.h b/Grid_cshift_common.h
index b8624826..203bf2f5 100644
--- a/Grid_cshift_common.h
+++ b/Grid_cshift_common.h
@@ -1,5 +1,17 @@
 #ifndef _GRID_CSHIFT_COMMON_H_
 #define _GRID_CSHIFT_COMMON_H_
+//////////////////////////////////////////////////////////////////////////////////////////
+// Must not lose sight that goal is to be able to construct really efficient
+// gather to a point stencil code. CSHIFT is not the best way, so probably need
+// additional stencil support.
+//
+// Stencil based code could pre-exchange haloes and use a table lookup for neighbours
+//
+// Lattice <foo> could also allocate haloes which get used for stencil code.
+//
+// Grid could create a neighbour index table for a given stencil.
+// Could also implement CovariantCshift.
+//////////////////////////////////////////////////////////////////////////////////////////
 
 //////////////////////////////////////////////////////
 // Gather for when there is no need to SIMD split
@@ -8,8 +20,6 @@ friend void Gather_plane_simple (Lattice<vobj> &rhs,std::vector<vobj,alignedAllo
 {
   int rd = rhs._grid->_rdimensions[dimension];
 
-  //  printf("Gather plane _simple mask %d\n",cbmask);
-
   if ( !rhs._grid->CheckerBoarded(dimension) ) {
 
     int so  = plane*rhs._grid->_ostride[dimension]; // base offset for start of plane 
@@ -31,7 +41,6 @@ friend void Gather_plane_simple (Lattice<vobj> &rhs,std::vector<vobj,alignedAllo
     int o   = 0;                                      // relative offset to base within plane
     int bo  = 0;                                      // offset in buffer
 
-    //    int jjj=0;
 #pragma omp parallel for collapse(2)
     for(int n=0;n<rhs._grid->_slice_nblock[dimension];n++){
       for(int b=0;b<rhs._grid->_slice_block[dimension];b++){
@@ -39,11 +48,6 @@ friend void Gather_plane_simple (Lattice<vobj> &rhs,std::vector<vobj,alignedAllo
 	int ocb=1<<rhs._grid->CheckerBoardFromOsite(o+b);// Could easily be a table lookup
 	if ( ocb &cbmask ) {
 	  buffer[bo]=rhs._odata[so+o+b];
-	  //	  float * ptr = (float *)& rhs._odata[so+o+b];
-	  //	  if( (cbmask!=3)&&(jjj<8)){
-	  //	    printf("Gather_plane_simple %d %le bo %d\n",so+o+b,*ptr,bo);
-	  //	    jjj++;
-	  //	  }
 	  bo++;
 	}
 
@@ -215,7 +219,7 @@ friend void Copy_plane(Lattice<vobj>& lhs,Lattice<vobj> &rhs, int dimension,int
     int ro  = rplane*rhs._grid->_ostride[dimension]; // base offset for start of plane 
     int lo  = lplane*lhs._grid->_ostride[dimension]; // base offset for start of plane 
     int o   = 0;                                     // relative offset to base within plane
-    //    int jjj=0;    
+
 #pragma omp parallel for collapse(2)
     for(int n=0;n<rhs._grid->_slice_nblock[dimension];n++){
       for(int b=0;b<rhs._grid->_slice_block[dimension];b++){
@@ -224,11 +228,6 @@ friend void Copy_plane(Lattice<vobj>& lhs,Lattice<vobj> &rhs, int dimension,int
 
 	if ( ocb&cbmask ) {
 	  lhs._odata[lo+o+b]=rhs._odata[ro+o+b];
-	  //	  float *ptr =(float *) &rhs._odata[ro+o+b];
-	  //	  if((cbmask!=0x3)&&jjj<8) {
-	    //	    printf("Copy_plane %d %le n,b=%d,%d mask %d ocb %d\n",ro+o+b,*ptr,n,b,cbmask,ocb);
-	    //	    jjj++;
-	    //	  }
 	}
 
       }
diff --git a/Grid_cshift_mpi.h b/Grid_cshift_mpi.h
index 4029f152..17aff2f4 100644
--- a/Grid_cshift_mpi.h
+++ b/Grid_cshift_mpi.h
@@ -1,36 +1,10 @@
 #ifndef _GRID_MPI_CSHIFT_H_
 #define _GRID_MPI_CSHIFT_H_
 
+#ifndef MAX
 #define MAX(x,y) ((x)>(y)?(x):(y))
 #define MIN(x,y) ((x)>(y)?(y):(x))
-//////////////////////////////////////////////////////////////////////////////////////////
-// Must not lose sight that goal is to be able to construct really efficient
-// gather to a point stencil code. CSHIFT is not the best way, so probably need
-// additional stencil support.
-//
-// Could still do a templated syntax tree and make CSHIFT return lattice vector.
-//
-// Stencil based code could pre-exchange haloes and use a table lookup for neighbours
-//
-// Lattice <foo> could also allocate haloes which get used for stencil code.
-//
-// Grid could create a neighbour index table for a given stencil.
-// Could also implement CovariantCshift.
-//////////////////////////////////////////////////////////////////////////////////////////
-
-      
-/////////////////////////////////////////////////////////////
-// Q. Further split this into separate sub functions?
-/////////////////////////////////////////////////////////////
-
-// CshiftCB_local
-// CshiftCB_local_permute
-
-// Cshift_comms_splice
-// Cshift_comms
-// Cshift_local
-// Cshift_local_permute
-
+#endif
 
 friend Lattice<vobj> Cshift(Lattice<vobj> &rhs,int dimension,int shift)
 {
@@ -71,16 +45,10 @@ friend void Cshift_comms(Lattice<vobj>& ret,Lattice<vobj> &rhs,int dimension,int
   sshift[1] = rhs._grid->CheckerBoardShift(rhs.checkerboard,dimension,shift,1);
 
   if ( sshift[0] == sshift[1] ) {
-    //    printf("Cshift_comms : single pass\n");
     Cshift_comms(ret,rhs,dimension,shift,0x3);
   } else {
-    //    printf("Cshift_comms : two pass\n");
-    //    printf("call1\n");
     Cshift_comms(ret,rhs,dimension,shift,0x1);// if checkerboard is unfavourable take two passes
-    //    printf("call2\n");
     Cshift_comms(ret,rhs,dimension,shift,0x2);// both with block stride loop iteration
-    //    printf("done\n");
-
   }
 }
 
@@ -94,11 +62,8 @@ friend void Cshift_comms_simd(Lattice<vobj>& ret,Lattice<vobj> &rhs,int dimensio
   if ( sshift[0] == sshift[1] ) {
     Cshift_comms_simd(ret,rhs,dimension,shift,0x3);
   } else {
-    //    printf("call1 0x1 cb=even\n");
     Cshift_comms_simd(ret,rhs,dimension,shift,0x1);// if checkerboard is unfavourable take two passes
-    //    printf("call2 0x2 cb=odd\n");
     Cshift_comms_simd(ret,rhs,dimension,shift,0x2);// both with block stride loop iteration
-    //    printf("done\n");
   }
 }
 
@@ -120,13 +85,10 @@ friend void Cshift_comms(Lattice<vobj> &ret,Lattice<vobj> &rhs,int dimension,int
   assert(shift>=0);
   assert(shift<fd);
   
-  // Packed gather sequence is clean
   int buffer_size = rhs._grid->_slice_nblock[dimension]*rhs._grid->_slice_block[dimension];
   std::vector<vobj,alignedAllocator<vobj> > send_buf(buffer_size);
   std::vector<vobj,alignedAllocator<vobj> > recv_buf(buffer_size);
 
-  // This code could be simplified by multiple calls to single routine with extra params to
-  // encapsulate the difference in the code paths.
   int cb= (cbmask==0x2)? 1 : 0;
   int sshift= rhs._grid->CheckerBoardShift(rhs.checkerboard,dimension,shift,cb);
 
@@ -137,8 +99,9 @@ friend void Cshift_comms(Lattice<vobj> &ret,Lattice<vobj> &rhs,int dimension,int
     int comm_proc = (x+sshift)/rd;
     
     if (!offnode) {
-      //      printf("local x %d sshift %d offnode %d rd %d cb %d\n",x,sshift,offnode,rd,cb);
+
       Copy_plane(ret,rhs,dimension,x,sx,cbmask); 
+
     } else {
 
       int words = send_buf.size();
@@ -146,29 +109,13 @@ friend void Cshift_comms(Lattice<vobj> &ret,Lattice<vobj> &rhs,int dimension,int
 
       int bytes = words * sizeof(vobj);
 
-      //      printf("nonlocal x %d sx %d sshift %d offnode %d rd %d cb %d cbmask %d rhscb %d comm_proc %d\n",
-      //	     x,sx,sshift,offnode,rd,cb,cbmask,rhs.checkerboard,comm_proc);
-      //      Copy_plane(temp,rhs,dimension,x,sx,cbmask); 
-
-      // Bug found; cbmask may differ between sx plan and rx plane.
       Gather_plane_simple (rhs,send_buf,dimension,sx,cbmask);
-      //      for(int i=0;i<MIN(words,8);i++){
-      //	float *ptr = (float *)&send_buf[i];
-      //	printf("send buf shift %d cbmask %d i %d %le\n",sshift,cbmask,i,*ptr);
-      //      }
-      //      Gather_plane_simple (rhs,send_buf,dimension,sx,cbmask^0x3);
-      //      for(int i=0;i<MIN(words,8);i++){
-      //	float *ptr = (float *)&send_buf[i];
-      //	printf("send buf shift %d cbmask %d i %d %le\n",sshift,cbmask,i,*ptr);
-      //      }
-      //      recv_buf=send_buf;
 
       int rank           = grid->_processor;
       int recv_from_rank;
       int xmit_to_rank;
       grid->ShiftedRanks(dimension,comm_proc,xmit_to_rank,recv_from_rank);
 
-      //      printf("bytes %d node %d sending to %d receiving from %d\n",bytes,rank,xmit_to_rank,recv_from_rank );
       grid->SendToRecvFrom((void *)&send_buf[0],
 			   xmit_to_rank,
 			   (void *)&recv_buf[0],
@@ -224,46 +171,29 @@ friend void  Cshift_comms_simd(Lattice<vobj> &ret,Lattice<vobj> &rhs,int dimensi
   int cb    = (cbmask==0x2)? 1 : 0;
   int sshift= grid->CheckerBoardShift(rhs.checkerboard,dimension,shift,cb);
   
-  //  printf("cshift-comms-simd: shift = %d ; sshift = %d ; cbmask %d ; simd_layout %d\n",shift,sshift,cbmask,simd_layout);
   std::vector<int> comm_offnode(simd_layout);
   std::vector<int> comm_proc   (simd_layout);  //relative processor coord in dim=dimension
 
-  // Strategy
-  //
-  //*  Loop over source planes
-  //*    if any communication needed extract and send
-  //*    if communication needed extract and send
-
   for(int x=0;x<rd;x++){       
 
     int comm_any = 0;
     for(int s=0;s<simd_layout;s++) {
-      // does shift to "neighbour" takes us off node?
-      // coordinates (reduce plane, simd_lane) of neighbour?
-      // how many nodes away is this shift?
-      // where we should send to?
-      // where we should receive from?
       int shifted_x   = x+s*rd+sshift;
       comm_offnode[s] = shifted_x >= ld; 
       comm_any        = comm_any | comm_offnode[s];
       comm_proc[s]    = shifted_x/ld;     
-      //      printf("rd %d x %d shifted %d s=%d comm_any %d\n",rd, x,shifted_x,s,comm_any);
     }
     
     int o    = 0;
     int bo   = x*grid->_ostride[dimension];
     int sx   = (x+sshift)%rd;
 
-    // Need Convenience function in _grid. Move this in
     if ( comm_any ) {
 
       for(int i=0;i<Nsimd;i++){
 	pointers[i] = (scalar_type *)&send_buf_extract[i][0];
       }
       Gather_plane_extract(rhs,pointers,dimension,sx,cbmask);
-      //      for(int i=0;i<Nsimd;i++){
-      //	printf("extracted %d %le\n",i,real(send_buf_extract[i][0]));
-      //      }
 
       for(int i=0;i<Nsimd;i++){
 
@@ -283,14 +213,11 @@ friend void  Cshift_comms_simd(Lattice<vobj> &ret,Lattice<vobj> &rhs,int dimensi
 			    recv_from_rank,
 			    bytes);
 
-	  //	  printf("Cshift_simd comms %d %le %le\n",i,real(recv_buf_extract[i][0]),real(send_buf_extract[i][0]));
-
 	  rpointers[i] = (scalar_type *)&recv_buf_extract[i][0];
 
 	} else { 
 
 	  rpointers[i] = (scalar_type *)&send_buf_extract[i][0];
-	  //	  printf("Cshift_simd local %d %le \n",i,real(send_buf_extract[i][0]));
 
 	}
 
@@ -311,7 +238,6 @@ friend void  Cshift_comms_simd(Lattice<vobj> &ret,Lattice<vobj> &rhs,int dimensi
 	} else {
 	  pointers[i] = rpointers[i];
 	}
-	//	printf("Cshift_simd perm %d num %d wrap %d swiz %d %le unswiz %le\n",permute_slice,num,wrap,i,real(pointers[i][0]),real(rpointers[i][0]));
       }
 
       Scatter_plane_merge(ret,pointers,dimension,x,cbmask);
diff --git a/Grid_math_types.h b/Grid_math_types.h
index 533ce424..3f7714d5 100644
--- a/Grid_math_types.h
+++ b/Grid_math_types.h
@@ -830,7 +830,7 @@ template<class vtype,int N> inline iMatrix<vtype,N> adj(const iMatrix<vtype,N> &
 
 /////////////////////////////////////////////////////////////////
 // Can only take the real/imag part of scalar objects, since
-// lattice objects of different complexity are non-conformable.
+// lattice objects of different complex nature are non-conformable.
 /////////////////////////////////////////////////////////////////
 template<class itype> inline auto real(const iScalar<itype> &z) -> iScalar<decltype(real(z._internal))>
 {
diff --git a/Grid_mpi.cc b/Grid_mpi.cc
index e00e3dce..c9b91210 100644
--- a/Grid_mpi.cc
+++ b/Grid_mpi.cc
@@ -17,13 +17,15 @@ CartesianCommunicator::CartesianCommunicator(std::vector<int> &processors)
   MPI_Cart_create(MPI_COMM_WORLD, _ndimension,&_processors[0],&periodic[0],1,&communicator);
   MPI_Comm_rank(communicator,&_processor);
   MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]);
-  printf("Hello world from processor [");
+
   for(int i=0;i<_ndimension;i++){
-    printf("%d ",_processor_coor[i]);
     _Nprocessors*=_processors[i];
   }
-  printf("]\n");
-  fflush(stdout);
+  
+  int Size; 
+  MPI_Comm_size(communicator,&Size);
+  
+  assert(Size==_Nprocessors);
 }
 
 void CartesianCommunicator::GlobalSumF(float &f){
@@ -71,15 +73,3 @@ void CartesianCommunicator::SendToRecvFrom(void *xmit,
 
 }
 
-#if 0
-
-// Could possibly do a direct block strided send?
-  int MPI_Type_vector(
-		      int count,
-		      int blocklength,
-		      int stride,
-		      MPI_Datatype old_type,
-  MPI_Datatype *newtype_p
-		      );
-
-#endif
diff --git a/Grid_vComplexD.h b/Grid_vComplexD.h
index f575f3ad..d1caefb1 100644
--- a/Grid_vComplexD.h
+++ b/Grid_vComplexD.h
@@ -48,7 +48,6 @@ namespace Grid {
 #endif
 #ifdef AVX512
             ret.v = _mm512_add_pd(a.v,b.v);
-            //printf("%s %f\n",__func__,_mm512_reduce_mul_pd(ret.v));
 #endif
 #ifdef QPX
             ret.v = vec_add(a.v,b.v);
@@ -210,7 +209,7 @@ namespace Grid {
 #ifdef QPX
 #error // Not implemented yet
 #endif
-                default: exit(EXIT_FAILURE); break;
+ 	        default: assert(0); break;
             }
         };
         void vload(zvec& a){
@@ -265,8 +264,7 @@ friend inline void vstore(vComplexD &ret, ComplexD *a){
    //Note v has a3 a2 a1 a0
 #endif
 #ifdef QPX
-	printf("%s Not implemented\n",__func__);
-	exit(-1);
+	assert(0);
 #endif
         }
       friend inline void vprefetch(const vComplexD &v)
@@ -294,7 +292,7 @@ friend inline void vstore(vComplexD &ret, ComplexD *a){
              
 #endif
 #ifdef QPX
-            exit(0); // not implemented
+	     assert(0);
 #endif
             return ret;
         }
diff --git a/Grid_vComplexF.h b/Grid_vComplexF.h
index 6c3e9eac..578228a3 100644
--- a/Grid_vComplexF.h
+++ b/Grid_vComplexF.h
@@ -193,7 +193,7 @@ namespace Grid {
 #ifdef QPX
 #error
 #endif
-                default: exit(EXIT_FAILURE); break;
+	        default: assert(0); break;
             }
         };
         
@@ -235,8 +235,7 @@ friend inline void vstore(vComplexF &ret, ComplexF *a){
 //Note v has a3 a2 a1 a0
 #endif
 #ifdef QPX
-	printf("%s Not implemented\n",__func__);
-exit(-1);
+	assert(0);
 #endif
 }
       friend inline void vprefetch(const vComplexF &v)
@@ -333,7 +332,7 @@ exit(-1);
             ret.v = _mm512_mask_sub_ps(in.v,0xaaaa,ret.v,in.v); // Zero out 0+real 0-imag 
 #endif
 #ifdef QPX
-            exit(0); // not implemented
+            assert(0);
 #endif
             return ret;
         }
diff --git a/Grid_vRealD.h b/Grid_vRealD.h
index 34150e4b..1abc0804 100644
--- a/Grid_vRealD.h
+++ b/Grid_vRealD.h
@@ -161,7 +161,7 @@ namespace Grid {
 #ifdef QPX
 #error
 #endif
-                default: exit(EXIT_FAILURE); break;
+	    default: assert(0);break;
             }
         };
 // gona be bye bye
@@ -214,8 +214,7 @@ namespace Grid {
             // Note v has a7 a6 a5ba4 a3 a2 a1 a0
 #endif
 #ifdef QPX
-          printf("%s Not implemented\n",__func__); 
-          exit(-1);
+	    assert(0);
 #endif
 	}
         friend inline void vprefetch(const vRealD &v)
diff --git a/Grid_vRealF.h b/Grid_vRealF.h
index 94875f7e..22809b83 100644
--- a/Grid_vRealF.h
+++ b/Grid_vRealF.h
@@ -185,7 +185,7 @@ namespace Grid {
 #ifdef QPX
 #error not implemented
 #endif
-                default: exit(EXIT_FAILURE); break;
+	    default: assert(0); break;
             }
         };
         
@@ -236,8 +236,7 @@ friend inline void vstore(vRealF &ret, float *a){
 	// Note v has a7 a6 a5ba4 a3 a2 a1 a0
 #endif
 #ifdef QPX
-	printf("%s Not implemented\n",__func__);
-	exit(-1);
+	assert(0);
 #endif
         }