mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-03 21:44:33 +00:00 
			
		
		
		
	I have made the Cshift work successfully with open mp threading in
every routine. Collapse(2) is now working under clang-omp++.
This commit is contained in:
		@@ -205,12 +205,9 @@ AUTOCONF = @AUTOCONF@
 | 
			
		||||
AUTOHEADER = @AUTOHEADER@
 | 
			
		||||
AUTOMAKE = @AUTOMAKE@
 | 
			
		||||
AWK = @AWK@
 | 
			
		||||
CC = @CC@
 | 
			
		||||
CCDEPMODE = @CCDEPMODE@
 | 
			
		||||
CFLAGS = @CFLAGS@
 | 
			
		||||
CPP = @CPP@
 | 
			
		||||
CPPFLAGS = @CPPFLAGS@
 | 
			
		||||
CXX = @CXX@
 | 
			
		||||
CXXCPP = @CXXCPP@
 | 
			
		||||
CXXDEPMODE = @CXXDEPMODE@
 | 
			
		||||
CXXFLAGS = @CXXFLAGS@
 | 
			
		||||
CYGPATH_W = @CYGPATH_W@
 | 
			
		||||
@@ -234,7 +231,7 @@ LTLIBOBJS = @LTLIBOBJS@
 | 
			
		||||
MAKEINFO = @MAKEINFO@
 | 
			
		||||
MKDIR_P = @MKDIR_P@
 | 
			
		||||
OBJEXT = @OBJEXT@
 | 
			
		||||
OPENMP_CFLAGS = @OPENMP_CFLAGS@
 | 
			
		||||
OPENMP_CXXFLAGS = @OPENMP_CXXFLAGS@
 | 
			
		||||
PACKAGE = @PACKAGE@
 | 
			
		||||
PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
 | 
			
		||||
PACKAGE_NAME = @PACKAGE_NAME@
 | 
			
		||||
@@ -252,7 +249,6 @@ abs_builddir = @abs_builddir@
 | 
			
		||||
abs_srcdir = @abs_srcdir@
 | 
			
		||||
abs_top_builddir = @abs_top_builddir@
 | 
			
		||||
abs_top_srcdir = @abs_top_srcdir@
 | 
			
		||||
ac_ct_CC = @ac_ct_CC@
 | 
			
		||||
ac_ct_CXX = @ac_ct_CXX@
 | 
			
		||||
am__include = @am__include@
 | 
			
		||||
am__leading_dot = @am__leading_dot@
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										21
									
								
								TODO
									
									
									
									
									
								
							
							
						
						
									
										21
									
								
								TODO
									
									
									
									
									
								
							@@ -2,10 +2,7 @@
 | 
			
		||||
*** Hacks and bug fixes to clean up and Audits
 | 
			
		||||
================================================================
 | 
			
		||||
* Base class to share common code between vRealF, VComplexF etc... 
 | 
			
		||||
 | 
			
		||||
* Performance check on Guido's reimplementation strategy
 | 
			
		||||
 | 
			
		||||
* Bug in SeedFixedIntegers gives same output on each site. -- Think I fixed but NOT checked for sure
 | 
			
		||||
  - Performance check on Guido's reimplementation strategy
 | 
			
		||||
 | 
			
		||||
* FIXME audit
 | 
			
		||||
* const audit
 | 
			
		||||
@@ -20,15 +17,16 @@
 | 
			
		||||
*** New Functionality
 | 
			
		||||
================================================================
 | 
			
		||||
 | 
			
		||||
* Implement where to take template scheme.
 | 
			
		||||
* Implement where within expression template scheme.
 | 
			
		||||
 | 
			
		||||
* - BinaryWriter, TextWriter etc...
 | 
			
		||||
  - use protocol buffers? replace xmlReader/Writer ec..
 | 
			
		||||
  - Binary use htonll, htonl
 | 
			
		||||
 | 
			
		||||
* Expression template engine:
 | 
			
		||||
* Bug in SeedFixedIntegers gave same output on each site. -- Think I fixed but NOT checked for sure
 | 
			
		||||
  Implement and use lattice IO to verify this.
 | 
			
		||||
 | 
			
		||||
   -- Audit
 | 
			
		||||
* Expression template engine: -- DONE
 | 
			
		||||
   -- Norm2(expression) problem: introduce norm2 unary op, or Introduce conversion automatic from expression to Lattice<vobj>
 | 
			
		||||
 | 
			
		||||
* CovariantShift support                             -----Use a class to store gauge field? (parallel transport?)
 | 
			
		||||
@@ -56,8 +54,9 @@
 | 
			
		||||
* TaProj
 | 
			
		||||
* FFTnD ?
 | 
			
		||||
 | 
			
		||||
* Parallel MPI2 IO
 | 
			
		||||
  Plaquette checks into nersc reader.
 | 
			
		||||
* Parallel io improvements
 | 
			
		||||
  - optional parallel MPI2 IO
 | 
			
		||||
  - move Plaquette and link trace checks into nersc reader from the Grid_nersc_io.cc test.
 | 
			
		||||
  
 | 
			
		||||
* rb4d support for 5th dimension in Mobius.
 | 
			
		||||
 | 
			
		||||
@@ -70,13 +69,15 @@
 | 
			
		||||
   // copyMask.
 | 
			
		||||
   // localMaxAbs
 | 
			
		||||
   // Fourier transform equivalent.
 | 
			
		||||
Actions
 | 
			
		||||
 | 
			
		||||
Actions -- coherent framework for implementing actions and their forces.
 | 
			
		||||
* Fermion
 | 
			
		||||
  - Wilson
 | 
			
		||||
  - Clover
 | 
			
		||||
  - DomainWall
 | 
			
		||||
  - Mobius
 | 
			
		||||
  - z-Mobius
 | 
			
		||||
 | 
			
		||||
* Gauge
 | 
			
		||||
  - Wilson, symanzik, iwasaki
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										64
									
								
								aclocal.m4
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										64
									
								
								aclocal.m4
									
									
									
									
										vendored
									
									
								
							@@ -765,70 +765,6 @@ AC_DEFUN([_AM_SET_OPTIONS],
 | 
			
		||||
AC_DEFUN([_AM_IF_OPTION],
 | 
			
		||||
[m4_ifset(_AM_MANGLE_OPTION([$1]), [$2], [$3])])
 | 
			
		||||
 | 
			
		||||
# Copyright (C) 1999-2014 Free Software Foundation, Inc.
 | 
			
		||||
#
 | 
			
		||||
# This file is free software; the Free Software Foundation
 | 
			
		||||
# gives unlimited permission to copy and/or distribute it,
 | 
			
		||||
# with or without modifications, as long as this notice is preserved.
 | 
			
		||||
 | 
			
		||||
# _AM_PROG_CC_C_O
 | 
			
		||||
# ---------------
 | 
			
		||||
# Like AC_PROG_CC_C_O, but changed for automake.  We rewrite AC_PROG_CC
 | 
			
		||||
# to automatically call this.
 | 
			
		||||
AC_DEFUN([_AM_PROG_CC_C_O],
 | 
			
		||||
[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl
 | 
			
		||||
AC_REQUIRE_AUX_FILE([compile])dnl
 | 
			
		||||
AC_LANG_PUSH([C])dnl
 | 
			
		||||
AC_CACHE_CHECK(
 | 
			
		||||
  [whether $CC understands -c and -o together],
 | 
			
		||||
  [am_cv_prog_cc_c_o],
 | 
			
		||||
  [AC_LANG_CONFTEST([AC_LANG_PROGRAM([])])
 | 
			
		||||
  # Make sure it works both with $CC and with simple cc.
 | 
			
		||||
  # Following AC_PROG_CC_C_O, we do the test twice because some
 | 
			
		||||
  # compilers refuse to overwrite an existing .o file with -o,
 | 
			
		||||
  # though they will create one.
 | 
			
		||||
  am_cv_prog_cc_c_o=yes
 | 
			
		||||
  for am_i in 1 2; do
 | 
			
		||||
    if AM_RUN_LOG([$CC -c conftest.$ac_ext -o conftest2.$ac_objext]) \
 | 
			
		||||
         && test -f conftest2.$ac_objext; then
 | 
			
		||||
      : OK
 | 
			
		||||
    else
 | 
			
		||||
      am_cv_prog_cc_c_o=no
 | 
			
		||||
      break
 | 
			
		||||
    fi
 | 
			
		||||
  done
 | 
			
		||||
  rm -f core conftest*
 | 
			
		||||
  unset am_i])
 | 
			
		||||
if test "$am_cv_prog_cc_c_o" != yes; then
 | 
			
		||||
   # Losing compiler, so override with the script.
 | 
			
		||||
   # FIXME: It is wrong to rewrite CC.
 | 
			
		||||
   # But if we don't then we get into trouble of one sort or another.
 | 
			
		||||
   # A longer-term fix would be to have automake use am__CC in this case,
 | 
			
		||||
   # and then we could set am__CC="\$(top_srcdir)/compile \$(CC)"
 | 
			
		||||
   CC="$am_aux_dir/compile $CC"
 | 
			
		||||
fi
 | 
			
		||||
AC_LANG_POP([C])])
 | 
			
		||||
 | 
			
		||||
# For backward compatibility.
 | 
			
		||||
AC_DEFUN_ONCE([AM_PROG_CC_C_O], [AC_REQUIRE([AC_PROG_CC])])
 | 
			
		||||
 | 
			
		||||
# Copyright (C) 2001-2014 Free Software Foundation, Inc.
 | 
			
		||||
#
 | 
			
		||||
# This file is free software; the Free Software Foundation
 | 
			
		||||
# gives unlimited permission to copy and/or distribute it,
 | 
			
		||||
# with or without modifications, as long as this notice is preserved.
 | 
			
		||||
 | 
			
		||||
# AM_RUN_LOG(COMMAND)
 | 
			
		||||
# -------------------
 | 
			
		||||
# Run COMMAND, save the exit status in ac_status, and log it.
 | 
			
		||||
# (This has been adapted from Autoconf's _AC_RUN_LOG macro.)
 | 
			
		||||
AC_DEFUN([AM_RUN_LOG],
 | 
			
		||||
[{ echo "$as_me:$LINENO: $1" >&AS_MESSAGE_LOG_FD
 | 
			
		||||
   ($1) >&AS_MESSAGE_LOG_FD 2>&AS_MESSAGE_LOG_FD
 | 
			
		||||
   ac_status=$?
 | 
			
		||||
   echo "$as_me:$LINENO: \$? = $ac_status" >&AS_MESSAGE_LOG_FD
 | 
			
		||||
   (exit $ac_status); }])
 | 
			
		||||
 | 
			
		||||
# Check to make sure that the build environment is sane.    -*- Autoconf -*-
 | 
			
		||||
 | 
			
		||||
# Copyright (C) 1996-2014 Free Software Foundation, Inc.
 | 
			
		||||
 
 | 
			
		||||
@@ -77,7 +77,7 @@ int main (int argc, char ** argv)
 | 
			
		||||
  WilsonMatrix Dw(Umu,mass);
 | 
			
		||||
  
 | 
			
		||||
  std::cout << "Calling Dw"<<std::endl;
 | 
			
		||||
  int ncall=10000;
 | 
			
		||||
  int ncall=1000;
 | 
			
		||||
  double t0=usecond();
 | 
			
		||||
  for(int i=0;i<ncall;i++){
 | 
			
		||||
    Dw.multiply(src,result);
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,4 @@
 | 
			
		||||
CXX=clang-omp++ ./configure --enable-simd=AVX CXXFLAGS="-mavx -fopenmp -O3 -I/opt/local/include/openmpi-mp/ -std=c++11" LDFLAGS=-L/opt/local/lib/openmpi-mp/ LIBS="-lmpi -lmpi_cxx -fopenmp"  --enable-comms=mpi
 | 
			
		||||
CXX=clang++ ./configure --enable-simd=AVX CXXFLAGS="-mavx -O3 -I/opt/local/include/openmpi-mp/ -std=c++11" LDFLAGS=-L/opt/local/lib/openmpi-mp/ LIBS="-lmpi -lmpi_cxx"  --enable-comms=mpi
 | 
			
		||||
CXX=clang++ ./configure --enable-simd=AVX CXXFLAGS="-mavx -g -I/opt/local/include/openmpi-mp/ -std=c++11" LDFLAGS=-L/opt/local/lib/openmpi-mp/ LIBS="-lmpi -lmpi_cxx"  --enable-comms=mpi
 | 
			
		||||
CXX=clang++ ./configure --enable-simd=AVX CXXFLAGS="-mavx -g  -std=c++11" LDFLAGS= LIBS=-lmpi  --enable-comms=fake
 | 
			
		||||
 
 | 
			
		||||
@@ -6,6 +6,7 @@ AC_CONFIG_SRCDIR([lib/Grid.h])
 | 
			
		||||
AC_CONFIG_HEADERS([lib/Grid_config.h])
 | 
			
		||||
 | 
			
		||||
# Checks for programs.
 | 
			
		||||
AC_LANG(C++)
 | 
			
		||||
AC_PROG_CXX
 | 
			
		||||
AC_OPENMP
 | 
			
		||||
AC_PROG_RANLIB
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										10
									
								
								lib/Grid.h
									
									
									
									
									
								
							
							
						
						
									
										10
									
								
								lib/Grid.h
									
									
									
									
									
								
							@@ -24,21 +24,21 @@
 | 
			
		||||
#include <stdio.h>
 | 
			
		||||
#include <signal.h>
 | 
			
		||||
 | 
			
		||||
#ifndef MAX
 | 
			
		||||
#define MAX(x,y) ((x)>(y)?(x):(y))
 | 
			
		||||
#define MIN(x,y) ((x)>(y)?(y):(x))
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#include <Grid_config.h>
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////
 | 
			
		||||
// Tunable header includes
 | 
			
		||||
////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
#ifdef HAVE_OPENMP
 | 
			
		||||
#define OMP
 | 
			
		||||
#include <omp.h>
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifdef HAVE_MALLOC_MALLOC_H
 | 
			
		||||
#include <malloc/malloc.h>
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifdef HAVE_MALLOC_H
 | 
			
		||||
#include <malloc.h>
 | 
			
		||||
#endif
 | 
			
		||||
 
 | 
			
		||||
@@ -250,7 +250,11 @@ namespace Grid {
 | 
			
		||||
	  int buffer_size = _grid->_slice_nblock[dimension]*_grid->_slice_block[dimension];
 | 
			
		||||
	  int words = sizeof(cobj)/sizeof(vector_type);
 | 
			
		||||
 | 
			
		||||
	  /* FIXME ALTERNATE BUFFER DETERMINATION ; possibly slow to allocate*/
 | 
			
		||||
	  /*
 | 
			
		||||
	   * possibly slow to allocate
 | 
			
		||||
	   * Doesn't matter in this test, but may want to preallocate in the 
 | 
			
		||||
	   * dirac operators
 | 
			
		||||
	   */
 | 
			
		||||
	  std::vector<std::vector<scalar_object> > send_buf_extract(Nsimd,std::vector<scalar_object>(buffer_size) ); 
 | 
			
		||||
	  std::vector<std::vector<scalar_object> > recv_buf_extract(Nsimd,std::vector<scalar_object>(buffer_size) );
 | 
			
		||||
	  int bytes = buffer_size*sizeof(scalar_object);
 | 
			
		||||
 
 | 
			
		||||
@@ -1,13 +1,17 @@
 | 
			
		||||
#ifndef GRID_THREADS_H
 | 
			
		||||
#define GRID_THREADS_H
 | 
			
		||||
 | 
			
		||||
#ifdef HAVE_OPENMP
 | 
			
		||||
#ifdef _OPENMP
 | 
			
		||||
#define GRID_OMP
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifdef GRID_OMP
 | 
			
		||||
#include <omp.h>
 | 
			
		||||
#define PARALLEL_FOR_LOOP _Pragma("omp parallel for")
 | 
			
		||||
#define PARALLEL_NESTED_LOOP(n) _Pragma("omp parallel for collapse(" #n ")")
 | 
			
		||||
#define PARALLEL_NESTED_LOOP2 _Pragma("omp parallel for collapse(2)")
 | 
			
		||||
#else
 | 
			
		||||
#define PARALLEL_FOR_LOOP 
 | 
			
		||||
#define PARALLEL_NESTED_LOOP(n) 
 | 
			
		||||
#define PARALLEL_NESTED_LOOP2
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
@@ -20,7 +24,7 @@ class GridThread {
 | 
			
		||||
  static int _threads;
 | 
			
		||||
 | 
			
		||||
  static void SetThreads(int thr) { 
 | 
			
		||||
#ifdef HAVE_OPENMP
 | 
			
		||||
#ifdef GRID_OMP
 | 
			
		||||
    _threads = MIN(thr,omp_get_max_threads()) ;
 | 
			
		||||
    omp_set_num_threads(_threads);
 | 
			
		||||
#else 
 | 
			
		||||
@@ -28,7 +32,7 @@ class GridThread {
 | 
			
		||||
#endif
 | 
			
		||||
  };
 | 
			
		||||
  static void SetMaxThreads(void) { 
 | 
			
		||||
#ifdef HAVE_OPENMP
 | 
			
		||||
#ifdef GRID_OMP
 | 
			
		||||
    _threads = omp_get_max_threads();
 | 
			
		||||
    omp_set_num_threads(_threads);
 | 
			
		||||
#else 
 | 
			
		||||
@@ -58,7 +62,7 @@ class GridThread {
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  static int  ThreadBarrier(void) {
 | 
			
		||||
#ifdef HAVE_OPENMP
 | 
			
		||||
#ifdef GRID_OMP
 | 
			
		||||
#pragma omp barrier
 | 
			
		||||
    return omp_get_thread_num();
 | 
			
		||||
#else
 | 
			
		||||
 
 | 
			
		||||
@@ -26,16 +26,15 @@ Gather_plane_simple (const Lattice<vobj> &rhs,std::vector<cobj,alignedAllocator<
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  int so  = plane*rhs._grid->_ostride[dimension]; // base offset for start of plane 
 | 
			
		||||
  int bo  = 0;                                      // offset in buffer
 | 
			
		||||
  
 | 
			
		||||
PARALLEL_NESTED_LOOP(2)
 | 
			
		||||
PARALLEL_NESTED_LOOP2
 | 
			
		||||
  for(int n=0;n<rhs._grid->_slice_nblock[dimension];n++){
 | 
			
		||||
    for(int b=0;b<rhs._grid->_slice_block[dimension];b++){
 | 
			
		||||
      int o = n*rhs._grid->_slice_stride[dimension];
 | 
			
		||||
      int o  = n*rhs._grid->_slice_stride[dimension];
 | 
			
		||||
      int bo = n*rhs._grid->_slice_block[dimension];
 | 
			
		||||
      int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup
 | 
			
		||||
      if ( ocb &cbmask ) {
 | 
			
		||||
	buffer[bo]=compress(rhs._odata[so+o+b]);
 | 
			
		||||
	bo++;
 | 
			
		||||
	buffer[bo+b]=compress(rhs._odata[so+o+b]);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
@@ -55,9 +54,8 @@ Gather_plane_extract(const Lattice<vobj> &rhs,std::vector<typename cobj::scalar_
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  int so  = plane*rhs._grid->_ostride[dimension]; // base offset for start of plane 
 | 
			
		||||
  int bo  = 0;                                      // offset in buffer
 | 
			
		||||
    
 | 
			
		||||
PARALLEL_NESTED_LOOP(2)
 | 
			
		||||
PARALLEL_NESTED_LOOP2
 | 
			
		||||
  for(int n=0;n<rhs._grid->_slice_nblock[dimension];n++){
 | 
			
		||||
    for(int b=0;b<rhs._grid->_slice_block[dimension];b++){
 | 
			
		||||
 | 
			
		||||
@@ -104,15 +102,15 @@ template<class vobj> void Scatter_plane_simple (Lattice<vobj> &rhs,std::vector<v
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  int so  = plane*rhs._grid->_ostride[dimension]; // base offset for start of plane 
 | 
			
		||||
  int bo  = 0;                                      // offset in buffer
 | 
			
		||||
    
 | 
			
		||||
PARALLEL_NESTED_LOOP(2)
 | 
			
		||||
PARALLEL_NESTED_LOOP2
 | 
			
		||||
  for(int n=0;n<rhs._grid->_slice_nblock[dimension];n++){
 | 
			
		||||
    for(int b=0;b<rhs._grid->_slice_block[dimension];b++){
 | 
			
		||||
      int o=n*rhs._grid->_slice_stride[dimension];
 | 
			
		||||
      int o   =n*rhs._grid->_slice_stride[dimension];
 | 
			
		||||
      int bo  =n*rhs._grid->_slice_block[dimension];
 | 
			
		||||
      int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup
 | 
			
		||||
      if ( ocb & cbmask ) {
 | 
			
		||||
	rhs._odata[so+o+b]=buffer[bo++];
 | 
			
		||||
	rhs._odata[so+o+b]=buffer[bo+b];
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
@@ -131,7 +129,7 @@ PARALLEL_NESTED_LOOP(2)
 | 
			
		||||
 | 
			
		||||
  int so  = plane*rhs._grid->_ostride[dimension]; // base offset for start of plane 
 | 
			
		||||
    
 | 
			
		||||
PARALLEL_NESTED_LOOP(2)
 | 
			
		||||
PARALLEL_NESTED_LOOP2
 | 
			
		||||
  for(int n=0;n<rhs._grid->_slice_nblock[dimension];n++){
 | 
			
		||||
    for(int b=0;b<rhs._grid->_slice_block[dimension];b++){
 | 
			
		||||
 | 
			
		||||
@@ -160,7 +158,7 @@ template<class vobj> void Copy_plane(Lattice<vobj>& lhs,Lattice<vobj> &rhs, int
 | 
			
		||||
  int ro  = rplane*rhs._grid->_ostride[dimension]; // base offset for start of plane 
 | 
			
		||||
  int lo  = lplane*lhs._grid->_ostride[dimension]; // base offset for start of plane 
 | 
			
		||||
  
 | 
			
		||||
PARALLEL_NESTED_LOOP(2)
 | 
			
		||||
PARALLEL_NESTED_LOOP2
 | 
			
		||||
  for(int n=0;n<rhs._grid->_slice_nblock[dimension];n++){
 | 
			
		||||
    for(int b=0;b<rhs._grid->_slice_block[dimension];b++){
 | 
			
		||||
      
 | 
			
		||||
@@ -185,7 +183,7 @@ template<class vobj> void Copy_plane_permute(Lattice<vobj>& lhs,Lattice<vobj> &r
 | 
			
		||||
  int ro  = rplane*rhs._grid->_ostride[dimension]; // base offset for start of plane 
 | 
			
		||||
  int lo  = lplane*lhs._grid->_ostride[dimension]; // base offset for start of plane 
 | 
			
		||||
  
 | 
			
		||||
PARALLEL_NESTED_LOOP(2)
 | 
			
		||||
PARALLEL_NESTED_LOOP2
 | 
			
		||||
  for(int n=0;n<rhs._grid->_slice_nblock[dimension];n++){
 | 
			
		||||
    for(int b=0;b<rhs._grid->_slice_block[dimension];b++){
 | 
			
		||||
      int o =n*rhs._grid->_slice_stride[dimension];
 | 
			
		||||
 
 | 
			
		||||
@@ -1,10 +1,6 @@
 | 
			
		||||
#ifndef _GRID_CSHIFT_MPI_H_
 | 
			
		||||
#define _GRID_CSHIFT_MPI_H_
 | 
			
		||||
 | 
			
		||||
#ifndef MAX
 | 
			
		||||
#define MAX(x,y) ((x)>(y)?(x):(y))
 | 
			
		||||
#define MIN(x,y) ((x)>(y)?(y):(x))
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
namespace Grid { 
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -100,22 +100,21 @@ void WilsonMatrix::Dhop(const LatticeFermion &in, LatticeFermion &out)
 | 
			
		||||
  vHalfSpinColourVector  chi;    
 | 
			
		||||
  vSpinColourVector result;
 | 
			
		||||
  vHalfSpinColourVector Uchi;
 | 
			
		||||
  vHalfSpinColourVector *chi_p;
 | 
			
		||||
  int offset,local,perm, ptype;
 | 
			
		||||
 | 
			
		||||
PARALLEL_FOR_LOOP
 | 
			
		||||
  for(int sss=0;sss<grid->oSites();sss++){
 | 
			
		||||
 | 
			
		||||
    int ss = sss;
 | 
			
		||||
    int ssu= sss;
 | 
			
		||||
    //int ss = Stencil._LebesgueReorder[sss];
 | 
			
		||||
    int ssu= ss;
 | 
			
		||||
    //    int ss = Stencil._LebesgueReorder[sss];
 | 
			
		||||
 | 
			
		||||
    // Xp
 | 
			
		||||
    offset = Stencil._offsets [Xp][ss];
 | 
			
		||||
    local  = Stencil._is_local[Xp][ss];
 | 
			
		||||
    perm   = Stencil._permute[Xp][ss];
 | 
			
		||||
    ptype  = Stencil._permute_type[Xp];
 | 
			
		||||
    chi_p  = &comm_buf[offset];
 | 
			
		||||
 | 
			
		||||
    if ( local && perm ) 
 | 
			
		||||
    {
 | 
			
		||||
      spProjXp(tmp,in._odata[offset]);
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user