Grid/lib/communicator/Communicator_none.cc

    /*************************************************************************************

    Grid physics library, www.github.com/paboyle/Grid 

    Source file: ./lib/communicator/Communicator_none.cc

    Copyright (C) 2015

Author: Peter Boyle <paboyle@ph.ed.ac.uk>

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License along
    with this program; if not, write to the Free Software Foundation, Inc.,
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

    See the full license in the file "LICENSE" in the top level distribution directory
    *************************************************************************************/
    /*  END LEGAL */
#include <Grid/GridCore.h>

namespace Grid {

///////////////////////////////////////////////////////////////////////////////////////////////////
// Info that is setup once and indept of cartesian layout
///////////////////////////////////////////////////////////////////////////////////////////////////

void CartesianCommunicator::Init(int *argc, char *** arv)
{
  ShmInitGeneric();
}

CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
{
  _processors = processors;
  _ndimension = processors.size();
  _processor_coor.resize(_ndimension);
  
  // Require 1^N processor grid for fake
  _Nprocessors=1;
  _processor = 0;
  for(int d=0;d<_ndimension;d++) {
    assert(_processors[d]==1);
    _processor_coor[d] = 0;
  }
}

void CartesianCommunicator::GlobalSum(float &){}
void CartesianCommunicator::GlobalSumVector(float *,int N){}
void CartesianCommunicator::GlobalSum(double &){}
void CartesianCommunicator::GlobalSum(uint32_t &){}
void CartesianCommunicator::GlobalSum(uint64_t &){}
void CartesianCommunicator::GlobalSumVector(double *,int N){}
void CartesianCommunicator::GlobalXOR(uint32_t &){}
void CartesianCommunicator::GlobalXOR(uint64_t &){}

void CartesianCommunicator::SendRecvPacket(void *xmit,
					   void *recv,
					   int xmit_to_rank,
					   int recv_from_rank,
					   int bytes)
{
  assert(0);
}


// Basic Halo comms primitive -- should never call in single node
void CartesianCommunicator::SendToRecvFrom(void *xmit,
					   int dest,
					   void *recv,
					   int from,
					   int bytes)
{
  assert(0);
}
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
						void *xmit,
						int dest,
						void *recv,
						int from,
						int bytes)
{
  assert(0);
}

void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
{
  assert(0);
}

int  CartesianCommunicator::RankWorld(void){return 0;}
void CartesianCommunicator::Barrier(void){}
void CartesianCommunicator::Broadcast(int root,void* data, int bytes) {}
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) { }
int  CartesianCommunicator::RankFromProcessorCoor(std::vector<int> &coor) {  return 0;}
void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &coor){  coor = _processor_coor; }
void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest)
{
  source =0;
  dest=0;
}


}
Global edit adding copyright and license info to every source file. 2016-01-02 14:51:32 +00:00			`/*************************************************************************************`

			`Grid physics library, www.github.com/paboyle/Grid`

			`Source file: ./lib/communicator/Communicator_none.cc`

			`Copyright (C) 2015`

			`Author: Peter Boyle <paboyle@ph.ed.ac.uk>`

			`This program is free software; you can redistribute it and/or modify`
			`it under the terms of the GNU General Public License as published by`
			`the Free Software Foundation; either version 2 of the License, or`
			`(at your option) any later version.`

			`This program is distributed in the hope that it will be useful,`
			`but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`GNU General Public License for more details.`

			`You should have received a copy of the GNU General Public License along`
			`with this program; if not, write to the Free Software Foundation, Inc.,`
			`51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.`

			`See the full license in the file "LICENSE" in the top level distribution directory`
			`*************************************************************************************/`
			`/* END LEGAL */`
Refactoring header layout 2017-02-22 18:09:33 +00:00			`#include <Grid/GridCore.h>`

Renamed the namespace to Grid 2015-04-03 05:29:54 +01:00			`namespace Grid {`
Fixing the Checkerboarding cshift. Implemented "fake" communications in preparation for the leap to MPI. 2015-03-29 20:35:37 +01:00
Update to use shared memory to contain the stencil comms buffers Tested on 2.1.1.1 1.2.1.1 4.1.1.1 1.4.1.1 2.2.1.1 subnode decompositions 2016-10-24 17:30:43 +01:00			`///////////////////////////////////////////////////////////////////////////////////////////////////`
			`// Info that is setup once and indept of cartesian layout`
			`///////////////////////////////////////////////////////////////////////////////////////////////////`
Shmem comms [NO MPI] target added. The dwf test runs and passes. Not really shaken out to my satisfaction though as I want more tests done, so don't declare as working. But committing my current while I try a few experimentals. 2016-02-14 20:24:38 +00:00
Update to use shared memory to contain the stencil comms buffers Tested on 2.1.1.1 1.2.1.1 4.1.1.1 1.4.1.1 2.2.1.1 subnode decompositions 2016-10-24 17:30:43 +01:00			`void CartesianCommunicator::Init(int argc, char ** arv)`
Internal SHM comms in non-simd directions working Need to fix simd directions 2016-10-22 18:14:27 +01:00			`{`
Update to use shared memory to contain the stencil comms buffers Tested on 2.1.1.1 1.2.1.1 4.1.1.1 1.4.1.1 2.2.1.1 subnode decompositions 2016-10-24 17:30:43 +01:00			`ShmInitGeneric();`
Internal SHM comms in non-simd directions working Need to fix simd directions 2016-10-22 18:14:27 +01:00			`}`
Shmem comms [NO MPI] target added. The dwf test runs and passes. Not really shaken out to my satisfaction though as I want more tests done, so don't declare as working. But committing my current while I try a few experimentals. 2016-02-14 20:24:38 +00:00
Domain wall fermions now invert ; have the basis set up for Tanh/Zolo * (Cayley/PartFrac/ContFrac) * (Mobius/Shamir/Wilson) Approx Representation Kernel. All are done with space-time taking part in checkerboarding, Ls uncheckerboarded Have only so far tested the Domain Wall limit of mobius, and at that only checked that it i) Inverts ii) 5dim DW == Ls copies of 4dim D2 iii) MeeInv Mee == 1 iv) Meo+Mee+Moe+Moo == M unprec. v) MpcDagMpc is hermitan vi) Mdag is the adjoint of M between stochastic vectors. That said, the RB schur solve, RB MpcDagMpc solve, Unprec solve all converge and the true residual becomes small; so pretty good tests. 2015-06-02 16:57:12 +01:00			`CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)`
Fixing the Checkerboarding cshift. Implemented "fake" communications in preparation for the leap to MPI. 2015-03-29 20:35:37 +01:00			`{`
			`_processors = processors;`
Fixing breakage in the Comms non compile 2015-05-10 15:23:09 +01:00			`_ndimension = processors.size();`
			`_processor_coor.resize(_ndimension);`
Fixing the Checkerboarding cshift. Implemented "fake" communications in preparation for the leap to MPI. 2015-03-29 20:35:37 +01:00
			`// Require 1^N processor grid for fake`
Fixing breakage in the Comms non compile 2015-05-10 15:23:09 +01:00			`_Nprocessors=1;`
			`_processor = 0;`
			`for(int d=0;d<_ndimension;d++) {`
			`assert(_processors[d]==1);`
			`_processor_coor[d] = 0;`
			`}`
Fixing the Checkerboarding cshift. Implemented "fake" communications in preparation for the leap to MPI. 2015-03-29 20:35:37 +01:00			`}`

Patch for comms none nocompile 2015-04-10 04:21:48 +01:00			`void CartesianCommunicator::GlobalSum(float &){}`
			`void CartesianCommunicator::GlobalSumVector(float *,int N){}`
			`void CartesianCommunicator::GlobalSum(double &){}`
Got the NERSC IO working and fixed a bug in cshift. 2015-04-22 22:46:48 +01:00			`void CartesianCommunicator::GlobalSum(uint32_t &){}`
Uint64 sum for IO rates 2016-03-16 09:27:22 +00:00			`void CartesianCommunicator::GlobalSum(uint64_t &){}`
Patch for comms none nocompile 2015-04-10 04:21:48 +01:00			`void CartesianCommunicator::GlobalSumVector(double *,int N){}`
I/O improvements 2017-06-11 23:14:10 +01:00			`void CartesianCommunicator::GlobalXOR(uint32_t &){}`
			`void CartesianCommunicator::GlobalXOR(uint64_t &){}`
Fixing the Checkerboarding cshift. Implemented "fake" communications in preparation for the leap to MPI. 2015-03-29 20:35:37 +01:00
Fix a nocompile 2016-03-03 20:33:28 +00:00			`void CartesianCommunicator::SendRecvPacket(void *xmit,`
			`void *recv,`
			`int xmit_to_rank,`
			`int recv_from_rank,`
			`int bytes)`
Binary IO file for generic Grid array parallel I/O. Number of IO MPI tasks can be varied by selecting which dimensions use parallel IO and which dimensions use Serial send to boss I/O. Thus can neck down from, say 1024 nodes = 4x4x8x8 to {1,8,32,64,128,256,1024} nodes doing the I/O. Interpolates nicely between ALL nodes write their data, a single boss per time-plane in processor space [old UKQCD fortran code did this], and a single node doing all I/O. Not sure I have the transfer sizes big enough and am not overly convinced fstream is guaranteed to not give buffer inconsistencies unless I set streambuf size to zero. Practically it has worked on 8 tasks, 2x1x2x2 writing /cloning NERSC configurations on my MacOS + OpenMPI and Clang environment. It is VERY easy to switch to pwrite at a later date, and also easy to send x-strips around from each node in order to gather bigger chunks at the syscall level. That would push us up to the circa 8x 1848 == 4KB size write chunk, and by taking, say, x/y non parallel we get to 16MB contiguous chunks written in multi 4KB transactions per IOnode in 64^3 lattices for configuration I/O. I suspect this is fine for system performance. 2015-08-26 13:40:29 +01:00			`{`
			`assert(0);`
			`}`


Added a comms benchmark 2015-05-02 23:42:30 +01:00			`// Basic Halo comms primitive -- should never call in single node`
Fixing the Checkerboarding cshift. Implemented "fake" communications in preparation for the leap to MPI. 2015-03-29 20:35:37 +01:00			`void CartesianCommunicator::SendToRecvFrom(void *xmit,`
Patch for comms none nocompile 2015-04-10 04:21:48 +01:00			`int dest,`
			`void *recv,`
			`int from,`
			`int bytes)`
Fixing the Checkerboarding cshift. Implemented "fake" communications in preparation for the leap to MPI. 2015-03-29 20:35:37 +01:00			`{`
Command line args and a general clean up 2015-05-11 12:43:10 +01:00			`assert(0);`
Fixing the Checkerboarding cshift. Implemented "fake" communications in preparation for the leap to MPI. 2015-03-29 20:35:37 +01:00			`}`
Added a comms benchmark 2015-05-02 23:42:30 +01:00			`void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,`
			`void *xmit,`
			`int dest,`
			`void *recv,`
			`int from,`
			`int bytes)`
			`{`
Command line args and a general clean up 2015-05-11 12:43:10 +01:00			`assert(0);`
Added a comms benchmark 2015-05-02 23:42:30 +01:00			`}`
Merge branch 'develop' of https://github.com/paboyle/Grid into feature/staggering 2016-11-22 13:49:11 +00:00
Added a comms benchmark 2015-05-02 23:42:30 +01:00			`void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)`
			`{`
Command line args and a general clean up 2015-05-11 12:43:10 +01:00			`assert(0);`
Added a comms benchmark 2015-05-02 23:42:30 +01:00			`}`
Fixing the Checkerboarding cshift. Implemented "fake" communications in preparation for the leap to MPI. 2015-03-29 20:35:37 +01:00
Comms improvements 2016-11-01 11:35:43 +00:00			`int CartesianCommunicator::RankWorld(void){return 0;}`
Update to use shared memory to contain the stencil comms buffers Tested on 2.1.1.1 1.2.1.1 4.1.1.1 1.4.1.1 2.2.1.1 subnode decompositions 2016-10-24 17:30:43 +01:00			`void CartesianCommunicator::Barrier(void){}`
			`void CartesianCommunicator::Broadcast(int root,void* data, int bytes) {}`
			`void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) { }`
			`int CartesianCommunicator::RankFromProcessorCoor(std::vector<int> &coor) { return 0;}`
Compiles now 2016-11-03 16:58:23 +00:00			`void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &coor){ coor = _processor_coor; }`
Patch for comms none nocompile 2015-04-10 04:21:48 +01:00			`void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest)`
			`{`
Fixing breakage in the Comms non compile 2015-05-10 15:23:09 +01:00			`source =0;`
			`dest=0;`
Patch for comms none nocompile 2015-04-10 04:21:48 +01:00			`}`


Fixing the Checkerboarding cshift. Implemented "fake" communications in preparation for the leap to MPI. 2015-03-29 20:35:37 +01:00			`}`