2016-01-02 14:51:32 +00:00
|
|
|
/*************************************************************************************
|
|
|
|
|
|
|
|
Grid physics library, www.github.com/paboyle/Grid
|
|
|
|
|
|
|
|
Source file: ./lib/Grid.h
|
|
|
|
|
|
|
|
Copyright (C) 2015
|
|
|
|
|
|
|
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|
|
|
Author: azusayamaguchi <ayamaguc@YAMAKAZE.local>
|
|
|
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License along
|
|
|
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
|
|
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
|
|
|
|
|
|
See the full license in the file "LICENSE" in the top level distribution directory
|
|
|
|
*************************************************************************************/
|
|
|
|
/* END LEGAL */
|
2015-03-04 03:12:19 +00:00
|
|
|
//
|
2015-04-18 20:44:19 +01:00
|
|
|
// Grid.h
|
2015-03-04 03:12:19 +00:00
|
|
|
// simd
|
|
|
|
//
|
|
|
|
// Created by Peter Boyle on 09/05/2014.
|
|
|
|
// Copyright (c) 2014 University of Edinburgh. All rights reserved.
|
|
|
|
//
|
|
|
|
|
2015-04-18 18:36:48 +01:00
|
|
|
#ifndef GRID_H
|
|
|
|
#define GRID_H
|
2015-03-04 03:12:19 +00:00
|
|
|
|
2015-07-27 10:32:28 +01:00
|
|
|
///////////////////
|
|
|
|
// Std C++ dependencies
|
|
|
|
///////////////////
|
2015-05-18 07:47:05 +01:00
|
|
|
#include <cassert>
|
2015-03-04 03:12:19 +00:00
|
|
|
#include <complex>
|
|
|
|
#include <vector>
|
|
|
|
#include <iostream>
|
2015-05-18 07:47:05 +01:00
|
|
|
#include <iomanip>
|
2015-03-04 03:12:19 +00:00
|
|
|
#include <random>
|
|
|
|
#include <functional>
|
2015-05-18 07:47:05 +01:00
|
|
|
#include <stdio.h>
|
2015-03-04 03:12:19 +00:00
|
|
|
#include <stdlib.h>
|
2015-03-04 04:13:07 +00:00
|
|
|
#include <stdio.h>
|
2015-03-04 13:25:23 +00:00
|
|
|
#include <signal.h>
|
2015-07-27 10:32:28 +01:00
|
|
|
#include <ctime>
|
|
|
|
#include <sys/time.h>
|
|
|
|
#include <chrono>
|
|
|
|
|
|
|
|
///////////////////
|
|
|
|
// Grid headers
|
|
|
|
///////////////////
|
2015-08-20 23:04:38 +01:00
|
|
|
#include <serialisation/Serialisation.h>
|
2015-07-27 10:32:28 +01:00
|
|
|
#include <Config.h>
|
|
|
|
#include <Timer.h>
|
2016-03-27 05:24:07 +01:00
|
|
|
#include <PerfCount.h>
|
2015-07-27 10:32:28 +01:00
|
|
|
#include <Log.h>
|
2015-06-03 12:47:05 +01:00
|
|
|
#include <AlignedAllocator.h>
|
|
|
|
#include <Simd.h>
|
|
|
|
#include <Threads.h>
|
2016-02-11 13:37:39 +00:00
|
|
|
#include <Lexicographic.h>
|
2015-07-27 10:32:28 +01:00
|
|
|
#include <Communicator.h>
|
|
|
|
#include <Cartesian.h>
|
|
|
|
#include <Tensors.h>
|
|
|
|
#include <Lattice.h>
|
|
|
|
#include <Cshift.h>
|
|
|
|
#include <Stencil.h>
|
|
|
|
#include <Algorithms.h>
|
Binary IO file for generic Grid array parallel I/O.
Number of IO MPI tasks can be varied by selecting which
dimensions use parallel IO and which dimensions use Serial send to boss
I/O.
Thus can neck down from, say 1024 nodes = 4x4x8x8 to {1,8,32,64,128,256,1024} nodes
doing the I/O.
Interpolates nicely between ALL nodes write their data, a single boss per time-plane
in processor space [old UKQCD fortran code did this], and a single node doing all I/O.
Not sure I have the transfer sizes big enough and am not overly convinced fstream
is guaranteed to not give buffer inconsistencies unless I set streambuf size to zero.
Practically it has worked on 8 tasks, 2x1x2x2 writing /cloning NERSC configurations
on my MacOS + OpenMPI and Clang environment.
It is VERY easy to switch to pwrite at a later date, and also easy to send x-strips around from
each node in order to gather bigger chunks at the syscall level.
That would push us up to the circa 8x 18*4*8 == 4KB size write chunk, and by taking, say, x/y non
parallel we get to 16MB contiguous chunks written in multi 4KB transactions
per IOnode in 64^3 lattices for configuration I/O.
I suspect this is fine for system performance.
2015-08-26 13:40:29 +01:00
|
|
|
#include <parallelIO/BinaryIO.h>
|
2015-12-20 02:29:51 +00:00
|
|
|
#include <qcd/QCD.h>
|
2015-06-03 12:47:05 +01:00
|
|
|
#include <parallelIO/NerscIO.h>
|
2015-12-22 11:19:25 +00:00
|
|
|
#include <Init.h>
|
|
|
|
|
2015-12-20 02:29:51 +00:00
|
|
|
#include <qcd/hmc/NerscCheckpointer.h>
|
2015-12-22 11:19:25 +00:00
|
|
|
#include <qcd/hmc/HmcRunner.h>
|
2015-03-04 03:12:19 +00:00
|
|
|
|
2015-05-11 12:43:10 +01:00
|
|
|
|
2015-03-04 03:12:19 +00:00
|
|
|
|
|
|
|
#endif
|