1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-09 21:50:45 +01:00

Crusher updates

This commit is contained in:
Peter Boyle 2022-05-25 12:36:09 -04:00
parent 239afb18fb
commit 136d843ce7
8 changed files with 62 additions and 62 deletions

View File

@ -53,6 +53,9 @@ NAMESPACE_BEGIN(Grid);
MultiShiftFunction PowerQuarter; MultiShiftFunction PowerQuarter;
MultiShiftFunction PowerNegQuarter; MultiShiftFunction PowerNegQuarter;
MultiShiftFunction MDPowerNegHalf;
MultiShiftFunction MDPowerQuarter;
private: private:
FermionOperator<Impl> & NumOp;// the basic operator FermionOperator<Impl> & NumOp;// the basic operator
@ -81,11 +84,13 @@ NAMESPACE_BEGIN(Grid);
remez.generateApprox(param.degree,1,2); remez.generateApprox(param.degree,1,2);
PowerHalf.Init(remez,param.tolerance,false); PowerHalf.Init(remez,param.tolerance,false);
PowerNegHalf.Init(remez,param.tolerance,true); PowerNegHalf.Init(remez,param.tolerance,true);
MDPowerNegHalf.Init(remez,param.mdtolerance,true);
// MdagM^(+- 1/4) // MdagM^(+- 1/4)
std::cout<<GridLogMessage << "Generating degree "<<param.degree<<" for x^(1/4)"<<std::endl; std::cout<<GridLogMessage << "Generating degree "<<param.degree<<" for x^(1/4)"<<std::endl;
remez.generateApprox(param.degree,1,4); remez.generateApprox(param.degree,1,4);
PowerQuarter.Init(remez,param.tolerance,false); PowerQuarter.Init(remez,param.tolerance,false);
MDPowerQuarter.Init(remez,param.mdtolerance,false);
PowerNegQuarter.Init(remez,param.tolerance,true); PowerNegQuarter.Init(remez,param.tolerance,true);
}; };
@ -226,8 +231,8 @@ NAMESPACE_BEGIN(Grid);
virtual void deriv(const GaugeField &U,GaugeField & dSdU) { virtual void deriv(const GaugeField &U,GaugeField & dSdU) {
const int n_f = PowerNegHalf.poles.size(); const int n_f = MDPowerNegHalf.poles.size();
const int n_pv = PowerQuarter.poles.size(); const int n_pv = MDPowerQuarter.poles.size();
std::vector<FermionField> MpvPhi_k (n_pv,NumOp.FermionRedBlackGrid()); std::vector<FermionField> MpvPhi_k (n_pv,NumOp.FermionRedBlackGrid());
std::vector<FermionField> MpvMfMpvPhi_k(n_pv,NumOp.FermionRedBlackGrid()); std::vector<FermionField> MpvMfMpvPhi_k(n_pv,NumOp.FermionRedBlackGrid());
@ -246,8 +251,8 @@ NAMESPACE_BEGIN(Grid);
SchurDifferentiableOperator<Impl> VdagV(NumOp); SchurDifferentiableOperator<Impl> VdagV(NumOp);
SchurDifferentiableOperator<Impl> MdagM(DenOp); SchurDifferentiableOperator<Impl> MdagM(DenOp);
ConjugateGradientMultiShift<FermionField> msCG_V(param.MaxIter,PowerQuarter); ConjugateGradientMultiShift<FermionField> msCG_V(param.MaxIter,MDPowerQuarter);
ConjugateGradientMultiShift<FermionField> msCG_M(param.MaxIter,PowerNegHalf); ConjugateGradientMultiShift<FermionField> msCG_M(param.MaxIter,MDPowerNegHalf);
msCG_V(VdagV,PhiOdd,MpvPhi_k,MpvPhi); msCG_V(VdagV,PhiOdd,MpvPhi_k,MpvPhi);
msCG_M(MdagM,MpvPhi,MfMpvPhi_k,MfMpvPhi); msCG_M(MdagM,MpvPhi,MfMpvPhi_k,MfMpvPhi);
@ -266,7 +271,7 @@ NAMESPACE_BEGIN(Grid);
//(1) //(1)
for(int k=0;k<n_f;k++){ for(int k=0;k<n_f;k++){
ak = PowerNegHalf.residues[k]; ak = MDPowerNegHalf.residues[k];
MdagM.Mpc(MfMpvPhi_k[k],Y); MdagM.Mpc(MfMpvPhi_k[k],Y);
MdagM.MpcDagDeriv(tmp , MfMpvPhi_k[k], Y ); dSdU=dSdU+ak*tmp; MdagM.MpcDagDeriv(tmp , MfMpvPhi_k[k], Y ); dSdU=dSdU+ak*tmp;
MdagM.MpcDeriv(tmp , Y, MfMpvPhi_k[k] ); dSdU=dSdU+ak*tmp; MdagM.MpcDeriv(tmp , Y, MfMpvPhi_k[k] ); dSdU=dSdU+ak*tmp;
@ -276,7 +281,7 @@ NAMESPACE_BEGIN(Grid);
//(3) //(3)
for(int k=0;k<n_pv;k++){ for(int k=0;k<n_pv;k++){
ak = PowerQuarter.residues[k]; ak = MDPowerQuarter.residues[k];
VdagV.Mpc(MpvPhi_k[k],Y); VdagV.Mpc(MpvPhi_k[k],Y);
VdagV.MpcDagDeriv(tmp,MpvMfMpvPhi_k[k],Y); dSdU=dSdU+ak*tmp; VdagV.MpcDagDeriv(tmp,MpvMfMpvPhi_k[k],Y); dSdU=dSdU+ak*tmp;

View File

@ -75,16 +75,14 @@ NAMESPACE_BEGIN(Grid);
remez.generateApprox(param.degree,1,2); remez.generateApprox(param.degree,1,2);
PowerHalf.Init(remez,param.tolerance,false); PowerHalf.Init(remez,param.tolerance,false);
PowerNegHalf.Init(remez,param.tolerance,true); PowerNegHalf.Init(remez,param.tolerance,true);
MDPowerNegHalf.Init(remez,param.mdtolerance,true);
// MdagM^(+- 1/4) // MdagM^(+- 1/4)
std::cout<<GridLogMessage << "Generating degree "<<param.degree<<" for x^(1/4)"<<std::endl; std::cout<<GridLogMessage << "Generating degree "<<param.degree<<" for x^(1/4)"<<std::endl;
remez.generateApprox(param.degree,1,4); remez.generateApprox(param.degree,1,4);
PowerQuarter.Init(remez,param.tolerance,false); PowerQuarter.Init(remez,param.tolerance,false);
PowerNegQuarter.Init(remez,param.tolerance,true);
// Derive solves different tol
MDPowerQuarter.Init(remez,param.mdtolerance,false); MDPowerQuarter.Init(remez,param.mdtolerance,false);
MDPowerNegHalf.Init(remez,param.mdtolerance,true); PowerNegQuarter.Init(remez,param.tolerance,true);
}; };
virtual std::string action_name(){return "OneFlavourRatioRationalPseudoFermionAction";} virtual std::string action_name(){return "OneFlavourRatioRationalPseudoFermionAction";}

View File

@ -6,6 +6,13 @@ uint32_t accelerator_threads=2;
uint32_t acceleratorThreads(void) {return accelerator_threads;}; uint32_t acceleratorThreads(void) {return accelerator_threads;};
void acceleratorThreads(uint32_t t) {accelerator_threads = t;}; void acceleratorThreads(uint32_t t) {accelerator_threads = t;};
#define ENV_LOCAL_RANK_OMPI "OMPI_COMM_WORLD_LOCAL_RANK"
#define ENV_RANK_OMPI "OMPI_COMM_WORLD_RANK"
#define ENV_LOCAL_RANK_SLURM "SLURM_LOCALID"
#define ENV_RANK_SLURM "SLURM_PROCID"
#define ENV_LOCAL_RANK_MVAPICH "MV2_COMM_WORLD_LOCAL_RANK"
#define ENV_RANK_MVAPICH "MV2_COMM_WORLD_RANK"
#ifdef GRID_CUDA #ifdef GRID_CUDA
cudaDeviceProp *gpu_props; cudaDeviceProp *gpu_props;
cudaStream_t copyStream; cudaStream_t copyStream;
@ -17,12 +24,6 @@ void acceleratorInit(void)
char * localRankStr = NULL; char * localRankStr = NULL;
int rank = 0, world_rank=0; int rank = 0, world_rank=0;
#define ENV_LOCAL_RANK_OMPI "OMPI_COMM_WORLD_LOCAL_RANK"
#define ENV_RANK_OMPI "OMPI_COMM_WORLD_RANK"
#define ENV_LOCAL_RANK_SLURM "SLURM_LOCALID"
#define ENV_RANK_SLURM "SLURM_PROCID"
#define ENV_LOCAL_RANK_MVAPICH "MV2_COMM_WORLD_LOCAL_RANK"
#define ENV_RANK_MVAPICH "MV2_COMM_WORLD_RANK"
if ((localRankStr = getenv(ENV_RANK_OMPI )) != NULL) { world_rank = atoi(localRankStr);} if ((localRankStr = getenv(ENV_RANK_OMPI )) != NULL) { world_rank = atoi(localRankStr);}
if ((localRankStr = getenv(ENV_RANK_MVAPICH)) != NULL) { world_rank = atoi(localRankStr);} if ((localRankStr = getenv(ENV_RANK_MVAPICH)) != NULL) { world_rank = atoi(localRankStr);}
if ((localRankStr = getenv(ENV_RANK_SLURM )) != NULL) { world_rank = atoi(localRankStr);} if ((localRankStr = getenv(ENV_RANK_SLURM )) != NULL) { world_rank = atoi(localRankStr);}
@ -119,10 +120,6 @@ void acceleratorInit(void)
char * localRankStr = NULL; char * localRankStr = NULL;
int rank = 0, world_rank=0; int rank = 0, world_rank=0;
#define ENV_LOCAL_RANK_OMPI "OMPI_COMM_WORLD_LOCAL_RANK"
#define ENV_LOCAL_RANK_MVAPICH "MV2_COMM_WORLD_LOCAL_RANK"
#define ENV_RANK_OMPI "OMPI_COMM_WORLD_RANK"
#define ENV_RANK_MVAPICH "MV2_COMM_WORLD_RANK"
// We extract the local rank initialization using an environment variable // We extract the local rank initialization using an environment variable
if ((localRankStr = getenv(ENV_LOCAL_RANK_OMPI)) != NULL) if ((localRankStr = getenv(ENV_LOCAL_RANK_OMPI)) != NULL)
{ {
@ -134,8 +131,10 @@ void acceleratorInit(void)
} }
if ((localRankStr = getenv(ENV_RANK_OMPI )) != NULL) { world_rank = atoi(localRankStr);} if ((localRankStr = getenv(ENV_RANK_OMPI )) != NULL) { world_rank = atoi(localRankStr);}
if ((localRankStr = getenv(ENV_RANK_MVAPICH)) != NULL) { world_rank = atoi(localRankStr);} if ((localRankStr = getenv(ENV_RANK_MVAPICH)) != NULL) { world_rank = atoi(localRankStr);}
if ((localRankStr = getenv(ENV_RANK_SLURM )) != NULL) { world_rank = atoi(localRankStr);}
printf("world_rank %d has %d devices\n",world_rank,nDevices); if ( world_rank == 0 )
printf("world_rank %d has %d devices\n",world_rank,nDevices);
size_t totalDeviceMem=0; size_t totalDeviceMem=0;
for (int i = 0; i < nDevices; i++) { for (int i = 0; i < nDevices; i++) {
@ -208,10 +207,7 @@ void acceleratorInit(void)
char * localRankStr = NULL; char * localRankStr = NULL;
int rank = 0, world_rank=0; int rank = 0, world_rank=0;
#define ENV_LOCAL_RANK_OMPI "OMPI_COMM_WORLD_LOCAL_RANK"
#define ENV_LOCAL_RANK_MVAPICH "MV2_COMM_WORLD_LOCAL_RANK"
#define ENV_RANK_OMPI "OMPI_COMM_WORLD_RANK"
#define ENV_RANK_MVAPICH "MV2_COMM_WORLD_RANK"
// We extract the local rank initialization using an environment variable // We extract the local rank initialization using an environment variable
if ((localRankStr = getenv(ENV_LOCAL_RANK_OMPI)) != NULL) if ((localRankStr = getenv(ENV_LOCAL_RANK_OMPI)) != NULL)
{ {

View File

@ -102,7 +102,7 @@ int main(int argc, char **argv) {
SFRp.hi = 30.0; SFRp.hi = 30.0;
SFRp.MaxIter = 10000; SFRp.MaxIter = 10000;
SFRp.tolerance= 1.0e-8; SFRp.tolerance= 1.0e-8;
SFRp.mdtolerance= 1.0e-6; SFRp.mdtolerance= 1.0e-5;
SFRp.degree = 16; SFRp.degree = 16;
SFRp.precision= 50; SFRp.precision= 50;
SFRp.BoundsCheckFreq=5; SFRp.BoundsCheckFreq=5;
@ -112,7 +112,7 @@ int main(int argc, char **argv) {
OFRp.hi = 30.0; OFRp.hi = 30.0;
OFRp.MaxIter = 10000; OFRp.MaxIter = 10000;
OFRp.tolerance= 1.0e-8; OFRp.tolerance= 1.0e-8;
OFRp.mdtolerance= 1.0e-6; OFRp.mdtolerance= 1.0e-5;
OFRp.degree = 16; OFRp.degree = 16;
OFRp.precision= 50; OFRp.precision= 50;
OFRp.BoundsCheckFreq=5; OFRp.BoundsCheckFreq=5;
@ -162,15 +162,17 @@ int main(int argc, char **argv) {
FermionAction::ImplParams Params(boundary); FermionAction::ImplParams Params(boundary);
double StoppingCondition = 1e-8; double StoppingCondition = 1e-8;
double MDStoppingCondition = 1e-6;
double MaxCGIterations = 30000; double MaxCGIterations = 30000;
ConjugateGradient<FermionField> CG(StoppingCondition,MaxCGIterations); ConjugateGradient<FermionField> CG(StoppingCondition,MaxCGIterations);
ConjugateGradient<FermionField> MDCG(MDStoppingCondition,MaxCGIterations);
//////////////////////////////////// ////////////////////////////////////
// Collect actions // Collect actions
//////////////////////////////////// ////////////////////////////////////
ActionLevel<HMCWrapper::Field> Level1(1); ActionLevel<HMCWrapper::Field> Level1(1);
ActionLevel<HMCWrapper::Field> Level2(4); ActionLevel<HMCWrapper::Field> Level2(4);
ActionLevel<HMCWrapper::Field> Level3(6); ActionLevel<HMCWrapper::Field> Level3(8);
//////////////////////////////////// ////////////////////////////////////
// Strange action // Strange action
@ -226,7 +228,7 @@ int main(int argc, char **argv) {
Numerators.push_back (new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_num[h],M5,b,c, Params)); Numerators.push_back (new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_num[h],M5,b,c, Params));
Denominators.push_back(new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_den[h],M5,b,c, Params)); Denominators.push_back(new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_den[h],M5,b,c, Params));
if(h!=0) { if(h!=0) {
Quotients.push_back (new TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],CG,CG)); Quotients.push_back (new TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],MDCG,CG));
} else { } else {
Bdys.push_back( new OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],OFRp)); Bdys.push_back( new OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],OFRp));
Bdys.push_back( new OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],OFRp)); Bdys.push_back( new OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],OFRp));
@ -241,7 +243,7 @@ int main(int argc, char **argv) {
for(int h=0;h<nquo-1;h++){ for(int h=0;h<nquo-1;h++){
Level2.push_back(Quotients[h]); Level2.push_back(Quotients[h]);
} }
Level1.push_back(Quotients[nquo-1]); // PV dirichlet fix on coarse timestep Level2.push_back(Quotients[nquo-1]);
///////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////
// Gauge action // Gauge action

View File

@ -9,6 +9,7 @@
--with-mpfr=/opt/cray/pe/gcc/mpfr/3.1.4/ \ --with-mpfr=/opt/cray/pe/gcc/mpfr/3.1.4/ \
--disable-gparity \ --disable-gparity \
CXX=hipcc MPICXX=mpicxx \ CXX=hipcc MPICXX=mpicxx \
CXXFLAGS="-fPIC -I/opt/rocm-4.5.0/include/ -std=c++14 -I${MPICH_DIR}/include " \ CXXFLAGS="-fPIC -I{$ROCM_PATH}/include/ -std=c++14 -I${MPICH_DIR}/include " \
LDFLAGS=" -L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa " LDFLAGS=" -L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa -lamdhip64 "
HIPFLAGS = --amdgpu-target=gfx90a

View File

@ -12,19 +12,21 @@
#SBATCH --gpu-bind=map_gpu:0,1,2,3,7,6,5,4 #SBATCH --gpu-bind=map_gpu:0,1,2,3,7,6,5,4
DIR=. DIR=.
module list source sourceme.sh
export MPIR_CVAR_GPU_EAGER_DEVICE_MEM=0 export MPIR_CVAR_GPU_EAGER_DEVICE_MEM=0
export MPICH_GPU_SUPPORT_ENABLED=1 export MPICH_GPU_SUPPORT_ENABLED=1
#export MPICH_SMP_SINGLE_COPY_MODE=XPMEM export MPICH_SMP_SINGLE_COPY_MODE=XPMEM
export MPICH_SMP_SINGLE_COPY_MODE=NONE
#export MPICH_SMP_SINGLE_COPY_MODE=CMA
export OMP_NUM_THREADS=1 export OMP_NUM_THREADS=1
echo MPICH_SMP_SINGLE_COPY_MODE $MPICH_SMP_SINGLE_COPY_MODE echo MPICH_SMP_SINGLE_COPY_MODE $MPICH_SMP_SINGLE_COPY_MODE
PARAMS=" --accelerator-threads 16 --grid 32.32.32.256 --mpi 1.1.1.8 --comms-overlap --shm 2048 --shm-mpi 0" echo working directory
echo $PARAMS pwd
PARAMS=" --accelerator-threads 8 --grid 32.32.32.32 --mpi 1.1.1.1 --comms-sequential --shm 2048 --shm-mpi 0"
srun --gpus-per-task 1 -n1 ./benchmarks/Benchmark_dwf_fp32 $PARAMS
PARAMS=" --accelerator-threads 8 --grid 64.64.64.32 --mpi 2.2.2.1 --comms-sequential --shm 2048 --shm-mpi 0"
srun --gpus-per-task 1 -n8 ./benchmarks/Benchmark_dwf_fp32 $PARAMS srun --gpus-per-task 1 -n8 ./benchmarks/Benchmark_dwf_fp32 $PARAMS

View File

@ -12,37 +12,30 @@
#SBATCH --gpu-bind=map_gpu:0,1,2,3,7,6,5,4 #SBATCH --gpu-bind=map_gpu:0,1,2,3,7,6,5,4
DIR=. DIR=.
module list source setup.sh
export MPICH_OFI_NIC_POLICY=GPU export MPICH_OFI_NIC_POLICY=GPU
export MPIR_CVAR_GPU_EAGER_DEVICE_MEM=0 export MPIR_CVAR_GPU_EAGER_DEVICE_MEM=0
export MPICH_GPU_SUPPORT_ENABLED=1 export MPICH_GPU_SUPPORT_ENABLED=1
#export MPICH_SMP_SINGLE_COPY_MODE=XPMEM export MPICH_SMP_SINGLE_COPY_MODE=XPMEM
#export MPICH_SMP_SINGLE_COPY_MODE=CMA #export MPICH_SMP_SINGLE_COPY_MODE=CMA
export MPICH_SMP_SINGLE_COPY_MODE=NONE #export MPICH_SMP_SINGLE_COPY_MODE=NONE
export OMP_NUM_THREADS=1 export OMP_NUM_THREADS=1
echo MPICH_SMP_SINGLE_COPY_MODE $MPICH_SMP_SINGLE_COPY_MODE echo MPICH_SMP_SINGLE_COPY_MODE $MPICH_SMP_SINGLE_COPY_MODE
PARAMS=" --accelerator-threads 16 --grid 64.64.64.256 --mpi 2.2.2.8 --comms-overlap --shm 2048 --shm-mpi 0" for vol in 64.64.64.256 64.64.64.128 32.32.32.256 32.32.32.128
do
PARAMS=" --accelerator-threads 8 --grid $vol --mpi 2.2.2.8 --comms-overlap --shm 2048 --shm-mpi 1"
echo $PARAMS echo $PARAMS
#srun --gpus-per-task 1 -N8 -n64 ./benchmarks/Benchmark_dwf_fp32 $PARAMS > dwf.64.64.64.256.8node srun --gpus-per-task 1 -N8 -n64 ./benchmarks/Benchmark_dwf_fp32 $PARAMS > dwf.${vol}.8node.shm-mpi1
done
PARAMS=" --accelerator-threads 8 --grid 64.64.64.32 --mpi 2.2.2.8 --comms-overlap --shm 2048 --shm-mpi 1"
PARAMS=" --accelerator-threads 16 --grid 64.64.64.32 --mpi 4.4.4.1 --comms-overlap --shm 2048 --shm-mpi 1"
echo $PARAMS echo $PARAMS
srun --gpus-per-task 1 -N8 -n64 ./benchmarks/Benchmark_dwf_fp32 $PARAMS > dwf.64.64.64.32.8node srun --gpus-per-task 1 -N8 -n64 ./benchmarks/Benchmark_ITT $PARAMS > itt.8node
PARAMS=" --accelerator-threads 16 --grid 64.64.64.32 --mpi 4.4.4.1 --comms-overlap --shm 2048 --shm-mpi 0" PARAMS=" --accelerator-threads 8 --grid 64.64.64.32 --mpi 2.2.2.8 --comms-overlap --shm 2048 --shm-mpi 0"
echo $PARAMS echo $PARAMS
#srun --gpus-per-task 1 -N8 -n64 ./benchmarks/Benchmark_dwf_fp32 $PARAMS > dwf.64.64.64.32.8node.shm0 srun --gpus-per-task 1 -N8 -n64 ./benchmarks/Benchmark_ITT $PARAMS > itt.8node_shm0
PARAMS=" --accelerator-threads 16 --grid 64.64.64.32 --mpi 2.2.2.8 --comms-overlap --shm 2048 --shm-mpi 1"
echo $PARAMS
#srun --gpus-per-task 1 -N8 -n64 ./benchmarks/Benchmark_ITT $PARAMS > itt.8node
PARAMS=" --accelerator-threads 16 --grid 64.64.64.32 --mpi 2.2.2.8 --comms-overlap --shm 2048 --shm-mpi 0"
echo $PARAMS
#srun --gpus-per-task 1 -N8 -n64 ./benchmarks/Benchmark_ITT $PARAMS > itt.8node_shm0

View File

@ -1,6 +1,9 @@
module load PrgEnv-gnu module load PrgEnv-gnu
module load rocm/4.5.0 module load rocm/5.1.0
module load cray-mpich/8.1.15
module load gmp module load gmp
module load cray-fftw #module load cray-fftw
module load craype-accel-amd-gfx90a module load craype-accel-amd-gfx90a
export LD_LIBRARY_PATH=/opt/gcc/mpfr/3.1.4/lib:$LD_LIBRARY_PATH export LD_LIBRARY_PATH=/opt/gcc/mpfr/3.1.4/lib:$LD_LIBRARY_PATH
#Hack for lib
export LD_LIBRARY_PATH=`pwd`:$LD_LIBRARY_PATH