Makefile.include.nvhpc acc: Difference between revisions
No edit summary |
No edit summary |
||
Line 3: | Line 3: | ||
# Default precompiler options | # Default precompiler options | ||
CPP_OPTIONS = -DHOST=\"LinuxNV\" \ | CPP_OPTIONS = -DHOST=\"LinuxNV\" \ | ||
-DMPI -DMPI_BLOCK=8000 -Duse_collective \ | -DMPI -DMPI_INPLACE -DMPI_BLOCK=8000 -Duse_collective \ | ||
-DscaLAPACK \ | -DscaLAPACK \ | ||
-DCACHE_SIZE=4000 \ | -DCACHE_SIZE=4000 \ | ||
Line 13: | Line 13: | ||
-Dfock_dblbuf \ | -Dfock_dblbuf \ | ||
-D_OPENACC \ | -D_OPENACC \ | ||
-DUSENCCL | -DUSENCCL | ||
CPP = nvfortran -Mpreprocess -Mfree -Mextend -E $(CPP_OPTIONS) $*$(FUFFIX) > $*$(SUFFIX) | CPP = nvfortran -Mpreprocess -Mfree -Mextend -E $(CPP_OPTIONS) $*$(FUFFIX) > $*$(SUFFIX) | ||
Line 19: | Line 19: | ||
# N.B.: you might need to change the cuda-version here | # N.B.: you might need to change the cuda-version here | ||
# to one that comes with your NVIDIA-HPC SDK | # to one that comes with your NVIDIA-HPC SDK | ||
FC = mpif90 -acc -gpu=cc60,cc70,cc80,cuda11. | CC = mpicc -acc -gpu=cc60,cc70,cc80,cuda11.8 | ||
FCL = mpif90 -acc -gpu=cc60,cc70,cc80,cuda11. | FC = mpif90 -acc -gpu=cc60,cc70,cc80,cuda11.8 | ||
FCL = mpif90 -acc -gpu=cc60,cc70,cc80,cuda11.8 -c++libs | |||
FREE = -Mfree | FREE = -Mfree | ||
Line 35: | Line 36: | ||
# Redefine the standard list of O1 and O2 objects | # Redefine the standard list of O1 and O2 objects | ||
SOURCE_O1 := pade_fit.o | SOURCE_O1 := pade_fit.o minimax_dependence.o | ||
SOURCE_O2 := pead.o | SOURCE_O2 := pead.o | ||
# For what used to be vasp.5.lib | # For what used to be vasp.5.lib | ||
CPP_LIB = $(CPP) | CPP_LIB = $(CPP) | ||
FC_LIB = | FC_LIB = $(FC) | ||
CC_LIB = | CC_LIB = $(CC) | ||
CFLAGS_LIB = -O | CFLAGS_LIB = -O -w | ||
FFLAGS_LIB = -O1 -Mfixed | FFLAGS_LIB = -O1 -Mfixed | ||
FREE_LIB = $(FREE) | FREE_LIB = $(FREE) | ||
Line 56: | Line 57: | ||
## necessary ... | ## necessary ... | ||
## | ## | ||
# When compiling on the target machine itself , change this to the | |||
# relevant target when cross-compiling for another architecture | |||
VASP_TARGET_CPU ?= -tp host | |||
FFLAGS += $(VASP_TARGET_CPU) | |||
# Specify your NV HPC-SDK installation (mandatory) | # Specify your NV HPC-SDK installation (mandatory) | ||
Line 65: | Line 70: | ||
#NVVERSION = 21.11 | #NVVERSION = 21.11 | ||
#NVROOT = $(NVHPC)/Linux_x86_64/$(NVVERSION) | #NVROOT = $(NVHPC)/Linux_x86_64/$(NVVERSION) | ||
## Improves performance when using NV HPC-SDK >=21.11 and CUDA >11.2 | |||
#OFLAG_IN = -fast -Mwarperf | |||
#SOURCE_IN := nonlr.o | |||
# Software emulation of quadruple precsion (mandatory) | # Software emulation of quadruple precsion (mandatory) | ||
Line 71: | Line 80: | ||
INCS += -I$(QD)/include/qd | INCS += -I$(QD)/include/qd | ||
# BLAS | # BLAS (mandatory) | ||
BLAS = -lblas | |||
# LAPACK (mandatory) | |||
LAPACK = -llapack | |||
# scaLAPACK (mandatory) | # scaLAPACK (mandatory) | ||
LLIBS += | SCALAPACK = -Mscalapack | ||
LLIBS += $(SCALAPACK) $(LAPACK) $(BLAS) | |||
# FFTW (mandatory) | # FFTW (mandatory) | ||
Line 81: | Line 95: | ||
LLIBS += -L$(FFTW_ROOT)/lib -lfftw3 | LLIBS += -L$(FFTW_ROOT)/lib -lfftw3 | ||
INCS += -I$(FFTW_ROOT)/include | INCS += -I$(FFTW_ROOT)/include | ||
# Use cusolvermp (optional) | |||
# supported as of NVHPC-SDK 24.1 (and needs CUDA-11.8) | |||
#CPP_OPTIONS+= -DCUSOLVERMP | |||
#LLIBS += -cudalib=cusolvermp | |||
#CFLAGS_LIB += -cudalib=cusolvermp | |||
#OBJECTS_LIB+= cal_mpi.o | |||
# HDF5-support (optional but strongly recommended) | # HDF5-support (optional but strongly recommended) | ||
Line 94: | Line 115: | ||
</pre> | </pre> | ||
---- | ---- | ||
[[makefile.include]] | |||
[[Category:VASP]] | [[Category:VASP]] | ||
[[Category:Installation]] | [[Category:Installation]] |
Revision as of 15:09, 6 February 2024
# Default precompiler options CPP_OPTIONS = -DHOST=\"LinuxNV\" \ -DMPI -DMPI_INPLACE -DMPI_BLOCK=8000 -Duse_collective \ -DscaLAPACK \ -DCACHE_SIZE=4000 \ -Davoidalloc \ -Dvasp6 \ -Duse_bse_te \ -Dtbdyn \ -Dqd_emulate \ -Dfock_dblbuf \ -D_OPENACC \ -DUSENCCL CPP = nvfortran -Mpreprocess -Mfree -Mextend -E $(CPP_OPTIONS) $*$(FUFFIX) > $*$(SUFFIX) # N.B.: you might need to change the cuda-version here # to one that comes with your NVIDIA-HPC SDK CC = mpicc -acc -gpu=cc60,cc70,cc80,cuda11.8 FC = mpif90 -acc -gpu=cc60,cc70,cc80,cuda11.8 FCL = mpif90 -acc -gpu=cc60,cc70,cc80,cuda11.8 -c++libs FREE = -Mfree FFLAGS = -Mbackslash -Mlarge_arrays OFLAG = -fast DEBUG = -Mfree -O0 -traceback OBJECTS = fftmpiw.o fftmpi_map.o fftw3d.o fft3dlib.o LLIBS = -cudalib=cublas,cusolver,cufft,nccl -cuda # Redefine the standard list of O1 and O2 objects SOURCE_O1 := pade_fit.o minimax_dependence.o SOURCE_O2 := pead.o # For what used to be vasp.5.lib CPP_LIB = $(CPP) FC_LIB = $(FC) CC_LIB = $(CC) CFLAGS_LIB = -O -w FFLAGS_LIB = -O1 -Mfixed FREE_LIB = $(FREE) OBJECTS_LIB = linpack_double.o # For the parser library CXX_PARS = nvc++ --no_warnings ## ## Customize as of this point! Of course you may change the preceding ## part of this file as well if you like, but it should rarely be ## necessary ... ## # When compiling on the target machine itself , change this to the # relevant target when cross-compiling for another architecture VASP_TARGET_CPU ?= -tp host FFLAGS += $(VASP_TARGET_CPU) # Specify your NV HPC-SDK installation (mandatory) #... first try to set it automatically NVROOT =$(shell which nvfortran | awk -F /compilers/bin/nvfortran '{ print $$1 }') # If the above fails, then NVROOT needs to be set manually #NVHPC ?= /opt/nvidia/hpc_sdk #NVVERSION = 21.11 #NVROOT = $(NVHPC)/Linux_x86_64/$(NVVERSION) ## Improves performance when using NV HPC-SDK >=21.11 and CUDA >11.2 #OFLAG_IN = -fast -Mwarperf #SOURCE_IN := nonlr.o # Software emulation of quadruple precsion (mandatory) QD ?= $(NVROOT)/compilers/extras/qd LLIBS += -L$(QD)/lib -lqdmod -lqd INCS += -I$(QD)/include/qd # BLAS (mandatory) BLAS = -lblas # LAPACK (mandatory) LAPACK = -llapack # scaLAPACK (mandatory) SCALAPACK = -Mscalapack LLIBS += $(SCALAPACK) $(LAPACK) $(BLAS) # FFTW (mandatory) FFTW_ROOT ?= /path/to/your/fftw/installation LLIBS += -L$(FFTW_ROOT)/lib -lfftw3 INCS += -I$(FFTW_ROOT)/include # Use cusolvermp (optional) # supported as of NVHPC-SDK 24.1 (and needs CUDA-11.8) #CPP_OPTIONS+= -DCUSOLVERMP #LLIBS += -cudalib=cusolvermp #CFLAGS_LIB += -cudalib=cusolvermp #OBJECTS_LIB+= cal_mpi.o # HDF5-support (optional but strongly recommended) #CPP_OPTIONS+= -DVASP_HDF5 #HDF5_ROOT ?= /path/to/your/hdf5/installation #LLIBS += -L$(HDF5_ROOT)/lib -lhdf5_fortran #INCS += -I$(HDF5_ROOT)/include # For the VASP-2-Wannier90 interface (optional) #CPP_OPTIONS += -DVASP2WANNIER90 #WANNIER90_ROOT ?= /path/to/your/wannier90/installation #LLIBS += -L$(WANNIER90_ROOT)/lib -lwannier