. $SHELLPACK_INCLUDE/include-sizes.sh
get_numa_details

export MMTESTS="specfem3d"

# List of monitors
export RUN_MONITOR=yes
export MONITORS_ALWAYS=
export MONITORS_GZIP="proc-vmstat mpstat turbostat"
export MONITORS_WITH_LATENCY="vmstat"
export MONITOR_UPDATE_FREQUENCY=10

# SPECFEM3D
if [ "$MMTESTS_TOOLCHAIN" != "" ]; then
	export MMTESTS_MPI_SELFBUILD=yes
fi
export SPECFEM3D_OPENMPI_VERSION=openmpi3
export SPECFEM3D_MPI_PATH=/usr/$MMTESTS_LIBDIR/mpi/gcc/$SPECFEM3D_OPENMPI_VERSION/bin
export SPECFEM3D_MPI_LIBPATH=/usr/$MMTESTS_LIBDIR/mpi/gcc/$SPECFEM3D_OPENMPI_VERSION/$MMTESTS_LIBDIR
export SPECFEM3D_MODEL=small_s362
export SPECFEM3D_NEX_XI=64
export SPECFEM3D_NEX_ETA=64
export SPECFEM3D_NCHUNKS=3

# Find highest possible nproc based on number of NUMCPUs
MIN_NPROC=1
while [ $((MIN_NPROC*MIN_NPROC*SPECFEM3D_NCHUNKS)) -le $NUMCPUS ]; do
	MIN_NPROC=$((MIN_NPROC+1))
done
MIN_NPROC=$((MIN_NPROC-1))

# NPROC must be a multiple of 8 such that there is a value
# of c that satisifies the equeation
# MIN_NPROC*c*8 == NEX_XI
while [ $(((SPECFEM3D_NEX_XI/MIN_NPROC)&7)) -ne 0 -a $MIN_NPROC -gt 0 ]; do
	MIN_NPROC=$((MIN_NPROC-1))
done

# Chunks must be 1, 2, 3 or 6. Check after scaling if nchunks == 6
# would still fit within the number of CPUs
if [ $((MIN_NPROC*MIN_NPROC*6)) -le $NUMCPUS ]; then
	SPECFEM3D_NCHUNKS=6
fi

export SPECFEM3D_NPROC_ETA=$((MIN_NPROC))
export SPECFEM3D_NPROC_XI=$((MIN_NPROC))

if [ $SPECFEM3D_NPROC_XI -eq 0 ]; then
	SPECFEM3D_NPROC_XI=1
fi
if [ $SPECFEM3D_NPROC_ETA -eq 0 ]; then
	SPECFEM3D_NPROC_ETA=1
fi

# Set additional mpi options if required
grep -q " ht " /proc/cpuinfo
if [ $? -eq 0 ]; then
	if [ $((SPECFEM3D_NPROC_XI*SPECFEM3D_NPROC_ETA*SPECFEM3D_NCHUNKS)) -gt $((NUMCPUS/2)) ]; then
		echo WARNING: Setting use-hwthread-cpus because HT siblings must be used
		SPECFEM3D_MPI_OPTS+=" --use-hwthread-cpus"
	fi
fi
if [ "`whoami`" = "root" ]; then
	SPECFEM3D_MPI_OPTS+=" --allow-run-as-root"
fi
export SPECFEM3D_MPI_OPTS
