...
 
Commits (297)
*.swp
precision.pdf
[submodule "AMG"]
path = AMG
url = https://github.com/LLNL/AMG.git
[submodule "CANDLE"]
path = CANDLE
url = https://github.com/ECP-CANDLE/Benchmarks.git
[submodule "CoMD"]
path = CoMD
url = https://github.com/ECP-copa/CoMD.git
[submodule "Laghos"]
path = Laghos
url = https://github.com/CEED/Laghos
[submodule "dep/mfem"]
path = dep/mfem
url = https://github.com/mfem/mfem.git
[submodule "MACSio"]
path = MACSio
url = https://github.com/LLNL/MACSio.git
[submodule "dep/json-cwx"]
path = dep/json-cwx
url = https://github.com/LLNL/json-cwx
[submodule "MiniAMR"]
path = MiniAMR
url = https://github.com/Mantevo/miniAMR.git
[submodule "MiniFE"]
path = MiniFE
url = https://github.com/Mantevo/miniFE.git
[submodule "MiniTri"]
path = MiniTri
url = https://github.com/Mantevo/miniTri.git
[submodule "Nekbone"]
path = Nekbone
url = https://github.com/Nek5000/Nekbone.git
[submodule "SW4lite"]
path = SW4lite
url = https://github.com/geodynamics/sw4lite.git
[submodule "SWFFT"]
path = SWFFT
url = https://xgitlab.cels.anl.gov/hacc/SWFFT.git
[submodule "XSBench"]
path = XSBench
url = https://github.com/ANL-CESAR/XSBench.git
[submodule "dep/fftw3"]
path = dep/fftw3
url = https://github.com/FFTW/fftw3.git
[submodule "QCD"]
path = QCD
url = https://github.com/fiber-miniapp/ccs-qcd.git
[submodule "FFVC"]
path = FFVC
url = https://github.com/fiber-miniapp/ffvc-mini.git
[submodule "NICAM"]
path = NICAM
url = https://github.com/fiber-miniapp/nicam-dc-mini.git
[submodule "MVMC"]
path = MVMC
url = https://github.com/fiber-miniapp/mVMC-mini.git
[submodule "NGSAnalyzer"]
path = NGSAnalyzer
url = https://github.com/fiber-miniapp/ngsa-mini.git
[submodule "NTChem"]
path = NTChem
url = https://github.com/fiber-miniapp/ntchem-mini.git
[submodule "FFB"]
path = FFB
url = https://github.com/fiber-miniapp/ffb-mini.git
[submodule "HPCG"]
path = HPCG
url = https://github.com/hpcg-benchmark/hpcg.git
[submodule "dep/msr-safe"]
path = dep/msr-safe
url = https://github.com/LLNL/msr-safe.git
[submodule "dep/likwid"]
path = dep/likwid
url = https://github.com/RRZE-HPC/likwid.git
[submodule "dep/intel-pcm"]
path = dep/intel-pcm
url = https://github.com/opcm/pcm.git
[submodule "BabelStream"]
path = BabelStream
url = https://github.com/UoB-HPC/BabelStream.git
# Links
- Spreadsheet: https://docs.google.com/spreadsheets/d/1un0TIi31LXI9yURmwobPkCOatNXTvVPofXbEu_W-SvA/
# Settings
```sh
source /opt/intel/parallel_studio_xe_2018.1.038/bin/psxevars.sh intel64 >/dev/null
export I_MPI_CC=icc
export I_MPI_CXX=icpc
export I_MPI_F77=ifort
export I_MPI_F90=ifort
ulimit -s unlimited
ulimit -n 4096
```
- `OMP_NUM_THREADS` **must** be several.
- The number of MPI processes **should** be one.
# MEMORY THROUGHPUTS : LYON0
```sh
lyon0 % perf stat -e l2_requests.miss sleep 1 2>&1 >/dev/null | grep l2_requests | tr -d ',' | awk '{printf ("%d B\n", $1 * 64)}'
487488 B
```
- Note: In order to calculate bandwidth, you must divide bytes by elapsed time outputted by `perf`.
- Each tile has LLC(L2) of 1MB.
- List of counters : https://github.com/TomTheBear/perfmondb/blob/master/KNL/KnightsLanding_core_V6.tsv
### MPI
```sh
lyon0% mkdir p
lyon0% mpiexec -n 8 bash -c 'perf stat -e mem_load_uops_retired.l3_miss sleep 1 >/dev/null 2>p/"$MPI_LOCALRANKID".txt'
lyon0% { for i in p/*.txt; do cat $i | egrep 'sec|miss' | tr -d ',' | sed -e 's/\s\+/ /g' | cut -d ' ' -f 2 | tr '\n' ' '; echo; done } | awk '{ s += $1 / $2 } END { printf ("%f GB/sec\n", s * 64 / (1000 ** 3)) }'
0.000300 GB/sec
```
# MEMORY THROUGHPUTS : mill\[0-1\] (KNM)
```sh
mill0 % perf stat -e cache-misses sleep 1 2>&1 >/dev/null | grep cache-misses | tr -d ',' | awk '{printf ("%d B\n", $1 * 64)}'
366528 B
```
- The `cache-misses` above is the same as the raw counter `r412e` of KNL's `l2_requests.miss`.
# MEMORY THROUGHPUTS : KIEV0
```sh
kiev0 % perf stat -e mem_load_uops_retired.l3_miss sleep 1 2>&1 >/dev/null | grep mem_load | tr -d ',' | awk '{printf ("%d B\n", $1 * 64)}'
10880 B
```
- Note: In order to calculate bandwidth, you must divide bytes by elapsed time outputted by `perf`.
- LLC is L3 of 30MB.
- This is following https://github.com/RRZE-HPC/likwid/blob/master/groups/broadwell/L3CACHE.txt
According to ["Detecting Memory-Boundedness with Hardware Performance Counters" Daniel Molka et al.]( http://www.readex.eu/wp-content/uploads/2017/06/ICPE2017_authors_version.pdf ) :
```
Therefore, the proportion of main memory accesses is severely underestimated by the OFFCORE_RESPONSE events.
However, the sum of the L3 hit and L3 miss events is very close to the number of L1 misses in both cases,
so the number of cache line transfers from the uncore to each core can be measured quite accurately.
```
# FLOP
- Note: `-knl` option must be replaced by `-bdw` on KIEV0.
- sde means [Intel SDE](https://software.intel.com/en-us/articles/intel-software-development-emulator)
- _Considering `FMA FLOP` and `MASKED FLOP` is future work._
- https://software.intel.com/en-us/articles/calculating-flop-using-intel-software-development-emulator-intel-sde
( https://matsulab.slack.com/files/U755Q4FC0/F8XL55459/calculate.py )
```py
#!/bin/python
##################################
#
# First, get result.txt:
# mpirun -np 1 bash -c '../../sde-external-8.12.0-2017-10-23-lin/sde64 -knl -iform 1 -omix tmp/"$MPI_LOCALRANKID".txt -- ./exe'
# for i in tmp/*.txt; do cat $i | egrep '\*total|elements' | sort -t ' ' -k1,1 -k 2rn | uniq -w 22; done >> result.txt
#
# Second, get time.txt
# (time mpirun -n 1 ./exe 2>/dev/null;)2>time.txt
#
##################################
f=open("result.txt","r")
f2=open("time.txt","r")
tmp=f.readline()
time=f2.readline()
time=f2.readline()
time=time.split('\t')
timem=time[1].split('m')[0]
times=time[1].split('m')[1]
times=times.split('s')[0]
#print(timem,times)
timem=float(timem)
times=float(times)
time=times+timem*60
element={}
while tmp!= '':
key=tmp.split(' ')[0]
number=tmp.split(' ')[-1]
number=int(number)
if element.has_key(key):
element[key]+=number
else:
element.setdefault(key, 0)
element[key]+=number
tmp=f.readline()
print('read file success!')
result={'single':0,'double':0,'int':0,'total':0}
for i in element:
temp=i.split('_')
if temp[-1] == 'masked':
temp[-1] = temp[-2]
if temp[0]=='*total':
continue
if temp[1][0]=='i':
len = int(temp[1][1:]) * int(temp[-1])
if len <= 64:
result['int']+=element[i]
else :
result['int']+=element[i] * len/64
else:
if temp[2]=='single':
result['single']+=element[i]*int(temp[-1])
else:
result['double']+=element[i]*int(temp[-1])
result['total']=result['single']+result['double']+result['int']
print(result)
print('Percentage of FP64: %2.2f'%(result['double']*100.0/result['total']))
print('Percentage of FP32: %2.2f'%(result['single']*100.0/result['total']))
print('Percentage of INT: %2.2f'%(result['int']*100.0/result['total']))
print('total time is: %fs'%(time))
print('Performance (sp): %f GFLOPS' % (result['single']/time/1000/1000/1000))
print('Performance (dp): %f GFLOPS' % (result['double']/time/1000/1000/1000))
print('TOTAL GFLOPS (sp): %f GFLOP' % (result['single']/1.0/1000/1000/1000))
print('TOTAL GFLOPS (dp): %f GFLOP' % (result['double']/1.0/1000/1000/1000))
f.close()
f2.close()
```
git clone --recurse-submodules git@gitlab.m.gsic.titech.ac.jp:Jens/precision.git
./inst/amg.sh
sudo tuned-adm profile latency-performance; tuned-adm verify
./run/amg/test.sh
Subproject commit 295de9693eaabf6f7330ac3a35fd9bd4ad030522
Subproject commit d9b089a0f94e9423b0653ca7ca533bd04c8501cb
Subproject commit ea14ed86d3e612f56383c56a6cff6f77210f7412
Subproject commit 3d48396b77ca8caa3124bc2391f9139c3ffb556c
Subproject commit e273244b65c7d340cc101ae596a55301359024dd
Subproject commit 890a3f9bb3a5cf358504063a1751383b7d46f86d
Subproject commit 5422fecd0a009a8731d0bd96b957d443297a53bc
Subproject commit 9a074521257434e0b9acff9e59ff10e3e881bc32
Subproject commit e8bece99bfa5eab9355549bb587ee36aec9d6c67
Subproject commit 7c58766b180ccb1035e4c220208b64ace3c49cf2
Subproject commit 62f60f2a70407c40dc3a2ed2dd3d69191e4f38dd
Subproject commit daeddf3bfaf3b521a932245fad9871336b53c166
Subproject commit 9771c71f3d25023fc50bc6e84a905d6d50e81151
Subproject commit 694b38eed8a4c09160045895a1bf86fcb35e85a3
Subproject commit 3f758000ffce6ee95a27fb6099f654ecdc5e3add
Subproject commit fcafcc4fec195d8a81c19affd1a3b83f7bab4285
Subproject commit d681db43f45f5437e5258b3663b5a92c078cfb57
Subproject commit 07277047b170529caa5fcd164afd814e70286ce4
Subproject commit 5ab8063ecdc94bdb59a5e65396c85bd54f9e0916
SWFFT @ d0ef3145
Subproject commit d0ef31454577740fbb87618cc35789b7ef838238
Subproject commit 4772cf0194e2ae6d6752c5cacb8cf063fbfef7d0
#!/bin/bash
export APPDIR="./AMG"
export BINARY="./test/amg"
export MAXXYZ=$((2*2*2*2*2*2*2*2))
export INPUT="-problem 1 -P PX PY PZ -n NX NY NZ"
export NumRunsTEST=3
export NumRunsBEST=10
export MAXTIME="1m"
export RUNSDE="yes"
export RUNPCM="yes"
export RUNVTUNE="yes"
if [[ $HOSTNAME = *"${XEONHOST}"* ]]; then
# on "normal" Xeon
export TESTCONF="1|6|1|1|1 1|12|1|1|1 1|24|1|1|1 1|32|1|1|1 1|48|1|1|1 1|96|1|1|1
2|6|2|1|1 2|12|2|1|1 2|24|2|1|1
4|1|2|2|1 4|2|2|2|1 4|4|2|2|1 4|6|2|2|1 4|12|2|2|1
8|1|2|2|2 8|2|2|2|2 8|4|2|2|2 8|6|2|2|2
16|1|4|2|2 16|2|4|2|2 16|4|4|2|2
32|1|4|4|2 32|2|4|4|2
64|1|4|4|4"
export BESTCONF="8|6|2|2|2"
elif [[ $HOSTNAME = *"${IKNLHOST}"* ]]; then
# on one of the Phi (knl)
export TESTCONF="1|64|1|1|1 1|128|1|1|1 1|192|1|1|1 1|256|1|1|1
4|16|2|2|1 4|32|2|2|1 4|48|2|2|1 4|64|2|2|1
16|4|4|2|2 16|8|4|2|2 16|12|4|2|2 16|16|4|2|2
32|2|4|4|2 32|4|4|4|2 32|6|4|4|2 32|8|4|4|2
64|1|4|4|4 64|2|4|4|4 64|4|6|4|4 64|6|6|4|4
128|1|8|4|4 128|2|8|4|4 128|3|8|4|4
256|1|8|8|4 256|2|8|8|4"
export BESTCONF="1|128|1|1|1"
elif [[ $HOSTNAME = *"${IKNMHOST}"* ]]; then
# on one of the Phi (knm)
export TESTCONF="1|64|1|1|1 1|72|1|1|1 1|128|1|1|1 1|144|1|1|1 1|192|1|1|1 1|256|1|1|1 1|288|1|1|1
4|18|2|2|1 4|36|2|2|1 4|54|2|2|1 4|72|2|2|1
16|4|4|2|2 16|8|4|2|2 16|12|4|2|2 16|16|4|2|2
18|4|3|3|2 18|8|3|3|2 18|12|3|3|2 18|16|3|3|2
32|2|4|4|2 32|4|4|4|2 32|6|4|4|2 32|8|4|4|2
64|1|4|4|4 64|2|4|4|4 64|4|6|4|4 64|6|6|4|4
72|1|6|4|3 72|2|6|4|3 72|4|6|4|3
128|1|8|4|4 128|2|8|4|4 128|3|8|4|4
144|1|6|6|4 144|2|6|6|4
256|1|8|8|4 256|2|8|8|4
288|1|8|6|6 288|2|8|6|6"
export BESTCONF="1|128|1|1|1"
else
echo "Unsupported host"
exit
fi
#!/bin/bash
export APPDIR="./BabelStream"
export BINARYS="./omp-stream_2 ./omp-stream_14"
export INPUT="-s SIZE -n 10"
export NumRunsTEST=3
export NumRunsBEST=10
export MAXTIME="2m"
export RUNSDE="yes"
export RUNPCM="yes"
export RUNVTUNE="yes"
if [[ $HOSTNAME = *"${XEONHOST}"* ]]; then
# on "normal" Xeon
export MAXTIME="10m"
export TESTCONF="2 6 12 18 24"
export BESTCONF="12"
elif [[ $HOSTNAME = *"${IKNLHOST}"* ]]; then
# on one of the Phi (knl)
export TESTCONF="8 16 32 64 96 128"
export BESTCONF="64"
elif [[ $HOSTNAME = *"${IKNMHOST}"* ]]; then
# on one of the Phi (knm)
export TESTCONF="9 18 36 72 108 144"
export BESTCONF="72"
else
echo "Unsupported host"
exit
fi
#!/bin/bash
export APPDIR="./CANDLE"
export BINARYS="p1b1_baseline_keras2.py" # p1b2_baseline_keras2.py p1b3_baseline_keras2.py p2b1_baseline_keras2.py p2b2_baseline_keras2.py p3b1_baseline_keras2.py p3b2_baseline_keras2.py"
export INPUT=""
export PATH=$ROOTDIR/dep/anaconda2/bin:$PATH
export MKL_THREADING_LAYER=GNU
export NumRunsTEST=3
export NumRunsBEST=10
export MAXTIME="10m"
export RUNSDE="yes"
export RUNPCM="yes"
export RUNVTUNE="yes"
if [[ $HOSTNAME = *"${XEONHOST}"* ]]; then
# on "normal" Xeon
export MAXTIME="10m"
export TESTCONF="2 6 12 24 48"
export BESTCONF="12"
elif [[ $HOSTNAME = *"${IKNLHOST}"* ]]; then
# on one of the Phi (knl)
export TESTCONF="32 64 128 192 256"
export BESTCONF="32"
elif [[ $HOSTNAME = *"${IKNMHOST}"* ]]; then
# on one of the Phi (knm)
export TESTCONF="36 72 144 216 288"
export BESTCONF="144"
else
echo "Unsupported host"
exit
fi
#!/bin/bash
export APPDIR="./CoMD"
export BINARY="./bin/CoMD-openmp-mpi"
export INPUT="-iPX -jPY -kPZ -x 40 -y 40 -z 40"
export NumRunsTEST=3
export NumRunsBEST=10
export MAXTIME="1m"
export RUNSDE="yes"
export RUNPCM="yes"
export RUNVTUNE="yes"
if [[ $HOSTNAME = *"${XEONHOST}"* ]]; then
# on "normal" Xeon
export TESTCONF="1|96|1|1|1 1|48|1|1|1 1|24|1|1|1 1|12|1|1|1
2|24|2|1|1 2|12|2|1|1
4|12|2|2|1 4|6|2|2|1
12|2|3|2|2
24|1|4|3|2
32|1|4|4|2
48|1|4|4|3
96|1|6|4|4"
export BESTCONF="48|1|4|4|3"
elif [[ $HOSTNAME = *"${IKNLHOST}"* ]]; then
# on one of the Phi (knl)
export TESTCONF="1|64|1|1|1 1|128|1|1|1 1|192|1|1|1 1|256|1|1|1
4|16|2|2|1 4|32|2|2|1 4|48|2|2|1 4|64|2|2|1
16|4|4|2|2 16|8|4|2|2 16|12|4|2|2 16|16|4|2|2
32|2|4|4|2 32|4|4|4|2 32|6|4|4|2 32|8|4|4|2
64|1|4|4|4 64|2|4|4|4 64|4|6|4|4 64|6|6|4|4
96|1|6|4|4 96|2|6|4|4 96|3|6|4|4
128|1|8|4|4 128|2|8|4|4
192|1|8|6|4
256|1|8|8|4"
export BESTCONF="32|8|4|4|2"
elif [[ $HOSTNAME = *"${IKNMHOST}"* ]]; then
# on one of the Phi (knm)
export TESTCONF="1|64|1|1|1 1|72|1|1|1 1|128|1|1|1 1|144|1|1|1 1|192|1|1|1 1|256|1|1|1 1|288|1|1|1
4|18|2|2|1 4|36|2|2|1 4|54|2|2|1 4|72|2|2|1
16|4|4|2|2 16|8|4|2|2 16|12|4|2|2 16|16|4|2|2
18|4|3|3|2 18|8|3|3|2 18|12|3|3|2 18|16|3|3|2
32|2|4|4|2 32|4|4|4|2 32|6|4|4|2 32|8|4|4|2
64|1|4|4|4 64|2|4|4|4 64|4|6|4|4 64|6|6|4|4
72|1|6|4|3 72|2|6|4|3 72|4|6|4|3
128|1|8|4|4 128|2|8|4|4 128|3|8|4|4
144|1|6|6|4 144|2|6|6|4
256|1|8|8|4 256|2|8|8|4
288|1|8|6|6 288|2|8|6|6"
export BESTCONF="72|2|6|4|3"
else
echo "Unsupported host"
exit
fi
#!/bin/bash
export APPDIR="./FFB/bin/"
export BINARY="./ffb_mini"
export MAXDCZ=$((50*50*50))
export INPUT="PX PY PZ DCZ"
export NumRunsTEST=3
export NumRunsBEST=10
export MAXTIME="1m"
export RUNSDE="yes"
export RUNPCM="yes"
export RUNVTUNE="yes"
if [[ $HOSTNAME = *"${XEONHOST}"* ]]; then
# on "normal" Xeon
export TESTCONF="1|6|1|1|1 1|12|1|1|1 1|24|1|1|1 1|32|1|1|1 1|48|1|1|1 1|96|1|1|1
2|6|2|1|1 2|12|2|1|1 2|24|2|1|1
4|1|2|2|1 4|2|2|2|1 4|4|2|2|1 4|6|2|2|1 4|12|2|2|1
6|1|3|2|1 6|2|3|2|1 6|4|3|2|1 6|8|3|2|1
12|1|3|2|2 12|2|3|2|2 12|4|3|2|2
24|1|4|3|2 24|2|4|3|2
32|1|4|4|2 32|2|4|4|2
48|1|4|4|3
96|1|6|4|4"
export BESTCONF="24|1|4|3|2"
elif [[ $HOSTNAME = *"${IKNLHOST}"* ]]; then
# on one of the Phi (knl)
export TESTCONF="1|64|1|1|1 1|128|1|1|1 1|192|1|1|1 1|256|1|1|1
4|16|2|2|1 4|32|2|2|1 4|48|2|2|1 4|64|2|2|1
16|4|4|2|2 16|8|4|2|2 16|12|4|2|2 16|16|4|2|2
32|2|4|4|2 32|4|4|4|2 32|6|4|4|2 32|8|4|4|2
64|1|4|4|4 64|2|4|4|4 64|4|6|4|4 64|6|6|4|4
96|1|6|4|4 96|2|6|4|4 96|3|6|4|4
128|1|8|4|4 128|2|8|4|4
192|1|8|6|4
256|1|8|8|4"
export BESTCONF="64|2|4|4|4"
elif [[ $HOSTNAME = *"${IKNMHOST}"* ]]; then
# on one of the Phi (knm)
export TESTCONF="1|64|1|1|1 1|72|1|1|1 1|128|1|1|1 1|144|1|1|1 1|192|1|1|1 1|256|1|1|1 1|288|1|1|1
4|18|2|2|1 4|36|2|2|1 4|54|2|2|1 4|72|2|2|1
16|4|4|2|2 16|8|4|2|2 16|12|4|2|2 16|16|4|2|2
18|4|3|3|2 18|8|3|3|2 18|12|3|3|2 18|16|3|3|2
32|2|4|4|2 32|4|4|4|2 32|6|4|4|2 32|8|4|4|2
64|1|4|4|4 64|2|4|4|4 64|4|6|4|4 64|6|6|4|4
72|1|6|4|3 72|2|6|4|3 72|4|6|4|3
128|1|8|4|4 128|2|8|4|4 128|3|8|4|4
144|1|6|6|4 144|2|6|6|4
256|1|8|8|4 256|2|8|8|4
288|1|8|6|6 288|2|8|6|6"
export BESTCONF="64|2|4|4|4"
else
echo "Unsupported host"
exit
fi
#!/bin/bash
export APPDIR="./FFVC"
export BINARY="./bin/ffvc_mini"
export INPUT="--scale=strong --size=144 --division=DXxDYxDZ"
export NumRunsTEST=3
export NumRunsBEST=10
export MAXTIME="1m"
export RUNSDE="yes"
export RUNPCM="yes"
export RUNVTUNE="yes"
if [[ $HOSTNAME = *"${XEONHOST}"* ]]; then
# on "normal" Xeon
export TESTCONF="1|6|1|1|1 1|12|1|1|1 1|24|1|1|1 1|32|1|1|1 1|48|1|1|1 1|96|1|1|1
2|6|2|1|1 2|12|2|1|1 2|24|2|1|1
4|1|2|2|1 4|2|2|2|1 4|4|2|2|1 4|6|2|2|1 4|12|2|2|1
6|1|3|2|1 6|2|3|2|1 6|4|3|2|1 6|8|3|2|1
12|1|3|2|2 12|2|3|2|2 12|4|3|2|2
24|1|4|3|2 24|2|4|3|2
32|1|4|4|2 32|2|4|4|2
48|1|4|4|3
96|1|6|4|4"
export BESTCONF="12|4|3|2|2"
elif [[ $HOSTNAME = *"${IKNLHOST}"* ]]; then
# on one of the Phi (knl)
export TESTCONF="1|64|1|1|1 1|128|1|1|1 1|192|1|1|1 1|256|1|1|1
4|16|2|2|1 4|32|2|2|1 4|48|2|2|1 4|64|2|2|1
16|4|4|2|2 16|8|4|2|2 16|12|4|2|2 16|16|4|2|2
32|2|4|4|2 32|4|4|4|2 32|6|4|4|2 32|8|4|4|2
64|1|4|4|4 64|2|4|4|4 64|4|6|4|4 64|6|6|4|4
96|1|6|4|4 96|2|6|4|4 96|3|6|4|4
128|1|8|4|4 128|2|8|4|4
192|1|8|6|4
256|1|8|8|4"
export BESTCONF="1|64|1|1|1"
elif [[ $HOSTNAME = *"${IKNMHOST}"* ]]; then
# on one of the Phi (knm)
export TESTCONF="1|64|1|1|1 1|72|1|1|1 1|128|1|1|1 1|144|1|1|1 1|192|1|1|1 1|256|1|1|1 1|288|1|1|1
4|18|2|2|1 4|36|2|2|1 4|54|2|2|1 4|72|2|2|1
16|4|4|2|2 16|8|4|2|2 16|12|4|2|2 16|16|4|2|2
18|4|3|3|2 18|8|3|3|2 18|12|3|3|2 18|16|3|3|2
32|2|4|4|2 32|4|4|4|2 32|6|4|4|2 32|8|4|4|2
64|1|4|4|4 64|2|4|4|4 64|4|6|4|4 64|6|6|4|4
72|1|6|4|3 72|2|6|4|3 72|4|6|4|3
128|1|8|4|4 128|2|8|4|4 128|3|8|4|4
144|1|6|6|4 144|2|6|6|4
256|1|8|8|4 256|2|8|8|4
288|1|8|6|6 288|2|8|6|6"
export BESTCONF="1|72|1|1|1"
else
echo "Unsupported host"
exit
fi
export XEONHOST="kiev"
export IKNLHOST="lyon"
export IKNMHOST="mill"
if [[ $HOSTNAME = *"${XEONHOST}"* ]]; then
export FREQR="1.2 1.3 1.4 1.5 1.6 1.7 1.8 1.9 2 2.1 2.2"
fi
if [[ $HOSTNAME = *"${IKNLHOST}"* ]]; then
export FREQR="1 1.1 1.2 1.3"
fi
if [[ $HOSTNAME = *"${IKNMHOST}"* ]]; then
export FREQR="1 1.1 1.2 1.3 1.4 1.5"
fi
#!/bin/bash
export APPDIR="./HPCG/build"
export BINARY="./bin/xhpcg"
export MAXXYZ=$((2*2*2*3*3*5))
export INPUT="--nx=NX --ny=NY --nz=NZ"
export NumRunsTEST=3
export NumRunsBEST=10
export MAXTIME="10m"
export RUNSDE="yes"
export RUNPCM="yes"
export RUNVTUNE="yes"
if [[ $HOSTNAME = *"${XEONHOST}"* ]]; then
# on "normal" Xeon
export TESTCONF="1|12 1|24 1|32 1|48
2|6 2|12 2|18 2|24
12|1 12|2 12|3 12|4
24|1 24|2"
export BESTCONF="2|24"
elif [[ $HOSTNAME = *"${IKNLHOST}"* ]]; then
# on one of the Phi (knl)
export TESTCONF="1|64 1|128 1|192 1|256
4|16 4|32 4|48 4|64
16|4 16|8 16|12 16|16
32|2 32|4 32|6 32|8
64|1 64|2 64|4 64|6
96|1 96|2 96|3
128|1 128|2
192|1
256|1"
export BESTCONF="96|1"
elif [[ $HOSTNAME = *"${IKNMHOST}"* ]]; then
# on one of the Phi (knm)
export TESTCONF="1|64 1|72 1|128 1|144 1|192 1|256 1|288
4|18 4|36 4|54 4|72
16|4 16|8 16|12 16|16
18|4 18|8 18|12 18|16
32|2 32|4 32|6 32|8
64|1 64|2 64|4 64|6
72|1 72|2 72|4
96|1 96|2 96|3
128|1 128|2
144|1 144|2
192|1
256|1
288|1"
export BESTCONF="64|1"
else
echo "Unsupported host"
exit
fi
#!/bin/bash
export APPDIR="./HPL/bin/Linux_Intel64"
export BINARY="./xhpl"
export HPLNS=$((336*192))
export INPUT=""
export NumRunsTEST=3
export NumRunsBEST=10
export MAXTIME="5m"
export RUNSDE="yes"
export RUNPCM="yes"
export RUNVTUNE="yes"
if [[ $HOSTNAME = *"${XEONHOST}"* ]]; then
# on "normal" Xeon
export TESTCONF="1|12|1|1 1|24|1|1 1|32|1|1 1|48|1|1
2|6|2|1 2|12|2|1 2|18|2|1 2|24|2|1
12|1|4|3 12|2|4|3 12|3|4|3 12|4|4|3
24|1|6|4 24|2|6|4"
export BESTCONF="24|1|6|4"
export HPLNB="192"
elif [[ $HOSTNAME = *"${IKNLHOST}"* ]]; then
# on one of the Phi (knl)
export TESTCONF="1|48|1|1 1|64|1|1 1|96|1|1 1|128|1|1 1|256|1|1
2|32|2|1 2|64|2|1 2|96|2|1 2|128|2|1
4|8|2|2 4|16|2|2 4|32|2|2 4|64|2|2
8|4|4|2 8|8|4|2 8|16|4|2
16|4|4|4 16|6|4|4 16|8|4|4
32|1|8|4 32|2|8|4 32|3|8|4
64|1|8|8 64|2|8|8
128|1|16|8
256|1|16|16"
export BESTCONF="64|1|8|8"
export HPLNB="336"
elif [[ $HOSTNAME = *"${IKNMHOST}"* ]]; then
# on one of the Phi (knm)
export TESTCONF="1|64|1|1 1|72|1|1 1|128|1|1 1|144|1|1 1|192|1|1 1|256|1|1 1|288|1|1
4|18|2|2 4|36|2|2 4|54|2|2 4|72|2|2
16|4|4|4 16|8|4|4 16|12|4|4 16|16|4|4
18|4|6|3 18|8|6|3 18|12|6|3 18|16|6|3
32|2|8|4 32|4|8|4 32|6|8|4 32|8|8|4
64|1|8|8 64|2|8|8 64|4|8|8 64|6|8|8
72|1|9|8 72|2|9|8 72|4|9|8
96|1|16|6 96|2|16|6 96|3|16|6
128|1|16|8 128|2|16|8
144|1|16|9 144|2|16|9
192|1|16|12
256|1|16|16
288|1|18|16"
export BESTCONF="72|1|9|8"
export HPLNB="336"
else
echo "Unsupported host"
exit
fi
INTEL_PACKAGE="/opt/intel/parallel_studio_xe_2018.3.051/bin/psxevars.sh"
#!/bin/bash
export APPDIR="./Laghos"
export BINARY="./laghos"
export INPUT="-p 1 -m data/square01_quad.mesh -rs 3 -tf 0.8 -no-vis -pa"
export NumRunsTEST=3
export NumRunsBEST=10
export MAXTIME="1m"
export RUNSDE="yes"
export RUNPCM="yes"
export RUNVTUNE="yes"
if [[ $HOSTNAME = *"${XEONHOST}"* ]]; then
# on "normal" Xeon
export TESTCONF="1|6 1|12 1|24 1|32 1|48 1|96
2|6 2|12 2|24
4|1 4|2 4|4 4|6 4|12
6|1 6|2 6|4 6|8
12|1 12|2 12|4
24|1 24|2
32|1 32|2
48|1
96|1"
export BESTCONF="24|1"
elif [[ $HOSTNAME = *"${IKNLHOST}"* ]]; then
# on one of the Phi (knl)
export TESTCONF="1|64 1|128 1|192 1|256
4|16 4|32 4|48 4|64
16|4 16|8 16|12 16|16
32|2 32|4 32|6 32|8
64|1 64|2 64|4 64|6
96|1 96|2 96|3
128|1 128|2
192|1
256|1"
export BESTCONF="64|4"
elif [[ $HOSTNAME = *"${IKNMHOST}"* ]]; then
# on one of the Phi (knm)
export TESTCONF="1|64 1|72 1|128 1|144 1|192 1|256 1|288
4|18 4|36 4|54 4|72
16|4 16|8 16|12 16|16
18|4 18|8 18|12 18|16
32|2 32|4 32|6 32|8
64|1 64|2 64|4 64|6
72|1 72|2 72|4
96|1 96|2 96|3
128|1 128|2
144|1 144|2
192|1
256|1
288|1"
export BESTCONF="64|4"
else
echo "Unsupported host"
exit
fi
#!/bin/bash
export APPDIR="./MACSio"
export BINARY="./macsio/macsio"
export MAXNDPP=$((2*2*2*2*2*3*3))
export INPUT="--units_prefix_system decimal --num_dumps NDPP"
export NumRunsTEST=3
export NumRunsBEST=10
export MAXTIME="2m"
export RUNSDE="yes"
export RUNPCM="yes"
export RUNVTUNE="yes"
if [[ $HOSTNAME = *"${XEONHOST}"* ]]; then
# on "normal" Xeon
export TESTCONF="1|1 4|1 6|1 12|1 24|1 32|1 48|1 96|1"
export BESTCONF="4|1"
elif [[ $HOSTNAME = *"${IKNLHOST}"* ]]; then
# on one of the Phi (knl)
export TESTCONF="16|1 32|1 64|1 96|1 128|1 192|1 256|1"
export BESTCONF="64|1"
elif [[ $HOSTNAME = *"${IKNMHOST}"* ]]; then
# on one of the Phi (knm)
export TESTCONF="16|1 18|1 32|1 64|1 72|1 96|1 128|1 144|1 192|1 256|1 288|1"
export BESTCONF="64|1"
else
echo "Unsupported host"
exit
fi
#!/bin/bash
export APPDIR="./MiniAMR/ref"
export BINARY="./ma.x"
export INPUT="--num_refine 4 --max_blocks 9000 --npx PX --npy PY --npz PZ --nx 2 --ny 2 --nz 2 --num_objects 1 --object 2 0 -1.71 -1.71 -1.71 0.04 0.04 0.04 1.7 1.7 1.7 0.0 0.0 0.0 --num_tsteps 100 --checksum_freq 1"
export NumRunsTEST=3
export NumRunsBEST=10
export MAXTIME="7m"
export RUNSDE="yes"
export RUNPCM="yes"
export RUNVTUNE="yes"
if [[ $HOSTNAME = *"${XEONHOST}"* ]]; then
# on "normal" Xeon
export TESTCONF="2|1|2|1|1 4|1|2|2|1 6|1|3|2|1 12|1|3|2|2 24|1|4|3|2 32|1|4|4|2 48|1|4|4|3 96|1|6|4|4"
export BESTCONF="96|1|6|4|4"
elif [[ $HOSTNAME = *"${IKNLHOST}"* ]]; then
# on one of the Phi (knl)
export TESTCONF="32|1|4|4|2 48|1|4|4|3 64|1|4|4|4 80|1|5|4|4 96|1|6|4|4 128|1|8|4|4 192|1|8|6|4 256|1|8|8|4"
export BESTCONF="128|1|8|4|4" #"256|1|8|8|4" better but also crashes
elif [[ $HOSTNAME = *"${IKNMHOST}"* ]]; then
# on one of the Phi (knm)
export TESTCONF="32|1|4|4|2 36|1|6|3|2 48|1|4|4|3 64|1|4|4|4 72|1|6|4|3 80|1|5|4|4 96|1|6|4|4 128|1|8|4|4 144|1|6|6|4 192|1|8|6|4 256|1|8|8|4 288|1|8|6|6"
export BESTCONF="128|1|8|4|4" #"192|1|8|6|4" use same on KNM to have comparable results, since it's not strong-scaling
else
echo "Unsupported host"
exit
fi
#!/bin/bash
export APPDIR="./MiniFE"
export BINARYS="./mkl/src/miniFE.x ./openmp-opt-knl/src/miniFE.x ./openmp-opt/src/miniFE.x"
export INPUT="-nx 128 -ny 128 -nz 128"
export NumRunsTEST=3
export NumRunsBEST=10
export MAXTIME="1m"
export RUNSDE="yes"
export RUNPCM="yes"
export RUNVTUNE="yes"
if [[ $HOSTNAME = *"${XEONHOST}"* ]]; then
# on "normal" Xeon
export TESTCONF="1|6 1|12 1|24 1|32 1|48 1|96
2|6 2|12 2|24
4|1 4|2 4|4 4|6 4|12
6|1 6|2 6|4 6|8
12|1 12|2 12|4
24|1 24|2
32|1 32|2
48|1
96|1"
export BBINARY="./mkl/src/miniFE.x"
export BESTCONF="24|1"
elif [[ $HOSTNAME = *"${IKNLHOST}"* ]]; then
# on one of the Phi (knl)
export TESTCONF="1|64 1|128 1|192 1|256
4|16 4|32 4|48 4|64
16|4 16|8 16|12 16|16
32|2 32|4 32|6 32|8
64|1 64|2 64|4 64|6
96|1 96|2 96|3
128|1 128|2
192|1
256|1"
export BBINARY="./openmp-opt/src/miniFE.x"
export BESTCONF="1|256"
elif [[ $HOSTNAME = *"${IKNMHOST}"* ]]; then
# on one of the Phi (knm)
export TESTCONF="1|64 1|72 1|128 1|144 1|192 1|256 1|288
4|18 4|36 4|54 4|72
16|4 16|8 16|12 16|16
18|4 18|8 18|12 18|16
32|2 32|4 32|6 32|8
64|1 64|2 64|4 64|6
72|1 72|2 72|4
96|1 96|2 96|3
128|1 128|2
144|1 144|2
192|1
256|1
288|1"
export BBINARY="./mkl/src/miniFE.x"
export BESTCONF="72|1"
else
echo "Unsupported host"
exit
fi
#!/bin/bash
export APPDIR="./MiniTri"
export BINARYMPI="./miniTri/linearAlgebra/MPI/miniTri.exe"
export BINARYOMP="./miniTri/linearAlgebra/openmp/miniTri.exe"
export INPUTMPI="./bcsstk30.mtx MM"
export INPUTOMP="./bcsstk30.mtx 16 OMPNT MM"
export NumRunsTEST=3
export NumRunsBEST=10
export MAXTIME="1m"
export RUNSDE="yes"
export RUNPCM="yes"
export RUNVTUNE="yes"
if [[ $HOSTNAME = *"${XEONHOST}"* ]]; then
# on "normal" Xeon
export TESTCONF="1|6 1|12 1|24 1|32 1|48 1|96
4|1
6|1
12|1
24|1
32|1
48|1
96|1"
export BESTCONF="1|48"
elif [[ $HOSTNAME = *"${IKNLHOST}"* ]]; then
# on one of the Phi (knl)
export TESTCONF="1|64 1|128 1|192 1|256
16|1
32|1
64|1
96|1
128|1
192|1
256|1"
export BESTCONF="1|128"
elif [[ $HOSTNAME = *"${IKNMHOST}"* ]]; then
# on one of the Phi (knm)
export TESTCONF="1|64 1|72 1|128 1|144 1|192 1|256 1|288
64|1
72|1
96|1
128|1
144|1
192|1
256|1
288|1"
export BESTCONF="1|128"
else
echo "Unsupported host"
exit
fi
#!/bin/bash
export APPDIR="./MODYLAS/data/wat222"
export BINARY="../../src/modylas_mini"
export INPUT="./wat222"
export NumRunsTEST=3
export NumRunsBEST=10
export MAXTIME="1m"
export RUNSDE="yes"
export RUNPCM="yes"
export RUNVTUNE="yes"
if [[ $HOSTNAME = *"${XEONHOST}"* ]]; then
# on "normal" Xeon
export TESTCONF="8|1 8|2 8|3 8|4 8|6 8|8 8|10
16|1 16|2 16|3 16|4
32|1 32|2
64|1"
export BESTCONF="16|3"
elif [[ $HOSTNAME = *"${IKNLHOST}"* ]]; then
# on one of the Phi (knl)
export TESTCONF="8|4 8|8 8|16 8|32
16|4 16|8 16|12 16|16
32|2 32|4 32|6 32|8
64|1 64|2 64|4 64|6
128|1 128|2 128|3
256|1 256|2"
export BESTCONF="64|4"
elif [[ $HOSTNAME = *"${IKNMHOST}"* ]]; then
# on one of the Phi (knm)
export TESTCONF="8|4 8|8 8|16 8|32
16|4 16|8 16|12 16|16
32|2 32|4 32|6 32|8
64|1 64|2 64|4 64|6
128|1 128|2 128|3
256|1 256|2"
export BESTCONF="64|4"
else
echo "Unsupported host"
exit
fi
#!/bin/bash
export APPDIR="./MVMC"
export BINARY="../src/vmc.out"
export INPUT="./multiDir.def"
export PATH=$ROOTDIR/dep/anaconda2/bin:$PATH
export NumRunsTEST=3
export NumRunsBEST=10
export MAXTIME="1m"
export RUNSDE="yes"
export RUNPCM="yes"
export RUNVTUNE="yes"
if [[ $HOSTNAME = *"${XEONHOST}"* ]]; then
# on "normal" Xeon
export TESTCONF="1|6 1|12 1|24 1|32 1|48 1|96
2|6 2|12 2|24
4|1 4|2 4|4 4|6 4|12
6|1 6|2 6|4 6|8
12|1 12|2 12|4
24|1 24|2
32|1 32|2
48|1 48|2
96|1"
export BESTCONF="24|2"
elif [[ $HOSTNAME = *"${IKNLHOST}"* ]]; then
# on one of the Phi (knl)
export TESTCONF="1|64 1|128 1|192 1|256
4|16 4|32 4|48 4|64
16|4 16|8 16|12 16|16
32|2 32|4 32|6 32|8
64|1 64|2 64|4 64|6
96|1 96|2 96|3
128|1 128|2
192|1
256|1"
export BESTCONF="32|6"
elif [[ $HOSTNAME = *"${IKNMHOST}"* ]]; then
# on one of the Phi (knm)
export TESTCONF="1|64 1|72 1|128 1|144 1|192 1|256 1|288
4|18 4|36 4|54 4|72
16|4 16|8 16|12 16|16
18|4 18|8 18|12 18|16
32|2 32|4 32|6 32|8
64|1 64|2 64|4 64|6
72|1 72|2 72|4
96|1 96|2 96|3
128|1 128|2
144|1 144|2
192|1
256|1
288|1"
export BESTCONF="72|4"
else
echo "Unsupported host"
exit
fi
#!/bin/bash
export APPDIR="./Nekbone/test/nek_mgrid"
export BINARY="./nekbone"
export INPUT=""
export NumRunsTEST=3
export NumRunsBEST=10
export ielN=13824
export MAXTIME="1m"
export RUNSDE="yes"
export RUNPCM="yes"
export RUNVTUNE="yes"
if [[ $HOSTNAME = *"${XEONHOST}"* ]]; then
# on "normal" Xeon
export TESTCONF="6|1 12|1 16|1 24|1 32|1 48|1 96|1"
export BESTCONF="96|1"
elif [[ $HOSTNAME = *"${IKNLHOST}"* ]]; then
# on one of the Phi (knl)
export TESTCONF="16|1 32|1 64|1 96|1 128|1 192|1 256|1"
export BESTCONF="128|1"
elif [[ $HOSTNAME = *"${IKNMHOST}"* ]]; then
# on one of the Phi (knm)
export TESTCONF="64|1 72|1 96|1 128|1 144|1 192|1 256|1 288|1"
export BESTCONF="144|1"
else
echo "Unsupported host"
exit
fi
#!/bin/bash
export APPDIR="./NGSAnalyzer"
export BINARY="./bin/workflow"
export INPUTDIR="./ngsa_mini_input"
export INPUT="$INPUTDIR/bwa_db/reference.fa $INPUTDIR/seq_contig.md $INPUTDIR/reference.fa $INPUTDIR/reference.fa.fai $INPUTDIR/00-read-rank"
export NumRunsTEST=3
export NumRunsBEST=10
export MAXTIME="20m"
export RUNSDE="yes"
export RUNPCM="yes"
export RUNVTUNE="yes"
if [[ $HOSTNAME = *"${XEONHOST}"* ]]; then
# on "normal" Xeon
export MAXTIME="10m"
export TESTCONF="1|6 1|12 1|24 1|32 1|48 1|96
2|6 2|12 2|24
4|1 4|2 4|4 4|6 4|12
6|1 6|2 6|4 6|8
12|1 12|2 12|4
24|1 24|2
32|1 32|2
48|1 48|2
96|1"
export BESTCONF="12|4"
elif [[ $HOSTNAME = *"${IKNLHOST}"* ]]; then
# on one of the Phi (knl)
export TESTCONF="1|64 1|128 1|192 1|256
4|16 4|32 4|48 4|64
16|4 16|8 16|12 16|16
32|2 32|4 32|6 32|8
64|1 64|2 64|4 64|6"
export BESTCONF="4|32"
elif [[ $HOSTNAME = *"${IKNMHOST}"* ]]; then
# on one of the Phi (knm)
export TESTCONF="1|64 1|72 1|128 1|144 1|192 1|256 1|288
4|18 4|36 4|54 4|72
16|4 16|8 16|12 16|16
18|4 18|8 18|12 18|16
32|2 32|4 32|6 32|8
64|1 64|2 64|4 64|6
72|1 72|2 72|4"
export BESTCONF="4|18"
else
echo "Unsupported host"
exit
fi
#!/bin/bash
export APPDIR="./NICAM/test/case/jablonowski"
export BINARY="./nhm_driver"
export NICAM_SYS=Linux64-intel-impi
export INPUT="./gl05rl00z40pe10"
export NumRunsTEST=3
export NumRunsBEST=10
export MAXTIME="2m"
export RUNSDE="yes"
export RUNPCM="yes"
export RUNVTUNE="yes"
if [[ $HOSTNAME = *"${XEONHOST}"* ]]; then
# on "normal" Xeon
export TESTCONF="10|1 10|2 10|3 10|4 10|5 10|6"
export BESTCONF="10|6"
elif [[ $HOSTNAME = *"${IKNLHOST}"* ]]; then
# on one of the Phi (knl)
export TESTCONF="10|5 10|6 10|7 10|10 10|15 10|20 10|25"
export BESTCONF="10|15"
elif [[ $HOSTNAME = *"${IKNMHOST}"* ]]; then
# on one of the Phi (knm)
export TESTCONF="10|5 10|6 10|7 10|10 10|14 10|16 10|20 10|28 10|30"
export BESTCONF="10|7"
else
echo "Unsupported host"
exit
fi
#!/bin/bash
export APPDIR="$1/NTChem"
export BINARY="./bin/rimp2.exe"
export INPUT=""
export NTCHEM_DIR=$APPDIR
export MODEL="h2o"
export DATA_DIR=${NTCHEM_DIR}/tests/${MODEL}
export NumRunsTEST=3
export NumRunsBEST=10
export MAXTIME="1m"
export RUNSDE="yes"
export RUNPCM="yes"
export RUNVTUNE="yes"
if [[ $HOSTNAME = *"${XEONHOST}"* ]]; then
# on "normal" Xeon
export TESTCONF="1|6 1|12 1|24 1|32 1|48 1|96
2|6 2|12 2|24
4|1 4|2 4|4 4|6 4|12
6|1 6|2 6|4 6|8
12|1 12|2 12|4
24|1 24|2
32|1 32|2
48|1 48|2
96|1"
export BESTCONF="24|1"
elif [[ $HOSTNAME = *"${IKNLHOST}"* ]]; then
# on one of the Phi (knl)
export TESTCONF="1|64 1|128 1|192 1|256
4|16 4|32 4|48 4|64
16|4 16|8 16|12 16|16
32|2 32|4 32|6 32|8
64|1 64|2 64|4 64|6
96|1 96|2 96|3
128|1 128|2
192|1
256|1"
export BESTCONF="16|8"
elif [[ $HOSTNAME = *"${IKNMHOST}"* ]]; then
# on one of the Phi (knm)
export TESTCONF="1|64 1|72 1|128 1|144 1|192 1|256 1|288
4|18 4|36 4|54 4|72
16|4 16|8 16|12 16|16
18|4 18|8 18|12 18|16
32|2 32|4 32|6 32|8
64|1 64|2 64|4 64|6
72|1 72|2 72|4
96|1 96|2 96|3
128|1 128|2
144|1 144|2
192|1
256|1
288|1"
export BESTCONF="72|2"
else
echo "Unsupported host"
exit
fi
#!/bin/bash
export APPDIR="./QCD/src"
export BINARY="./ccs_qcd_solver_bench_class2"
export INPUT=""
export NumRunsTEST=3
export NumRunsBEST=10
export MAXTIME="1m"
export RUNSDE="yes"
export RUNPCM="yes"
export RUNVTUNE="yes"
if [[ $HOSTNAME = *"${XEONHOST}"* ]]; then