Commit 3653da46 authored by Fabian Wein's avatar Fabian Wein Committed by Florian Toth
Browse files

Add option CFS_NATIVE for -march=native optimization for cfs and cfsdeps

* CFS_NATIVE has max 20% benefit for Xenon and much less on AMD and Mac
* fix compile errors and warnings for clang-12
* optimization flags are in CFS_OPT_FLAGS
* CFS_OPT_FLAGS is set in compiler.cmake and applied in
  FindCFSDEPS.cmake for CFSDEPS_*_FLAGS
* CFSDEPS_*_FLAGS could deserve some love for C++/C/Fortran differences
* Intel could deserve some love
parent 64ac4257
......@@ -41,7 +41,7 @@ include("cmake_modules/CFS_macros.cmake")
# CFS version number. Simply the last modification date.
# It is good practice to keep this up to date (CFS_NAME can stay)
set(CFS_VERSION_YEAR "21")
set(CFS_VERSION_MONTH "01")
set(CFS_VERSION_MONTH "03")
set(CFS_VERSION "${CFS_VERSION_YEAR}.${CFS_VERSION_MONTH}")
# Version Name. Every major release gets a label/name
......@@ -102,6 +102,7 @@ set_property(CACHE CFS_PARDISO PROPERTY STRINGS MKL SCHENK)
# Switch on/off usage of cfsdepscache
option(CFS_DEPS_PRECOMPILED "Use (create) precompiled cfsdeps packages" ${CFS_DEPS_PRECOMPILED_DEFAULT})
option(DEBUG "Build debug version of the code which allows debugging, logging and has asserts - up to 10 times slower." ${DEBUG_DEFAULT})
option(CFS_NATIVE "Compile for native CPU for speed, might not be transferable" ${CFS_NATIVE_DEFAULT})
option(CFS_PROFILING "Set profiling compiler options for Release for vtune and valgrind (-g)." OFF)
option(CFS_COVERAGE "Turn on coverage compiler options (gcc, clang) to be analyzed by gcov." OFF)
option(CFS_FSANITIZE "Use -fsanitize to check for memory leaks and general errors. Very costly during runtime!" OFF)
......
Subproject commit 2047e4ca4900fa7d936de146d2fdbf2efd60508b
Subproject commit 2b89b683a4a49a568781bde9d867d62179bc6c05
......@@ -35,11 +35,27 @@ SET(CFSDEPS_DIR "${CFS_SOURCE_DIR}/cfsdeps")
# We do not want to see warnings from external projects, since they would
# show up on CDash.
#-----------------------------------------------------------------------------
if(CMAKE_COMPILER_IS_GNUCXX)
set(CFSDEPS_C_FLAGS "-w")
set(CFSDEPS_CXX_FLAGS "-w")
set(CFSDEPS_Fortran_FLAGS "-w")
if(CFS_CXX_COMPILER_NAME STREQUAL "GCC" OR CFS_CXX_COMPILER_NAME STREQUAL "CLANG")
if(NOT CFS_OPT_FLAGS)
message(STATUS "CFS_OPT_FLAGS not set, check order with compile.cmake")
endif()
set(CFSDEPS_C_FLAGS "${CFS_OPT_FLAGS} -w")
set(CFSDEPS_CXX_FLAGS "${CFS_OPT_FLAGS} -w ${CFSDEPS_CXX_FLAGS}")
if(USE_CGAL) # remove when we use header only CGAL
set(CFSDEPS_C_FLAGS "-frounding-math ${CFSDEPS_C_FLAGS}")
set(CFSDEPS_CXX_FLAGS "-frounding-math ${CFSDEPS_CXX_FLAGS}")
endif()
endif()
if(CFS_FORTRAN_COMPILER_NAME STREQUAL "GCC" OR CFS_CXX_COMPILER_NAME STREQUAL "FLANG")
set(CFSDEPS_Fortran_FLAGS "{CFS_OPT_FLAGS} -w")
endif()
# TODO: Intel is missing but there is a lot CFSDEPS_ stuff for intel in compiler.cmake
#message(STATUS "CFS_OPT_FLAGS = ${CFS_OPT_FLAGS}")
#message(STATUS "CMAKE_COMPILER_IS_GNUCXX = ${CMAKE_COMPILER_IS_GNUCXX}")
#message(STATUS "CFS_CXX_COMPILER_NAME = ${CFS_CXX_COMPILER_NAME}")
#message(STATUS "CFS_FORTRAN_COMPILER_NAME = ${CFS_FORTRAN_COMPILER_NAME}")
#message(STATUS "CFSDEPS_CXX_FLAGS = ${CFSDEPS_CXX_FLAGS}")
# handle gfortran >= 10.
if(${CMAKE_Fortran_COMPILER_ID} MATCHES "GNU" AND (NOT ${FC_VERSION} VERSION_LESS 10))
......
......@@ -121,9 +121,8 @@ IF(CFS_CXX_COMPILER_NAME STREQUAL "GCC" OR CFS_CXX_COMPILER_NAME STREQUAL "CLANG
STRING(REPLACE "." ";" CFS_CXX_COMPILER_VER_LIST ${CFS_CXX_COMPILER_VER})
LIST(GET CFS_CXX_COMPILER_VER_LIST 0 CFS_CXX_COMPILER_MAJOR_VER)
# we assue C++11 for CFS for any compiler
set(CFS_CXX_FLAGS "-std=c++11 -Wuninitialized -Wno-error=unused-variable -Wno-error=maybe-uninitialized -DBOOST_NO_AUTO_PTR ${CFS_CXX_FLAGS}")
set(CFSDEPS_CXX_FLAGS "-std=c++11 ${CFSDEPS_CXX_FLAGS}")
# we assue C++14 for CFS for any compiler (including icc below)
set(CFS_CXX_FLAGS "-std=c++14 -Wuninitialized -Wno-error=unused-variable -Wno-error=maybe-uninitialized -DBOOST_NO_AUTO_PTR ${CFS_CXX_FLAGS}")
set(CFS_C_FLAGS "-std=c11")
IF(CFS_FSANITIZE)
......@@ -146,7 +145,6 @@ IF(CFS_CXX_COMPILER_NAME STREQUAL "GCC" OR CFS_CXX_COMPILER_NAME STREQUAL "CLANG
# -frounding-math: is needed for CGAL library
IF(USE_CGAL)
SET(CFS_CXX_FLAGS "${CFS_CXX_FLAGS} -frounding-math")
SET(CFSDEPS_CXX_FLAGS "-frounding-math ${CFSDEPS_CXX_FLAGS}")
ENDIF(USE_CGAL)
SET(CHECK_MEM_ALLOC 1)
......@@ -155,9 +153,22 @@ IF(CFS_CXX_COMPILER_NAME STREQUAL "GCC" OR CFS_CXX_COMPILER_NAME STREQUAL "CLANG
SET(CFS_C_FLAGS "-Wall -fmessage-length=0 ${CFS_C_FLAGS}")
SET(CFS_CXX_FLAGS "${CFS_CXX_FLAGS} -Wall -ftemplate-depth-100")
IF(CFS_ARCH STREQUAL "X86_64")
SET(CFS_OPT_FLAGS "-m64 -march=k8 -msse2")
ENDIF()
# the CFS_OPT_FLAGS are also used in FindCFSDEPS.cmake for CFSDEPS_*_FLAGS
if(CFS_NATIVE)
# -m64 -> 32 bit int and 64 bit pointers and long
# further candidates: https://developer.amd.com/wordpress/media/2020/04/Compiler%20Options%20Quick%20Ref%20Guide%20for%20AMD%20EPYC%207xx2%20Series%20Processors.pdf
# -march=native - has up to 20% boost against nonsense k8 and slight different results on some tests
# -Ofast Maximize performance - almost no additional effect
# -funroll-all-loops Enable unrolling - almost no additional effect
# -flto Link time optimization - extremely slow linking on gcc with almost no effect
# -param prefetch-latency=300 - Generate memory preload in structions - negative effect
# some more for AMD Clang in the link above
set(CFS_OPT_FLAGS "-m64 -march=native -Ofast ")
else()
# AMD K8 is a years old legacy that worked, who know's a better portable id?
set(CFS_OPT_FLAGS "-m64 -march=k8")
endif()
ENDIF() # end debug/release
......@@ -240,7 +251,7 @@ main ()
ENDIF()
IF(USE_CGAL)
IF(USE_CGAL) # does CGAL really use C?
SET(CFS_C_FLAGS "-frounding-math ${CFS_C_FLAGS}")
SET(CFS_CXX_FLAGS "-frounding-math ${CFS_CXX_FLAGS}")
ENDIF()
......@@ -289,17 +300,12 @@ ELSEIF(CFS_CXX_COMPILER_NAME STREQUAL "ICC")
IF(UNIX)
IF(DEBUG)
SET(CFS_C_FLAGS "-g -c99 -w1 -Wcheck -Werror ${CFS_C_FLAGS}")
# -std=c++11 fails on tumbleweed because the stdlib of gcc 6.2 has a bug.
# however it works without a flag, mabye the intel compiler checks the stdlib.
# on woody one needs to add -std=c++11, e.g. in CXX_FLAGS via ccmake, when using gcc 4.8 stdlib.
# It's anoying that intel depends on the system stdlib :(
SET(CFS_CXX_FLAGS "-std=c++14 -g -w1 -Wcheck -Werror ${CFS_CXX_FLAGS}")
SET(CFSDEPS_CXX_FLAGS "-std=c++14 -g ${CFSDEPS_CXX_FLAGS}")
SET(CFSDEPS_CXX_FLAGS "-std=c++14 -g ${CFSDEPS_CXX_FLAGS}") # TODO move this to FindCFSDEPS.cmake
SET(CHECK_MEM_ALLOC 1)
ELSE()
# release case
SET(CFS_C_FLAGS "-c99 -w0 -Werror ${CFS_C_FLAGS}")
# see above with -std=c++11
SET(CFS_CXX_FLAGS "-std=c++14 -w0 -Werror ${CFS_CXX_FLAGS}")
SET(CFSDEPS_CXX_FLAGS "-std=c++14 -w0 ${CFSDEPS_CXX_FLAGS}")
SET(CFS_SUPPRESSIONS "-wd1125,654,980 -Wno-unknown-pragmas -Wno-comment")
......
......@@ -26,6 +26,7 @@ set(TESTSUITE_DIR_DEFAULT "${CFS_SOURCE_DIR}/Testsuite")
set(EXPLICIT_TEMPLATE_INSTANTIATION_DEFAULT ON)
set(CFS_DEPS_CACHE_DIR_DEFAULT "${CFS_BINARY_DIR}/cfsdeps/cache")
set(CFS_DEPS_PRECOMPILED_DEFAULT ON)
set(CFS_NATIVE_DEFAULT OFF)
set(USE_GIDPOST_DEFAULT ON)
set(USE_GMV_DEFAULT ON)
......
......@@ -2208,8 +2208,6 @@ namespace CoupledField {
std::string regnam,regtype;
std::vector< std::string > tokens(32);
std::vector<std::string>::const_iterator it, end;
UInt numdata;
int *dataVal;
......@@ -2276,8 +2274,6 @@ namespace CoupledField {
std::string line;
std::string regnam,regtype;
std::vector<std::string>::const_iterator it, end;
for (UInt ib=0; ib<linePtsCMCmnds_.size(); ib++) {
if (!GetLine(line,linePtsCMCmnds_[ib])) {
......@@ -2317,8 +2313,6 @@ namespace CoupledField {
std::string line;
std::string regnam,regtype;
std::vector<std::string>::const_iterator it, end;
UInt ansElemNum;
StdVector<UInt> elemNumbers;
......@@ -2380,8 +2374,6 @@ namespace CoupledField {
std::string line;
std::string regnam,regtype;
std::vector<std::string>::const_iterator it, end;
UInt sfeElemNum;
StdVector<UInt> elemNumbers;
std::string actSfeID="", lastSfeID="", strElemNum="";
......@@ -2636,8 +2628,6 @@ namespace CoupledField {
std::ostringstream errMsg;
std::string line;
std::vector<std::string>::const_iterator it, end;
UInt matNum;
UInt numdata;
StdVector<UInt> dataVal;
......@@ -2755,8 +2745,6 @@ namespace CoupledField {
std::ostringstream errMsg;
std::string line;
std::vector<std::string>::const_iterator it, end;
UInt numdata;
StdVector<UInt> dataVal;
......
......@@ -368,8 +368,6 @@ namespace CoupledField {
inFile_ >> elemNum >> str;
inFile_.ignore(100,'\n');
std::vector<std::string>::iterator it, end;
if ( elemNames.Find(str) == -1 ) {
elemNames.Push_back(str);
}
......
......@@ -242,8 +242,9 @@ namespace CoupledField
CoefFunction::CoefInverseType type = ptCoef->GetInverseType();
if ( type == CoefFunction::INVSOURCE ) {
rhsSource_ = ptCoef;
if ( approxSourceWithDeltaFnc_ )
rhsSource_->SetInverseSourceApproxType(CoefFunction::DELTA);
if ( approxSourceWithDeltaFnc_ ) {
rhsSource_->SetInverseSourceApproxType(CoefFunction::DELTA);
}
isRHSsource = true;
num++;
}
......
......@@ -314,11 +314,11 @@ void SurfaceNitscheABInt<COEF_DATA_TYPE, B_DATA_TYPE>
for(UInt i=0;i<aIsoOrder.GetSize();i++)
order1 = (aIsoOrder[i]>order1)? aIsoOrder[i] : order1;
if(order1 == 0 && this->ptFeSpace1_->GetSpaceType() == this->ptFeSpace1_->HCURL){
// that's a bit dirty because zero order Nedelec elements are neither zero nor first order...
// but here we set it to first, otherwise we divide by zero
order1 = 1;
}
if(order1 == 0 && this->ptFeSpace1_->GetSpaceType() == this->ptFeSpace1_->HCURL){
// that's a bit dirty because zero order Nedelec elements are neither zero nor first order...
// but here we set it to first, otherwise we divide by zero
order1 = 1;
}
}
MAT_DATA_TYPE surface1(min/(Double)order1);
......
......@@ -1377,7 +1377,7 @@ StdVector<Condition*> ConditionContainer::GetList(Condition::Type type, DesignEl
if(access != Function::NO_ACCESS && g->GetAccess() != access)
continue;
result.Push_back(g);
result.Push_back(g);
}
return result;
}
......
......@@ -324,7 +324,7 @@ DesignSpace::DesignSpace(StdVector<RegionIdType>& reg_data, PtrParamNode pn, Ers
}
}
de.SetDesign(random ? (((float) rand()/RAND_MAX) * (upper - lower) + lower) : initial);
de.SetDesign(random ? (((float) rand()/(float) RAND_MAX) * (upper - lower) + lower) : initial);
data.Push_back(de);
totalElements_.Push_back(&data.Last());
......
......@@ -194,9 +194,10 @@ void SGP::PostInit()
filtering_gap_grad.Resize(obj->outer_grad.GetSize());
filtering_gaps_bound += constr[i]->GetBoundValue();
}
if (constr[i]->GetType() == Condition::VOLUME || constr[i]->GetType() == Condition::GLOBAL_TWO_SCALE_VOL || constr[i]->GetType() == Condition::GLOBAL_TENSOR_TRACE)
if (constr[i]->GetType() == Condition::VOLUME || constr[i]->GetType() == Condition::GLOBAL_TWO_SCALE_VOL || constr[i]->GetType() == Condition::GLOBAL_TENSOR_TRACE) {
volume_bound = constr[i]->GetBoundValue();
volume_grad.Resize(obj->outer_grad.GetSize());
volume_grad.Resize(obj->outer_grad.GetSize()); // originally this was w/o bracket but indent - probably a bug?!
}
}
cc.view->Done();
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment