Merge branch 'PGI-support' into development

2020-01-24 12:24:38 +01:00 · 2020-01-24 12:24:38 +01:00 · 3d6ec6955f
parent 13ffe45e23 4c09c23374
commit 3d6ec6955f
5 changed files with 93 additions and 22 deletions
--- a/cmake/Compiler-GNU.cmake
+++ b/cmake/Compiler-GNU.cmake
@ -103,9 +103,9 @@ set (COMPILE_FLAGS "${COMPILE_FLAGS} -ffpe-summary=all")
 # print summary of floating point exeptions (invalid,zero,overflow,underflow,inexact,denormal)

 # Additional options
-# -Warray-temporarieswarnings:   because we have many temporary arrays (performance issue?):
+# -Warray-temporarieswarnings:   because we have many temporary arrays (performance issue?)
 # -Wimplicit-interface:          no interfaces for lapack/MPI routines
-# -Wunsafe-loop-optimizations:   warn if the loop cannot be optimized due to nontrivial assumptions.
+# -Wunsafe-loop-optimizations:   warn if the loop cannot be optimized due to nontrivial assumptions

 #------------------------------------------------------------------------------------------------
 # Runtime debugging
@ -122,6 +122,7 @@ set (DEBUG_FLAGS "${DEBUG_FLAGS} -fcheck=all")

 # Additional options
 # -ffpe-trap=precision,denormal,underflow
+
 #------------------------------------------------------------------------------------------------
 #  precision settings
 set (PRECISION_FLAGS "${PRECISION_FLAGS} -fdefault-real-8")
--- a/cmake/Compiler-PGI.cmake
+++ b/cmake/Compiler-PGI.cmake
@ -2,23 +2,51 @@
 # PGI Compiler
 ###################################################################################################

+if (OPENMP)
+  set (OPENMP_FLAGS "-mp")
+else ()
+  set (OPENMP_FLAGS "-nomp")
+endif ()
+
+
 if (OPTIMIZATION STREQUAL "OFF")
  set (OPTIMIZATION_FLAGS "-O0"       )
 elseif (OPTIMIZATION STREQUAL "DEFENSIVE")
-  set (OPTIMIZATION_FLAGS "-O2")
+  set (OPTIMIZATION_FLAGS "-O2 -fast")
 elseif (OPTIMIZATION STREQUAL "AGGRESSIVE")
-  set (OPTIMIZATION_FLAGS "-O3")
+  set (OPTIMIZATION_FLAGS "-O4 -fast -Mvect=sse")
 endif ()

+set (STANDARD_CHECK "-Mallocatable=03 -Mstandard")

 #------------------------------------------------------------------------------------------------
 # Fine tuning compilation options
 set (COMPILE_FLAGS "${COMPILE_FLAGS} -Mpreprocess")
 # preprocessor

-set (STANDARD_CHECK "-Mallocatable=03")
+set (COMPILE_FLAGS "${COMPILE_FLAGS} -Minfo=all")
+# instructs the compiler to produce information on standard error

-#------------------------------------------------------------------------------------------------
+set (COMPILE_FLAGS "${COMPILE_FLAGS} -Minform=warn")
+# instructs the compiler to display error messages at the specified and higher levels
+
+set (COMPILE_FLAGS "${COMPILE_FLAGS} -Mdclchk")
+# instructs the compiler to require that all program variables be declared
+
+#------------------------------------------------------------------------------------------------O
 # Runtime debugging
 set (DEBUG_FLAGS "${DEBUG_FLAGS} -g")
 # Includes debugging information in the object module; sets the optimization level to zero unless a -⁠O option is present on the command line
+set (DEBUG_FLAGS "${DEBUG_FLAGS} -C")
+# Generates code to check array bounds
+set (DEBUG_FLAGS "${DEBUG_FLAGS} -Mchkptr")
+# Check for NULL pointers (pgf95, pgfortran only)
+set (DEBUG_FLAGS "${DEBUG_FLAGS} -Mchkstk")
+# Check the stack for available space upon entry to and before the start of a parallel region. Useful when many private variables are declared
+set (DEBUG_FLAGS "${DEBUG_FLAGS} -Mbounds")
+# Specifies whether array bounds checking is enabled or disabled
+
+#------------------------------------------------------------------------------------------------
+#  precision settings
+set (PRECISION_FLAGS "${PRECISION_FLAGS} -r8")
+# Determines whether the compiler promotes REAL variables and constants to DOUBLE PRECISION
--- a/src/constitutive.f90
+++ b/src/constitutive.f90
@ -433,10 +433,10 @@ subroutine constitutive_hooke_SandItsTangents(S, dS_dFe, dS_dFi, &
  E = 0.5_pReal*(matmul(transpose(Fe),Fe)-math_I3)                                                  !< Green-Lagrange strain in unloaded configuration
  S = math_mul3333xx33(C,matmul(matmul(transpose(Fi),E),Fi))                                        !< 2PK stress in lattice configuration in work conjugate with GL strain pulled back to lattice configuration

-  forall (i=1:3, j=1:3)
+  do i =1, 3;do j=1,3
    dS_dFe(i,j,1:3,1:3) = matmul(Fe,matmul(matmul(Fi,C(i,j,1:3,1:3)),transpose(Fi)))                !< dS_ij/dFe_kl = C_ijmn * Fi_lm * Fi_on * Fe_ko
    dS_dFi(i,j,1:3,1:3) = 2.0_pReal*matmul(matmul(E,Fi),C(i,j,1:3,1:3))                             !< dS_ij/dFi_kl = C_ijln * E_km * Fe_mn
-  end forall
+  enddo; enddo

 end subroutine constitutive_hooke_SandItsTangents

--- a/src/lattice.f90
+++ b/src/lattice.f90
@ -43,10 +43,17 @@ module lattice
    LATTICE_FCC_NCLEAVAGESYSTEM = [3, 4]                                                            !< # of cleavage systems per family for fcc
 
  integer, parameter  :: &
+#ifndef __PGI
    LATTICE_FCC_NSLIP     = sum(LATTICE_FCC_NSLIPSYSTEM), &                                         !< total # of slip systems for fcc
    LATTICE_FCC_NTWIN     = sum(LATTICE_FCC_NTWINSYSTEM), &                                         !< total # of twin systems for fcc
    LATTICE_FCC_NTRANS    = sum(LATTICE_FCC_NTRANSSYSTEM), &                                        !< total # of transformation systems for fcc
    LATTICE_FCC_NCLEAVAGE = sum(LATTICE_FCC_NCLEAVAGESYSTEM)                                        !< total # of cleavage systems for fcc
+#else
+    LATTICE_FCC_NSLIP     = 18, &
+    LATTICE_FCC_NTWIN     = 12, &
+    LATTICE_FCC_NTRANS    = 12, &
+    LATTICE_FCC_NCLEAVAGE = 7
+#endif

  real(pReal), dimension(3+3,LATTICE_FCC_NSLIP), parameter :: &
    LATTICE_FCC_SYSTEMSLIP = reshape(real([&
@ -128,9 +135,15 @@ module lattice
    LATTICE_BCC_NCLEAVAGESYSTEM = [3, 6]                                                            !< # of cleavage systems per family for bcc
 
  integer, parameter  :: &
+#ifndef __PGI
    LATTICE_BCC_NSLIP     = sum(LATTICE_BCC_NSLIPSYSTEM), &                                         !< total # of slip systems for bcc
    LATTICE_BCC_NTWIN     = sum(LATTICE_BCC_NTWINSYSTEM), &                                         !< total # of twin systems for bcc
    LATTICE_BCC_NCLEAVAGE = sum(LATTICE_BCC_NCLEAVAGESYSTEM)                                        !< total # of cleavage systems for bcc
+#else
+    LATTICE_BCC_NSLIP     = 24, &
+    LATTICE_BCC_NTWIN     = 12, &
+    LATTICE_BCC_NCLEAVAGE = 9
+#endif

  real(pReal), dimension(3+3,LATTICE_BCC_NSLIP), parameter :: &
    LATTICE_BCC_SYSTEMSLIP = reshape(real([&
@ -206,9 +219,15 @@ module lattice
    LATTICE_HEX_NCLEAVAGESYSTEM = [3]                                                               !< # of cleavage systems per family for hex
 
  integer, parameter  :: &
+#ifndef __PGI
    LATTICE_HEX_NSLIP     = sum(LATTICE_HEX_NSLIPSYSTEM), &                                         !< total # of slip systems for hex
    LATTICE_HEX_NTWIN     = sum(LATTICE_HEX_NTWINSYSTEM), &                                         !< total # of twin systems for hex
    LATTICE_HEX_NCLEAVAGE = sum(LATTICE_HEX_NCLEAVAGESYSTEM)                                        !< total # of cleavage systems for hex
+#else
+    LATTICE_HEX_NSLIP     = 33, &
+    LATTICE_HEX_NTWIN     = 24, &
+    LATTICE_HEX_NCLEAVAGE = 3
+#endif

  real(pReal), dimension(4+4,LATTICE_HEX_NSLIP), parameter :: &
    LATTICE_HEX_SYSTEMSLIP = reshape(real([&
@ -301,7 +320,11 @@ module lattice
    LATTICE_BCT_NSLIPSYSTEM = [2, 2, 2, 4, 2, 4, 2, 2, 4, 8, 4, 8, 8 ]                              !< # of slip systems per family for bct (Sn) Bieler J. Electr Mater 2009
 
  integer, parameter :: &
+#ifndef __PGI
    LATTICE_BCT_NSLIP = sum(LATTICE_BCT_NSLIPSYSTEM)                                                !< total # of slip systems for bct
+#else
+    LATTICE_BCT_NSLIP = 52
+#endif
 
  real(pReal), dimension(3+3,LATTICE_BCT_NSLIP), parameter :: &
    LATTICE_BCT_SYSTEMSLIP = reshape(real([&
@ -379,7 +402,11 @@ module lattice
    LATTICE_ISO_NCLEAVAGESYSTEM = [3]                                                               !< # of cleavage systems per family for iso
 
  integer, parameter  :: &
+#ifndef __PGI
    LATTICE_ISO_NCLEAVAGE = sum(LATTICE_ISO_NCLEAVAGESYSTEM)                                        !< total # of cleavage systems for iso
+#else
+    LATTICE_ISO_NCLEAVAGE = 3
+#endif

  real(pReal), dimension(3+3,LATTICE_ISO_NCLEAVAGE), parameter :: &
    LATTICE_ISO_SYSTEMCLEAVAGE= reshape(real([&
@ -396,7 +423,11 @@ module lattice
    LATTICE_ORT_NCLEAVAGESYSTEM = [1, 1, 1]                                                         !< # of cleavage systems per family for ortho
 
  integer, parameter  :: &
+#ifndef __PGI
    LATTICE_ORT_NCLEAVAGE = sum(LATTICE_ORT_NCLEAVAGESYSTEM)                                        !< total # of cleavage systems for ortho
+#else
+    LATTICE_ORT_NCLEAVAGE = 3
+#endif

  real(pReal), dimension(3+3,LATTICE_ORT_NCLEAVAGE), parameter :: &
    LATTICE_ORT_SYSTEMCLEAVAGE = reshape(real([&
--- a/src/rotations.f90
+++ b/src/rotations.f90
@ -610,7 +610,7 @@ function om2ax(om) result(ax)
  else
    call dgeev('N','V',3,om_,3,Wr,Wi,devNull,3,VR,3,work,size(work,1),ierr)
    if (ierr /= 0) call IO_error(0,ext_msg='Error in om2ax: DGEEV return not zero')
-#if defined(__GFORTRAN__) &&  __GNUC__<9 || defined(__INTEL_COMPILER) && INTEL_COMPILER<1800
+#if defined(__GFORTRAN__) &&  __GNUC__<9 || defined(__INTEL_COMPILER) && INTEL_COMPILER<1800 || defined(__PGI)
    i = maxloc(merge(1,0,cEq(cmplx(Wr,Wi,pReal),cmplx(1.0_pReal,0.0_pReal,pReal),tol=1.0e-14_pReal)),dim=1)
 #else
    i = findloc(cEq(cmplx(Wr,Wi,pReal),cmplx(1.0_pReal,0.0_pReal,pReal),tol=1.0e-14_pReal),.true.,dim=1) !find eigenvalue (1,0)
@ -1266,38 +1266,49 @@ subroutine unitTest
            sin(2.0_pReal*PI*x(1))*A]
      if(qu(1)<0.0_pReal) qu = qu * (-1.0_pReal)
    endif
-
+#ifndef __PGI
    if(dNeq0(norm2(om2qu(qu2om(qu))-qu),1.0e-12_pReal)) msg = trim(msg)//'om2qu/qu2om,'
    if(dNeq0(norm2(eu2qu(qu2eu(qu))-qu),1.0e-12_pReal)) msg = trim(msg)//'eu2qu/qu2eu,'
    if(dNeq0(norm2(ax2qu(qu2ax(qu))-qu),1.0e-12_pReal)) msg = trim(msg)//'ax2qu/qu2ax,'
    if(dNeq0(norm2(ro2qu(qu2ro(qu))-qu),1.0e-12_pReal)) msg = trim(msg)//'ro2qu/qu2ro,'
    if(dNeq0(norm2(ho2qu(qu2ho(qu))-qu),1.0e-7_pReal))  msg = trim(msg)//'ho2qu/qu2ho,'
    if(dNeq0(norm2(cu2qu(qu2cu(qu))-qu),1.0e-7_pReal))  msg = trim(msg)//'cu2qu/qu2cu,'
+#endif

    om = qu2om(qu)
+#ifndef __PGI
    if(dNeq0(norm2(om2qu(eu2om(om2eu(om)))-qu),1.0e-7_pReal))  msg = trim(msg)//'eu2om/om2eu,'
    if(dNeq0(norm2(om2qu(ax2om(om2ax(om)))-qu),1.0e-7_pReal))  msg = trim(msg)//'ax2om/om2ax,'
    if(dNeq0(norm2(om2qu(ro2om(om2ro(om)))-qu),1.0e-12_pReal)) msg = trim(msg)//'ro2om/om2ro,'
    if(dNeq0(norm2(om2qu(ho2om(om2ho(om)))-qu),1.0e-7_pReal))  msg = trim(msg)//'ho2om/om2ho,'
    if(dNeq0(norm2(om2qu(cu2om(om2cu(om)))-qu),1.0e-7_pReal))  msg = trim(msg)//'cu2om/om2cu,'
+#endif

    eu = qu2eu(qu)
+#ifndef __PGI
    if(dNeq0(norm2(eu2qu(ax2eu(eu2ax(eu)))-qu),1.0e-12_pReal)) msg = trim(msg)//'ax2eu/eu2ax,'
    if(dNeq0(norm2(eu2qu(ro2eu(eu2ro(eu)))-qu),1.0e-12_pReal)) msg = trim(msg)//'ro2eu/eu2ro,'
    if(dNeq0(norm2(eu2qu(ho2eu(eu2ho(eu)))-qu),1.0e-7_pReal))  msg = trim(msg)//'ho2eu/eu2ho,'
    if(dNeq0(norm2(eu2qu(cu2eu(eu2cu(eu)))-qu),1.0e-7_pReal))  msg = trim(msg)//'cu2eu/eu2cu,'
+#endif

    ax = qu2ax(qu)
+#ifndef __PGI
    if(dNeq0(norm2(ax2qu(ro2ax(ax2ro(ax)))-qu),1.0e-12_pReal)) msg = trim(msg)//'ro2ax/ax2ro,'
    if(dNeq0(norm2(ax2qu(ho2ax(ax2ho(ax)))-qu),1.0e-7_pReal))  msg = trim(msg)//'ho2ax/ax2ho,'
    if(dNeq0(norm2(ax2qu(cu2ax(ax2cu(ax)))-qu),1.0e-7_pReal))  msg = trim(msg)//'cu2ax/ax2cu,'
+#endif

    ro = qu2ro(qu)
+#ifndef __PGI
    if(dNeq0(norm2(ro2qu(ho2ro(ro2ho(ro)))-qu),1.0e-7_pReal))  msg = trim(msg)//'ho2ro/ro2ho,'
    if(dNeq0(norm2(ro2qu(cu2ro(ro2cu(ro)))-qu),1.0e-7_pReal))  msg = trim(msg)//'cu2ro/ro2cu,'
+#endif

    ho = qu2ho(qu)
+#ifndef __PGI
    if(dNeq0(norm2(ho2qu(cu2ho(ho2cu(ho)))-qu),1.0e-7_pReal))  msg = trim(msg)//'cu2ho/ho2cu,'
+#endif

    call R%fromMatrix(om)