Merge branch 'no-FMA' into 'development'

let the compiler do the work

See merge request damask/DAMASK!824
This commit is contained in:
Philip Eisenlohr 2023-09-27 12:06:35 +00:00
commit f8cead9e0a
3 changed files with 3 additions and 47 deletions

View File

@ -1049,26 +1049,16 @@ pure subroutine math_eigh33(w,v,m)
U = max(T, T**2) U = max(T, T**2)
threshold = sqrt(5.68e-14_pREAL * U**2) threshold = sqrt(5.68e-14_pREAL * U**2)
#ifndef __INTEL_LLVM_COMPILER
v(1:3,1) = [m(1,3)*w(1) + v(1,2), & v(1:3,1) = [m(1,3)*w(1) + v(1,2), &
m(2,3)*w(1) + v(2,2), & m(2,3)*w(1) + v(2,2), &
#else
v(1:3,1) = [IEEE_FMA(m(1,3),w(1),v(1,2)), &
IEEE_FMA(m(2,3),w(1),v(2,2)), &
#endif
(m(1,1) - w(1)) * (m(2,2) - w(1)) - v(3,2)] (m(1,1) - w(1)) * (m(2,2) - w(1)) - v(3,2)]
norm = norm2(v(1:3, 1)) norm = norm2(v(1:3, 1))
fallback1: if (norm < threshold) then fallback1: if (norm < threshold) then
call math_eigh(w,v,error,m) call math_eigh(w,v,error,m)
else fallback1 else fallback1
v(1:3,1) = v(1:3, 1) / norm v(1:3,1) = v(1:3, 1) / norm
#ifndef __INTEL_LLVM_COMPILER
v(1:3,2) = [m(1,3)*w(2) + v(1,2), & v(1:3,2) = [m(1,3)*w(2) + v(1,2), &
m(2,3)*w(2) + v(2,2), & m(2,3)*w(2) + v(2,2), &
#else
v(1:3,2) = [IEEE_FMA(m(1,3),w(2),v(1,2)), &
IEEE_FMA(m(2,3),w(2),v(2,2)), &
#endif
(m(1,1) - w(2)) * (m(2,2) - w(2)) - v(3,2)] (m(1,1) - w(2)) * (m(2,2) - w(2)) - v(3,2)]
norm = norm2(v(1:3, 2)) norm = norm2(v(1:3, 2))
fallback2: if (norm < threshold) then fallback2: if (norm < threshold) then

View File

@ -693,11 +693,7 @@ function integrateStateEuler(F_0,F,Fp0,Fi0,state0,Delta_t,ph,en) result(broken)
if (any(IEEE_is_NaN(dotState))) return if (any(IEEE_is_NaN(dotState))) return
sizeDotState = plasticState(ph)%sizeDotState sizeDotState = plasticState(ph)%sizeDotState
#ifndef __INTEL_LLVM_COMPILER
plasticState(ph)%state(1:sizeDotState,en) = state0 + dotState*Delta_t plasticState(ph)%state(1:sizeDotState,en) = state0 + dotState*Delta_t
#else
plasticState(ph)%state(1:sizeDotState,en) = IEEE_FMA(dotState,Delta_t,state0)
#endif
broken = plastic_deltaState(ph,en) broken = plastic_deltaState(ph,en)
if (broken) return if (broken) return
@ -736,11 +732,7 @@ function integrateStateAdaptiveEuler(F_0,F,Fp0,Fi0,state0,Delta_t,ph,en) result(
sizeDotState = plasticState(ph)%sizeDotState sizeDotState = plasticState(ph)%sizeDotState
r = - dotState * 0.5_pREAL * Delta_t r = - dotState * 0.5_pREAL * Delta_t
#ifndef __INTEL_LLVM_COMPILER
plasticState(ph)%state(1:sizeDotState,en) = state0 + dotState*Delta_t plasticState(ph)%state(1:sizeDotState,en) = state0 + dotState*Delta_t
#else
plasticState(ph)%state(1:sizeDotState,en) = IEEE_FMA(dotState,Delta_t,state0)
#endif
broken = plastic_deltaState(ph,en) broken = plastic_deltaState(ph,en)
if (broken) return if (broken) return
@ -861,18 +853,10 @@ function integrateStateRK(F_0,F,Fp0,Fi0,state0,Delta_t,ph,en,A,B,C,DB) result(br
dotState = A(1,stage) * plastic_RKdotState(1:sizeDotState,1) dotState = A(1,stage) * plastic_RKdotState(1:sizeDotState,1)
do n = 2, stage do n = 2, stage
#ifndef __INTEL_LLVM_COMPILER
dotState = dotState + A(n,stage)*plastic_RKdotState(1:sizeDotState,n) dotState = dotState + A(n,stage)*plastic_RKdotState(1:sizeDotState,n)
#else
dotState = IEEE_FMA(A(n,stage),plastic_RKdotState(1:sizeDotState,n),dotState)
#endif
end do end do
#ifndef __INTEL_LLVM_COMPILER
plasticState(ph)%state(1:sizeDotState,en) = state0 + dotState*Delta_t plasticState(ph)%state(1:sizeDotState,en) = state0 + dotState*Delta_t
#else
plasticState(ph)%state(1:sizeDotState,en) = IEEE_FMA(dotState,Delta_t,state0)
#endif
broken = integrateStress(F_0+(F-F_0)*Delta_t*C(stage),Fp0,Fi0,Delta_t*C(stage), ph,en) broken = integrateStress(F_0+(F-F_0)*Delta_t*C(stage),Fp0,Fi0,Delta_t*C(stage), ph,en)
if (broken) exit if (broken) exit
@ -886,11 +870,7 @@ function integrateStateRK(F_0,F,Fp0,Fi0,state0,Delta_t,ph,en,A,B,C,DB) result(br
plastic_RKdotState(1:sizeDotState,size(B)) = dotState plastic_RKdotState(1:sizeDotState,size(B)) = dotState
dotState = matmul(plastic_RKdotState,B) dotState = matmul(plastic_RKdotState,B)
#ifndef __INTEL_LLVM_COMPILER
plasticState(ph)%state(1:sizeDotState,en) = state0 + dotState*Delta_t plasticState(ph)%state(1:sizeDotState,en) = state0 + dotState*Delta_t
#else
plasticState(ph)%state(1:sizeDotState,en) = IEEE_FMA(dotState,Delta_t,state0)
#endif
if (present(DB)) & if (present(DB)) &
broken = .not. converged(matmul(plastic_RKdotState(1:sizeDotState,1:size(DB)),DB) * Delta_t, & broken = .not. converged(matmul(plastic_RKdotState(1:sizeDotState,1:size(DB)),DB) * Delta_t, &
@ -1174,18 +1154,12 @@ module function phase_mechanical_dPdF(Delta_t,co,ce) result(dPdF)
else else
lhs_3333 = 0.0_pREAL; rhs_3333 = 0.0_pREAL lhs_3333 = 0.0_pREAL; rhs_3333 = 0.0_pREAL
do o=1,3; do p=1,3 do o=1,3; do p=1,3
#ifndef __INTEL_LLVM_COMPILER
lhs_3333(1:3,1:3,o,p) = lhs_3333(1:3,1:3,o,p) & lhs_3333(1:3,1:3,o,p) = lhs_3333(1:3,1:3,o,p) &
+ matmul(invSubFi0,dLidFi(1:3,1:3,o,p)) * Delta_t + matmul(invSubFi0,dLidFi(1:3,1:3,o,p)) * Delta_t
lhs_3333(1:3,o,1:3,p) = lhs_3333(1:3,o,1:3,p) & lhs_3333(1:3,o,1:3,p) = lhs_3333(1:3,o,1:3,p) &
+ invFi*invFi(p,o) + invFi*invFi(p,o)
rhs_3333(1:3,1:3,o,p) = rhs_3333(1:3,1:3,o,p) & rhs_3333(1:3,1:3,o,p) = rhs_3333(1:3,1:3,o,p) &
- matmul(invSubFi0,dLidS(1:3,1:3,o,p)) * Delta_t - matmul(invSubFi0,dLidS(1:3,1:3,o,p)) * Delta_t
#else
lhs_3333(1:3,1:3,o,p) = IEEE_FMA(matmul(invSubFi0,dLidFi(1:3,1:3,o,p)),Delta_t,lhs_3333(1:3,1:3,o,p))
lhs_3333(1:3,o,1:3,p) = IEEE_FMA(invFi,invFi(p,o),lhs_3333(1:3,o,1:3,p))
rhs_3333(1:3,1:3,o,p) = IEEE_FMA(matmul(invSubFi0,dLidS(1:3,1:3,o,p)),-Delta_t,rhs_3333(1:3,1:3,o,p))
#endif
end do; end do end do; end do
call math_invert(temp_99,error,math_3333to99(lhs_3333)) call math_invert(temp_99,error,math_3333to99(lhs_3333))
if (error) then if (error) then
@ -1214,12 +1188,8 @@ module function phase_mechanical_dPdF(Delta_t,co,ce) result(dPdF)
temp_3333(1:3,1:3,p,o) = matmul(matmul(temp_33_2,dLpdS(1:3,1:3,p,o)), invFi) & temp_3333(1:3,1:3,p,o) = matmul(matmul(temp_33_2,dLpdS(1:3,1:3,p,o)), invFi) &
+ matmul(temp_33_3,dLidS(1:3,1:3,p,o)) + matmul(temp_33_3,dLidS(1:3,1:3,p,o))
end do; end do end do; end do
#ifndef __INTEL_LLVM_COMPILER
lhs_3333 = math_mul3333xx3333(dSdFe,temp_3333) * Delta_t & lhs_3333 = math_mul3333xx3333(dSdFe,temp_3333) * Delta_t &
+ math_mul3333xx3333(dSdFi,dFidS) + math_mul3333xx3333(dSdFi,dFidS)
#else
lhs_3333 = IEEE_FMA(math_mul3333xx3333(dSdFe,temp_3333),Delta_t,math_mul3333xx3333(dSdFi,dFidS))
#endif
call math_invert(temp_99,error,math_eye(9)+math_3333to99(lhs_3333)) call math_invert(temp_99,error,math_eye(9)+math_3333to99(lhs_3333))
if (error) then if (error) then

View File

@ -105,13 +105,9 @@ pure function eval(self,x) result(y)
integer :: o integer :: o
y = self%coef(ubound(self%coef,1)) y = 0.0_pREAL
do o = ubound(self%coef,1)-1, 0, -1 do o = ubound(self%coef,1), 0, -1
#ifndef __INTEL_LLVM_COMPILER
y = y*(x-self%x_ref) + self%coef(o) y = y*(x-self%x_ref) + self%coef(o)
#else
y = IEEE_FMA(y,x-self%x_ref,self%coef(o))
#endif
end do end do
end function eval end function eval