no merge in hot loop

results in measureable run time decrease for Intel and GNU compilers
This commit is contained in:
Martin Diehl 2023-10-04 21:59:20 +02:00
parent 05e675bbdb
commit 0e353d9feb
3 changed files with 12 additions and 9 deletions

View File

@ -295,6 +295,8 @@ pure module subroutine dislotungsten_LpAndItsTangent(Lp,dLp_dMp, &
T !< temperature T !< temperature
real(pREAL), dimension(param(ph)%sum_N_sl) :: & real(pREAL), dimension(param(ph)%sum_N_sl) :: &
dot_gamma, ddot_gamma_dtau dot_gamma, ddot_gamma_dtau
real(pREAL), dimension(3,3,param(ph)%sum_N_sl) :: &
P_nS
T = thermal_T(ph,en) T = thermal_T(ph,en)
@ -304,13 +306,12 @@ pure module subroutine dislotungsten_LpAndItsTangent(Lp,dLp_dMp, &
associate(prm => param(ph)) associate(prm => param(ph))
call kinetics(Mp,T,ph,en, dot_gamma,ddot_gamma_dtau) call kinetics(Mp,T,ph,en, dot_gamma,ddot_gamma_dtau)
P_nS = merge(prm%P_nS_pos,prm%P_nS_neg, spread(spread(dot_gamma,1,3),2,3)>0.0_pREAL) ! faster than 'merge' in loop
do i = 1, prm%sum_N_sl do i = 1, prm%sum_N_sl
Lp = Lp + dot_gamma(i)*prm%P_sl(1:3,1:3,i) Lp = Lp + dot_gamma(i)*prm%P_sl(1:3,1:3,i)
forall (k=1:3,l=1:3,m=1:3,n=1:3) & forall (k=1:3,l=1:3,m=1:3,n=1:3) &
dLp_dMp(k,l,m,n) = dLp_dMp(k,l,m,n) & dLp_dMp(k,l,m,n) = dLp_dMp(k,l,m,n) &
+ ddot_gamma_dtau(i) * prm%P_sl(k,l,i) & + ddot_gamma_dtau(i) * prm%P_sl(k,l,i) * P_nS(m,n,i)
* merge(prm%P_nS_pos(m,n,i), &
prm%P_nS_neg(m,n,i), dot_gamma(i)>0.0_pREAL)
end do end do
end associate end associate

View File

@ -272,6 +272,8 @@ pure module subroutine kinehardening_LpAndItsTangent(Lp,dLp_dMp, Mp,ph,en)
i,k,l,m,n i,k,l,m,n
real(pREAL), dimension(param(ph)%sum_N_sl) :: & real(pREAL), dimension(param(ph)%sum_N_sl) :: &
dot_gamma, ddot_gamma_dtau dot_gamma, ddot_gamma_dtau
real(pREAL), dimension(3,3,param(ph)%sum_N_sl) :: &
P_nS
Lp = 0.0_pREAL Lp = 0.0_pREAL
@ -280,13 +282,12 @@ pure module subroutine kinehardening_LpAndItsTangent(Lp,dLp_dMp, Mp,ph,en)
associate(prm => param(ph)) associate(prm => param(ph))
call kinetics(Mp,ph,en, dot_gamma,ddot_gamma_dtau) call kinetics(Mp,ph,en, dot_gamma,ddot_gamma_dtau)
P_nS = merge(prm%P_nS_pos,prm%P_nS_neg, spread(spread(dot_gamma,1,3),2,3)>0.0_pREAL) ! faster than 'merge' in loop
do i = 1, prm%sum_N_sl do i = 1, prm%sum_N_sl
Lp = Lp + dot_gamma(i)*prm%P(1:3,1:3,i) Lp = Lp + dot_gamma(i)*prm%P(1:3,1:3,i)
forall (k=1:3,l=1:3,m=1:3,n=1:3) & forall (k=1:3,l=1:3,m=1:3,n=1:3) &
dLp_dMp(k,l,m,n) = dLp_dMp(k,l,m,n) & dLp_dMp(k,l,m,n) = dLp_dMp(k,l,m,n) &
+ ddot_gamma_dtau(i) * prm%P(k,l,i) & + ddot_gamma_dtau(i) * prm%P(k,l,i) * P_nS(m,n,i)
* merge(prm%P_nS_pos(m,n,i), &
prm%P_nS_neg(m,n,i), dot_gamma(i)>0.0_pREAL)
end do end do
end associate end associate

View File

@ -312,6 +312,8 @@ pure module subroutine phenopowerlaw_LpAndItsTangent(Lp,dLp_dMp,Mp,ph,en)
i,k,l,m,n i,k,l,m,n
real(pREAL), dimension(param(ph)%sum_N_sl) :: & real(pREAL), dimension(param(ph)%sum_N_sl) :: &
dot_gamma_sl,ddot_gamma_dtau_sl dot_gamma_sl,ddot_gamma_dtau_sl
real(pREAL), dimension(3,3,param(ph)%sum_N_sl) :: &
P_nS
real(pREAL), dimension(param(ph)%sum_N_tw) :: & real(pREAL), dimension(param(ph)%sum_N_tw) :: &
dot_gamma_tw,ddot_gamma_dtau_tw dot_gamma_tw,ddot_gamma_dtau_tw
@ -322,13 +324,12 @@ pure module subroutine phenopowerlaw_LpAndItsTangent(Lp,dLp_dMp,Mp,ph,en)
associate(prm => param(ph)) associate(prm => param(ph))
call kinetics_sl(Mp,ph,en,dot_gamma_sl,ddot_gamma_dtau_sl) call kinetics_sl(Mp,ph,en,dot_gamma_sl,ddot_gamma_dtau_sl)
P_nS = merge(prm%P_nS_pos,prm%P_nS_neg, spread(spread(dot_gamma_sl,1,3),2,3)>0.0_pREAL) ! faster than 'merge' in loop
slipSystems: do i = 1, prm%sum_N_sl slipSystems: do i = 1, prm%sum_N_sl
Lp = Lp + dot_gamma_sl(i)*prm%P_sl(1:3,1:3,i) Lp = Lp + dot_gamma_sl(i)*prm%P_sl(1:3,1:3,i)
forall (k=1:3,l=1:3,m=1:3,n=1:3) & forall (k=1:3,l=1:3,m=1:3,n=1:3) &
dLp_dMp(k,l,m,n) = dLp_dMp(k,l,m,n) & dLp_dMp(k,l,m,n) = dLp_dMp(k,l,m,n) &
+ ddot_gamma_dtau_sl(i) * prm%P_sl(k,l,i) & + ddot_gamma_dtau_sl(i) * prm%P_sl(k,l,i) * P_nS(m,n,i)
* merge(prm%P_nS_pos(m,n,i), &
prm%P_nS_neg(m,n,i), dot_gamma_sl(i)>0.0_pREAL)
end do slipSystems end do slipSystems
call kinetics_tw(Mp,ph,en,dot_gamma_tw,ddot_gamma_dtau_tw) call kinetics_tw(Mp,ph,en,dot_gamma_tw,ddot_gamma_dtau_tw)