replaced almost all calls of matmul by our own code, gives better performance in parallel computations
This commit is contained in:
parent
6abff4b3ba
commit
cf1c7ce82a
|
@ -128,7 +128,7 @@
|
|||
use prec, only: pReal,pInt
|
||||
use FEsolving
|
||||
use debug
|
||||
use math, only: math_init, invnrmMandel, math_identity2nd, math_Mandel3333to66,math_Mandel33to6,math_Mandel6to33,math_det3x3,math_I3
|
||||
use math
|
||||
use mesh, only: mesh_init,mesh_FEasCP, mesh_NcpElems, FE_Nips, FE_mapElemtype, mesh_element
|
||||
use lattice, only: lattice_init
|
||||
use constitutive, only: constitutive_init,constitutive_state_old,constitutive_state_new,material_Cslip_66
|
||||
|
@ -195,9 +195,9 @@
|
|||
CPFEM_calc_done = .true. ! now calc is done
|
||||
endif
|
||||
! translate from P and dP/dF to CS and dCS/dE
|
||||
!$OMP CRITICAL (evilmatmul)
|
||||
Kirchhoff_bar = matmul(CPFEM_PK1_bar(:,:,CPFEM_in, cp_en),transpose(CPFEM_ffn1_bar(:,:,CPFEM_in, cp_en)))
|
||||
!$OMP END CRITICAL (evilmatmul)
|
||||
!!$OMP CRITICAL (evilmatmul)
|
||||
Kirchhoff_bar = math_mul33x33(CPFEM_PK1_bar(:,:,CPFEM_in, cp_en),transpose(CPFEM_ffn1_bar(:,:,CPFEM_in, cp_en)))
|
||||
!!$OMP END CRITICAL (evilmatmul)
|
||||
J_inverse = 1.0_pReal/math_det3x3(CPFEM_ffn1_bar(:,:,CPFEM_in, cp_en))
|
||||
CPFEM_stress_bar(1:CPFEM_ngens,CPFEM_in,cp_en) = math_Mandel33to6(J_inverse*Kirchhoff_bar)
|
||||
!
|
||||
|
|
|
@ -116,7 +116,7 @@
|
|||
use prec, only: pReal,pInt
|
||||
use FEsolving
|
||||
use debug
|
||||
use math, only: math_init, invnrmMandel, math_identity2nd, math_Mandel3333to66,math_Mandel33to6,math_Mandel6to33,math_det3x3,math_I3
|
||||
use math
|
||||
use mesh, only: mesh_init,mesh_FEasCP, mesh_NcpElems, FE_Nips, FE_mapElemtype, mesh_element
|
||||
use lattice, only: lattice_init
|
||||
use constitutive, only: constitutive_init,constitutive_state_old,constitutive_state_new,material_Cslip_66
|
||||
|
@ -189,9 +189,9 @@
|
|||
CPFEM_calc_done = .true. ! now calc is done
|
||||
endif
|
||||
! translate from P and dP/dF to CS and dCS/dE
|
||||
!$OMP CRITICAL (evilmatmul)
|
||||
Kirchhoff_bar = matmul(CPFEM_PK1_bar(:,:,CPFEM_in, cp_en),transpose(CPFEM_ffn1_bar(:,:,CPFEM_in, cp_en)))
|
||||
!$OMP END CRITICAL (evilmatmul)
|
||||
!!$OMP CRITICAL (evilmatmul)
|
||||
Kirchhoff_bar = math_mul33x33(CPFEM_PK1_bar(:,:,CPFEM_in, cp_en),transpose(CPFEM_ffn1_bar(:,:,CPFEM_in, cp_en)))
|
||||
!!$OMP END CRITICAL (evilmatmul)
|
||||
J_inverse = 1.0_pReal/math_det3x3(CPFEM_ffn1_bar(:,:,CPFEM_in, cp_en))
|
||||
CPFEM_stress_bar(1:CPFEM_ngens,CPFEM_in,cp_en) = math_Mandel33to6(J_inverse*Kirchhoff_bar)
|
||||
!
|
||||
|
|
|
@ -38,8 +38,7 @@ CONTAINS
|
|||
use debug
|
||||
use constitutive, only: constitutive_Nstatevars,constitutive_Nresults
|
||||
use mesh, only: mesh_element
|
||||
use math, only: math_Mandel6to33,math_Mandel33to6,math_Mandel3333to66,&
|
||||
math_I3,math_det3x3,math_invert3x3
|
||||
use math
|
||||
implicit none
|
||||
!
|
||||
character(len=*) msg
|
||||
|
@ -66,9 +65,9 @@ CONTAINS
|
|||
Fg_aim = Fg_old ! make "new", "aim" a synonym for "old"
|
||||
Fp_new = Fp_old
|
||||
call math_invert3x3(Fp_old,inv,det,error)
|
||||
!$OMP CRITICAL (evilmatmul)
|
||||
Fe_new = matmul(Fg_old,inv)
|
||||
!$OMP END CRITICAL (evilmatmul)
|
||||
!!$OMP CRITICAL (evilmatmul)
|
||||
Fe_new = math_mul33x33(Fg_old,inv)
|
||||
!!$OMP END CRITICAL (evilmatmul)
|
||||
state_bestguess = state_new ! remember potentially available state guess
|
||||
state_new = state_old
|
||||
!
|
||||
|
@ -91,9 +90,10 @@ CONTAINS
|
|||
state_new = state_bestguess ! try best available guess for state
|
||||
if (dt_aim > 0.0_pReal) then
|
||||
call math_invert3x3(Fg_aim,inv,det,error) ! inv of Fg_aim
|
||||
!$OMP CRITICAL (evilmatmul)
|
||||
Lp = 0.5_pReal*Lp + 0.5_pReal*(math_I3 - matmul(Fp_current,matmul(inv,Fe_current)))/dt_aim ! interpolate Lp and L
|
||||
!$OMP END CRITICAL (evilmatmul)
|
||||
!!$OMP CRITICAL (evilmatmul)
|
||||
Lp = 0.5_pReal*Lp + 0.5_pReal*(math_I3 - math_mul33x33(Fp_current,&
|
||||
math_mul33x33(inv,Fe_current)))/dt_aim ! interpolate Lp and L
|
||||
!!$OMP END CRITICAL (evilmatmul)
|
||||
endif
|
||||
!!$OMP CRITICAL (timeint)
|
||||
call TimeIntegration(msg,Lp,Fp_new,Fe_new,P,state_new,post_results,.true., & ! def gradients and PK2 at end of time step
|
||||
|
@ -221,7 +221,6 @@ CONTAINS
|
|||
constitutive_homogenizedC,constitutive_dotState,constitutive_LpAndItsTangent,&
|
||||
constitutive_Nresults,constitutive_Microstructure,constitutive_post_results
|
||||
use math
|
||||
|
||||
use IO
|
||||
implicit none
|
||||
!
|
||||
|
@ -249,9 +248,9 @@ CONTAINS
|
|||
return
|
||||
endif
|
||||
|
||||
!$OMP CRITICAL (evilmatmul)
|
||||
A = matmul(transpose(invFp_old), matmul(transpose(Fg_new),matmul(Fg_new,invFp_old)))
|
||||
!$OMP END CRITICAL (evilmatmul)
|
||||
!!$OMP CRITICAL (evilmatmul)
|
||||
A = math_mul33x33(transpose(invFp_old), math_mul33x33(transpose(Fg_new),math_mul33x33(Fg_new,invFp_old)))
|
||||
!!$OMP END CRITICAL (evilmatmul)
|
||||
!
|
||||
if (all(state == 0.0_pReal)) then
|
||||
state = state_old ! former state guessed, if none specified
|
||||
|
@ -291,11 +290,11 @@ Inner: do ! inner iteration: Lp
|
|||
!
|
||||
B = math_i3 - dt*Lpguess
|
||||
BT = transpose(B)
|
||||
!$OMP CRITICAL (evilmatmul)
|
||||
AB = matmul(A,B)
|
||||
BTA = matmul(BT,A)
|
||||
Tstar_v = 0.5_pReal*matmul(C_66,math_mandel33to6(matmul(BT,AB)-math_I3))
|
||||
!$OMP END CRITICAL (evilmatmul)
|
||||
!!$OMP CRITICAL (evilmatmul)
|
||||
AB = math_mul33x33(A,B)
|
||||
BTA = math_mul33x33(BT,A)
|
||||
Tstar_v = 0.5_pReal*math_mul66x6(C_66,math_mandel33to6(math_mul33x33(BT,AB)-math_I3))
|
||||
!!$OMP END CRITICAL (evilmatmul)
|
||||
Tstar = math_Mandel6to33(Tstar_v)
|
||||
p_hydro=(Tstar_v(1)+Tstar_v(2)+Tstar_v(3))/3.0_pReal
|
||||
forall(i=1:3) Tstar_v(i) = Tstar_v(i)-p_hydro ! subtract hydrostatic pressure
|
||||
|
@ -371,9 +370,9 @@ Inner: do ! inner iteration: Lp
|
|||
dTdLp(3*(i-1)+j,3*(k-1)+l) = dTdLp(3*(i-1)+j,3*(k-1)+l) + &
|
||||
C(i,j,l,n)*AB(k,n)+C(i,j,m,l)*BTA(m,k)
|
||||
dTdLp = -0.5_pReal*dt*dTdLp
|
||||
!$OMP CRITICAL (evilmatmul)
|
||||
dRdLp = eye2 - matmul(dLp,dTdLp) ! calc dR/dLp
|
||||
!$OMP END CRITICAL (evilmatmul)
|
||||
!!$OMP CRITICAL (evilmatmul)
|
||||
dRdLp = eye2 - math_mul99x99(dLp,dTdLp) ! calc dR/dLp
|
||||
!!$OMP END CRITICAL (evilmatmul)
|
||||
invdRdLp = 0.0_pReal
|
||||
call math_invert(9,dRdLp,invdRdLp,dummy,failed) ! invert dR/dLp --> dLp/dR
|
||||
if (failed) then
|
||||
|
@ -425,9 +424,9 @@ Inner: do ! inner iteration: Lp
|
|||
debug_OuterLoopDistribution(iOuter) = debug_OuterLoopDistribution(iOuter)+1
|
||||
!$OMP END CRITICAL (out)
|
||||
|
||||
!$OMP CRITICAL (evilmatmul)
|
||||
invFp_new = matmul(invFp_old,B)
|
||||
!$OMP END CRITICAL (evilmatmul)
|
||||
!!$OMP CRITICAL (evilmatmul)
|
||||
invFp_new = math_mul33x33(invFp_old,B)
|
||||
!!$OMP END CRITICAL (evilmatmul)
|
||||
call math_invert3x3(invFp_new,Fp_new,det,failed)
|
||||
if (failed) then
|
||||
msg = 'inversion Fp_new^-1'
|
||||
|
@ -441,10 +440,10 @@ Inner: do ! inner iteration: Lp
|
|||
!
|
||||
Fp_new = Fp_new*det**(1.0_pReal/3.0_pReal) ! regularize Fp by det = det(InvFp_new) !!
|
||||
forall (i=1:3) Tstar_v(i) = Tstar_v(i)+p_hydro ! add hydrostatic component back
|
||||
!$OMP CRITICAL (evilmatmul)
|
||||
Fe_new = matmul(Fg_new,invFp_new) ! calc resulting Fe
|
||||
P = matmul(Fe_new,matmul(Tstar,transpose(invFp_new))) ! first PK stress
|
||||
!$OMP END CRITICAL (evilmatmul)
|
||||
!!$OMP CRITICAL (evilmatmul)
|
||||
Fe_new = math_mul33x33(Fg_new,invFp_new) ! calc resulting Fe
|
||||
P = math_mul33x33(Fe_new,math_mul33x33(Tstar,transpose(invFp_new))) ! first PK stress
|
||||
!!$OMP END CRITICAL (evilmatmul)
|
||||
!
|
||||
!!$OMP CRITICAL (write2out)
|
||||
! write(6,*)
|
||||
|
|
|
@ -268,6 +268,24 @@
|
|||
|
||||
END FUNCTION
|
||||
|
||||
!**************************************************************************
|
||||
! matrix multiplication 6x6
|
||||
!**************************************************************************
|
||||
FUNCTION math_mul66x6(A,B)
|
||||
|
||||
use prec, only: pReal, pInt
|
||||
implicit none
|
||||
|
||||
integer(pInt) i
|
||||
real(pReal), dimension(6) :: math_mul66x6,B
|
||||
real(pReal), dimension(6,6) :: A
|
||||
|
||||
forall (i=1:6) math_mul66x6(i) = &
|
||||
A(i,1)*B(1) + A(i,2)*B(2) + A(i,3)*B(3) + &
|
||||
A(i,4)*B(4) + A(i,5)*B(5) + A(i,6)*B(6)
|
||||
return
|
||||
|
||||
END FUNCTION
|
||||
|
||||
!**************************************************************************
|
||||
! matrix multiplication 9x9
|
||||
|
@ -881,10 +899,10 @@
|
|||
real(pReal), dimension(3,3) :: r
|
||||
real(pReal) math_disorient, tr
|
||||
|
||||
!$OMP CRITICAL (evilmatmul)
|
||||
!!$OMP CRITICAL (evilmatmul)
|
||||
|
||||
r = matmul(math_EulerToR(EulerB),transpose(math_EulerToR(EulerA)))
|
||||
!$OMP END CRITICAL (evilmatmul)
|
||||
r = math_mul33x33(math_EulerToR(EulerB),transpose(math_EulerToR(EulerA)))
|
||||
!!$OMP END CRITICAL (evilmatmul)
|
||||
|
||||
tr = (r(1,1)+r(2,2)+r(3,3)-1.0_pReal)*0.4999999_pReal
|
||||
math_disorient = abs(0.5_pReal*pi-asin(tr))
|
||||
|
@ -951,10 +969,10 @@ endif
|
|||
if (rnd(5) <= exp(-1.0_pReal*(math_disorient(origin,disturb)/scatter)**2)) exit
|
||||
enddo
|
||||
|
||||
!$OMP CRITICAL (evilmatmul)
|
||||
!!$OMP CRITICAL (evilmatmul)
|
||||
|
||||
math_sampleGaussOri = math_RtoEuler(matmul(math_EulerToR(disturb),math_EulerToR(center)))
|
||||
!$OMP END CRITICAL (evilmatmul)
|
||||
math_sampleGaussOri = math_RtoEuler(math_mul33x33(math_EulerToR(disturb),math_EulerToR(center)))
|
||||
!!$OMP END CRITICAL (evilmatmul)
|
||||
|
||||
return
|
||||
|
||||
|
@ -1029,10 +1047,10 @@ endif
|
|||
pRot = math_RodrigtoR(axis,angle)
|
||||
|
||||
! ---# apply the three rotations #---
|
||||
!$OMP CRITICAL (evilmatmul)
|
||||
!!$OMP CRITICAL (evilmatmul)
|
||||
|
||||
math_sampleFiberOri = math_RtoEuler(matmul(pRot,matmul(fRot,oRot)))
|
||||
!$OMP END CRITICAL (evilmatmul)
|
||||
math_sampleFiberOri = math_RtoEuler(math_mul33x33(pRot,math_mul33x33(fRot,oRot)))
|
||||
!!$OMP END CRITICAL (evilmatmul)
|
||||
|
||||
return
|
||||
|
||||
|
@ -1100,20 +1118,20 @@ math_sampleFiberOri = math_RtoEuler(matmul(pRot,matmul(fRot,oRot)))
|
|||
real(pReal) FE(3,3),R(3,3),U(3,3),CE(3,3),EW1,EW2,EW3,EB1(3,3),EB2(3,3),EB3(3,3),UI(3,3),det
|
||||
|
||||
error = .false.
|
||||
!$OMP CRITICAL (evilmatmul)
|
||||
!!$OMP CRITICAL (evilmatmul)
|
||||
|
||||
ce=matmul(transpose(fe),fe)
|
||||
!$OMP END CRITICAL (evilmatmul)
|
||||
ce=math_mul33x33(transpose(fe),fe)
|
||||
!!$OMP END CRITICAL (evilmatmul)
|
||||
|
||||
CALL math_spectral1(CE,EW1,EW2,EW3,EB1,EB2,EB3)
|
||||
U=DSQRT(EW1)*EB1+DSQRT(EW2)*EB2+DSQRT(EW3)*EB3
|
||||
call math_invert3x3(U,UI,det,error)
|
||||
if (.not. error) then
|
||||
|
||||
!$OMP CRITICAL (evilmatmul)
|
||||
!!$OMP CRITICAL (evilmatmul)
|
||||
|
||||
R = matmul(fe,ui)
|
||||
!$OMP END CRITICAL (evilmatmul)
|
||||
R = math_mul33x33(fe,ui)
|
||||
!!$OMP END CRITICAL (evilmatmul)
|
||||
|
||||
endif
|
||||
|
||||
|
@ -1173,10 +1191,10 @@ math_sampleFiberOri = math_RtoEuler(matmul(pRot,matmul(fRot,oRot)))
|
|||
D3=1.0_pReal/(EW3-EW1)/(EW3-EW2)
|
||||
M1=M-EW1*math_I3
|
||||
M2=M-EW2*math_I3
|
||||
!$OMP CRITICAL (evilmatmul)
|
||||
!!$OMP CRITICAL (evilmatmul)
|
||||
|
||||
EB3=MATMUL(M1,M2)*D3
|
||||
!$OMP END CRITICAL (evilmatmul)
|
||||
EB3=math_mul33x33(M1,M2)*D3
|
||||
!!$OMP END CRITICAL (evilmatmul)
|
||||
|
||||
EB1=math_I3-EB3
|
||||
! both EB2 and EW2 are set to zero so that they do not
|
||||
|
@ -1187,10 +1205,10 @@ math_sampleFiberOri = math_RtoEuler(matmul(pRot,matmul(fRot,oRot)))
|
|||
D1=1.0_pReal/(EW1-EW2)/(EW1-EW3)
|
||||
M2=M-math_I3*EW2
|
||||
M3=M-math_I3*EW3
|
||||
!$OMP CRITICAL (evilmatmul)
|
||||
!!$OMP CRITICAL (evilmatmul)
|
||||
|
||||
EB1=MATMUL(M2,M3)*D1
|
||||
!$OMP END CRITICAL (evilmatmul)
|
||||
EB1=math_mul33x33(M2,M3)*D1
|
||||
!!$OMP END CRITICAL (evilmatmul)
|
||||
|
||||
EB2=math_I3-EB1
|
||||
! both EB3 and EW3 are set to zero so that they do not
|
||||
|
@ -1201,10 +1219,10 @@ math_sampleFiberOri = math_RtoEuler(matmul(pRot,matmul(fRot,oRot)))
|
|||
D2=1.0_pReal/(EW2-EW1)/(EW2-EW3)
|
||||
M1=M-math_I3*EW1
|
||||
M3=M-math_I3*EW3
|
||||
!$OMP CRITICAL (evilmatmul)
|
||||
!!$OMP CRITICAL (evilmatmul)
|
||||
|
||||
EB2=MATMUL(M1,M3)*D2
|
||||
!$OMP END CRITICAL (evilmatmul)
|
||||
EB2=math_mul33x33(M1,M3)*D2
|
||||
!!$OMP END CRITICAL (evilmatmul)
|
||||
|
||||
EB1=math_I3-EB2
|
||||
! both EB3 and EW3 are set to zero so that they do not
|
||||
|
@ -1218,12 +1236,12 @@ math_sampleFiberOri = math_RtoEuler(matmul(pRot,matmul(fRot,oRot)))
|
|||
M1=M-EW1*math_I3
|
||||
M2=M-EW2*math_I3
|
||||
M3=M-EW3*math_I3
|
||||
!$OMP CRITICAL (evilmatmul)
|
||||
!!$OMP CRITICAL (evilmatmul)
|
||||
|
||||
EB1=MATMUL(M2,M3)*D1
|
||||
EB2=MATMUL(M1,M3)*D2
|
||||
EB3=MATMUL(M1,M2)*D3
|
||||
!$OMP END CRITICAL (evilmatmul)
|
||||
EB1=math_mul33x33(M2,M3)*D1
|
||||
EB2=math_mul33x33(M1,M3)*D2
|
||||
EB3=math_mul33x33(M1,M2)*D3
|
||||
!!$OMP END CRITICAL (evilmatmul)
|
||||
|
||||
END IF
|
||||
END IF
|
||||
|
|
Loading…
Reference in New Issue