replaced almost all calls of matmul by our own code, gives better performance in parallel computations

This commit is contained in:
Franz Roters 2008-07-23 12:49:40 +00:00
parent 6abff4b3ba
commit cf1c7ce82a
4 changed files with 81 additions and 64 deletions

View File

@ -128,7 +128,7 @@
use prec, only: pReal,pInt use prec, only: pReal,pInt
use FEsolving use FEsolving
use debug use debug
use math, only: math_init, invnrmMandel, math_identity2nd, math_Mandel3333to66,math_Mandel33to6,math_Mandel6to33,math_det3x3,math_I3 use math
use mesh, only: mesh_init,mesh_FEasCP, mesh_NcpElems, FE_Nips, FE_mapElemtype, mesh_element use mesh, only: mesh_init,mesh_FEasCP, mesh_NcpElems, FE_Nips, FE_mapElemtype, mesh_element
use lattice, only: lattice_init use lattice, only: lattice_init
use constitutive, only: constitutive_init,constitutive_state_old,constitutive_state_new,material_Cslip_66 use constitutive, only: constitutive_init,constitutive_state_old,constitutive_state_new,material_Cslip_66
@ -195,9 +195,9 @@
CPFEM_calc_done = .true. ! now calc is done CPFEM_calc_done = .true. ! now calc is done
endif endif
! translate from P and dP/dF to CS and dCS/dE ! translate from P and dP/dF to CS and dCS/dE
!$OMP CRITICAL (evilmatmul) !!$OMP CRITICAL (evilmatmul)
Kirchhoff_bar = matmul(CPFEM_PK1_bar(:,:,CPFEM_in, cp_en),transpose(CPFEM_ffn1_bar(:,:,CPFEM_in, cp_en))) Kirchhoff_bar = math_mul33x33(CPFEM_PK1_bar(:,:,CPFEM_in, cp_en),transpose(CPFEM_ffn1_bar(:,:,CPFEM_in, cp_en)))
!$OMP END CRITICAL (evilmatmul) !!$OMP END CRITICAL (evilmatmul)
J_inverse = 1.0_pReal/math_det3x3(CPFEM_ffn1_bar(:,:,CPFEM_in, cp_en)) J_inverse = 1.0_pReal/math_det3x3(CPFEM_ffn1_bar(:,:,CPFEM_in, cp_en))
CPFEM_stress_bar(1:CPFEM_ngens,CPFEM_in,cp_en) = math_Mandel33to6(J_inverse*Kirchhoff_bar) CPFEM_stress_bar(1:CPFEM_ngens,CPFEM_in,cp_en) = math_Mandel33to6(J_inverse*Kirchhoff_bar)
! !

View File

@ -116,7 +116,7 @@
use prec, only: pReal,pInt use prec, only: pReal,pInt
use FEsolving use FEsolving
use debug use debug
use math, only: math_init, invnrmMandel, math_identity2nd, math_Mandel3333to66,math_Mandel33to6,math_Mandel6to33,math_det3x3,math_I3 use math
use mesh, only: mesh_init,mesh_FEasCP, mesh_NcpElems, FE_Nips, FE_mapElemtype, mesh_element use mesh, only: mesh_init,mesh_FEasCP, mesh_NcpElems, FE_Nips, FE_mapElemtype, mesh_element
use lattice, only: lattice_init use lattice, only: lattice_init
use constitutive, only: constitutive_init,constitutive_state_old,constitutive_state_new,material_Cslip_66 use constitutive, only: constitutive_init,constitutive_state_old,constitutive_state_new,material_Cslip_66
@ -189,9 +189,9 @@
CPFEM_calc_done = .true. ! now calc is done CPFEM_calc_done = .true. ! now calc is done
endif endif
! translate from P and dP/dF to CS and dCS/dE ! translate from P and dP/dF to CS and dCS/dE
!$OMP CRITICAL (evilmatmul) !!$OMP CRITICAL (evilmatmul)
Kirchhoff_bar = matmul(CPFEM_PK1_bar(:,:,CPFEM_in, cp_en),transpose(CPFEM_ffn1_bar(:,:,CPFEM_in, cp_en))) Kirchhoff_bar = math_mul33x33(CPFEM_PK1_bar(:,:,CPFEM_in, cp_en),transpose(CPFEM_ffn1_bar(:,:,CPFEM_in, cp_en)))
!$OMP END CRITICAL (evilmatmul) !!$OMP END CRITICAL (evilmatmul)
J_inverse = 1.0_pReal/math_det3x3(CPFEM_ffn1_bar(:,:,CPFEM_in, cp_en)) J_inverse = 1.0_pReal/math_det3x3(CPFEM_ffn1_bar(:,:,CPFEM_in, cp_en))
CPFEM_stress_bar(1:CPFEM_ngens,CPFEM_in,cp_en) = math_Mandel33to6(J_inverse*Kirchhoff_bar) CPFEM_stress_bar(1:CPFEM_ngens,CPFEM_in,cp_en) = math_Mandel33to6(J_inverse*Kirchhoff_bar)
! !

View File

@ -38,8 +38,7 @@ CONTAINS
use debug use debug
use constitutive, only: constitutive_Nstatevars,constitutive_Nresults use constitutive, only: constitutive_Nstatevars,constitutive_Nresults
use mesh, only: mesh_element use mesh, only: mesh_element
use math, only: math_Mandel6to33,math_Mandel33to6,math_Mandel3333to66,& use math
math_I3,math_det3x3,math_invert3x3
implicit none implicit none
! !
character(len=*) msg character(len=*) msg
@ -66,9 +65,9 @@ CONTAINS
Fg_aim = Fg_old ! make "new", "aim" a synonym for "old" Fg_aim = Fg_old ! make "new", "aim" a synonym for "old"
Fp_new = Fp_old Fp_new = Fp_old
call math_invert3x3(Fp_old,inv,det,error) call math_invert3x3(Fp_old,inv,det,error)
!$OMP CRITICAL (evilmatmul) !!$OMP CRITICAL (evilmatmul)
Fe_new = matmul(Fg_old,inv) Fe_new = math_mul33x33(Fg_old,inv)
!$OMP END CRITICAL (evilmatmul) !!$OMP END CRITICAL (evilmatmul)
state_bestguess = state_new ! remember potentially available state guess state_bestguess = state_new ! remember potentially available state guess
state_new = state_old state_new = state_old
! !
@ -91,9 +90,10 @@ CONTAINS
state_new = state_bestguess ! try best available guess for state state_new = state_bestguess ! try best available guess for state
if (dt_aim > 0.0_pReal) then if (dt_aim > 0.0_pReal) then
call math_invert3x3(Fg_aim,inv,det,error) ! inv of Fg_aim call math_invert3x3(Fg_aim,inv,det,error) ! inv of Fg_aim
!$OMP CRITICAL (evilmatmul) !!$OMP CRITICAL (evilmatmul)
Lp = 0.5_pReal*Lp + 0.5_pReal*(math_I3 - matmul(Fp_current,matmul(inv,Fe_current)))/dt_aim ! interpolate Lp and L Lp = 0.5_pReal*Lp + 0.5_pReal*(math_I3 - math_mul33x33(Fp_current,&
!$OMP END CRITICAL (evilmatmul) math_mul33x33(inv,Fe_current)))/dt_aim ! interpolate Lp and L
!!$OMP END CRITICAL (evilmatmul)
endif endif
!!$OMP CRITICAL (timeint) !!$OMP CRITICAL (timeint)
call TimeIntegration(msg,Lp,Fp_new,Fe_new,P,state_new,post_results,.true., & ! def gradients and PK2 at end of time step call TimeIntegration(msg,Lp,Fp_new,Fe_new,P,state_new,post_results,.true., & ! def gradients and PK2 at end of time step
@ -221,7 +221,6 @@ CONTAINS
constitutive_homogenizedC,constitutive_dotState,constitutive_LpAndItsTangent,& constitutive_homogenizedC,constitutive_dotState,constitutive_LpAndItsTangent,&
constitutive_Nresults,constitutive_Microstructure,constitutive_post_results constitutive_Nresults,constitutive_Microstructure,constitutive_post_results
use math use math
use IO use IO
implicit none implicit none
! !
@ -249,9 +248,9 @@ CONTAINS
return return
endif endif
!$OMP CRITICAL (evilmatmul) !!$OMP CRITICAL (evilmatmul)
A = matmul(transpose(invFp_old), matmul(transpose(Fg_new),matmul(Fg_new,invFp_old))) A = math_mul33x33(transpose(invFp_old), math_mul33x33(transpose(Fg_new),math_mul33x33(Fg_new,invFp_old)))
!$OMP END CRITICAL (evilmatmul) !!$OMP END CRITICAL (evilmatmul)
! !
if (all(state == 0.0_pReal)) then if (all(state == 0.0_pReal)) then
state = state_old ! former state guessed, if none specified state = state_old ! former state guessed, if none specified
@ -291,11 +290,11 @@ Inner: do ! inner iteration: Lp
! !
B = math_i3 - dt*Lpguess B = math_i3 - dt*Lpguess
BT = transpose(B) BT = transpose(B)
!$OMP CRITICAL (evilmatmul) !!$OMP CRITICAL (evilmatmul)
AB = matmul(A,B) AB = math_mul33x33(A,B)
BTA = matmul(BT,A) BTA = math_mul33x33(BT,A)
Tstar_v = 0.5_pReal*matmul(C_66,math_mandel33to6(matmul(BT,AB)-math_I3)) Tstar_v = 0.5_pReal*math_mul66x6(C_66,math_mandel33to6(math_mul33x33(BT,AB)-math_I3))
!$OMP END CRITICAL (evilmatmul) !!$OMP END CRITICAL (evilmatmul)
Tstar = math_Mandel6to33(Tstar_v) Tstar = math_Mandel6to33(Tstar_v)
p_hydro=(Tstar_v(1)+Tstar_v(2)+Tstar_v(3))/3.0_pReal p_hydro=(Tstar_v(1)+Tstar_v(2)+Tstar_v(3))/3.0_pReal
forall(i=1:3) Tstar_v(i) = Tstar_v(i)-p_hydro ! subtract hydrostatic pressure forall(i=1:3) Tstar_v(i) = Tstar_v(i)-p_hydro ! subtract hydrostatic pressure
@ -371,9 +370,9 @@ Inner: do ! inner iteration: Lp
dTdLp(3*(i-1)+j,3*(k-1)+l) = dTdLp(3*(i-1)+j,3*(k-1)+l) + & dTdLp(3*(i-1)+j,3*(k-1)+l) = dTdLp(3*(i-1)+j,3*(k-1)+l) + &
C(i,j,l,n)*AB(k,n)+C(i,j,m,l)*BTA(m,k) C(i,j,l,n)*AB(k,n)+C(i,j,m,l)*BTA(m,k)
dTdLp = -0.5_pReal*dt*dTdLp dTdLp = -0.5_pReal*dt*dTdLp
!$OMP CRITICAL (evilmatmul) !!$OMP CRITICAL (evilmatmul)
dRdLp = eye2 - matmul(dLp,dTdLp) ! calc dR/dLp dRdLp = eye2 - math_mul99x99(dLp,dTdLp) ! calc dR/dLp
!$OMP END CRITICAL (evilmatmul) !!$OMP END CRITICAL (evilmatmul)
invdRdLp = 0.0_pReal invdRdLp = 0.0_pReal
call math_invert(9,dRdLp,invdRdLp,dummy,failed) ! invert dR/dLp --> dLp/dR call math_invert(9,dRdLp,invdRdLp,dummy,failed) ! invert dR/dLp --> dLp/dR
if (failed) then if (failed) then
@ -425,9 +424,9 @@ Inner: do ! inner iteration: Lp
debug_OuterLoopDistribution(iOuter) = debug_OuterLoopDistribution(iOuter)+1 debug_OuterLoopDistribution(iOuter) = debug_OuterLoopDistribution(iOuter)+1
!$OMP END CRITICAL (out) !$OMP END CRITICAL (out)
!$OMP CRITICAL (evilmatmul) !!$OMP CRITICAL (evilmatmul)
invFp_new = matmul(invFp_old,B) invFp_new = math_mul33x33(invFp_old,B)
!$OMP END CRITICAL (evilmatmul) !!$OMP END CRITICAL (evilmatmul)
call math_invert3x3(invFp_new,Fp_new,det,failed) call math_invert3x3(invFp_new,Fp_new,det,failed)
if (failed) then if (failed) then
msg = 'inversion Fp_new^-1' msg = 'inversion Fp_new^-1'
@ -441,10 +440,10 @@ Inner: do ! inner iteration: Lp
! !
Fp_new = Fp_new*det**(1.0_pReal/3.0_pReal) ! regularize Fp by det = det(InvFp_new) !! Fp_new = Fp_new*det**(1.0_pReal/3.0_pReal) ! regularize Fp by det = det(InvFp_new) !!
forall (i=1:3) Tstar_v(i) = Tstar_v(i)+p_hydro ! add hydrostatic component back forall (i=1:3) Tstar_v(i) = Tstar_v(i)+p_hydro ! add hydrostatic component back
!$OMP CRITICAL (evilmatmul) !!$OMP CRITICAL (evilmatmul)
Fe_new = matmul(Fg_new,invFp_new) ! calc resulting Fe Fe_new = math_mul33x33(Fg_new,invFp_new) ! calc resulting Fe
P = matmul(Fe_new,matmul(Tstar,transpose(invFp_new))) ! first PK stress P = math_mul33x33(Fe_new,math_mul33x33(Tstar,transpose(invFp_new))) ! first PK stress
!$OMP END CRITICAL (evilmatmul) !!$OMP END CRITICAL (evilmatmul)
! !
!!$OMP CRITICAL (write2out) !!$OMP CRITICAL (write2out)
! write(6,*) ! write(6,*)

View File

@ -268,6 +268,24 @@
END FUNCTION END FUNCTION
!**************************************************************************
! matrix multiplication 6x6
!**************************************************************************
FUNCTION math_mul66x6(A,B)
use prec, only: pReal, pInt
implicit none
integer(pInt) i
real(pReal), dimension(6) :: math_mul66x6,B
real(pReal), dimension(6,6) :: A
forall (i=1:6) math_mul66x6(i) = &
A(i,1)*B(1) + A(i,2)*B(2) + A(i,3)*B(3) + &
A(i,4)*B(4) + A(i,5)*B(5) + A(i,6)*B(6)
return
END FUNCTION
!************************************************************************** !**************************************************************************
! matrix multiplication 9x9 ! matrix multiplication 9x9
@ -881,10 +899,10 @@
real(pReal), dimension(3,3) :: r real(pReal), dimension(3,3) :: r
real(pReal) math_disorient, tr real(pReal) math_disorient, tr
!$OMP CRITICAL (evilmatmul) !!$OMP CRITICAL (evilmatmul)
r = matmul(math_EulerToR(EulerB),transpose(math_EulerToR(EulerA))) r = math_mul33x33(math_EulerToR(EulerB),transpose(math_EulerToR(EulerA)))
!$OMP END CRITICAL (evilmatmul) !!$OMP END CRITICAL (evilmatmul)
tr = (r(1,1)+r(2,2)+r(3,3)-1.0_pReal)*0.4999999_pReal tr = (r(1,1)+r(2,2)+r(3,3)-1.0_pReal)*0.4999999_pReal
math_disorient = abs(0.5_pReal*pi-asin(tr)) math_disorient = abs(0.5_pReal*pi-asin(tr))
@ -951,10 +969,10 @@ endif
if (rnd(5) <= exp(-1.0_pReal*(math_disorient(origin,disturb)/scatter)**2)) exit if (rnd(5) <= exp(-1.0_pReal*(math_disorient(origin,disturb)/scatter)**2)) exit
enddo enddo
!$OMP CRITICAL (evilmatmul) !!$OMP CRITICAL (evilmatmul)
math_sampleGaussOri = math_RtoEuler(matmul(math_EulerToR(disturb),math_EulerToR(center))) math_sampleGaussOri = math_RtoEuler(math_mul33x33(math_EulerToR(disturb),math_EulerToR(center)))
!$OMP END CRITICAL (evilmatmul) !!$OMP END CRITICAL (evilmatmul)
return return
@ -1029,10 +1047,10 @@ endif
pRot = math_RodrigtoR(axis,angle) pRot = math_RodrigtoR(axis,angle)
! ---# apply the three rotations #--- ! ---# apply the three rotations #---
!$OMP CRITICAL (evilmatmul) !!$OMP CRITICAL (evilmatmul)
math_sampleFiberOri = math_RtoEuler(matmul(pRot,matmul(fRot,oRot))) math_sampleFiberOri = math_RtoEuler(math_mul33x33(pRot,math_mul33x33(fRot,oRot)))
!$OMP END CRITICAL (evilmatmul) !!$OMP END CRITICAL (evilmatmul)
return return
@ -1100,20 +1118,20 @@ math_sampleFiberOri = math_RtoEuler(matmul(pRot,matmul(fRot,oRot)))
real(pReal) FE(3,3),R(3,3),U(3,3),CE(3,3),EW1,EW2,EW3,EB1(3,3),EB2(3,3),EB3(3,3),UI(3,3),det real(pReal) FE(3,3),R(3,3),U(3,3),CE(3,3),EW1,EW2,EW3,EB1(3,3),EB2(3,3),EB3(3,3),UI(3,3),det
error = .false. error = .false.
!$OMP CRITICAL (evilmatmul) !!$OMP CRITICAL (evilmatmul)
ce=matmul(transpose(fe),fe) ce=math_mul33x33(transpose(fe),fe)
!$OMP END CRITICAL (evilmatmul) !!$OMP END CRITICAL (evilmatmul)
CALL math_spectral1(CE,EW1,EW2,EW3,EB1,EB2,EB3) CALL math_spectral1(CE,EW1,EW2,EW3,EB1,EB2,EB3)
U=DSQRT(EW1)*EB1+DSQRT(EW2)*EB2+DSQRT(EW3)*EB3 U=DSQRT(EW1)*EB1+DSQRT(EW2)*EB2+DSQRT(EW3)*EB3
call math_invert3x3(U,UI,det,error) call math_invert3x3(U,UI,det,error)
if (.not. error) then if (.not. error) then
!$OMP CRITICAL (evilmatmul) !!$OMP CRITICAL (evilmatmul)
R = matmul(fe,ui) R = math_mul33x33(fe,ui)
!$OMP END CRITICAL (evilmatmul) !!$OMP END CRITICAL (evilmatmul)
endif endif
@ -1173,10 +1191,10 @@ math_sampleFiberOri = math_RtoEuler(matmul(pRot,matmul(fRot,oRot)))
D3=1.0_pReal/(EW3-EW1)/(EW3-EW2) D3=1.0_pReal/(EW3-EW1)/(EW3-EW2)
M1=M-EW1*math_I3 M1=M-EW1*math_I3
M2=M-EW2*math_I3 M2=M-EW2*math_I3
!$OMP CRITICAL (evilmatmul) !!$OMP CRITICAL (evilmatmul)
EB3=MATMUL(M1,M2)*D3 EB3=math_mul33x33(M1,M2)*D3
!$OMP END CRITICAL (evilmatmul) !!$OMP END CRITICAL (evilmatmul)
EB1=math_I3-EB3 EB1=math_I3-EB3
! both EB2 and EW2 are set to zero so that they do not ! both EB2 and EW2 are set to zero so that they do not
@ -1187,10 +1205,10 @@ math_sampleFiberOri = math_RtoEuler(matmul(pRot,matmul(fRot,oRot)))
D1=1.0_pReal/(EW1-EW2)/(EW1-EW3) D1=1.0_pReal/(EW1-EW2)/(EW1-EW3)
M2=M-math_I3*EW2 M2=M-math_I3*EW2
M3=M-math_I3*EW3 M3=M-math_I3*EW3
!$OMP CRITICAL (evilmatmul) !!$OMP CRITICAL (evilmatmul)
EB1=MATMUL(M2,M3)*D1 EB1=math_mul33x33(M2,M3)*D1
!$OMP END CRITICAL (evilmatmul) !!$OMP END CRITICAL (evilmatmul)
EB2=math_I3-EB1 EB2=math_I3-EB1
! both EB3 and EW3 are set to zero so that they do not ! both EB3 and EW3 are set to zero so that they do not
@ -1201,10 +1219,10 @@ math_sampleFiberOri = math_RtoEuler(matmul(pRot,matmul(fRot,oRot)))
D2=1.0_pReal/(EW2-EW1)/(EW2-EW3) D2=1.0_pReal/(EW2-EW1)/(EW2-EW3)
M1=M-math_I3*EW1 M1=M-math_I3*EW1
M3=M-math_I3*EW3 M3=M-math_I3*EW3
!$OMP CRITICAL (evilmatmul) !!$OMP CRITICAL (evilmatmul)
EB2=MATMUL(M1,M3)*D2 EB2=math_mul33x33(M1,M3)*D2
!$OMP END CRITICAL (evilmatmul) !!$OMP END CRITICAL (evilmatmul)
EB1=math_I3-EB2 EB1=math_I3-EB2
! both EB3 and EW3 are set to zero so that they do not ! both EB3 and EW3 are set to zero so that they do not
@ -1218,12 +1236,12 @@ math_sampleFiberOri = math_RtoEuler(matmul(pRot,matmul(fRot,oRot)))
M1=M-EW1*math_I3 M1=M-EW1*math_I3
M2=M-EW2*math_I3 M2=M-EW2*math_I3
M3=M-EW3*math_I3 M3=M-EW3*math_I3
!$OMP CRITICAL (evilmatmul) !!$OMP CRITICAL (evilmatmul)
EB1=MATMUL(M2,M3)*D1 EB1=math_mul33x33(M2,M3)*D1
EB2=MATMUL(M1,M3)*D2 EB2=math_mul33x33(M1,M3)*D2
EB3=MATMUL(M1,M2)*D3 EB3=math_mul33x33(M1,M2)*D3
!$OMP END CRITICAL (evilmatmul) !!$OMP END CRITICAL (evilmatmul)
END IF END IF
END IF END IF