replaced almost all calls of matmul by our own code, gives better performance in parallel computations

2008-07-23 12:49:40 +00:00 · 2008-07-23 12:49:40 +00:00 · cf1c7ce82a
parent 6abff4b3ba
commit cf1c7ce82a
4 changed files with 81 additions and 64 deletions
--- a/trunk/CPFEM_GIA8.f90
+++ b/trunk/CPFEM_GIA8.f90
@ -128,7 +128,7 @@
 use prec, only: pReal,pInt
 use FEsolving
 use debug
- use math, only: math_init, invnrmMandel, math_identity2nd, math_Mandel3333to66,math_Mandel33to6,math_Mandel6to33,math_det3x3,math_I3
+ use math
 use mesh, only: mesh_init,mesh_FEasCP, mesh_NcpElems, FE_Nips, FE_mapElemtype, mesh_element
 use lattice, only: lattice_init
 use constitutive, only: constitutive_init,constitutive_state_old,constitutive_state_new,material_Cslip_66
@ -195,9 +195,9 @@
           CPFEM_calc_done = .true.                   ! now calc is done
         endif    
 !       translate from P and dP/dF to CS and dCS/dE
-!$OMP CRITICAL (evilmatmul)
+!!$OMP CRITICAL (evilmatmul)
-       Kirchhoff_bar = matmul(CPFEM_PK1_bar(:,:,CPFEM_in, cp_en),transpose(CPFEM_ffn1_bar(:,:,CPFEM_in, cp_en)))
+       Kirchhoff_bar = math_mul33x33(CPFEM_PK1_bar(:,:,CPFEM_in, cp_en),transpose(CPFEM_ffn1_bar(:,:,CPFEM_in, cp_en)))
-!$OMP END CRITICAL (evilmatmul)
+!!$OMP END CRITICAL (evilmatmul)
       J_inverse  = 1.0_pReal/math_det3x3(CPFEM_ffn1_bar(:,:,CPFEM_in, cp_en))
       CPFEM_stress_bar(1:CPFEM_ngens,CPFEM_in,cp_en) = math_Mandel33to6(J_inverse*Kirchhoff_bar)
 !
--- a/trunk/CPFEM_Taylor.f90
+++ b/trunk/CPFEM_Taylor.f90
@ -116,7 +116,7 @@
 use prec, only: pReal,pInt
 use FEsolving
 use debug
- use math, only: math_init, invnrmMandel, math_identity2nd, math_Mandel3333to66,math_Mandel33to6,math_Mandel6to33,math_det3x3,math_I3
+ use math
 use mesh, only: mesh_init,mesh_FEasCP, mesh_NcpElems, FE_Nips, FE_mapElemtype, mesh_element
 use lattice, only: lattice_init
 use constitutive, only: constitutive_init,constitutive_state_old,constitutive_state_new,material_Cslip_66
@ -189,9 +189,9 @@
           CPFEM_calc_done = .true.                   ! now calc is done
         endif    
 !       translate from P and dP/dF to CS and dCS/dE
-!$OMP CRITICAL (evilmatmul)
+!!$OMP CRITICAL (evilmatmul)
-         Kirchhoff_bar = matmul(CPFEM_PK1_bar(:,:,CPFEM_in, cp_en),transpose(CPFEM_ffn1_bar(:,:,CPFEM_in, cp_en)))
+         Kirchhoff_bar = math_mul33x33(CPFEM_PK1_bar(:,:,CPFEM_in, cp_en),transpose(CPFEM_ffn1_bar(:,:,CPFEM_in, cp_en)))
-!$OMP END CRITICAL (evilmatmul)
+!!$OMP END CRITICAL (evilmatmul)
         J_inverse  = 1.0_pReal/math_det3x3(CPFEM_ffn1_bar(:,:,CPFEM_in, cp_en))
         CPFEM_stress_bar(1:CPFEM_ngens,CPFEM_in,cp_en) = math_Mandel33to6(J_inverse*Kirchhoff_bar)
 !
--- a/trunk/crystallite.f90
+++ b/trunk/crystallite.f90
@ -38,8 +38,7 @@ CONTAINS
 use debug
 use constitutive, only: constitutive_Nstatevars,constitutive_Nresults
 use mesh, only: mesh_element
- use math, only: math_Mandel6to33,math_Mandel33to6,math_Mandel3333to66,&
+ use math
                 math_I3,math_det3x3,math_invert3x3
 implicit none
 !
 character(len=*) msg
@ -66,9 +65,9 @@ CONTAINS
 Fg_aim = Fg_old                                                   ! make "new", "aim" a synonym for "old"
 Fp_new = Fp_old
 call math_invert3x3(Fp_old,inv,det,error)
-!$OMP CRITICAL (evilmatmul)
+!!$OMP CRITICAL (evilmatmul)
- Fe_new = matmul(Fg_old,inv)
+ Fe_new = math_mul33x33(Fg_old,inv)
-!$OMP END CRITICAL (evilmatmul)
+!!$OMP END CRITICAL (evilmatmul)
 state_bestguess = state_new                                       ! remember potentially available state guess
 state_new = state_old
 !
@ -91,9 +90,10 @@ CONTAINS
      state_new = state_bestguess                                   !  try best available guess for state
      if (dt_aim > 0.0_pReal) then
        call math_invert3x3(Fg_aim,inv,det,error)                   !  inv of Fg_aim
-!$OMP CRITICAL (evilmatmul)
+!!$OMP CRITICAL (evilmatmul)
-        Lp = 0.5_pReal*Lp + 0.5_pReal*(math_I3 -  matmul(Fp_current,matmul(inv,Fe_current)))/dt_aim  ! interpolate Lp  and L
+        Lp = 0.5_pReal*Lp + 0.5_pReal*(math_I3 -  math_mul33x33(Fp_current,&
-!$OMP END CRITICAL (evilmatmul)
+                            math_mul33x33(inv,Fe_current)))/dt_aim  ! interpolate Lp  and L
 !!$OMP END CRITICAL (evilmatmul)
      endif
 !!$OMP CRITICAL (timeint)
   call TimeIntegration(msg,Lp,Fp_new,Fe_new,P,state_new,post_results,.true., &   ! def gradients and PK2 at end of time step
@ -221,7 +221,6 @@ CONTAINS
                         constitutive_homogenizedC,constitutive_dotState,constitutive_LpAndItsTangent,&
                         constitutive_Nresults,constitutive_Microstructure,constitutive_post_results
 use math
 use IO
 implicit none
 !
@ -249,9 +248,9 @@ CONTAINS
    return
 endif
-!$OMP CRITICAL (evilmatmul)
+!!$OMP CRITICAL (evilmatmul)
- A = matmul(transpose(invFp_old), matmul(transpose(Fg_new),matmul(Fg_new,invFp_old)))
+ A = math_mul33x33(transpose(invFp_old), math_mul33x33(transpose(Fg_new),math_mul33x33(Fg_new,invFp_old)))
-!$OMP END CRITICAL (evilmatmul)
+!!$OMP END CRITICAL (evilmatmul)
 !
 if (all(state == 0.0_pReal)) then
   state = state_old    ! former state guessed, if none specified
@ -291,11 +290,11 @@ Inner: do              ! inner iteration: Lp
 !
         B = math_i3 - dt*Lpguess
         BT = transpose(B)
-!$OMP CRITICAL (evilmatmul)
+!!$OMP CRITICAL (evilmatmul)
-         AB = matmul(A,B)
+         AB = math_mul33x33(A,B)
-         BTA = matmul(BT,A)
+         BTA = math_mul33x33(BT,A)
-         Tstar_v = 0.5_pReal*matmul(C_66,math_mandel33to6(matmul(BT,AB)-math_I3))
+         Tstar_v = 0.5_pReal*math_mul66x6(C_66,math_mandel33to6(math_mul33x33(BT,AB)-math_I3))
-!$OMP END CRITICAL (evilmatmul)
+!!$OMP END CRITICAL (evilmatmul)
         Tstar = math_Mandel6to33(Tstar_v)
         p_hydro=(Tstar_v(1)+Tstar_v(2)+Tstar_v(3))/3.0_pReal
         forall(i=1:3) Tstar_v(i) = Tstar_v(i)-p_hydro                ! subtract hydrostatic pressure
@ -371,9 +370,9 @@ Inner: do              ! inner iteration: Lp
             dTdLp(3*(i-1)+j,3*(k-1)+l) = dTdLp(3*(i-1)+j,3*(k-1)+l) + &
             C(i,j,l,n)*AB(k,n)+C(i,j,m,l)*BTA(m,k)
           dTdLp = -0.5_pReal*dt*dTdLp
-!$OMP CRITICAL (evilmatmul)
+!!$OMP CRITICAL (evilmatmul)
-           dRdLp = eye2 - matmul(dLp,dTdLp)                           ! calc dR/dLp
+           dRdLp = eye2 - math_mul99x99(dLp,dTdLp)                           ! calc dR/dLp
-!$OMP END CRITICAL (evilmatmul)
+!!$OMP END CRITICAL (evilmatmul)
           invdRdLp = 0.0_pReal
           call math_invert(9,dRdLp,invdRdLp,dummy,failed)            ! invert dR/dLp --> dLp/dR
           if (failed) then
@ -425,9 +424,9 @@ Inner: do              ! inner iteration: Lp
 debug_OuterLoopDistribution(iOuter) = debug_OuterLoopDistribution(iOuter)+1
 !$OMP END CRITICAL (out)
-!$OMP CRITICAL (evilmatmul)
+!!$OMP CRITICAL (evilmatmul)
- invFp_new = matmul(invFp_old,B)
+ invFp_new = math_mul33x33(invFp_old,B)
-!$OMP END CRITICAL (evilmatmul)
+!!$OMP END CRITICAL (evilmatmul)
 call math_invert3x3(invFp_new,Fp_new,det,failed)
 if (failed) then
   msg = 'inversion Fp_new^-1'
@ -441,10 +440,10 @@ Inner: do              ! inner iteration: Lp
 !
 Fp_new = Fp_new*det**(1.0_pReal/3.0_pReal)     ! regularize Fp by det = det(InvFp_new) !!
 forall (i=1:3) Tstar_v(i) = Tstar_v(i)+p_hydro ! add hydrostatic component back
-!$OMP CRITICAL (evilmatmul)
+!!$OMP CRITICAL (evilmatmul)
- Fe_new = matmul(Fg_new,invFp_new)              ! calc resulting Fe
+ Fe_new = math_mul33x33(Fg_new,invFp_new)              ! calc resulting Fe
- P = matmul(Fe_new,matmul(Tstar,transpose(invFp_new)))    ! first PK stress
+ P = math_mul33x33(Fe_new,math_mul33x33(Tstar,transpose(invFp_new)))    ! first PK stress
-!$OMP END CRITICAL (evilmatmul)
+!!$OMP END CRITICAL (evilmatmul)
 !
 !!$OMP CRITICAL (write2out)
 !    write(6,*)
--- a/trunk/math.f90
+++ b/trunk/math.f90
@ -268,6 +268,24 @@
 END FUNCTION
 !**************************************************************************
 ! matrix multiplication 6x6
 !**************************************************************************
 FUNCTION math_mul66x6(A,B)  
 use prec, only: pReal, pInt
 implicit none
 integer(pInt)  i
 real(pReal), dimension(6) ::  math_mul66x6,B
 real(pReal), dimension(6,6) ::  A
 forall (i=1:6) math_mul66x6(i) = &
   A(i,1)*B(1) + A(i,2)*B(2) + A(i,3)*B(3) + &
   A(i,4)*B(4) + A(i,5)*B(5) + A(i,6)*B(6)
 return
 END FUNCTION
 !**************************************************************************
 ! matrix multiplication 9x9
@ -881,10 +899,10 @@
 real(pReal), dimension(3,3) :: r
 real(pReal) math_disorient, tr
-!$OMP CRITICAL (evilmatmul)
+!!$OMP CRITICAL (evilmatmul)
- r = matmul(math_EulerToR(EulerB),transpose(math_EulerToR(EulerA)))
+ r = math_mul33x33(math_EulerToR(EulerB),transpose(math_EulerToR(EulerA)))
-!$OMP END CRITICAL (evilmatmul)
+!!$OMP END CRITICAL (evilmatmul)
 tr = (r(1,1)+r(2,2)+r(3,3)-1.0_pReal)*0.4999999_pReal
 math_disorient = abs(0.5_pReal*pi-asin(tr))
@ -951,10 +969,10 @@ endif
   if (rnd(5) <= exp(-1.0_pReal*(math_disorient(origin,disturb)/scatter)**2)) exit   
 enddo
-!$OMP CRITICAL (evilmatmul)
+!!$OMP CRITICAL (evilmatmul)
- math_sampleGaussOri = math_RtoEuler(matmul(math_EulerToR(disturb),math_EulerToR(center)))
+ math_sampleGaussOri = math_RtoEuler(math_mul33x33(math_EulerToR(disturb),math_EulerToR(center)))
-!$OMP END CRITICAL (evilmatmul)
+!!$OMP END CRITICAL (evilmatmul)
 return
@ -1029,10 +1047,10 @@ endif
 pRot = math_RodrigtoR(axis,angle)
 ! ---# apply the three rotations #---
-!$OMP CRITICAL (evilmatmul)
+!!$OMP CRITICAL (evilmatmul)
-math_sampleFiberOri = math_RtoEuler(matmul(pRot,matmul(fRot,oRot))) 
+math_sampleFiberOri = math_RtoEuler(math_mul33x33(pRot,math_mul33x33(fRot,oRot))) 
-!$OMP END CRITICAL (evilmatmul)
+!!$OMP END CRITICAL (evilmatmul)
 return
@ -1100,20 +1118,20 @@ math_sampleFiberOri = math_RtoEuler(matmul(pRot,matmul(fRot,oRot)))
 real(pReal) FE(3,3),R(3,3),U(3,3),CE(3,3),EW1,EW2,EW3,EB1(3,3),EB2(3,3),EB3(3,3),UI(3,3),det
 error = .false.
-!$OMP CRITICAL (evilmatmul)
+!!$OMP CRITICAL (evilmatmul)
- ce=matmul(transpose(fe),fe)
+ ce=math_mul33x33(transpose(fe),fe)
-!$OMP END CRITICAL (evilmatmul)
+!!$OMP END CRITICAL (evilmatmul)
 CALL math_spectral1(CE,EW1,EW2,EW3,EB1,EB2,EB3)
 U=DSQRT(EW1)*EB1+DSQRT(EW2)*EB2+DSQRT(EW3)*EB3
 call math_invert3x3(U,UI,det,error)
 if (.not. error) then
-!$OMP CRITICAL (evilmatmul)
+!!$OMP CRITICAL (evilmatmul)
- R = matmul(fe,ui)
+ R = math_mul33x33(fe,ui)
-!$OMP END CRITICAL (evilmatmul)
+!!$OMP END CRITICAL (evilmatmul)
 endif
@ -1173,10 +1191,10 @@ math_sampleFiberOri = math_RtoEuler(matmul(pRot,matmul(fRot,oRot)))
  D3=1.0_pReal/(EW3-EW1)/(EW3-EW2)
  M1=M-EW1*math_I3
  M2=M-EW2*math_I3
-!$OMP CRITICAL (evilmatmul)
+!!$OMP CRITICAL (evilmatmul)
-  EB3=MATMUL(M1,M2)*D3
+  EB3=math_mul33x33(M1,M2)*D3
-!$OMP END CRITICAL (evilmatmul)
+!!$OMP END CRITICAL (evilmatmul)
  EB1=math_I3-EB3
 !  both EB2 and EW2 are set to zero so that they do not
@ -1187,10 +1205,10 @@ math_sampleFiberOri = math_RtoEuler(matmul(pRot,matmul(fRot,oRot)))
  D1=1.0_pReal/(EW1-EW2)/(EW1-EW3)
  M2=M-math_I3*EW2
  M3=M-math_I3*EW3
-!$OMP CRITICAL (evilmatmul)
+!!$OMP CRITICAL (evilmatmul)
-  EB1=MATMUL(M2,M3)*D1
+  EB1=math_mul33x33(M2,M3)*D1
-!$OMP END CRITICAL (evilmatmul)
+!!$OMP END CRITICAL (evilmatmul)
  EB2=math_I3-EB1
 !  both EB3 and EW3 are set to zero so that they do not
@ -1201,10 +1219,10 @@ math_sampleFiberOri = math_RtoEuler(matmul(pRot,matmul(fRot,oRot)))
  D2=1.0_pReal/(EW2-EW1)/(EW2-EW3) 
  M1=M-math_I3*EW1
  M3=M-math_I3*EW3
-!$OMP CRITICAL (evilmatmul)
+!!$OMP CRITICAL (evilmatmul)
-  EB2=MATMUL(M1,M3)*D2
+  EB2=math_mul33x33(M1,M3)*D2
-!$OMP END CRITICAL (evilmatmul)
+!!$OMP END CRITICAL (evilmatmul)
  EB1=math_I3-EB2
 !  both EB3 and EW3 are set to zero so that they do not
@ -1218,12 +1236,12 @@ math_sampleFiberOri = math_RtoEuler(matmul(pRot,matmul(fRot,oRot)))
  M1=M-EW1*math_I3
  M2=M-EW2*math_I3
  M3=M-EW3*math_I3
-!$OMP CRITICAL (evilmatmul)
+!!$OMP CRITICAL (evilmatmul)
-  EB1=MATMUL(M2,M3)*D1
+  EB1=math_mul33x33(M2,M3)*D1
-  EB2=MATMUL(M1,M3)*D2
+  EB2=math_mul33x33(M1,M3)*D2
-  EB3=MATMUL(M1,M2)*D3
+  EB3=math_mul33x33(M1,M2)*D3
-!$OMP END CRITICAL (evilmatmul)
+!!$OMP END CRITICAL (evilmatmul)
   END IF
 END IF