integer exponents: faster and shorter

This commit is contained in:
Martin Diehl 2021-11-25 20:52:22 +01:00
parent 72c717d51d
commit bfc6b69ee2
8 changed files with 42 additions and 42 deletions

View File

@ -697,9 +697,9 @@ subroutine formJacobian(da_local,x_local,Jac_pre,Jac,dummy,ierr)
!--------------------------------------------------------------------------------------------------
! applying boundary conditions
diag = (C_volAvg(1,1,1,1)/delta(1)**2.0_pReal + &
C_volAvg(2,2,2,2)/delta(2)**2.0_pReal + &
C_volAvg(3,3,3,3)/delta(3)**2.0_pReal)*detJ
diag = (C_volAvg(1,1,1,1)/delta(1)**2 + &
C_volAvg(2,2,2,2)/delta(2)**2 + &
C_volAvg(3,3,3,3)/delta(3)**2)*detJ
call MatZeroRowsColumns(Jac,size(rows),rows,diag,PETSC_NULL_VEC,PETSC_NULL_VEC,ierr)
CHKERRQ(ierr)
call DMGetGlobalVector(da_local,coordinates,ierr); CHKERRQ(ierr)

View File

@ -578,22 +578,22 @@ real(pReal) function utilities_divergenceRMS()
do k = 1, grid3; do j = 1, grid(2)
do i = 2, grid1Red -1 ! Has somewhere a conj. complex counterpart. Therefore count it twice.
utilities_divergenceRMS = utilities_divergenceRMS &
+ 2.0_pReal*(sum (real(matmul(tensorField_fourier(1:3,1:3,i,j,k),& ! (sqrt(real(a)**2 + aimag(a)**2))**2 = real(a)**2 + aimag(a)**2. do not take square root and square again
conjg(-xi1st(1:3,i,j,k))*rescaledGeom))**2.0_pReal)& ! --> sum squared L_2 norm of vector
+ 2.0_pReal*(sum (real(matmul(tensorField_fourier(1:3,1:3,i,j,k), & ! (sqrt(real(a)**2 + aimag(a)**2))**2 = real(a)**2 + aimag(a)**2. do not take square root and square again
conjg(-xi1st(1:3,i,j,k))*rescaledGeom))**2) & ! --> sum squared L_2 norm of vector
+sum(aimag(matmul(tensorField_fourier(1:3,1:3,i,j,k),&
conjg(-xi1st(1:3,i,j,k))*rescaledGeom))**2.0_pReal))
conjg(-xi1st(1:3,i,j,k))*rescaledGeom))**2))
enddo
utilities_divergenceRMS = utilities_divergenceRMS & ! these two layers (DC and Nyquist) do not have a conjugate complex counterpart (if grid(1) /= 1)
+ sum( real(matmul(tensorField_fourier(1:3,1:3,1 ,j,k), &
conjg(-xi1st(1:3,1,j,k))*rescaledGeom))**2.0_pReal) &
conjg(-xi1st(1:3,1,j,k))*rescaledGeom))**2) &
+ sum(aimag(matmul(tensorField_fourier(1:3,1:3,1 ,j,k), &
conjg(-xi1st(1:3,1,j,k))*rescaledGeom))**2.0_pReal) &
conjg(-xi1st(1:3,1,j,k))*rescaledGeom))**2) &
+ sum( real(matmul(tensorField_fourier(1:3,1:3,grid1Red,j,k), &
conjg(-xi1st(1:3,grid1Red,j,k))*rescaledGeom))**2.0_pReal) &
conjg(-xi1st(1:3,grid1Red,j,k))*rescaledGeom))**2) &
+ sum(aimag(matmul(tensorField_fourier(1:3,1:3,grid1Red,j,k), &
conjg(-xi1st(1:3,grid1Red,j,k))*rescaledGeom))**2.0_pReal)
conjg(-xi1st(1:3,grid1Red,j,k))*rescaledGeom))**2)
enddo; enddo
if (grid(1) == 1) utilities_divergenceRMS = utilities_divergenceRMS * 0.5_pReal ! counted twice in case of grid(1) == 1
if (grid(1) == 1) utilities_divergenceRMS = utilities_divergenceRMS * 0.5_pReal ! counted twice in case of grid(1) == 1
call MPI_Allreduce(MPI_IN_PLACE,utilities_divergenceRMS,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD,ierr)
if (ierr /=0) error stop 'MPI error'
utilities_divergenceRMS = sqrt(utilities_divergenceRMS) * wgt ! RMS in real space calculated with Parsevals theorem from Fourier space
@ -630,7 +630,7 @@ real(pReal) function utilities_curlRMS()
-tensorField_fourier(l,1,i,j,k)*xi1st(2,i,j,k)*rescaledGeom(2))
enddo
utilities_curlRMS = utilities_curlRMS &
+2.0_pReal*sum(curl_fourier%re**2.0_pReal+curl_fourier%im**2.0_pReal) ! Has somewhere a conj. complex counterpart. Therefore count it twice.
+2.0_pReal*sum(curl_fourier%re**2+curl_fourier%im**2) ! Has somewhere a conj. complex counterpart. Therefore count it twice.
enddo
do l = 1, 3
curl_fourier = (+tensorField_fourier(l,3,1,j,k)*xi1st(2,1,j,k)*rescaledGeom(2) &
@ -641,7 +641,7 @@ real(pReal) function utilities_curlRMS()
-tensorField_fourier(l,1,1,j,k)*xi1st(2,1,j,k)*rescaledGeom(2))
enddo
utilities_curlRMS = utilities_curlRMS &
+ sum(curl_fourier%re**2.0_pReal + curl_fourier%im**2.0_pReal) ! this layer (DC) does not have a conjugate complex counterpart (if grid(1) /= 1)
+ sum(curl_fourier%re**2 + curl_fourier%im**2) ! this layer (DC) does not have a conjugate complex counterpart (if grid(1) /= 1)
do l = 1, 3
curl_fourier = (+tensorField_fourier(l,3,grid1Red,j,k)*xi1st(2,grid1Red,j,k)*rescaledGeom(2) &
-tensorField_fourier(l,2,grid1Red,j,k)*xi1st(3,grid1Red,j,k)*rescaledGeom(3))
@ -651,13 +651,13 @@ real(pReal) function utilities_curlRMS()
-tensorField_fourier(l,1,grid1Red,j,k)*xi1st(2,grid1Red,j,k)*rescaledGeom(2))
enddo
utilities_curlRMS = utilities_curlRMS &
+ sum(curl_fourier%re**2.0_pReal + curl_fourier%im**2.0_pReal) ! this layer (Nyquist) does not have a conjugate complex counterpart (if grid(1) /= 1)
+ sum(curl_fourier%re**2 + curl_fourier%im**2) ! this layer (Nyquist) does not have a conjugate complex counterpart (if grid(1) /= 1)
enddo; enddo
call MPI_Allreduce(MPI_IN_PLACE,utilities_curlRMS,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD,ierr)
if (ierr /=0) error stop 'MPI error'
utilities_curlRMS = sqrt(utilities_curlRMS) * wgt
if (grid(1) == 1) utilities_curlRMS = utilities_curlRMS * 0.5_pReal ! counted twice in case of grid(1) == 1
if (grid(1) == 1) utilities_curlRMS = utilities_curlRMS * 0.5_pReal ! counted twice in case of grid(1) == 1
end function utilities_curlRMS
@ -836,13 +836,13 @@ subroutine utilities_constitutiveResponse(P,P_av,C_volAvg,C_minmaxAvg,&
dPdF_min = huge(1.0_pReal)
dPdF_norm_min = huge(1.0_pReal)
do i = 1, product(grid(1:2))*grid3
if (dPdF_norm_max < sum(homogenization_dPdF(1:3,1:3,1:3,1:3,i)**2.0_pReal)) then
if (dPdF_norm_max < sum(homogenization_dPdF(1:3,1:3,1:3,1:3,i)**2)) then
dPdF_max = homogenization_dPdF(1:3,1:3,1:3,1:3,i)
dPdF_norm_max = sum(homogenization_dPdF(1:3,1:3,1:3,1:3,i)**2.0_pReal)
dPdF_norm_max = sum(homogenization_dPdF(1:3,1:3,1:3,1:3,i)**2)
endif
if (dPdF_norm_min > sum(homogenization_dPdF(1:3,1:3,1:3,1:3,i)**2.0_pReal)) then
if (dPdF_norm_min > sum(homogenization_dPdF(1:3,1:3,1:3,1:3,i)**2)) then
dPdF_min = homogenization_dPdF(1:3,1:3,1:3,1:3,i)
dPdF_norm_min = sum(homogenization_dPdF(1:3,1:3,1:3,1:3,i)**2.0_pReal)
dPdF_norm_min = sum(homogenization_dPdF(1:3,1:3,1:3,1:3,i)**2)
endif
enddo

View File

@ -546,7 +546,7 @@ module function RGC_updateState(P,F,avgF,dt,dPdF,ce) result(doneAndHappy)
do k = 1,3; do l = 1,3
nDef(i,j) = nDef(i,j) - nVect(k)*gDef(i,l)*math_LeviCivita(j,k,l) ! compute the interface mismatch tensor from the jump of deformation gradient
end do; end do
nDefNorm = nDefNorm + nDef(i,j)**2.0_pReal ! compute the norm of the mismatch tensor
nDefNorm = nDefNorm + nDef(i,j)**2 ! compute the norm of the mismatch tensor
end do; end do
nDefNorm = max(nDefToler,sqrt(nDefNorm)) ! approximation to zero mismatch if mismatch is zero (singularity)
nMis(abs(intFace(1)),iGrain) = nMis(abs(intFace(1)),iGrain) + nDefNorm ! total amount of mismatch experienced by the grain (at all six interfaces)

View File

@ -473,13 +473,13 @@ function lattice_characteristicShear_Twin(Ntwin,lattice,CoverA) result(character
p = sum(HEX_NTWINSYSTEM(1:f-1))+s
select case(HEX_SHEARTWIN(p)) ! from Christian & Mahajan 1995 p.29
case (1) ! <-10.1>{10.2}
characteristicShear(a) = (3.0_pReal-cOverA**2.0_pReal)/sqrt(3.0_pReal)/CoverA
characteristicShear(a) = (3.0_pReal-cOverA**2)/sqrt(3.0_pReal)/CoverA
case (2) ! <11.6>{-1-1.1}
characteristicShear(a) = 1.0_pReal/cOverA
case (3) ! <10.-2>{10.1}
characteristicShear(a) = (4.0_pReal*cOverA**2.0_pReal-9.0_pReal)/sqrt(48.0_pReal)/cOverA
characteristicShear(a) = (4.0_pReal*cOverA**2-9.0_pReal)/sqrt(48.0_pReal)/cOverA
case (4) ! <11.-3>{11.2}
characteristicShear(a) = 2.0_pReal*(cOverA**2.0_pReal-2.0_pReal)/3.0_pReal/cOverA
characteristicShear(a) = 2.0_pReal*(cOverA**2-2.0_pReal)/3.0_pReal/cOverA
end select
case default
call IO_error(137,ext_msg='lattice_characteristicShear_Twin: '//trim(lattice))
@ -558,11 +558,11 @@ function lattice_C66_trans(Ntrans,C_parent66,lattice_target, &
C_bar66(1,4) = (C_parent66(1,1) - C_parent66(1,2) - 2.0_pReal*C_parent66(4,4)) /(3.0_pReal*sqrt(2.0_pReal))
C_target_unrotated66 = 0.0_pReal
C_target_unrotated66(1,1) = C_bar66(1,1) - C_bar66(1,4)**2.0_pReal/C_bar66(4,4)
C_target_unrotated66(1,2) = C_bar66(1,2) + C_bar66(1,4)**2.0_pReal/C_bar66(4,4)
C_target_unrotated66(1,1) = C_bar66(1,1) - C_bar66(1,4)**2/C_bar66(4,4)
C_target_unrotated66(1,2) = C_bar66(1,2) + C_bar66(1,4)**2/C_bar66(4,4)
C_target_unrotated66(1,3) = C_bar66(1,3)
C_target_unrotated66(3,3) = C_bar66(3,3)
C_target_unrotated66(4,4) = C_bar66(4,4) - C_bar66(1,4)**2.0_pReal/(0.5_pReal*(C_bar66(1,1) - C_bar66(1,2)))
C_target_unrotated66(4,4) = C_bar66(4,4) - C_bar66(1,4)**2/(0.5_pReal*(C_bar66(1,1) - C_bar66(1,2)))
C_target_unrotated66 = lattice_symmetrize_C66(C_target_unrotated66,'hP')
elseif (lattice_target == 'cI') then
if (a_bcc <= 0.0_pReal .or. a_fcc <= 0.0_pReal) &

View File

@ -924,7 +924,7 @@ real(pReal) function math_sampleGaussVar(mu, sigma, width)
do
call random_number(rnd)
scatter = width_ * (2.0_pReal * rnd(1) - 1.0_pReal)
if (rnd(2) <= exp(-0.5_pReal * scatter ** 2.0_pReal)) exit ! test if scattered value is drawn
if (rnd(2) <= exp(-0.5_pReal * scatter**2)) exit ! test if scattered value is drawn
enddo
math_sampleGaussVar = scatter * sigma

View File

@ -99,13 +99,13 @@ module subroutine thermalexpansion_LiAndItsTangent(Li, dLi_dTstar, ph,me)
associate(prm => param(kinematics_thermal_expansion_instance(ph)))
Li = dot_T * ( &
prm%A(1:3,1:3,1) & ! constant coefficient
+ prm%A(1:3,1:3,2)*(T - prm%T_ref)**1.0_pReal & ! linear coefficient
+ prm%A(1:3,1:3,3)*(T - prm%T_ref)**2.0_pReal & ! quadratic coefficient
+ prm%A(1:3,1:3,2)*(T - prm%T_ref)**1 & ! linear coefficient
+ prm%A(1:3,1:3,3)*(T - prm%T_ref)**2 & ! quadratic coefficient
) / &
(1.0_pReal &
+ prm%A(1:3,1:3,1)*(T - prm%T_ref)**1.0_pReal / 1.0_pReal &
+ prm%A(1:3,1:3,2)*(T - prm%T_ref)**2.0_pReal / 2.0_pReal &
+ prm%A(1:3,1:3,3)*(T - prm%T_ref)**3.0_pReal / 3.0_pReal &
+ prm%A(1:3,1:3,1)*(T - prm%T_ref)**1 / 1.0_pReal &
+ prm%A(1:3,1:3,2)*(T - prm%T_ref)**2 / 2.0_pReal &
+ prm%A(1:3,1:3,3)*(T - prm%T_ref)**3 / 3.0_pReal &
)
end associate
dLi_dTstar = 0.0_pReal

View File

@ -620,7 +620,7 @@ module subroutine nonlocal_dependentState(ph, en, ip, el)
* spread(( 1.0_pReal - prm%f_F &
+ prm%f_F &
* log(0.35_pReal * prm%b_sl * sqrt(max(stt%rho_forest(:,en),prm%rho_significant))) &
/ log(0.35_pReal * prm%b_sl * 1e6_pReal))** 2.0_pReal,2,prm%sum_N_sl)
/ log(0.35_pReal * prm%b_sl * 1e6_pReal))**2,2,prm%sum_N_sl)
else
myInteractionMatrix = prm%h_sl_sl
end if
@ -646,7 +646,7 @@ module subroutine nonlocal_dependentState(ph, en, ip, el)
rhoExcess(1,:) = rho_edg_delta
rhoExcess(2,:) = rho_scr_delta
FVsize = geom(ph)%V_0(en) ** (1.0_pReal/3.0_pReal)
FVsize = geom(ph)%V_0(en)**(1.0_pReal/3.0_pReal)
!* loop through my neighborhood and get the connection vectors (in lattice frame) and the excess densities
@ -1304,10 +1304,10 @@ function rhoDotFlux(timestep,ph,en,ip,el)
* math_inner(m(1:3,s,t), normal_neighbor2me) * area ! positive line length that wants to enter through this interface
where (compatibility(c,:,s,n,ip,el) > 0.0_pReal) &
rhoDotFlux(:,t) = rhoDotFlux(1:ns,t) &
+ lineLength/IPvolume(ip,el)*compatibility(c,:,s,n,ip,el)**2.0_pReal ! transferring to equally signed mobile dislocation type
+ lineLength/IPvolume(ip,el)*compatibility(c,:,s,n,ip,el)**2 ! transferring to equally signed mobile dislocation type
where (compatibility(c,:,s,n,ip,el) < 0.0_pReal) &
rhoDotFlux(:,topp) = rhoDotFlux(:,topp) &
+ lineLength/IPvolume(ip,el)*compatibility(c,:,s,n,ip,el)**2.0_pReal ! transferring to opposite signed mobile dislocation type
+ lineLength/IPvolume(ip,el)*compatibility(c,:,s,n,ip,el)**2 ! transferring to opposite signed mobile dislocation type
end if
end do
@ -1336,7 +1336,7 @@ function rhoDotFlux(timestep,ph,en,ip,el)
c = (t + 1) / 2
if (v0(s,t) * math_inner(m(1:3,s,t), normal_me2neighbor) > 0.0_pReal ) then ! flux from en to my neighbor == leaving flux for en (might also be a pure flux from my mobile density to dead density if interface not at all transmissive)
if (v0(s,t) * neighbor_v0(s,t) >= 0.0_pReal) then ! no sign change in flux density
transmissivity = sum(compatibility(c,:,s,n,ip,el)**2.0_pReal) ! overall transmissivity from this slip system to my neighbor
transmissivity = sum(compatibility(c,:,s,n,ip,el)**2) ! overall transmissivity from this slip system to my neighbor
else ! sign change in flux density means sign change in stress which does not allow for dislocations to arive at the neighbor
transmissivity = 0.0_pReal
end if
@ -1663,7 +1663,7 @@ pure subroutine kinetics(v, dv_dtau, dv_dtauNS, tau, tauNS, tauThreshold, c, T,
!* Peierls contribution
tauEff = max(0.0_pReal, abs(tauNS(s)) - tauThreshold(s))
lambda_P = prm%b_sl(s)
activationVolume_P = prm%w *prm%b_sl(s)**3.0_pReal
activationVolume_P = prm%w *prm%b_sl(s)**3
criticalStress_P = prm%peierlsStress(s,c)
activationEnergy_P = criticalStress_P * activationVolume_P
tauRel_P = min(1.0_pReal, tauEff / criticalStress_P)
@ -1678,7 +1678,7 @@ pure subroutine kinetics(v, dv_dtau, dv_dtauNS, tau, tauNS, tauThreshold, c, T,
! Contribution from solid solution strengthening
tauEff = abs(tau(s)) - tauThreshold(s)
lambda_S = prm%b_sl(s) / sqrt(prm%c_sol)
activationVolume_S = prm%f_sol * prm%b_sl(s)**3.0_pReal / sqrt(prm%c_sol)
activationVolume_S = prm%f_sol * prm%b_sl(s)**3 / sqrt(prm%c_sol)
criticalStress_S = prm%Q_sol / activationVolume_S
tauRel_S = min(1.0_pReal, tauEff / criticalStress_S)
tSolidSolution = 1.0_pReal / prm%nu_a &
@ -1694,8 +1694,8 @@ pure subroutine kinetics(v, dv_dtau, dv_dtauNS, tau, tauNS, tauThreshold, c, T,
v(s) = sign(1.0_pReal,tau(s)) &
/ (tPeierls / lambda_P + tSolidSolution / lambda_S + prm%B /(prm%b_sl(s) * tauEff))
dv_dtau(s) = v(s)**2.0_pReal * (dtSolidSolution_dtau / lambda_S + prm%B / (prm%b_sl(s) * tauEff**2.0_pReal))
dv_dtauNS(s) = v(s)**2.0_pReal * dtPeierls_dtau / lambda_P
dv_dtau(s) = v(s)**2 * (dtSolidSolution_dtau / lambda_S + prm%B / (prm%b_sl(s) * tauEff**2))
dv_dtauNS(s) = v(s)**2 * dtPeierls_dtau / lambda_P
end if
end do

View File

@ -1099,7 +1099,7 @@ pure function ho2ax(ho) result(ax)
+0.000003953714684212874_pReal, -0.00000036555001439719544_pReal ]
! normalize h and store the magnitude
hmag_squared = sum(ho**2.0_pReal)
hmag_squared = sum(ho**2)
if (dEq0(hmag_squared)) then
ax = [ 0.0_pReal, 0.0_pReal, 1.0_pReal, 0.0_pReal ]
else