Merge branch 'spectral-polish' into 'development'

Spectral polish See merge request damask/DAMASK!524
2022-02-15 12:32:09 +00:00 · 2022-02-15 12:32:09 +00:00 · b0bbb1c286
parent bdeb3e042e c66e2336c2
commit b0bbb1c286
3 changed files with 119 additions and 93 deletions
--- a/src/grid/spectral_utilities.f90
+++ b/src/grid/spectral_utilities.f90
@ -31,7 +31,7 @@ module spectral_utilities
 !--------------------------------------------------------------------------------------------------
 ! grid related information
  real(pReal), protected,  public                :: wgt                                             !< weighting factor 1/Nelems
-  integer,     protected,  public                :: grid1Red                                        !< cells(1)/2
+  integer,     protected,  public                :: cells1Red                                       !< cells(1)/2
  real(pReal), protected,  public,  dimension(3) :: scaledGeomSize                                  !< scaled geometry size for calculation of divergence

 !--------------------------------------------------------------------------------------------------
@ -201,7 +201,7 @@ subroutine spectral_utilities_init
                                num_grid%get_asString('PETSc_options',defaultVal=''),err_PETSc)
  CHKERRQ(err_PETSc)

-  grid1Red = cells(1)/2 + 1
+  cells1Red = cells(1)/2 + 1
  wgt = 1.0/real(product(cells),pReal)

  num%memory_efficient      = num_grid%get_asInt('memory_efficient',      defaultVal=1) > 0         ! ToDo: should be logical in YAML file
@ -265,8 +265,8 @@ subroutine spectral_utilities_init
  gridFFTW = int(cells,C_INTPTR_T)
  alloc_local = fftw_mpi_local_size_3d(gridFFTW(3), gridFFTW(2), gridFFTW(1)/2 +1, &
                                       PETSC_COMM_WORLD, local_K, local_K_offset)
-  allocate (xi1st (3,grid1Red,cells(2),cells3),source = cmplx(0.0_pReal,0.0_pReal,pReal))           ! frequencies for first derivatives, only half the size for first dimension
-  allocate (xi2nd (3,grid1Red,cells(2),cells3),source = cmplx(0.0_pReal,0.0_pReal,pReal))           ! frequencies for second derivatives, only half the size for first dimension
+  allocate (xi1st (3,cells1Red,cells(2),cells3),source = cmplx(0.0_pReal,0.0_pReal,pReal))          ! frequencies for first derivatives, only half the size for first dimension
+  allocate (xi2nd (3,cells1Red,cells(2),cells3),source = cmplx(0.0_pReal,0.0_pReal,pReal))          ! frequencies for second derivatives, only half the size for first dimension

  tensorField = fftw_alloc_complex(tensorSize*alloc_local)
  call c_f_pointer(tensorField, tensorField_real,    [3_C_INTPTR_T,3_C_INTPTR_T, &
@ -333,7 +333,7 @@ subroutine spectral_utilities_init
      do j = 1, cells(2)
        k_s(2) = j - 1
        if (j > cells(2)/2 + 1) k_s(2) = k_s(2) - cells(2)                                          ! running from 0,1,...,N/2,N/2+1,-N/2,-N/2+1,...,-1
-          do i = 1, grid1Red
+          do i = 1, cells1Red
            k_s(1) = i - 1                                                                          ! symmetry, junst running from 0,1,...,N/2,N/2+1
            xi2nd(1:3,i,j,k-cells3Offset) = utilities_getFreqDerivative(k_s)
            where(mod(cells,2)==0 .and. [i,j,k] == cells/2+1 .and. &
@ -347,7 +347,7 @@ subroutine spectral_utilities_init
  if (num%memory_efficient) then                                                                    ! allocate just single fourth order tensor
    allocate (gamma_hat(3,3,3,3,1,1,1), source = cmplx(0.0_pReal,0.0_pReal,pReal))
  else                                                                                              ! precalculation of gamma_hat field
-    allocate (gamma_hat(3,3,3,3,grid1Red,cells(2),cells3), source = cmplx(0.0_pReal,0.0_pReal,pReal))
+    allocate (gamma_hat(3,3,3,3,cells1Red,cells(2),cells3), source = cmplx(0.0_pReal,0.0_pReal,pReal))
  endif

 end subroutine spectral_utilities_init
@ -362,7 +362,7 @@ end subroutine spectral_utilities_init
 subroutine utilities_updateGamma(C)

  real(pReal), intent(in), dimension(3,3,3,3) :: C                                                  !< input stiffness to store as reference stiffness
-  complex(pReal),              dimension(3,3) :: temp33_complex, xiDyad_cmplx
+  complex(pReal),              dimension(3,3) :: temp33_cmplx, xiDyad_cmplx
  real(pReal),                 dimension(6,6) :: A, A_inv
  integer :: &
    i, j, k, &
@ -372,27 +372,40 @@ subroutine utilities_updateGamma(C)
  C_ref = C

  if (.not. num%memory_efficient) then
-    gamma_hat =  cmplx(0.0_pReal,0.0_pReal,pReal)                                                   ! for the singular point and any non invertible A
-    do k = cells3Offset+1, cells3Offset+cells3; do j = 1, cells(2); do i = 1, grid1Red
+    gamma_hat = cmplx(0.0_pReal,0.0_pReal,pReal)                                                    ! for the singular point and any non invertible A
+    !$OMP PARALLEL DO PRIVATE(l,m,n,o,temp33_cmplx,xiDyad_cmplx,A,A_inv,err)
+    do k = cells3Offset+1, cells3Offset+cells3; do j = 1, cells(2); do i = 1, cells1Red
      if (any([i,j,k] /= 1)) then                                                                   ! singular point at xi=(0.0,0.0,0.0) i.e. i=j=k=1
-        do concurrent (l = 1:3, m = 1:3)
+#ifndef __INTEL_COMPILER
+        do concurrent(l = 1:3, m = 1:3)
          xiDyad_cmplx(l,m) = conjg(-xi1st(l,i,j,k-cells3Offset))*xi1st(m,i,j,k-cells3Offset)
        end do
        do concurrent(l = 1:3, m = 1:3)
-          temp33_complex(l,m) = sum(cmplx(C_ref(l,1:3,m,1:3),0.0_pReal)*xiDyad_cmplx)
+          temp33_cmplx(l,m) = sum(cmplx(C_ref(l,1:3,m,1:3),0.0_pReal)*xiDyad_cmplx)
        end do
-        A(1:3,1:3) = temp33_complex%re; A(4:6,4:6) =  temp33_complex%re
-        A(1:3,4:6) = temp33_complex%im; A(4:6,1:3) = -temp33_complex%im
+#else
+        forall(l = 1:3, m = 1:3) &
+          xiDyad_cmplx(l,m) = conjg(-xi1st(l,i,j,k-cells3Offset))*xi1st(m,i,j,k-cells3Offset)
+        forall(l = 1:3, m = 1:3) &
+          temp33_cmplx(l,m) = sum(cmplx(C_ref(l,1:3,m,1:3),0.0_pReal)*xiDyad_cmplx)
+#endif
+        A(1:3,1:3) = temp33_cmplx%re; A(4:6,4:6) =  temp33_cmplx%re
+        A(1:3,4:6) = temp33_cmplx%im; A(4:6,1:3) = -temp33_cmplx%im
        if (abs(math_det33(A(1:3,1:3))) > 1e-16) then
          call math_invert(A_inv, err, A)
-          temp33_complex = cmplx(A_inv(1:3,1:3),A_inv(1:3,4:6),pReal)
+          temp33_cmplx = cmplx(A_inv(1:3,1:3),A_inv(1:3,4:6),pReal)
+#ifndef __INTEL_COMPILER
          do concurrent(l=1:3, m=1:3, n=1:3, o=1:3)
-            gamma_hat(l,m,n,o,i,j,k-cells3Offset) = temp33_complex(l,n)* &
-                                                    conjg(-xi1st(o,i,j,k-cells3Offset))*xi1st(m,i,j,k-cells3Offset)
+            gamma_hat(l,m,n,o,i,j,k-cells3Offset) = temp33_cmplx(l,n) * xiDyad_cmplx(o,m)
          end do
+#else
+          forall(l=1:3, m=1:3, n=1:3, o=1:3) &
+            gamma_hat(l,m,n,o,i,j,k-cells3Offset) = temp33_cmplx(l,n) * xiDyad_cmplx(o,m)
+#endif
        end if
      end if
    end do; end do; end do
+    !$OMP END PARALLEL DO
  endif

 end subroutine utilities_updateGamma
@ -405,7 +418,7 @@ end subroutine utilities_updateGamma
 !--------------------------------------------------------------------------------------------------
 subroutine utilities_FFTtensorForward

-  tensorField_real(1:3,1:3,cells(1)+1:grid1Red*2,:,:) = 0.0_pReal
+  tensorField_real(1:3,1:3,cells(1)+1:cells1Red*2,:,:) = 0.0_pReal
  call fftw_mpi_execute_dft_r2c(planTensorForth,tensorField_real,tensorField_fourier)

 end subroutine utilities_FFTtensorForward
@ -429,7 +442,7 @@ end subroutine utilities_FFTtensorBackward
 !--------------------------------------------------------------------------------------------------
 subroutine utilities_FFTscalarForward

-  scalarField_real(cells(1)+1:grid1Red*2,:,:) = 0.0_pReal
+  scalarField_real(cells(1)+1:cells1Red*2,:,:) = 0.0_pReal
  call fftw_mpi_execute_dft_r2c(planScalarForth,scalarField_real,scalarField_fourier)

 end subroutine utilities_FFTscalarForward
@ -454,7 +467,7 @@ end subroutine utilities_FFTscalarBackward
 !--------------------------------------------------------------------------------------------------
 subroutine utilities_FFTvectorForward

-  vectorField_real(1:3,cells(1)+1:grid1Red*2,:,:) = 0.0_pReal
+  vectorField_real(1:3,cells(1)+1:cells1Red*2,:,:) = 0.0_pReal
  call fftw_mpi_execute_dft_r2c(planVectorForth,vectorField_real,vectorField_fourier)

 end subroutine utilities_FFTvectorForward
@ -478,7 +491,7 @@ end subroutine utilities_FFTvectorBackward
 subroutine utilities_fourierGammaConvolution(fieldAim)

  real(pReal), intent(in), dimension(3,3) :: fieldAim                                               !< desired average value of the field after convolution
-  complex(pReal),          dimension(3,3) :: temp33_complex, xiDyad_cmplx
+  complex(pReal),          dimension(3,3) :: temp33_cmplx, xiDyad_cmplx
  real(pReal),             dimension(6,6) :: A, A_inv

  integer :: &
@ -493,38 +506,61 @@ subroutine utilities_fourierGammaConvolution(fieldAim)
 !--------------------------------------------------------------------------------------------------
 ! do the actual spectral method calculation (mechanical equilibrium)
  memoryEfficient: if (num%memory_efficient) then
-    do k = 1, cells3; do j = 1, cells(2); do i = 1, grid1Red
+    !$OMP PARALLEL DO PRIVATE(l,m,n,o,temp33_cmplx,xiDyad_cmplx,A,A_inv,err,gamma_hat)
+    do k = 1, cells3; do j = 1, cells(2); do i = 1, cells1Red
      if (any([i,j,k+cells3Offset] /= 1)) then                                                      ! singular point at xi=(0.0,0.0,0.0) i.e. i=j=k=1
+#ifndef __INTEL_COMPILER
        do concurrent(l = 1:3, m = 1:3)
          xiDyad_cmplx(l,m) = conjg(-xi1st(l,i,j,k))*xi1st(m,i,j,k)
        end do
        do concurrent(l = 1:3, m = 1:3)
-          temp33_complex(l,m) = sum(cmplx(C_ref(l,1:3,m,1:3),0.0_pReal)*xiDyad_cmplx)
+          temp33_cmplx(l,m) = sum(cmplx(C_ref(l,1:3,m,1:3),0.0_pReal)*xiDyad_cmplx)
        end do
-        A(1:3,1:3) = temp33_complex%re; A(4:6,4:6) =  temp33_complex%re
-        A(1:3,4:6) = temp33_complex%im; A(4:6,1:3) = -temp33_complex%im
+#else
+        forall(l = 1:3, m = 1:3) &
+          xiDyad_cmplx(l,m) = conjg(-xi1st(l,i,j,k))*xi1st(m,i,j,k)
+        forall(l = 1:3, m = 1:3) &
+          temp33_cmplx(l,m) = sum(cmplx(C_ref(l,1:3,m,1:3),0.0_pReal)*xiDyad_cmplx)
+#endif
+        A(1:3,1:3) = temp33_cmplx%re; A(4:6,4:6) =  temp33_cmplx%re
+        A(1:3,4:6) = temp33_cmplx%im; A(4:6,1:3) = -temp33_cmplx%im
        if (abs(math_det33(A(1:3,1:3))) > 1e-16) then
          call math_invert(A_inv, err, A)
-          temp33_complex = cmplx(A_inv(1:3,1:3),A_inv(1:3,4:6),pReal)
+          temp33_cmplx = cmplx(A_inv(1:3,1:3),A_inv(1:3,4:6),pReal)
+#ifndef __INTEL_COMPILER
          do concurrent(l=1:3, m=1:3, n=1:3, o=1:3)
-            gamma_hat(l,m,n,o,1,1,1) =  temp33_complex(l,n)*conjg(-xi1st(o,i,j,k))*xi1st(m,i,j,k)
+            gamma_hat(l,m,n,o,1,1,1) = temp33_cmplx(l,n)*xiDyad_cmplx(o,m)
          end do
+          do concurrent(l = 1:3, m = 1:3)
+            temp33_cmplx(l,m) = sum(gamma_hat(l,m,1:3,1:3,1,1,1)*tensorField_fourier(1:3,1:3,i,j,k))
+          end do
+#else
+          forall(l=1:3, m=1:3, n=1:3, o=1:3) &
+            gamma_hat(l,m,n,o,1,1,1) = temp33_cmplx(l,n)*xiDyad_cmplx(o,m)
+          forall(l = 1:3, m = 1:3) &
+            temp33_cmplx(l,m) = sum(gamma_hat(l,m,1:3,1:3,1,1,1)*tensorField_fourier(1:3,1:3,i,j,k))
+#endif
+          tensorField_fourier(1:3,1:3,i,j,k) = temp33_cmplx
        else
-          gamma_hat(1:3,1:3,1:3,1:3,1,1,1) = cmplx(0.0_pReal,0.0_pReal,pReal)
+          tensorField_fourier(1:3,1:3,i,j,k) = cmplx(0.0_pReal,0.0_pReal,pReal)
        end if
-        do concurrent(l = 1:3, m = 1:3)
-          temp33_Complex(l,m) = sum(gamma_hat(l,m,1:3,1:3,1,1,1)*tensorField_fourier(1:3,1:3,i,j,k))
-        end do
-        tensorField_fourier(1:3,1:3,i,j,k) = temp33_Complex
      end if
    end do; end do; end do
+    !$OMP END PARALLEL DO
  else memoryEfficient
-    do k = 1, cells3;  do j = 1, cells(2);  do i = 1,grid1Red
+    !$OMP PARALLEL DO PRIVATE(l,m,temp33_cmplx)
+    do k = 1, cells3;  do j = 1, cells(2);  do i = 1,cells1Red
+#ifndef __INTEL_COMPILER
      do concurrent(l = 1:3, m = 1:3)
-        temp33_Complex(l,m) = sum(gamma_hat(l,m,1:3,1:3,i,j,k) * tensorField_fourier(1:3,1:3,i,j,k))
+        temp33_cmplx(l,m) = sum(gamma_hat(l,m,1:3,1:3,i,j,k)*tensorField_fourier(1:3,1:3,i,j,k))
      end do
-      tensorField_fourier(1:3,1:3,i,j,k) = temp33_Complex
+#else
+      forall(l = 1:3, m = 1:3) &
+        temp33_cmplx(l,m) = sum(gamma_hat(l,m,1:3,1:3,i,j,k)*tensorField_fourier(1:3,1:3,i,j,k))
+#endif
+      tensorField_fourier(1:3,1:3,i,j,k) = temp33_cmplx
    end do; end do; end do
+    !$OMP END PARALLEL DO
  end if memoryEfficient

  if (cells3Offset == 0) tensorField_fourier(1:3,1:3,1,1,1) = cmplx(fieldAim/wgt,0.0_pReal,pReal)
@ -544,12 +580,14 @@ subroutine utilities_fourierGreenConvolution(D_ref, mu_ref, Delta_t)

 !--------------------------------------------------------------------------------------------------
 ! do the actual spectral method calculation
-  do k = 1, cells3; do j = 1, cells(2) ;do i = 1, grid1Red
+  !$OMP PARALLEL DO PRIVATE(GreenOp_hat)
+  do k = 1, cells3; do j = 1, cells(2) ;do i = 1, cells1Red
    GreenOp_hat = cmplx(1.0_pReal,0.0_pReal,pReal) &
                / (cmplx(mu_ref,0.0_pReal,pReal) + cmplx(Delta_t,0.0_pReal) &
                   * sum(conjg(xi1st(1:3,i,j,k))* matmul(cmplx(D_ref,0.0_pReal),xi1st(1:3,i,j,k))))
    scalarField_fourier(i,j,k) = scalarField_fourier(i,j,k)*GreenOp_hat
  enddo; enddo; enddo
+  !$OMP END PARALLEL DO

 end subroutine utilities_fourierGreenConvolution

@ -572,7 +610,7 @@ real(pReal) function utilities_divergenceRMS()
 ! calculating RMS divergence criterion in Fourier space
  utilities_divergenceRMS = 0.0_pReal
  do k = 1, cells3; do j = 1, cells(2)
-    do i = 2, grid1Red -1                                                                           ! Has somewhere a conj. complex counterpart. Therefore count it twice.
+    do i = 2, cells1Red -1                                                                          ! Has somewhere a conj. complex counterpart. Therefore count it twice.
      utilities_divergenceRMS = utilities_divergenceRMS &
            + 2.0_pReal*(sum (real(matmul(tensorField_fourier(1:3,1:3,i,j,k), &                     ! (sqrt(real(a)**2 + aimag(a)**2))**2 = real(a)**2 + aimag(a)**2, i.e. do not take square root and square again
                                          conjg(-xi1st(1:3,i,j,k))*rescaledGeom))**2) &             ! --> sum squared L_2 norm of vector
@ -584,10 +622,10 @@ real(pReal) function utilities_divergenceRMS()
                                  conjg(-xi1st(1:3,1,j,k))*rescaledGeom))**2) &
               + sum(aimag(matmul(tensorField_fourier(1:3,1:3,1       ,j,k), &
                                  conjg(-xi1st(1:3,1,j,k))*rescaledGeom))**2) &
-               + sum( real(matmul(tensorField_fourier(1:3,1:3,grid1Red,j,k), &
-                                  conjg(-xi1st(1:3,grid1Red,j,k))*rescaledGeom))**2) &
-               + sum(aimag(matmul(tensorField_fourier(1:3,1:3,grid1Red,j,k), &
-                                  conjg(-xi1st(1:3,grid1Red,j,k))*rescaledGeom))**2)
+               + sum( real(matmul(tensorField_fourier(1:3,1:3,cells1Red,j,k), &
+                                  conjg(-xi1st(1:3,cells1Red,j,k))*rescaledGeom))**2) &
+               + sum(aimag(matmul(tensorField_fourier(1:3,1:3,cells1Red,j,k), &
+                                  conjg(-xi1st(1:3,cells1Red,j,k))*rescaledGeom))**2)
  enddo; enddo
  if (cells(1) == 1) utilities_divergenceRMS = utilities_divergenceRMS * 0.5_pReal                   ! counted twice in case of cells(1) == 1
  call MPI_Allreduce(MPI_IN_PLACE,utilities_divergenceRMS,1_MPI_INTEGER_KIND,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD,err_MPI)
@ -617,7 +655,7 @@ real(pReal) function utilities_curlRMS()
  utilities_curlRMS = 0.0_pReal

  do k = 1, cells3; do j = 1, cells(2);
-    do i = 2, grid1Red - 1
+    do i = 2, cells1Red - 1
      do l = 1, 3
        curl_fourier(l,1) = (+tensorField_fourier(l,3,i,j,k)*xi1st(2,i,j,k)*rescaledGeom(2) &
                             -tensorField_fourier(l,2,i,j,k)*xi1st(3,i,j,k)*rescaledGeom(3))
@ -640,12 +678,12 @@ real(pReal) function utilities_curlRMS()
    utilities_curlRMS = utilities_curlRMS &
                      + sum(curl_fourier%re**2 + curl_fourier%im**2)                                ! this layer (DC) does not have a conjugate complex counterpart (if cells(1) /= 1)
    do l = 1, 3
-      curl_fourier = (+tensorField_fourier(l,3,grid1Red,j,k)*xi1st(2,grid1Red,j,k)*rescaledGeom(2) &
-                      -tensorField_fourier(l,2,grid1Red,j,k)*xi1st(3,grid1Red,j,k)*rescaledGeom(3))
-      curl_fourier = (+tensorField_fourier(l,1,grid1Red,j,k)*xi1st(3,grid1Red,j,k)*rescaledGeom(3) &
-                      -tensorField_fourier(l,3,grid1Red,j,k)*xi1st(1,grid1Red,j,k)*rescaledGeom(1))
-      curl_fourier = (+tensorField_fourier(l,2,grid1Red,j,k)*xi1st(1,grid1Red,j,k)*rescaledGeom(1) &
-                      -tensorField_fourier(l,1,grid1Red,j,k)*xi1st(2,grid1Red,j,k)*rescaledGeom(2))
+      curl_fourier = (+tensorField_fourier(l,3,cells1Red,j,k)*xi1st(2,cells1Red,j,k)*rescaledGeom(2) &
+                      -tensorField_fourier(l,2,cells1Red,j,k)*xi1st(3,cells1Red,j,k)*rescaledGeom(3))
+      curl_fourier = (+tensorField_fourier(l,1,cells1Red,j,k)*xi1st(3,cells1Red,j,k)*rescaledGeom(3) &
+                      -tensorField_fourier(l,3,cells1Red,j,k)*xi1st(1,cells1Red,j,k)*rescaledGeom(1))
+      curl_fourier = (+tensorField_fourier(l,2,cells1Red,j,k)*xi1st(1,cells1Red,j,k)*rescaledGeom(1) &
+                      -tensorField_fourier(l,1,cells1Red,j,k)*xi1st(2,cells1Red,j,k)*rescaledGeom(2))
    enddo
    utilities_curlRMS = utilities_curlRMS &
                      + sum(curl_fourier%re**2 + curl_fourier%im**2)                                ! this layer (Nyquist) does not have a conjugate complex counterpart (if cells(1) /= 1)
@ -736,9 +774,10 @@ subroutine utilities_fourierScalarGradient()

  integer :: i, j, k

-  do k = 1, cells3;  do j = 1, cells(2);  do i = 1,grid1Red
+
+  do k = 1, cells3;  do j = 1, cells(2);  do i = 1,cells1Red
    vectorField_fourier(1:3,i,j,k) = scalarField_fourier(i,j,k)*xi1st(1:3,i,j,k)                    ! ToDo: no -conjg?
-  enddo; enddo; enddo
+  end do; end do; end do

 end subroutine utilities_fourierScalarGradient

@ -748,11 +787,9 @@ end subroutine utilities_fourierScalarGradient
 !--------------------------------------------------------------------------------------------------
 subroutine utilities_fourierVectorDivergence()

-  integer :: i, j, k

-  do k = 1, cells3;  do j = 1, cells(2);  do i = 1,grid1Red
-    scalarField_fourier(i,j,k) = sum(vectorField_fourier(1:3,i,j,k)*conjg(-xi1st(1:3,i,j,k)))
-  enddo; enddo; enddo
+  scalarField_fourier(1:cells1Red,1:cells(2),1:cells3) = sum(vectorField_fourier(1:3,1:cells1Red,1:cells(2),1:cells3) &
+                                                             *conjg(-xi1st),1)

 end subroutine utilities_fourierVectorDivergence

@ -764,11 +801,12 @@ subroutine utilities_fourierVectorGradient()

  integer :: i, j, k, m, n

-  do k = 1, cells3;  do j = 1, cells(2);  do i = 1,grid1Red
+
+  do k = 1, cells3;  do j = 1, cells(2);  do i = 1,cells1Red
    do m = 1, 3; do n = 1, 3
      tensorField_fourier(m,n,i,j,k) = vectorField_fourier(m,i,j,k)*xi1st(n,i,j,k)
-    enddo; enddo
-  enddo; enddo; enddo
+    end do; end do
+  end do; end do; end do

 end subroutine utilities_fourierVectorGradient

@ -780,9 +818,10 @@ subroutine utilities_fourierTensorDivergence()

  integer :: i, j, k

-  do k = 1, cells3;  do j = 1, cells(2);  do i = 1,grid1Red
+
+  do k = 1, cells3;  do j = 1, cells(2);  do i = 1,cells1Red
    vectorField_fourier(:,i,j,k) = matmul(tensorField_fourier(:,:,i,j,k),conjg(-xi1st(:,i,j,k)))
-  enddo; enddo; enddo
+  end do; end do; end do

 end subroutine utilities_fourierTensorDivergence

@ -884,11 +923,10 @@ pure function utilities_calculateRate(heterogeneous,field0,field,dt,avRate)
  real(pReal),             dimension(3,3,cells(1),cells(2),cells3) :: &
    utilities_calculateRate

-  if (heterogeneous) then
-    utilities_calculateRate = (field-field0) / dt
-  else
-    utilities_calculateRate = spread(spread(spread(avRate,3,cells(1)),4,cells(2)),5,cells3)
-  endif
+
+  utilities_calculateRate = merge((field-field0) / dt, &
+                                  spread(spread(spread(avRate,3,cells(1)),4,cells(2)),5,cells3), &
+                                  heterogeneous)

 end function utilities_calculateRate

@ -980,6 +1018,7 @@ end function utilities_getFreqDerivative
 subroutine utilities_updateCoords(F)

  real(pReal),   dimension(3,3,cells(1),cells(2),cells3), intent(in) :: F
+
  real(pReal),   dimension(3,  cells(1),cells(2),cells3)             :: IPcoords
  real(pReal),   dimension(3,  cells(1),cells(2),cells3+2)           :: IPfluct_padded              ! Fluctuations of cell center displacement (padded along z for MPI)
  real(pReal),   dimension(3,  cells(1)+1,cells(2)+1,cells3+1)       :: nodeCoords
@ -1010,20 +1049,23 @@ subroutine utilities_updateCoords(F)
                        1, 1, 1, &
                        0, 1, 1  ], [3,8])

+
  step = geomSize/real(cells, pReal)
 !--------------------------------------------------------------------------------------------------
 ! integration in Fourier space to get fluctuations of cell center discplacements
  tensorField_real(1:3,1:3,1:cells(1),1:cells(2),1:cells3) = F
  call utilities_FFTtensorForward()

-  do k = 1, cells3; do j = 1, cells(2); do i = 1, grid1Red
+  !$OMP PARALLEL DO
+  do k = 1, cells3; do j = 1, cells(2); do i = 1, cells1Red
    if (any([i,j,k+cells3Offset] /= 1)) then
      vectorField_fourier(1:3,i,j,k) = matmul(tensorField_fourier(1:3,1:3,i,j,k),xi2nd(1:3,i,j,k)) &
                                     / sum(conjg(-xi2nd(1:3,i,j,k))*xi2nd(1:3,i,j,k)) * cmplx(wgt,0.0,pReal)
    else
      vectorField_fourier(1:3,i,j,k) = cmplx(0.0,0.0,pReal)
-    endif
-  enddo; enddo; enddo
+    end if
+  end do; end do; end do
+  !$OMP END PARALLEL DO

  call fftw_mpi_execute_dft_c2r(planVectorBack,vectorField_fourier,vectorField_real)

@ -1041,7 +1083,7 @@ subroutine utilities_updateCoords(F)
  rank_b = modulo(worldrank-1_MPI_INTEGER_KIND,worldsize)

  ! send bottom layer to process below
-  call MPI_Isend(IPfluct_padded(:,:,:,2),      c,MPI_DOUBLE,rank_b,0_MPI_INTEGER_KIND,MPI_COMM_WORLD,request(1),err_MPI)
+  call MPI_Isend(IPfluct_padded(:,:,:,2),       c,MPI_DOUBLE,rank_b,0_MPI_INTEGER_KIND,MPI_COMM_WORLD,request(1),err_MPI)
  if (err_MPI /= 0_MPI_INTEGER_KIND) error stop 'MPI error'
  call MPI_Irecv(IPfluct_padded(:,:,:,cells3+2),c,MPI_DOUBLE,rank_t,0_MPI_INTEGER_KIND,MPI_COMM_WORLD,request(2),err_MPI)
  if (err_MPI /= 0_MPI_INTEGER_KIND) error stop 'MPI error'
@ -1049,7 +1091,7 @@ subroutine utilities_updateCoords(F)
  ! send top layer to process above
  call MPI_Isend(IPfluct_padded(:,:,:,cells3+1),c,MPI_DOUBLE,rank_t,1_MPI_INTEGER_KIND,MPI_COMM_WORLD,request(3),err_MPI)
  if (err_MPI /= 0_MPI_INTEGER_KIND) error stop 'MPI error'
-  call MPI_Irecv(IPfluct_padded(:,:,:,1),      c,MPI_DOUBLE,rank_b,1_MPI_INTEGER_KIND,MPI_COMM_WORLD,request(4),err_MPI)
+  call MPI_Irecv(IPfluct_padded(:,:,:,1),       c,MPI_DOUBLE,rank_b,1_MPI_INTEGER_KIND,MPI_COMM_WORLD,request(4),err_MPI)
  if (err_MPI /= 0_MPI_INTEGER_KIND) error stop 'MPI error'

  call MPI_Waitall(4,request,status,err_MPI)
--- a/src/math.f90
+++ b/src/math.f90
@ -262,9 +262,8 @@ pure function math_identity4th()
    math_identity4th(i,j,k,l) = 0.5_pReal*(math_I3(i,k)*math_I3(j,l)+math_I3(i,l)*math_I3(j,k))
  enddo
 #else
-  do i=1,3; do j=1,3; do k=1,3; do l=1,3
+  forall(i=1:3, j=1:3, k=1:3, l=1:3) &
    math_identity4th(i,j,k,l) = 0.5_pReal*(math_I3(i,k)*math_I3(j,l)+math_I3(i,l)*math_I3(j,k))
-  enddo; enddo; enddo; enddo
 #endif

 end function math_identity4th
@ -338,9 +337,7 @@ pure function math_outer(A,B)
    math_outer(i,j) = A(i)*B(j)
  enddo
 #else
-  do i=1,size(A,1); do j=1,size(B,1)
-    math_outer(i,j) = A(i)*B(j)
-  enddo; enddo
+  forall(i=1:size(A,1), j=1:size(B,1)) math_outer(i,j) = A(i)*B(j)
 #endif

 end function math_outer
@ -387,9 +384,7 @@ pure function math_mul3333xx33(A,B)
    math_mul3333xx33(i,j) = sum(A(i,j,1:3,1:3)*B(1:3,1:3))
  enddo
 #else
-  do i=1,3; do j=1,3
-    math_mul3333xx33(i,j) = sum(A(i,j,1:3,1:3)*B(1:3,1:3))
-  enddo; enddo
+  forall (i=1:3, j=1:3) math_mul3333xx33(i,j) = sum(A(i,j,1:3,1:3)*B(1:3,1:3))
 #endif

 end function math_mul3333xx33
@ -411,9 +406,7 @@ pure function math_mul3333xx3333(A,B)
    math_mul3333xx3333(i,j,k,l) = sum(A(i,j,1:3,1:3)*B(1:3,1:3,k,l))
  enddo
 #else
-  do i=1,3; do j=1,3; do k=1,3; do l=1,3
-    math_mul3333xx3333(i,j,k,l) = sum(A(i,j,1:3,1:3)*B(1:3,1:3,k,l))
-  enddo; enddo; enddo; enddo
+  forall(i=1:3, j=1:3, k=1:3, l=1:3) math_mul3333xx3333(i,j,k,l) = sum(A(i,j,1:3,1:3)*B(1:3,1:3,k,l))
 #endif

 end function math_mul3333xx3333
@ -752,9 +745,7 @@ pure function math_3333to99(m3333)
    math_3333to99(i,j) = m3333(MAPPLAIN(1,i),MAPPLAIN(2,i),MAPPLAIN(1,j),MAPPLAIN(2,j))
  enddo
 #else
-  do i=1,9; do j=1,9
-    math_3333to99(i,j) = m3333(MAPPLAIN(1,i),MAPPLAIN(2,i),MAPPLAIN(1,j),MAPPLAIN(2,j))
-  enddo; enddo
+  forall(i=1:9, j=1:9) math_3333to99(i,j) = m3333(MAPPLAIN(1,i),MAPPLAIN(2,i),MAPPLAIN(1,j),MAPPLAIN(2,j))
 #endif

 end function math_3333to99
@ -775,9 +766,7 @@ pure function math_99to3333(m99)
    math_99to3333(MAPPLAIN(1,i),MAPPLAIN(2,i),MAPPLAIN(1,j),MAPPLAIN(2,j)) = m99(i,j)
  enddo
 #else
-  do i=1,9; do j=1,9
-    math_99to3333(MAPPLAIN(1,i),MAPPLAIN(2,i),MAPPLAIN(1,j),MAPPLAIN(2,j)) = m99(i,j)
-  enddo; enddo
+  forall(i=1:9, j=1:9) math_99to3333(MAPPLAIN(1,i),MAPPLAIN(2,i),MAPPLAIN(1,j),MAPPLAIN(2,j)) = m99(i,j)
 #endif

 end function math_99to3333
@ -810,9 +799,7 @@ pure function math_sym3333to66(m3333,weighted)
    math_sym3333to66(i,j) = w(i)*w(j)*m3333(MAPNYE(1,i),MAPNYE(2,i),MAPNYE(1,j),MAPNYE(2,j))
  enddo
 #else
-  do i=1,6; do j=1,6
-    math_sym3333to66(i,j) = w(i)*w(j)*m3333(MAPNYE(1,i),MAPNYE(2,i),MAPNYE(1,j),MAPNYE(2,j))
-  enddo; enddo
+  forall(i=1:6, j=1:6) math_sym3333to66(i,j) = w(i)*w(j)*m3333(MAPNYE(1,i),MAPNYE(2,i),MAPNYE(1,j),MAPNYE(2,j))
 #endif

 end function math_sym3333to66
@ -950,9 +937,7 @@ pure function math_3333toVoigt66_stiffness(C) result(C_tilde)
    C_tilde(i,j) = C(MAPVOIGT(1,i),MAPVOIGT(2,i),MAPVOIGT(1,j),MAPVOIGT(2,j))
  end do
 #else
-  do i=1,6; do j=1,6
-    C_tilde(i,j) = C(MAPVOIGT(1,i),MAPVOIGT(2,i),MAPVOIGT(1,j),MAPVOIGT(2,j))
-  end do; end do
+  forall(i=1:6, j=1:6) C_tilde(i,j) = C(MAPVOIGT(1,i),MAPVOIGT(2,i),MAPVOIGT(1,j),MAPVOIGT(2,j))
 #endif

 end function math_3333toVoigt66_stiffness
--- a/src/phase_mechanical_plastic.f90
+++ b/src/phase_mechanical_plastic.f90
@ -379,10 +379,9 @@ module function plastic_deltaState(ph, en) result(broken)
    en
  logical :: broken

-  real(pReal),               dimension(3,3) :: &
+  real(pReal), dimension(3,3) :: &
    Mp
  integer :: &
-    myOffset, &
    mySize