From ac4beea14b718b5c86ebc59356e614dbba0c1a42 Mon Sep 17 00:00:00 2001
From: Martin Diehl <mail@martin-diehl.net>
Date: Sun, 13 Feb 2022 21:00:21 +0100
Subject: [PATCH 1/5] use precalculated dyad

---
 src/grid/spectral_utilities.f90 | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/src/grid/spectral_utilities.f90 b/src/grid/spectral_utilities.f90
index 34a976644..28ff0f8b3 100644
--- a/src/grid/spectral_utilities.f90
+++ b/src/grid/spectral_utilities.f90
@@ -372,7 +372,7 @@ subroutine utilities_updateGamma(C)
   C_ref = C
 
   if (.not. num%memory_efficient) then
-    gamma_hat =  cmplx(0.0_pReal,0.0_pReal,pReal)                                                   ! for the singular point and any non invertible A
+    gamma_hat = cmplx(0.0_pReal,0.0_pReal,pReal)                                                    ! for the singular point and any non invertible A
     do k = cells3Offset+1, cells3Offset+cells3; do j = 1, cells(2); do i = 1, grid1Red
       if (any([i,j,k] /= 1)) then                                                                   ! singular point at xi=(0.0,0.0,0.0) i.e. i=j=k=1
         do concurrent (l = 1:3, m = 1:3)
@@ -387,8 +387,7 @@ subroutine utilities_updateGamma(C)
           call math_invert(A_inv, err, A)
           temp33_complex = cmplx(A_inv(1:3,1:3),A_inv(1:3,4:6),pReal)
           do concurrent(l=1:3, m=1:3, n=1:3, o=1:3)
-            gamma_hat(l,m,n,o,i,j,k-cells3Offset) = temp33_complex(l,n)* &
-                                                    conjg(-xi1st(o,i,j,k-cells3Offset))*xi1st(m,i,j,k-cells3Offset)
+            gamma_hat(l,m,n,o,i,j,k-cells3Offset) = temp33_complex(l,n) * xiDyad_cmplx(o,m)
           end do
         end if
       end if
@@ -507,7 +506,7 @@ subroutine utilities_fourierGammaConvolution(fieldAim)
           call math_invert(A_inv, err, A)
           temp33_complex = cmplx(A_inv(1:3,1:3),A_inv(1:3,4:6),pReal)
           do concurrent(l=1:3, m=1:3, n=1:3, o=1:3)
-            gamma_hat(l,m,n,o,1,1,1) =  temp33_complex(l,n)*conjg(-xi1st(o,i,j,k))*xi1st(m,i,j,k)
+            gamma_hat(l,m,n,o,1,1,1) = temp33_complex(l,n)*xiDyad_cmplx(o,m)
           end do
         else
           gamma_hat(1:3,1:3,1:3,1:3,1,1,1) = cmplx(0.0_pReal,0.0_pReal,pReal)
@@ -521,7 +520,7 @@ subroutine utilities_fourierGammaConvolution(fieldAim)
   else memoryEfficient
     do k = 1, cells3;  do j = 1, cells(2);  do i = 1,grid1Red
       do concurrent(l = 1:3, m = 1:3)
-        temp33_Complex(l,m) = sum(gamma_hat(l,m,1:3,1:3,i,j,k) * tensorField_fourier(1:3,1:3,i,j,k))
+        temp33_Complex(l,m) = sum(gamma_hat(l,m,1:3,1:3,i,j,k)*tensorField_fourier(1:3,1:3,i,j,k))
       end do
       tensorField_fourier(1:3,1:3,i,j,k) = temp33_Complex
     end do; end do; end do
@@ -884,11 +883,10 @@ pure function utilities_calculateRate(heterogeneous,field0,field,dt,avRate)
   real(pReal),             dimension(3,3,cells(1),cells(2),cells3) :: &
     utilities_calculateRate
 
-  if (heterogeneous) then
-    utilities_calculateRate = (field-field0) / dt
-  else
-    utilities_calculateRate = spread(spread(spread(avRate,3,cells(1)),4,cells(2)),5,cells3)
-  endif
+
+  utilities_calculateRate = merge((field-field0) / dt, &
+                                  spread(spread(spread(avRate,3,cells(1)),4,cells(2)),5,cells3), &
+                                  heterogeneous)
 
 end function utilities_calculateRate
 
@@ -1041,7 +1039,7 @@ subroutine utilities_updateCoords(F)
   rank_b = modulo(worldrank-1_MPI_INTEGER_KIND,worldsize)
 
   ! send bottom layer to process below
-  call MPI_Isend(IPfluct_padded(:,:,:,2),      c,MPI_DOUBLE,rank_b,0_MPI_INTEGER_KIND,MPI_COMM_WORLD,request(1),err_MPI)
+  call MPI_Isend(IPfluct_padded(:,:,:,2),       c,MPI_DOUBLE,rank_b,0_MPI_INTEGER_KIND,MPI_COMM_WORLD,request(1),err_MPI)
   if (err_MPI /= 0_MPI_INTEGER_KIND) error stop 'MPI error'
   call MPI_Irecv(IPfluct_padded(:,:,:,cells3+2),c,MPI_DOUBLE,rank_t,0_MPI_INTEGER_KIND,MPI_COMM_WORLD,request(2),err_MPI)
   if (err_MPI /= 0_MPI_INTEGER_KIND) error stop 'MPI error'
@@ -1049,7 +1047,7 @@ subroutine utilities_updateCoords(F)
   ! send top layer to process above
   call MPI_Isend(IPfluct_padded(:,:,:,cells3+1),c,MPI_DOUBLE,rank_t,1_MPI_INTEGER_KIND,MPI_COMM_WORLD,request(3),err_MPI)
   if (err_MPI /= 0_MPI_INTEGER_KIND) error stop 'MPI error'
-  call MPI_Irecv(IPfluct_padded(:,:,:,1),      c,MPI_DOUBLE,rank_b,1_MPI_INTEGER_KIND,MPI_COMM_WORLD,request(4),err_MPI)
+  call MPI_Irecv(IPfluct_padded(:,:,:,1),       c,MPI_DOUBLE,rank_b,1_MPI_INTEGER_KIND,MPI_COMM_WORLD,request(4),err_MPI)
   if (err_MPI /= 0_MPI_INTEGER_KIND) error stop 'MPI error'
 
   call MPI_Waitall(4,request,status,err_MPI)

From 0008ad1bf8fe7eea6728ab742bd2480b8072a33e Mon Sep 17 00:00:00 2001
From: Martin Diehl <mail@martin-diehl.net>
Date: Mon, 14 Feb 2022 05:57:48 +0100
Subject: [PATCH 2/5] easier to understand

---
 src/grid/spectral_utilities.f90 | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/grid/spectral_utilities.f90 b/src/grid/spectral_utilities.f90
index 28ff0f8b3..08d983c8a 100644
--- a/src/grid/spectral_utilities.f90
+++ b/src/grid/spectral_utilities.f90
@@ -508,13 +508,13 @@ subroutine utilities_fourierGammaConvolution(fieldAim)
           do concurrent(l=1:3, m=1:3, n=1:3, o=1:3)
             gamma_hat(l,m,n,o,1,1,1) = temp33_complex(l,n)*xiDyad_cmplx(o,m)
           end do
+          do concurrent(l = 1:3, m = 1:3)
+            temp33_Complex(l,m) = sum(gamma_hat(l,m,1:3,1:3,1,1,1)*tensorField_fourier(1:3,1:3,i,j,k))
+          end do
+          tensorField_fourier(1:3,1:3,i,j,k) = temp33_Complex
         else
-          gamma_hat(1:3,1:3,1:3,1:3,1,1,1) = cmplx(0.0_pReal,0.0_pReal,pReal)
+          tensorField_fourier(1:3,1:3,i,j,k) = cmplx(0.0_pReal,0.0_pReal,pReal)
         end if
-        do concurrent(l = 1:3, m = 1:3)
-          temp33_Complex(l,m) = sum(gamma_hat(l,m,1:3,1:3,1,1,1)*tensorField_fourier(1:3,1:3,i,j,k))
-        end do
-        tensorField_fourier(1:3,1:3,i,j,k) = temp33_Complex
       end if
     end do; end do; end do
   else memoryEfficient

From 61e11a0529f8f192b411304a8d382feb59532e71 Mon Sep 17 00:00:00 2001
From: Martin Diehl <mail@martin-diehl.net>
Date: Mon, 14 Feb 2022 07:58:15 +0100
Subject: [PATCH 3/5] use openMP for operations in Fourier space

---
 src/grid/spectral_utilities.f90 | 65 ++++++++++++++++++++-------------
 1 file changed, 39 insertions(+), 26 deletions(-)

diff --git a/src/grid/spectral_utilities.f90 b/src/grid/spectral_utilities.f90
index 08d983c8a..418f83b05 100644
--- a/src/grid/spectral_utilities.f90
+++ b/src/grid/spectral_utilities.f90
@@ -362,7 +362,7 @@ end subroutine spectral_utilities_init
 subroutine utilities_updateGamma(C)
 
   real(pReal), intent(in), dimension(3,3,3,3) :: C                                                  !< input stiffness to store as reference stiffness
-  complex(pReal),              dimension(3,3) :: temp33_complex, xiDyad_cmplx
+  complex(pReal),              dimension(3,3) :: temp33_cmplx, xiDyad_cmplx
   real(pReal),                 dimension(6,6) :: A, A_inv
   integer :: &
     i, j, k, &
@@ -373,25 +373,27 @@ subroutine utilities_updateGamma(C)
 
   if (.not. num%memory_efficient) then
     gamma_hat = cmplx(0.0_pReal,0.0_pReal,pReal)                                                    ! for the singular point and any non invertible A
+    !$OMP PARALLEL DO PRIVATE(l,m,n,o,temp33_cmplx,xiDyad_cmplx,A,A_inv,err)
     do k = cells3Offset+1, cells3Offset+cells3; do j = 1, cells(2); do i = 1, grid1Red
       if (any([i,j,k] /= 1)) then                                                                   ! singular point at xi=(0.0,0.0,0.0) i.e. i=j=k=1
         do concurrent (l = 1:3, m = 1:3)
           xiDyad_cmplx(l,m) = conjg(-xi1st(l,i,j,k-cells3Offset))*xi1st(m,i,j,k-cells3Offset)
         end do
         do concurrent(l = 1:3, m = 1:3)
-          temp33_complex(l,m) = sum(cmplx(C_ref(l,1:3,m,1:3),0.0_pReal)*xiDyad_cmplx)
+          temp33_cmplx(l,m) = sum(cmplx(C_ref(l,1:3,m,1:3),0.0_pReal)*xiDyad_cmplx)
         end do
-        A(1:3,1:3) = temp33_complex%re; A(4:6,4:6) =  temp33_complex%re
-        A(1:3,4:6) = temp33_complex%im; A(4:6,1:3) = -temp33_complex%im
+        A(1:3,1:3) = temp33_cmplx%re; A(4:6,4:6) =  temp33_cmplx%re
+        A(1:3,4:6) = temp33_cmplx%im; A(4:6,1:3) = -temp33_cmplx%im
         if (abs(math_det33(A(1:3,1:3))) > 1e-16) then
           call math_invert(A_inv, err, A)
-          temp33_complex = cmplx(A_inv(1:3,1:3),A_inv(1:3,4:6),pReal)
+          temp33_cmplx = cmplx(A_inv(1:3,1:3),A_inv(1:3,4:6),pReal)
           do concurrent(l=1:3, m=1:3, n=1:3, o=1:3)
-            gamma_hat(l,m,n,o,i,j,k-cells3Offset) = temp33_complex(l,n) * xiDyad_cmplx(o,m)
+            gamma_hat(l,m,n,o,i,j,k-cells3Offset) = temp33_cmplx(l,n) * xiDyad_cmplx(o,m)
           end do
         end if
       end if
     end do; end do; end do
+    !$OMP END PARALLEL DO
   endif
 
 end subroutine utilities_updateGamma
@@ -477,7 +479,7 @@ end subroutine utilities_FFTvectorBackward
 subroutine utilities_fourierGammaConvolution(fieldAim)
 
   real(pReal), intent(in), dimension(3,3) :: fieldAim                                               !< desired average value of the field after convolution
-  complex(pReal),          dimension(3,3) :: temp33_complex, xiDyad_cmplx
+  complex(pReal),          dimension(3,3) :: temp33_cmplx, xiDyad_cmplx
   real(pReal),             dimension(6,6) :: A, A_inv
 
   integer :: &
@@ -492,38 +494,42 @@ subroutine utilities_fourierGammaConvolution(fieldAim)
 !--------------------------------------------------------------------------------------------------
 ! do the actual spectral method calculation (mechanical equilibrium)
   memoryEfficient: if (num%memory_efficient) then
+    !$OMP PARALLEL DO PRIVATE(l,m,n,o,temp33_cmplx,xiDyad_cmplx,A,A_inv,err,gamma_hat)
     do k = 1, cells3; do j = 1, cells(2); do i = 1, grid1Red
       if (any([i,j,k+cells3Offset] /= 1)) then                                                      ! singular point at xi=(0.0,0.0,0.0) i.e. i=j=k=1
         do concurrent(l = 1:3, m = 1:3)
           xiDyad_cmplx(l,m) = conjg(-xi1st(l,i,j,k))*xi1st(m,i,j,k)
         end do
         do concurrent(l = 1:3, m = 1:3)
-          temp33_complex(l,m) = sum(cmplx(C_ref(l,1:3,m,1:3),0.0_pReal)*xiDyad_cmplx)
+          temp33_cmplx(l,m) = sum(cmplx(C_ref(l,1:3,m,1:3),0.0_pReal)*xiDyad_cmplx)
         end do
-        A(1:3,1:3) = temp33_complex%re; A(4:6,4:6) =  temp33_complex%re
-        A(1:3,4:6) = temp33_complex%im; A(4:6,1:3) = -temp33_complex%im
+        A(1:3,1:3) = temp33_cmplx%re; A(4:6,4:6) =  temp33_cmplx%re
+        A(1:3,4:6) = temp33_cmplx%im; A(4:6,1:3) = -temp33_cmplx%im
         if (abs(math_det33(A(1:3,1:3))) > 1e-16) then
           call math_invert(A_inv, err, A)
-          temp33_complex = cmplx(A_inv(1:3,1:3),A_inv(1:3,4:6),pReal)
+          temp33_cmplx = cmplx(A_inv(1:3,1:3),A_inv(1:3,4:6),pReal)
           do concurrent(l=1:3, m=1:3, n=1:3, o=1:3)
-            gamma_hat(l,m,n,o,1,1,1) = temp33_complex(l,n)*xiDyad_cmplx(o,m)
+            gamma_hat(l,m,n,o,1,1,1) = temp33_cmplx(l,n)*xiDyad_cmplx(o,m)
           end do
           do concurrent(l = 1:3, m = 1:3)
-            temp33_Complex(l,m) = sum(gamma_hat(l,m,1:3,1:3,1,1,1)*tensorField_fourier(1:3,1:3,i,j,k))
+            temp33_cmplx(l,m) = sum(gamma_hat(l,m,1:3,1:3,1,1,1)*tensorField_fourier(1:3,1:3,i,j,k))
           end do
-          tensorField_fourier(1:3,1:3,i,j,k) = temp33_Complex
+          tensorField_fourier(1:3,1:3,i,j,k) = temp33_cmplx
         else
           tensorField_fourier(1:3,1:3,i,j,k) = cmplx(0.0_pReal,0.0_pReal,pReal)
         end if
       end if
     end do; end do; end do
+    !$OMP END PARALLEL DO
   else memoryEfficient
+    !$OMP PARALLEL DO PRIVATE(l,m,temp33_cmplx)
     do k = 1, cells3;  do j = 1, cells(2);  do i = 1,grid1Red
       do concurrent(l = 1:3, m = 1:3)
-        temp33_Complex(l,m) = sum(gamma_hat(l,m,1:3,1:3,i,j,k)*tensorField_fourier(1:3,1:3,i,j,k))
+        temp33_cmplx(l,m) = sum(gamma_hat(l,m,1:3,1:3,i,j,k)*tensorField_fourier(1:3,1:3,i,j,k))
       end do
-      tensorField_fourier(1:3,1:3,i,j,k) = temp33_Complex
+      tensorField_fourier(1:3,1:3,i,j,k) = temp33_cmplx
     end do; end do; end do
+    !$OMP END PARALLEL DO
   end if memoryEfficient
 
   if (cells3Offset == 0) tensorField_fourier(1:3,1:3,1,1,1) = cmplx(fieldAim/wgt,0.0_pReal,pReal)
@@ -543,12 +549,14 @@ subroutine utilities_fourierGreenConvolution(D_ref, mu_ref, Delta_t)
 
 !--------------------------------------------------------------------------------------------------
 ! do the actual spectral method calculation
+  !$OMP PARALLEL DO PRIVATE(GreenOp_hat)
   do k = 1, cells3; do j = 1, cells(2) ;do i = 1, grid1Red
     GreenOp_hat = cmplx(1.0_pReal,0.0_pReal,pReal) &
                 / (cmplx(mu_ref,0.0_pReal,pReal) + cmplx(Delta_t,0.0_pReal) &
                    * sum(conjg(xi1st(1:3,i,j,k))* matmul(cmplx(D_ref,0.0_pReal),xi1st(1:3,i,j,k))))
     scalarField_fourier(i,j,k) = scalarField_fourier(i,j,k)*GreenOp_hat
   enddo; enddo; enddo
+  !$OMP END PARALLEL DO
 
 end subroutine utilities_fourierGreenConvolution
 
@@ -735,9 +743,10 @@ subroutine utilities_fourierScalarGradient()
 
   integer :: i, j, k
 
+
   do k = 1, cells3;  do j = 1, cells(2);  do i = 1,grid1Red
     vectorField_fourier(1:3,i,j,k) = scalarField_fourier(i,j,k)*xi1st(1:3,i,j,k)                    ! ToDo: no -conjg?
-  enddo; enddo; enddo
+  end do; end do; end do
 
 end subroutine utilities_fourierScalarGradient
 
@@ -747,11 +756,9 @@ end subroutine utilities_fourierScalarGradient
 !--------------------------------------------------------------------------------------------------
 subroutine utilities_fourierVectorDivergence()
 
-  integer :: i, j, k
 
-  do k = 1, cells3;  do j = 1, cells(2);  do i = 1,grid1Red
-    scalarField_fourier(i,j,k) = sum(vectorField_fourier(1:3,i,j,k)*conjg(-xi1st(1:3,i,j,k)))
-  enddo; enddo; enddo
+  scalarField_fourier(1:grid1Red,1:cells(2),1:cells3) = sum(vectorField_fourier(1:3,1:grid1Red,1:cells(2),1:cells3) &
+                                                            *conjg(-xi1st))
 
 end subroutine utilities_fourierVectorDivergence
 
@@ -763,11 +770,12 @@ subroutine utilities_fourierVectorGradient()
 
   integer :: i, j, k, m, n
 
+
   do k = 1, cells3;  do j = 1, cells(2);  do i = 1,grid1Red
     do m = 1, 3; do n = 1, 3
       tensorField_fourier(m,n,i,j,k) = vectorField_fourier(m,i,j,k)*xi1st(n,i,j,k)
-    enddo; enddo
-  enddo; enddo; enddo
+    end do; end do
+  end do; end do; end do
 
 end subroutine utilities_fourierVectorGradient
 
@@ -779,9 +787,10 @@ subroutine utilities_fourierTensorDivergence()
 
   integer :: i, j, k
 
+
   do k = 1, cells3;  do j = 1, cells(2);  do i = 1,grid1Red
     vectorField_fourier(:,i,j,k) = matmul(tensorField_fourier(:,:,i,j,k),conjg(-xi1st(:,i,j,k)))
-  enddo; enddo; enddo
+  end do; end do; end do
 
 end subroutine utilities_fourierTensorDivergence
 
@@ -978,6 +987,7 @@ end function utilities_getFreqDerivative
 subroutine utilities_updateCoords(F)
 
   real(pReal),   dimension(3,3,cells(1),cells(2),cells3), intent(in) :: F
+
   real(pReal),   dimension(3,  cells(1),cells(2),cells3)             :: IPcoords
   real(pReal),   dimension(3,  cells(1),cells(2),cells3+2)           :: IPfluct_padded              ! Fluctuations of cell center displacement (padded along z for MPI)
   real(pReal),   dimension(3,  cells(1)+1,cells(2)+1,cells3+1)       :: nodeCoords
@@ -1008,20 +1018,23 @@ subroutine utilities_updateCoords(F)
                         1, 1, 1, &
                         0, 1, 1  ], [3,8])
 
+
   step = geomSize/real(cells, pReal)
  !--------------------------------------------------------------------------------------------------
  ! integration in Fourier space to get fluctuations of cell center discplacements
   tensorField_real(1:3,1:3,1:cells(1),1:cells(2),1:cells3) = F
   call utilities_FFTtensorForward()
 
+  !$OMP PARALLEL DO
   do k = 1, cells3; do j = 1, cells(2); do i = 1, grid1Red
     if (any([i,j,k+cells3Offset] /= 1)) then
       vectorField_fourier(1:3,i,j,k) = matmul(tensorField_fourier(1:3,1:3,i,j,k),xi2nd(1:3,i,j,k)) &
                                      / sum(conjg(-xi2nd(1:3,i,j,k))*xi2nd(1:3,i,j,k)) * cmplx(wgt,0.0,pReal)
     else
       vectorField_fourier(1:3,i,j,k) = cmplx(0.0,0.0,pReal)
-    endif
-  enddo; enddo; enddo
+    end if
+  end do; end do; end do
+  !$OMP END PARALLEL DO
 
   call fftw_mpi_execute_dft_c2r(planVectorBack,vectorField_fourier,vectorField_real)
 

From 466682e9787453fa28dd47c3794c036913c0e2da Mon Sep 17 00:00:00 2001
From: Martin Diehl <mail@martin-diehl.net>
Date: Mon, 14 Feb 2022 08:32:48 +0100
Subject: [PATCH 4/5] missing rename grid -> cells

---
 src/grid/spectral_utilities.f90 | 62 ++++++++++++++++-----------------
 1 file changed, 31 insertions(+), 31 deletions(-)

diff --git a/src/grid/spectral_utilities.f90 b/src/grid/spectral_utilities.f90
index 418f83b05..f89485c7a 100644
--- a/src/grid/spectral_utilities.f90
+++ b/src/grid/spectral_utilities.f90
@@ -31,7 +31,7 @@ module spectral_utilities
 !--------------------------------------------------------------------------------------------------
 ! grid related information
   real(pReal), protected,  public                :: wgt                                             !< weighting factor 1/Nelems
-  integer,     protected,  public                :: grid1Red                                        !< cells(1)/2
+  integer,     protected,  public                :: cells1Red                                       !< cells(1)/2
   real(pReal), protected,  public,  dimension(3) :: scaledGeomSize                                  !< scaled geometry size for calculation of divergence
 
 !--------------------------------------------------------------------------------------------------
@@ -201,7 +201,7 @@ subroutine spectral_utilities_init
                                 num_grid%get_asString('PETSc_options',defaultVal=''),err_PETSc)
   CHKERRQ(err_PETSc)
 
-  grid1Red = cells(1)/2 + 1
+  cells1Red = cells(1)/2 + 1
   wgt = 1.0/real(product(cells),pReal)
 
   num%memory_efficient      = num_grid%get_asInt('memory_efficient',      defaultVal=1) > 0         ! ToDo: should be logical in YAML file
@@ -265,8 +265,8 @@ subroutine spectral_utilities_init
   gridFFTW = int(cells,C_INTPTR_T)
   alloc_local = fftw_mpi_local_size_3d(gridFFTW(3), gridFFTW(2), gridFFTW(1)/2 +1, &
                                        PETSC_COMM_WORLD, local_K, local_K_offset)
-  allocate (xi1st (3,grid1Red,cells(2),cells3),source = cmplx(0.0_pReal,0.0_pReal,pReal))           ! frequencies for first derivatives, only half the size for first dimension
-  allocate (xi2nd (3,grid1Red,cells(2),cells3),source = cmplx(0.0_pReal,0.0_pReal,pReal))           ! frequencies for second derivatives, only half the size for first dimension
+  allocate (xi1st (3,cells1Red,cells(2),cells3),source = cmplx(0.0_pReal,0.0_pReal,pReal))          ! frequencies for first derivatives, only half the size for first dimension
+  allocate (xi2nd (3,cells1Red,cells(2),cells3),source = cmplx(0.0_pReal,0.0_pReal,pReal))          ! frequencies for second derivatives, only half the size for first dimension
 
   tensorField = fftw_alloc_complex(tensorSize*alloc_local)
   call c_f_pointer(tensorField, tensorField_real,    [3_C_INTPTR_T,3_C_INTPTR_T, &
@@ -333,7 +333,7 @@ subroutine spectral_utilities_init
       do j = 1, cells(2)
         k_s(2) = j - 1
         if (j > cells(2)/2 + 1) k_s(2) = k_s(2) - cells(2)                                          ! running from 0,1,...,N/2,N/2+1,-N/2,-N/2+1,...,-1
-          do i = 1, grid1Red
+          do i = 1, cells1Red
             k_s(1) = i - 1                                                                          ! symmetry, junst running from 0,1,...,N/2,N/2+1
             xi2nd(1:3,i,j,k-cells3Offset) = utilities_getFreqDerivative(k_s)
             where(mod(cells,2)==0 .and. [i,j,k] == cells/2+1 .and. &
@@ -347,7 +347,7 @@ subroutine spectral_utilities_init
   if (num%memory_efficient) then                                                                    ! allocate just single fourth order tensor
     allocate (gamma_hat(3,3,3,3,1,1,1), source = cmplx(0.0_pReal,0.0_pReal,pReal))
   else                                                                                              ! precalculation of gamma_hat field
-    allocate (gamma_hat(3,3,3,3,grid1Red,cells(2),cells3), source = cmplx(0.0_pReal,0.0_pReal,pReal))
+    allocate (gamma_hat(3,3,3,3,cells1Red,cells(2),cells3), source = cmplx(0.0_pReal,0.0_pReal,pReal))
   endif
 
 end subroutine spectral_utilities_init
@@ -374,7 +374,7 @@ subroutine utilities_updateGamma(C)
   if (.not. num%memory_efficient) then
     gamma_hat = cmplx(0.0_pReal,0.0_pReal,pReal)                                                    ! for the singular point and any non invertible A
     !$OMP PARALLEL DO PRIVATE(l,m,n,o,temp33_cmplx,xiDyad_cmplx,A,A_inv,err)
-    do k = cells3Offset+1, cells3Offset+cells3; do j = 1, cells(2); do i = 1, grid1Red
+    do k = cells3Offset+1, cells3Offset+cells3; do j = 1, cells(2); do i = 1, cells1Red
       if (any([i,j,k] /= 1)) then                                                                   ! singular point at xi=(0.0,0.0,0.0) i.e. i=j=k=1
         do concurrent (l = 1:3, m = 1:3)
           xiDyad_cmplx(l,m) = conjg(-xi1st(l,i,j,k-cells3Offset))*xi1st(m,i,j,k-cells3Offset)
@@ -406,7 +406,7 @@ end subroutine utilities_updateGamma
 !--------------------------------------------------------------------------------------------------
 subroutine utilities_FFTtensorForward
 
-  tensorField_real(1:3,1:3,cells(1)+1:grid1Red*2,:,:) = 0.0_pReal
+  tensorField_real(1:3,1:3,cells(1)+1:cells1Red*2,:,:) = 0.0_pReal
   call fftw_mpi_execute_dft_r2c(planTensorForth,tensorField_real,tensorField_fourier)
 
 end subroutine utilities_FFTtensorForward
@@ -430,7 +430,7 @@ end subroutine utilities_FFTtensorBackward
 !--------------------------------------------------------------------------------------------------
 subroutine utilities_FFTscalarForward
 
-  scalarField_real(cells(1)+1:grid1Red*2,:,:) = 0.0_pReal
+  scalarField_real(cells(1)+1:cells1Red*2,:,:) = 0.0_pReal
   call fftw_mpi_execute_dft_r2c(planScalarForth,scalarField_real,scalarField_fourier)
 
 end subroutine utilities_FFTscalarForward
@@ -455,7 +455,7 @@ end subroutine utilities_FFTscalarBackward
 !--------------------------------------------------------------------------------------------------
 subroutine utilities_FFTvectorForward
 
-  vectorField_real(1:3,cells(1)+1:grid1Red*2,:,:) = 0.0_pReal
+  vectorField_real(1:3,cells(1)+1:cells1Red*2,:,:) = 0.0_pReal
   call fftw_mpi_execute_dft_r2c(planVectorForth,vectorField_real,vectorField_fourier)
 
 end subroutine utilities_FFTvectorForward
@@ -495,7 +495,7 @@ subroutine utilities_fourierGammaConvolution(fieldAim)
 ! do the actual spectral method calculation (mechanical equilibrium)
   memoryEfficient: if (num%memory_efficient) then
     !$OMP PARALLEL DO PRIVATE(l,m,n,o,temp33_cmplx,xiDyad_cmplx,A,A_inv,err,gamma_hat)
-    do k = 1, cells3; do j = 1, cells(2); do i = 1, grid1Red
+    do k = 1, cells3; do j = 1, cells(2); do i = 1, cells1Red
       if (any([i,j,k+cells3Offset] /= 1)) then                                                      ! singular point at xi=(0.0,0.0,0.0) i.e. i=j=k=1
         do concurrent(l = 1:3, m = 1:3)
           xiDyad_cmplx(l,m) = conjg(-xi1st(l,i,j,k))*xi1st(m,i,j,k)
@@ -523,7 +523,7 @@ subroutine utilities_fourierGammaConvolution(fieldAim)
     !$OMP END PARALLEL DO
   else memoryEfficient
     !$OMP PARALLEL DO PRIVATE(l,m,temp33_cmplx)
-    do k = 1, cells3;  do j = 1, cells(2);  do i = 1,grid1Red
+    do k = 1, cells3;  do j = 1, cells(2);  do i = 1,cells1Red
       do concurrent(l = 1:3, m = 1:3)
         temp33_cmplx(l,m) = sum(gamma_hat(l,m,1:3,1:3,i,j,k)*tensorField_fourier(1:3,1:3,i,j,k))
       end do
@@ -550,7 +550,7 @@ subroutine utilities_fourierGreenConvolution(D_ref, mu_ref, Delta_t)
 !--------------------------------------------------------------------------------------------------
 ! do the actual spectral method calculation
   !$OMP PARALLEL DO PRIVATE(GreenOp_hat)
-  do k = 1, cells3; do j = 1, cells(2) ;do i = 1, grid1Red
+  do k = 1, cells3; do j = 1, cells(2) ;do i = 1, cells1Red
     GreenOp_hat = cmplx(1.0_pReal,0.0_pReal,pReal) &
                 / (cmplx(mu_ref,0.0_pReal,pReal) + cmplx(Delta_t,0.0_pReal) &
                    * sum(conjg(xi1st(1:3,i,j,k))* matmul(cmplx(D_ref,0.0_pReal),xi1st(1:3,i,j,k))))
@@ -579,7 +579,7 @@ real(pReal) function utilities_divergenceRMS()
 ! calculating RMS divergence criterion in Fourier space
   utilities_divergenceRMS = 0.0_pReal
   do k = 1, cells3; do j = 1, cells(2)
-    do i = 2, grid1Red -1                                                                           ! Has somewhere a conj. complex counterpart. Therefore count it twice.
+    do i = 2, cells1Red -1                                                                          ! Has somewhere a conj. complex counterpart. Therefore count it twice.
       utilities_divergenceRMS = utilities_divergenceRMS &
             + 2.0_pReal*(sum (real(matmul(tensorField_fourier(1:3,1:3,i,j,k), &                     ! (sqrt(real(a)**2 + aimag(a)**2))**2 = real(a)**2 + aimag(a)**2, i.e. do not take square root and square again
                                           conjg(-xi1st(1:3,i,j,k))*rescaledGeom))**2) &             ! --> sum squared L_2 norm of vector
@@ -591,10 +591,10 @@ real(pReal) function utilities_divergenceRMS()
                                   conjg(-xi1st(1:3,1,j,k))*rescaledGeom))**2) &
                + sum(aimag(matmul(tensorField_fourier(1:3,1:3,1       ,j,k), &
                                   conjg(-xi1st(1:3,1,j,k))*rescaledGeom))**2) &
-               + sum( real(matmul(tensorField_fourier(1:3,1:3,grid1Red,j,k), &
-                                  conjg(-xi1st(1:3,grid1Red,j,k))*rescaledGeom))**2) &
-               + sum(aimag(matmul(tensorField_fourier(1:3,1:3,grid1Red,j,k), &
-                                  conjg(-xi1st(1:3,grid1Red,j,k))*rescaledGeom))**2)
+               + sum( real(matmul(tensorField_fourier(1:3,1:3,cells1Red,j,k), &
+                                  conjg(-xi1st(1:3,cells1Red,j,k))*rescaledGeom))**2) &
+               + sum(aimag(matmul(tensorField_fourier(1:3,1:3,cells1Red,j,k), &
+                                  conjg(-xi1st(1:3,cells1Red,j,k))*rescaledGeom))**2)
   enddo; enddo
   if (cells(1) == 1) utilities_divergenceRMS = utilities_divergenceRMS * 0.5_pReal                   ! counted twice in case of cells(1) == 1
   call MPI_Allreduce(MPI_IN_PLACE,utilities_divergenceRMS,1_MPI_INTEGER_KIND,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD,err_MPI)
@@ -624,7 +624,7 @@ real(pReal) function utilities_curlRMS()
   utilities_curlRMS = 0.0_pReal
 
   do k = 1, cells3; do j = 1, cells(2);
-    do i = 2, grid1Red - 1
+    do i = 2, cells1Red - 1
       do l = 1, 3
         curl_fourier(l,1) = (+tensorField_fourier(l,3,i,j,k)*xi1st(2,i,j,k)*rescaledGeom(2) &
                              -tensorField_fourier(l,2,i,j,k)*xi1st(3,i,j,k)*rescaledGeom(3))
@@ -647,12 +647,12 @@ real(pReal) function utilities_curlRMS()
     utilities_curlRMS = utilities_curlRMS &
                       + sum(curl_fourier%re**2 + curl_fourier%im**2)                                ! this layer (DC) does not have a conjugate complex counterpart (if cells(1) /= 1)
     do l = 1, 3
-      curl_fourier = (+tensorField_fourier(l,3,grid1Red,j,k)*xi1st(2,grid1Red,j,k)*rescaledGeom(2) &
-                      -tensorField_fourier(l,2,grid1Red,j,k)*xi1st(3,grid1Red,j,k)*rescaledGeom(3))
-      curl_fourier = (+tensorField_fourier(l,1,grid1Red,j,k)*xi1st(3,grid1Red,j,k)*rescaledGeom(3) &
-                      -tensorField_fourier(l,3,grid1Red,j,k)*xi1st(1,grid1Red,j,k)*rescaledGeom(1))
-      curl_fourier = (+tensorField_fourier(l,2,grid1Red,j,k)*xi1st(1,grid1Red,j,k)*rescaledGeom(1) &
-                      -tensorField_fourier(l,1,grid1Red,j,k)*xi1st(2,grid1Red,j,k)*rescaledGeom(2))
+      curl_fourier = (+tensorField_fourier(l,3,cells1Red,j,k)*xi1st(2,cells1Red,j,k)*rescaledGeom(2) &
+                      -tensorField_fourier(l,2,cells1Red,j,k)*xi1st(3,cells1Red,j,k)*rescaledGeom(3))
+      curl_fourier = (+tensorField_fourier(l,1,cells1Red,j,k)*xi1st(3,cells1Red,j,k)*rescaledGeom(3) &
+                      -tensorField_fourier(l,3,cells1Red,j,k)*xi1st(1,cells1Red,j,k)*rescaledGeom(1))
+      curl_fourier = (+tensorField_fourier(l,2,cells1Red,j,k)*xi1st(1,cells1Red,j,k)*rescaledGeom(1) &
+                      -tensorField_fourier(l,1,cells1Red,j,k)*xi1st(2,cells1Red,j,k)*rescaledGeom(2))
     enddo
     utilities_curlRMS = utilities_curlRMS &
                       + sum(curl_fourier%re**2 + curl_fourier%im**2)                                ! this layer (Nyquist) does not have a conjugate complex counterpart (if cells(1) /= 1)
@@ -744,7 +744,7 @@ subroutine utilities_fourierScalarGradient()
   integer :: i, j, k
 
 
-  do k = 1, cells3;  do j = 1, cells(2);  do i = 1,grid1Red
+  do k = 1, cells3;  do j = 1, cells(2);  do i = 1,cells1Red
     vectorField_fourier(1:3,i,j,k) = scalarField_fourier(i,j,k)*xi1st(1:3,i,j,k)                    ! ToDo: no -conjg?
   end do; end do; end do
 
@@ -757,8 +757,8 @@ end subroutine utilities_fourierScalarGradient
 subroutine utilities_fourierVectorDivergence()
 
 
-  scalarField_fourier(1:grid1Red,1:cells(2),1:cells3) = sum(vectorField_fourier(1:3,1:grid1Red,1:cells(2),1:cells3) &
-                                                            *conjg(-xi1st))
+  scalarField_fourier(1:cells1Red,1:cells(2),1:cells3) = sum(vectorField_fourier(1:3,1:cells1Red,1:cells(2),1:cells3) &
+                                                             *conjg(-xi1st))
 
 end subroutine utilities_fourierVectorDivergence
 
@@ -771,7 +771,7 @@ subroutine utilities_fourierVectorGradient()
   integer :: i, j, k, m, n
 
 
-  do k = 1, cells3;  do j = 1, cells(2);  do i = 1,grid1Red
+  do k = 1, cells3;  do j = 1, cells(2);  do i = 1,cells1Red
     do m = 1, 3; do n = 1, 3
       tensorField_fourier(m,n,i,j,k) = vectorField_fourier(m,i,j,k)*xi1st(n,i,j,k)
     end do; end do
@@ -788,7 +788,7 @@ subroutine utilities_fourierTensorDivergence()
   integer :: i, j, k
 
 
-  do k = 1, cells3;  do j = 1, cells(2);  do i = 1,grid1Red
+  do k = 1, cells3;  do j = 1, cells(2);  do i = 1,cells1Red
     vectorField_fourier(:,i,j,k) = matmul(tensorField_fourier(:,:,i,j,k),conjg(-xi1st(:,i,j,k)))
   end do; end do; end do
 
@@ -1026,7 +1026,7 @@ subroutine utilities_updateCoords(F)
   call utilities_FFTtensorForward()
 
   !$OMP PARALLEL DO
-  do k = 1, cells3; do j = 1, cells(2); do i = 1, grid1Red
+  do k = 1, cells3; do j = 1, cells(2); do i = 1, cells1Red
     if (any([i,j,k+cells3Offset] /= 1)) then
       vectorField_fourier(1:3,i,j,k) = matmul(tensorField_fourier(1:3,1:3,i,j,k),xi2nd(1:3,i,j,k)) &
                                      / sum(conjg(-xi2nd(1:3,i,j,k))*xi2nd(1:3,i,j,k)) * cmplx(wgt,0.0,pReal)

From c66e2336c25d704a36a60ea121c837c34210a388 Mon Sep 17 00:00:00 2001
From: Martin Diehl <mail@martin-diehl.net>
Date: Mon, 14 Feb 2022 08:58:40 +0100
Subject: [PATCH 5/5] some versions of ifort have problems with 'do concurrent'

---
 src/grid/spectral_utilities.f90  | 35 ++++++++++++++++++++++++++++++--
 src/math.f90                     | 31 ++++++++--------------------
 src/phase_mechanical_plastic.f90 |  3 +--
 3 files changed, 42 insertions(+), 27 deletions(-)

diff --git a/src/grid/spectral_utilities.f90 b/src/grid/spectral_utilities.f90
index f89485c7a..2a675c9b6 100644
--- a/src/grid/spectral_utilities.f90
+++ b/src/grid/spectral_utilities.f90
@@ -376,20 +376,32 @@ subroutine utilities_updateGamma(C)
     !$OMP PARALLEL DO PRIVATE(l,m,n,o,temp33_cmplx,xiDyad_cmplx,A,A_inv,err)
     do k = cells3Offset+1, cells3Offset+cells3; do j = 1, cells(2); do i = 1, cells1Red
       if (any([i,j,k] /= 1)) then                                                                   ! singular point at xi=(0.0,0.0,0.0) i.e. i=j=k=1
-        do concurrent (l = 1:3, m = 1:3)
+#ifndef __INTEL_COMPILER
+        do concurrent(l = 1:3, m = 1:3)
           xiDyad_cmplx(l,m) = conjg(-xi1st(l,i,j,k-cells3Offset))*xi1st(m,i,j,k-cells3Offset)
         end do
         do concurrent(l = 1:3, m = 1:3)
           temp33_cmplx(l,m) = sum(cmplx(C_ref(l,1:3,m,1:3),0.0_pReal)*xiDyad_cmplx)
         end do
+#else
+        forall(l = 1:3, m = 1:3) &
+          xiDyad_cmplx(l,m) = conjg(-xi1st(l,i,j,k-cells3Offset))*xi1st(m,i,j,k-cells3Offset)
+        forall(l = 1:3, m = 1:3) &
+          temp33_cmplx(l,m) = sum(cmplx(C_ref(l,1:3,m,1:3),0.0_pReal)*xiDyad_cmplx)
+#endif
         A(1:3,1:3) = temp33_cmplx%re; A(4:6,4:6) =  temp33_cmplx%re
         A(1:3,4:6) = temp33_cmplx%im; A(4:6,1:3) = -temp33_cmplx%im
         if (abs(math_det33(A(1:3,1:3))) > 1e-16) then
           call math_invert(A_inv, err, A)
           temp33_cmplx = cmplx(A_inv(1:3,1:3),A_inv(1:3,4:6),pReal)
+#ifndef __INTEL_COMPILER
           do concurrent(l=1:3, m=1:3, n=1:3, o=1:3)
             gamma_hat(l,m,n,o,i,j,k-cells3Offset) = temp33_cmplx(l,n) * xiDyad_cmplx(o,m)
           end do
+#else
+          forall(l=1:3, m=1:3, n=1:3, o=1:3) &
+            gamma_hat(l,m,n,o,i,j,k-cells3Offset) = temp33_cmplx(l,n) * xiDyad_cmplx(o,m)
+#endif
         end if
       end if
     end do; end do; end do
@@ -497,23 +509,37 @@ subroutine utilities_fourierGammaConvolution(fieldAim)
     !$OMP PARALLEL DO PRIVATE(l,m,n,o,temp33_cmplx,xiDyad_cmplx,A,A_inv,err,gamma_hat)
     do k = 1, cells3; do j = 1, cells(2); do i = 1, cells1Red
       if (any([i,j,k+cells3Offset] /= 1)) then                                                      ! singular point at xi=(0.0,0.0,0.0) i.e. i=j=k=1
+#ifndef __INTEL_COMPILER
         do concurrent(l = 1:3, m = 1:3)
           xiDyad_cmplx(l,m) = conjg(-xi1st(l,i,j,k))*xi1st(m,i,j,k)
         end do
         do concurrent(l = 1:3, m = 1:3)
           temp33_cmplx(l,m) = sum(cmplx(C_ref(l,1:3,m,1:3),0.0_pReal)*xiDyad_cmplx)
         end do
+#else
+        forall(l = 1:3, m = 1:3) &
+          xiDyad_cmplx(l,m) = conjg(-xi1st(l,i,j,k))*xi1st(m,i,j,k)
+        forall(l = 1:3, m = 1:3) &
+          temp33_cmplx(l,m) = sum(cmplx(C_ref(l,1:3,m,1:3),0.0_pReal)*xiDyad_cmplx)
+#endif
         A(1:3,1:3) = temp33_cmplx%re; A(4:6,4:6) =  temp33_cmplx%re
         A(1:3,4:6) = temp33_cmplx%im; A(4:6,1:3) = -temp33_cmplx%im
         if (abs(math_det33(A(1:3,1:3))) > 1e-16) then
           call math_invert(A_inv, err, A)
           temp33_cmplx = cmplx(A_inv(1:3,1:3),A_inv(1:3,4:6),pReal)
+#ifndef __INTEL_COMPILER
           do concurrent(l=1:3, m=1:3, n=1:3, o=1:3)
             gamma_hat(l,m,n,o,1,1,1) = temp33_cmplx(l,n)*xiDyad_cmplx(o,m)
           end do
           do concurrent(l = 1:3, m = 1:3)
             temp33_cmplx(l,m) = sum(gamma_hat(l,m,1:3,1:3,1,1,1)*tensorField_fourier(1:3,1:3,i,j,k))
           end do
+#else
+          forall(l=1:3, m=1:3, n=1:3, o=1:3) &
+            gamma_hat(l,m,n,o,1,1,1) = temp33_cmplx(l,n)*xiDyad_cmplx(o,m)
+          forall(l = 1:3, m = 1:3) &
+            temp33_cmplx(l,m) = sum(gamma_hat(l,m,1:3,1:3,1,1,1)*tensorField_fourier(1:3,1:3,i,j,k))
+#endif
           tensorField_fourier(1:3,1:3,i,j,k) = temp33_cmplx
         else
           tensorField_fourier(1:3,1:3,i,j,k) = cmplx(0.0_pReal,0.0_pReal,pReal)
@@ -524,9 +550,14 @@ subroutine utilities_fourierGammaConvolution(fieldAim)
   else memoryEfficient
     !$OMP PARALLEL DO PRIVATE(l,m,temp33_cmplx)
     do k = 1, cells3;  do j = 1, cells(2);  do i = 1,cells1Red
+#ifndef __INTEL_COMPILER
       do concurrent(l = 1:3, m = 1:3)
         temp33_cmplx(l,m) = sum(gamma_hat(l,m,1:3,1:3,i,j,k)*tensorField_fourier(1:3,1:3,i,j,k))
       end do
+#else
+      forall(l = 1:3, m = 1:3) &
+        temp33_cmplx(l,m) = sum(gamma_hat(l,m,1:3,1:3,i,j,k)*tensorField_fourier(1:3,1:3,i,j,k))
+#endif
       tensorField_fourier(1:3,1:3,i,j,k) = temp33_cmplx
     end do; end do; end do
     !$OMP END PARALLEL DO
@@ -758,7 +789,7 @@ subroutine utilities_fourierVectorDivergence()
 
 
   scalarField_fourier(1:cells1Red,1:cells(2),1:cells3) = sum(vectorField_fourier(1:3,1:cells1Red,1:cells(2),1:cells3) &
-                                                             *conjg(-xi1st))
+                                                             *conjg(-xi1st),1)
 
 end subroutine utilities_fourierVectorDivergence
 
diff --git a/src/math.f90 b/src/math.f90
index dd4690672..75b9ddea2 100644
--- a/src/math.f90
+++ b/src/math.f90
@@ -262,9 +262,8 @@ pure function math_identity4th()
     math_identity4th(i,j,k,l) = 0.5_pReal*(math_I3(i,k)*math_I3(j,l)+math_I3(i,l)*math_I3(j,k))
   enddo
 #else
-  do i=1,3; do j=1,3; do k=1,3; do l=1,3
+  forall(i=1:3, j=1:3, k=1:3, l=1:3) &
     math_identity4th(i,j,k,l) = 0.5_pReal*(math_I3(i,k)*math_I3(j,l)+math_I3(i,l)*math_I3(j,k))
-  enddo; enddo; enddo; enddo
 #endif
 
 end function math_identity4th
@@ -338,9 +337,7 @@ pure function math_outer(A,B)
     math_outer(i,j) = A(i)*B(j)
   enddo
 #else
-  do i=1,size(A,1); do j=1,size(B,1)
-    math_outer(i,j) = A(i)*B(j)
-  enddo; enddo
+  forall(i=1:size(A,1), j=1:size(B,1)) math_outer(i,j) = A(i)*B(j)
 #endif
 
 end function math_outer
@@ -387,9 +384,7 @@ pure function math_mul3333xx33(A,B)
     math_mul3333xx33(i,j) = sum(A(i,j,1:3,1:3)*B(1:3,1:3))
   enddo
 #else
-  do i=1,3; do j=1,3
-    math_mul3333xx33(i,j) = sum(A(i,j,1:3,1:3)*B(1:3,1:3))
-  enddo; enddo
+  forall (i=1:3, j=1:3) math_mul3333xx33(i,j) = sum(A(i,j,1:3,1:3)*B(1:3,1:3))
 #endif
 
 end function math_mul3333xx33
@@ -411,9 +406,7 @@ pure function math_mul3333xx3333(A,B)
     math_mul3333xx3333(i,j,k,l) = sum(A(i,j,1:3,1:3)*B(1:3,1:3,k,l))
   enddo
 #else
-  do i=1,3; do j=1,3; do k=1,3; do l=1,3
-    math_mul3333xx3333(i,j,k,l) = sum(A(i,j,1:3,1:3)*B(1:3,1:3,k,l))
-  enddo; enddo; enddo; enddo
+  forall(i=1:3, j=1:3, k=1:3, l=1:3) math_mul3333xx3333(i,j,k,l) = sum(A(i,j,1:3,1:3)*B(1:3,1:3,k,l))
 #endif
 
 end function math_mul3333xx3333
@@ -752,9 +745,7 @@ pure function math_3333to99(m3333)
     math_3333to99(i,j) = m3333(MAPPLAIN(1,i),MAPPLAIN(2,i),MAPPLAIN(1,j),MAPPLAIN(2,j))
   enddo
 #else
-  do i=1,9; do j=1,9
-    math_3333to99(i,j) = m3333(MAPPLAIN(1,i),MAPPLAIN(2,i),MAPPLAIN(1,j),MAPPLAIN(2,j))
-  enddo; enddo
+  forall(i=1:9, j=1:9) math_3333to99(i,j) = m3333(MAPPLAIN(1,i),MAPPLAIN(2,i),MAPPLAIN(1,j),MAPPLAIN(2,j))
 #endif
 
 end function math_3333to99
@@ -775,9 +766,7 @@ pure function math_99to3333(m99)
     math_99to3333(MAPPLAIN(1,i),MAPPLAIN(2,i),MAPPLAIN(1,j),MAPPLAIN(2,j)) = m99(i,j)
   enddo
 #else
-  do i=1,9; do j=1,9
-    math_99to3333(MAPPLAIN(1,i),MAPPLAIN(2,i),MAPPLAIN(1,j),MAPPLAIN(2,j)) = m99(i,j)
-  enddo; enddo
+  forall(i=1:9, j=1:9) math_99to3333(MAPPLAIN(1,i),MAPPLAIN(2,i),MAPPLAIN(1,j),MAPPLAIN(2,j)) = m99(i,j)
 #endif
 
 end function math_99to3333
@@ -810,9 +799,7 @@ pure function math_sym3333to66(m3333,weighted)
     math_sym3333to66(i,j) = w(i)*w(j)*m3333(MAPNYE(1,i),MAPNYE(2,i),MAPNYE(1,j),MAPNYE(2,j))
   enddo
 #else
-  do i=1,6; do j=1,6
-    math_sym3333to66(i,j) = w(i)*w(j)*m3333(MAPNYE(1,i),MAPNYE(2,i),MAPNYE(1,j),MAPNYE(2,j))
-  enddo; enddo
+  forall(i=1:6, j=1:6) math_sym3333to66(i,j) = w(i)*w(j)*m3333(MAPNYE(1,i),MAPNYE(2,i),MAPNYE(1,j),MAPNYE(2,j))
 #endif
 
 end function math_sym3333to66
@@ -950,9 +937,7 @@ pure function math_3333toVoigt66_stiffness(C) result(C_tilde)
     C_tilde(i,j) = C(MAPVOIGT(1,i),MAPVOIGT(2,i),MAPVOIGT(1,j),MAPVOIGT(2,j))
   end do
 #else
-  do i=1,6; do j=1,6
-    C_tilde(i,j) = C(MAPVOIGT(1,i),MAPVOIGT(2,i),MAPVOIGT(1,j),MAPVOIGT(2,j))
-  end do; end do
+  forall(i=1:6, j=1:6) C_tilde(i,j) = C(MAPVOIGT(1,i),MAPVOIGT(2,i),MAPVOIGT(1,j),MAPVOIGT(2,j))
 #endif
 
 end function math_3333toVoigt66_stiffness
diff --git a/src/phase_mechanical_plastic.f90 b/src/phase_mechanical_plastic.f90
index 72b67ef64..3915c3b2d 100644
--- a/src/phase_mechanical_plastic.f90
+++ b/src/phase_mechanical_plastic.f90
@@ -379,10 +379,9 @@ module function plastic_deltaState(ph, en) result(broken)
     en
   logical :: broken
 
-  real(pReal),               dimension(3,3) :: &
+  real(pReal), dimension(3,3) :: &
     Mp
   integer :: &
-    myOffset, &
     mySize