From ac4beea14b718b5c86ebc59356e614dbba0c1a42 Mon Sep 17 00:00:00 2001 From: Martin Diehl Date: Sun, 13 Feb 2022 21:00:21 +0100 Subject: [PATCH 1/5] use precalculated dyad --- src/grid/spectral_utilities.f90 | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/src/grid/spectral_utilities.f90 b/src/grid/spectral_utilities.f90 index 34a976644..28ff0f8b3 100644 --- a/src/grid/spectral_utilities.f90 +++ b/src/grid/spectral_utilities.f90 @@ -372,7 +372,7 @@ subroutine utilities_updateGamma(C) C_ref = C if (.not. num%memory_efficient) then - gamma_hat = cmplx(0.0_pReal,0.0_pReal,pReal) ! for the singular point and any non invertible A + gamma_hat = cmplx(0.0_pReal,0.0_pReal,pReal) ! for the singular point and any non invertible A do k = cells3Offset+1, cells3Offset+cells3; do j = 1, cells(2); do i = 1, grid1Red if (any([i,j,k] /= 1)) then ! singular point at xi=(0.0,0.0,0.0) i.e. i=j=k=1 do concurrent (l = 1:3, m = 1:3) @@ -387,8 +387,7 @@ subroutine utilities_updateGamma(C) call math_invert(A_inv, err, A) temp33_complex = cmplx(A_inv(1:3,1:3),A_inv(1:3,4:6),pReal) do concurrent(l=1:3, m=1:3, n=1:3, o=1:3) - gamma_hat(l,m,n,o,i,j,k-cells3Offset) = temp33_complex(l,n)* & - conjg(-xi1st(o,i,j,k-cells3Offset))*xi1st(m,i,j,k-cells3Offset) + gamma_hat(l,m,n,o,i,j,k-cells3Offset) = temp33_complex(l,n) * xiDyad_cmplx(o,m) end do end if end if @@ -507,7 +506,7 @@ subroutine utilities_fourierGammaConvolution(fieldAim) call math_invert(A_inv, err, A) temp33_complex = cmplx(A_inv(1:3,1:3),A_inv(1:3,4:6),pReal) do concurrent(l=1:3, m=1:3, n=1:3, o=1:3) - gamma_hat(l,m,n,o,1,1,1) = temp33_complex(l,n)*conjg(-xi1st(o,i,j,k))*xi1st(m,i,j,k) + gamma_hat(l,m,n,o,1,1,1) = temp33_complex(l,n)*xiDyad_cmplx(o,m) end do else gamma_hat(1:3,1:3,1:3,1:3,1,1,1) = cmplx(0.0_pReal,0.0_pReal,pReal) @@ -521,7 +520,7 @@ subroutine utilities_fourierGammaConvolution(fieldAim) else memoryEfficient do k = 1, cells3; do j = 1, cells(2); do i = 1,grid1Red do concurrent(l = 1:3, m = 1:3) - temp33_Complex(l,m) = sum(gamma_hat(l,m,1:3,1:3,i,j,k) * tensorField_fourier(1:3,1:3,i,j,k)) + temp33_Complex(l,m) = sum(gamma_hat(l,m,1:3,1:3,i,j,k)*tensorField_fourier(1:3,1:3,i,j,k)) end do tensorField_fourier(1:3,1:3,i,j,k) = temp33_Complex end do; end do; end do @@ -884,11 +883,10 @@ pure function utilities_calculateRate(heterogeneous,field0,field,dt,avRate) real(pReal), dimension(3,3,cells(1),cells(2),cells3) :: & utilities_calculateRate - if (heterogeneous) then - utilities_calculateRate = (field-field0) / dt - else - utilities_calculateRate = spread(spread(spread(avRate,3,cells(1)),4,cells(2)),5,cells3) - endif + + utilities_calculateRate = merge((field-field0) / dt, & + spread(spread(spread(avRate,3,cells(1)),4,cells(2)),5,cells3), & + heterogeneous) end function utilities_calculateRate @@ -1041,7 +1039,7 @@ subroutine utilities_updateCoords(F) rank_b = modulo(worldrank-1_MPI_INTEGER_KIND,worldsize) ! send bottom layer to process below - call MPI_Isend(IPfluct_padded(:,:,:,2), c,MPI_DOUBLE,rank_b,0_MPI_INTEGER_KIND,MPI_COMM_WORLD,request(1),err_MPI) + call MPI_Isend(IPfluct_padded(:,:,:,2), c,MPI_DOUBLE,rank_b,0_MPI_INTEGER_KIND,MPI_COMM_WORLD,request(1),err_MPI) if (err_MPI /= 0_MPI_INTEGER_KIND) error stop 'MPI error' call MPI_Irecv(IPfluct_padded(:,:,:,cells3+2),c,MPI_DOUBLE,rank_t,0_MPI_INTEGER_KIND,MPI_COMM_WORLD,request(2),err_MPI) if (err_MPI /= 0_MPI_INTEGER_KIND) error stop 'MPI error' @@ -1049,7 +1047,7 @@ subroutine utilities_updateCoords(F) ! send top layer to process above call MPI_Isend(IPfluct_padded(:,:,:,cells3+1),c,MPI_DOUBLE,rank_t,1_MPI_INTEGER_KIND,MPI_COMM_WORLD,request(3),err_MPI) if (err_MPI /= 0_MPI_INTEGER_KIND) error stop 'MPI error' - call MPI_Irecv(IPfluct_padded(:,:,:,1), c,MPI_DOUBLE,rank_b,1_MPI_INTEGER_KIND,MPI_COMM_WORLD,request(4),err_MPI) + call MPI_Irecv(IPfluct_padded(:,:,:,1), c,MPI_DOUBLE,rank_b,1_MPI_INTEGER_KIND,MPI_COMM_WORLD,request(4),err_MPI) if (err_MPI /= 0_MPI_INTEGER_KIND) error stop 'MPI error' call MPI_Waitall(4,request,status,err_MPI) From 0008ad1bf8fe7eea6728ab742bd2480b8072a33e Mon Sep 17 00:00:00 2001 From: Martin Diehl Date: Mon, 14 Feb 2022 05:57:48 +0100 Subject: [PATCH 2/5] easier to understand --- src/grid/spectral_utilities.f90 | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/grid/spectral_utilities.f90 b/src/grid/spectral_utilities.f90 index 28ff0f8b3..08d983c8a 100644 --- a/src/grid/spectral_utilities.f90 +++ b/src/grid/spectral_utilities.f90 @@ -508,13 +508,13 @@ subroutine utilities_fourierGammaConvolution(fieldAim) do concurrent(l=1:3, m=1:3, n=1:3, o=1:3) gamma_hat(l,m,n,o,1,1,1) = temp33_complex(l,n)*xiDyad_cmplx(o,m) end do + do concurrent(l = 1:3, m = 1:3) + temp33_Complex(l,m) = sum(gamma_hat(l,m,1:3,1:3,1,1,1)*tensorField_fourier(1:3,1:3,i,j,k)) + end do + tensorField_fourier(1:3,1:3,i,j,k) = temp33_Complex else - gamma_hat(1:3,1:3,1:3,1:3,1,1,1) = cmplx(0.0_pReal,0.0_pReal,pReal) + tensorField_fourier(1:3,1:3,i,j,k) = cmplx(0.0_pReal,0.0_pReal,pReal) end if - do concurrent(l = 1:3, m = 1:3) - temp33_Complex(l,m) = sum(gamma_hat(l,m,1:3,1:3,1,1,1)*tensorField_fourier(1:3,1:3,i,j,k)) - end do - tensorField_fourier(1:3,1:3,i,j,k) = temp33_Complex end if end do; end do; end do else memoryEfficient From 61e11a0529f8f192b411304a8d382feb59532e71 Mon Sep 17 00:00:00 2001 From: Martin Diehl Date: Mon, 14 Feb 2022 07:58:15 +0100 Subject: [PATCH 3/5] use openMP for operations in Fourier space --- src/grid/spectral_utilities.f90 | 65 ++++++++++++++++++++------------- 1 file changed, 39 insertions(+), 26 deletions(-) diff --git a/src/grid/spectral_utilities.f90 b/src/grid/spectral_utilities.f90 index 08d983c8a..418f83b05 100644 --- a/src/grid/spectral_utilities.f90 +++ b/src/grid/spectral_utilities.f90 @@ -362,7 +362,7 @@ end subroutine spectral_utilities_init subroutine utilities_updateGamma(C) real(pReal), intent(in), dimension(3,3,3,3) :: C !< input stiffness to store as reference stiffness - complex(pReal), dimension(3,3) :: temp33_complex, xiDyad_cmplx + complex(pReal), dimension(3,3) :: temp33_cmplx, xiDyad_cmplx real(pReal), dimension(6,6) :: A, A_inv integer :: & i, j, k, & @@ -373,25 +373,27 @@ subroutine utilities_updateGamma(C) if (.not. num%memory_efficient) then gamma_hat = cmplx(0.0_pReal,0.0_pReal,pReal) ! for the singular point and any non invertible A + !$OMP PARALLEL DO PRIVATE(l,m,n,o,temp33_cmplx,xiDyad_cmplx,A,A_inv,err) do k = cells3Offset+1, cells3Offset+cells3; do j = 1, cells(2); do i = 1, grid1Red if (any([i,j,k] /= 1)) then ! singular point at xi=(0.0,0.0,0.0) i.e. i=j=k=1 do concurrent (l = 1:3, m = 1:3) xiDyad_cmplx(l,m) = conjg(-xi1st(l,i,j,k-cells3Offset))*xi1st(m,i,j,k-cells3Offset) end do do concurrent(l = 1:3, m = 1:3) - temp33_complex(l,m) = sum(cmplx(C_ref(l,1:3,m,1:3),0.0_pReal)*xiDyad_cmplx) + temp33_cmplx(l,m) = sum(cmplx(C_ref(l,1:3,m,1:3),0.0_pReal)*xiDyad_cmplx) end do - A(1:3,1:3) = temp33_complex%re; A(4:6,4:6) = temp33_complex%re - A(1:3,4:6) = temp33_complex%im; A(4:6,1:3) = -temp33_complex%im + A(1:3,1:3) = temp33_cmplx%re; A(4:6,4:6) = temp33_cmplx%re + A(1:3,4:6) = temp33_cmplx%im; A(4:6,1:3) = -temp33_cmplx%im if (abs(math_det33(A(1:3,1:3))) > 1e-16) then call math_invert(A_inv, err, A) - temp33_complex = cmplx(A_inv(1:3,1:3),A_inv(1:3,4:6),pReal) + temp33_cmplx = cmplx(A_inv(1:3,1:3),A_inv(1:3,4:6),pReal) do concurrent(l=1:3, m=1:3, n=1:3, o=1:3) - gamma_hat(l,m,n,o,i,j,k-cells3Offset) = temp33_complex(l,n) * xiDyad_cmplx(o,m) + gamma_hat(l,m,n,o,i,j,k-cells3Offset) = temp33_cmplx(l,n) * xiDyad_cmplx(o,m) end do end if end if end do; end do; end do + !$OMP END PARALLEL DO endif end subroutine utilities_updateGamma @@ -477,7 +479,7 @@ end subroutine utilities_FFTvectorBackward subroutine utilities_fourierGammaConvolution(fieldAim) real(pReal), intent(in), dimension(3,3) :: fieldAim !< desired average value of the field after convolution - complex(pReal), dimension(3,3) :: temp33_complex, xiDyad_cmplx + complex(pReal), dimension(3,3) :: temp33_cmplx, xiDyad_cmplx real(pReal), dimension(6,6) :: A, A_inv integer :: & @@ -492,38 +494,42 @@ subroutine utilities_fourierGammaConvolution(fieldAim) !-------------------------------------------------------------------------------------------------- ! do the actual spectral method calculation (mechanical equilibrium) memoryEfficient: if (num%memory_efficient) then + !$OMP PARALLEL DO PRIVATE(l,m,n,o,temp33_cmplx,xiDyad_cmplx,A,A_inv,err,gamma_hat) do k = 1, cells3; do j = 1, cells(2); do i = 1, grid1Red if (any([i,j,k+cells3Offset] /= 1)) then ! singular point at xi=(0.0,0.0,0.0) i.e. i=j=k=1 do concurrent(l = 1:3, m = 1:3) xiDyad_cmplx(l,m) = conjg(-xi1st(l,i,j,k))*xi1st(m,i,j,k) end do do concurrent(l = 1:3, m = 1:3) - temp33_complex(l,m) = sum(cmplx(C_ref(l,1:3,m,1:3),0.0_pReal)*xiDyad_cmplx) + temp33_cmplx(l,m) = sum(cmplx(C_ref(l,1:3,m,1:3),0.0_pReal)*xiDyad_cmplx) end do - A(1:3,1:3) = temp33_complex%re; A(4:6,4:6) = temp33_complex%re - A(1:3,4:6) = temp33_complex%im; A(4:6,1:3) = -temp33_complex%im + A(1:3,1:3) = temp33_cmplx%re; A(4:6,4:6) = temp33_cmplx%re + A(1:3,4:6) = temp33_cmplx%im; A(4:6,1:3) = -temp33_cmplx%im if (abs(math_det33(A(1:3,1:3))) > 1e-16) then call math_invert(A_inv, err, A) - temp33_complex = cmplx(A_inv(1:3,1:3),A_inv(1:3,4:6),pReal) + temp33_cmplx = cmplx(A_inv(1:3,1:3),A_inv(1:3,4:6),pReal) do concurrent(l=1:3, m=1:3, n=1:3, o=1:3) - gamma_hat(l,m,n,o,1,1,1) = temp33_complex(l,n)*xiDyad_cmplx(o,m) + gamma_hat(l,m,n,o,1,1,1) = temp33_cmplx(l,n)*xiDyad_cmplx(o,m) end do do concurrent(l = 1:3, m = 1:3) - temp33_Complex(l,m) = sum(gamma_hat(l,m,1:3,1:3,1,1,1)*tensorField_fourier(1:3,1:3,i,j,k)) + temp33_cmplx(l,m) = sum(gamma_hat(l,m,1:3,1:3,1,1,1)*tensorField_fourier(1:3,1:3,i,j,k)) end do - tensorField_fourier(1:3,1:3,i,j,k) = temp33_Complex + tensorField_fourier(1:3,1:3,i,j,k) = temp33_cmplx else tensorField_fourier(1:3,1:3,i,j,k) = cmplx(0.0_pReal,0.0_pReal,pReal) end if end if end do; end do; end do + !$OMP END PARALLEL DO else memoryEfficient + !$OMP PARALLEL DO PRIVATE(l,m,temp33_cmplx) do k = 1, cells3; do j = 1, cells(2); do i = 1,grid1Red do concurrent(l = 1:3, m = 1:3) - temp33_Complex(l,m) = sum(gamma_hat(l,m,1:3,1:3,i,j,k)*tensorField_fourier(1:3,1:3,i,j,k)) + temp33_cmplx(l,m) = sum(gamma_hat(l,m,1:3,1:3,i,j,k)*tensorField_fourier(1:3,1:3,i,j,k)) end do - tensorField_fourier(1:3,1:3,i,j,k) = temp33_Complex + tensorField_fourier(1:3,1:3,i,j,k) = temp33_cmplx end do; end do; end do + !$OMP END PARALLEL DO end if memoryEfficient if (cells3Offset == 0) tensorField_fourier(1:3,1:3,1,1,1) = cmplx(fieldAim/wgt,0.0_pReal,pReal) @@ -543,12 +549,14 @@ subroutine utilities_fourierGreenConvolution(D_ref, mu_ref, Delta_t) !-------------------------------------------------------------------------------------------------- ! do the actual spectral method calculation + !$OMP PARALLEL DO PRIVATE(GreenOp_hat) do k = 1, cells3; do j = 1, cells(2) ;do i = 1, grid1Red GreenOp_hat = cmplx(1.0_pReal,0.0_pReal,pReal) & / (cmplx(mu_ref,0.0_pReal,pReal) + cmplx(Delta_t,0.0_pReal) & * sum(conjg(xi1st(1:3,i,j,k))* matmul(cmplx(D_ref,0.0_pReal),xi1st(1:3,i,j,k)))) scalarField_fourier(i,j,k) = scalarField_fourier(i,j,k)*GreenOp_hat enddo; enddo; enddo + !$OMP END PARALLEL DO end subroutine utilities_fourierGreenConvolution @@ -735,9 +743,10 @@ subroutine utilities_fourierScalarGradient() integer :: i, j, k + do k = 1, cells3; do j = 1, cells(2); do i = 1,grid1Red vectorField_fourier(1:3,i,j,k) = scalarField_fourier(i,j,k)*xi1st(1:3,i,j,k) ! ToDo: no -conjg? - enddo; enddo; enddo + end do; end do; end do end subroutine utilities_fourierScalarGradient @@ -747,11 +756,9 @@ end subroutine utilities_fourierScalarGradient !-------------------------------------------------------------------------------------------------- subroutine utilities_fourierVectorDivergence() - integer :: i, j, k - do k = 1, cells3; do j = 1, cells(2); do i = 1,grid1Red - scalarField_fourier(i,j,k) = sum(vectorField_fourier(1:3,i,j,k)*conjg(-xi1st(1:3,i,j,k))) - enddo; enddo; enddo + scalarField_fourier(1:grid1Red,1:cells(2),1:cells3) = sum(vectorField_fourier(1:3,1:grid1Red,1:cells(2),1:cells3) & + *conjg(-xi1st)) end subroutine utilities_fourierVectorDivergence @@ -763,11 +770,12 @@ subroutine utilities_fourierVectorGradient() integer :: i, j, k, m, n + do k = 1, cells3; do j = 1, cells(2); do i = 1,grid1Red do m = 1, 3; do n = 1, 3 tensorField_fourier(m,n,i,j,k) = vectorField_fourier(m,i,j,k)*xi1st(n,i,j,k) - enddo; enddo - enddo; enddo; enddo + end do; end do + end do; end do; end do end subroutine utilities_fourierVectorGradient @@ -779,9 +787,10 @@ subroutine utilities_fourierTensorDivergence() integer :: i, j, k + do k = 1, cells3; do j = 1, cells(2); do i = 1,grid1Red vectorField_fourier(:,i,j,k) = matmul(tensorField_fourier(:,:,i,j,k),conjg(-xi1st(:,i,j,k))) - enddo; enddo; enddo + end do; end do; end do end subroutine utilities_fourierTensorDivergence @@ -978,6 +987,7 @@ end function utilities_getFreqDerivative subroutine utilities_updateCoords(F) real(pReal), dimension(3,3,cells(1),cells(2),cells3), intent(in) :: F + real(pReal), dimension(3, cells(1),cells(2),cells3) :: IPcoords real(pReal), dimension(3, cells(1),cells(2),cells3+2) :: IPfluct_padded ! Fluctuations of cell center displacement (padded along z for MPI) real(pReal), dimension(3, cells(1)+1,cells(2)+1,cells3+1) :: nodeCoords @@ -1008,20 +1018,23 @@ subroutine utilities_updateCoords(F) 1, 1, 1, & 0, 1, 1 ], [3,8]) + step = geomSize/real(cells, pReal) !-------------------------------------------------------------------------------------------------- ! integration in Fourier space to get fluctuations of cell center discplacements tensorField_real(1:3,1:3,1:cells(1),1:cells(2),1:cells3) = F call utilities_FFTtensorForward() + !$OMP PARALLEL DO do k = 1, cells3; do j = 1, cells(2); do i = 1, grid1Red if (any([i,j,k+cells3Offset] /= 1)) then vectorField_fourier(1:3,i,j,k) = matmul(tensorField_fourier(1:3,1:3,i,j,k),xi2nd(1:3,i,j,k)) & / sum(conjg(-xi2nd(1:3,i,j,k))*xi2nd(1:3,i,j,k)) * cmplx(wgt,0.0,pReal) else vectorField_fourier(1:3,i,j,k) = cmplx(0.0,0.0,pReal) - endif - enddo; enddo; enddo + end if + end do; end do; end do + !$OMP END PARALLEL DO call fftw_mpi_execute_dft_c2r(planVectorBack,vectorField_fourier,vectorField_real) From 466682e9787453fa28dd47c3794c036913c0e2da Mon Sep 17 00:00:00 2001 From: Martin Diehl Date: Mon, 14 Feb 2022 08:32:48 +0100 Subject: [PATCH 4/5] missing rename grid -> cells --- src/grid/spectral_utilities.f90 | 62 ++++++++++++++++----------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/src/grid/spectral_utilities.f90 b/src/grid/spectral_utilities.f90 index 418f83b05..f89485c7a 100644 --- a/src/grid/spectral_utilities.f90 +++ b/src/grid/spectral_utilities.f90 @@ -31,7 +31,7 @@ module spectral_utilities !-------------------------------------------------------------------------------------------------- ! grid related information real(pReal), protected, public :: wgt !< weighting factor 1/Nelems - integer, protected, public :: grid1Red !< cells(1)/2 + integer, protected, public :: cells1Red !< cells(1)/2 real(pReal), protected, public, dimension(3) :: scaledGeomSize !< scaled geometry size for calculation of divergence !-------------------------------------------------------------------------------------------------- @@ -201,7 +201,7 @@ subroutine spectral_utilities_init num_grid%get_asString('PETSc_options',defaultVal=''),err_PETSc) CHKERRQ(err_PETSc) - grid1Red = cells(1)/2 + 1 + cells1Red = cells(1)/2 + 1 wgt = 1.0/real(product(cells),pReal) num%memory_efficient = num_grid%get_asInt('memory_efficient', defaultVal=1) > 0 ! ToDo: should be logical in YAML file @@ -265,8 +265,8 @@ subroutine spectral_utilities_init gridFFTW = int(cells,C_INTPTR_T) alloc_local = fftw_mpi_local_size_3d(gridFFTW(3), gridFFTW(2), gridFFTW(1)/2 +1, & PETSC_COMM_WORLD, local_K, local_K_offset) - allocate (xi1st (3,grid1Red,cells(2),cells3),source = cmplx(0.0_pReal,0.0_pReal,pReal)) ! frequencies for first derivatives, only half the size for first dimension - allocate (xi2nd (3,grid1Red,cells(2),cells3),source = cmplx(0.0_pReal,0.0_pReal,pReal)) ! frequencies for second derivatives, only half the size for first dimension + allocate (xi1st (3,cells1Red,cells(2),cells3),source = cmplx(0.0_pReal,0.0_pReal,pReal)) ! frequencies for first derivatives, only half the size for first dimension + allocate (xi2nd (3,cells1Red,cells(2),cells3),source = cmplx(0.0_pReal,0.0_pReal,pReal)) ! frequencies for second derivatives, only half the size for first dimension tensorField = fftw_alloc_complex(tensorSize*alloc_local) call c_f_pointer(tensorField, tensorField_real, [3_C_INTPTR_T,3_C_INTPTR_T, & @@ -333,7 +333,7 @@ subroutine spectral_utilities_init do j = 1, cells(2) k_s(2) = j - 1 if (j > cells(2)/2 + 1) k_s(2) = k_s(2) - cells(2) ! running from 0,1,...,N/2,N/2+1,-N/2,-N/2+1,...,-1 - do i = 1, grid1Red + do i = 1, cells1Red k_s(1) = i - 1 ! symmetry, junst running from 0,1,...,N/2,N/2+1 xi2nd(1:3,i,j,k-cells3Offset) = utilities_getFreqDerivative(k_s) where(mod(cells,2)==0 .and. [i,j,k] == cells/2+1 .and. & @@ -347,7 +347,7 @@ subroutine spectral_utilities_init if (num%memory_efficient) then ! allocate just single fourth order tensor allocate (gamma_hat(3,3,3,3,1,1,1), source = cmplx(0.0_pReal,0.0_pReal,pReal)) else ! precalculation of gamma_hat field - allocate (gamma_hat(3,3,3,3,grid1Red,cells(2),cells3), source = cmplx(0.0_pReal,0.0_pReal,pReal)) + allocate (gamma_hat(3,3,3,3,cells1Red,cells(2),cells3), source = cmplx(0.0_pReal,0.0_pReal,pReal)) endif end subroutine spectral_utilities_init @@ -374,7 +374,7 @@ subroutine utilities_updateGamma(C) if (.not. num%memory_efficient) then gamma_hat = cmplx(0.0_pReal,0.0_pReal,pReal) ! for the singular point and any non invertible A !$OMP PARALLEL DO PRIVATE(l,m,n,o,temp33_cmplx,xiDyad_cmplx,A,A_inv,err) - do k = cells3Offset+1, cells3Offset+cells3; do j = 1, cells(2); do i = 1, grid1Red + do k = cells3Offset+1, cells3Offset+cells3; do j = 1, cells(2); do i = 1, cells1Red if (any([i,j,k] /= 1)) then ! singular point at xi=(0.0,0.0,0.0) i.e. i=j=k=1 do concurrent (l = 1:3, m = 1:3) xiDyad_cmplx(l,m) = conjg(-xi1st(l,i,j,k-cells3Offset))*xi1st(m,i,j,k-cells3Offset) @@ -406,7 +406,7 @@ end subroutine utilities_updateGamma !-------------------------------------------------------------------------------------------------- subroutine utilities_FFTtensorForward - tensorField_real(1:3,1:3,cells(1)+1:grid1Red*2,:,:) = 0.0_pReal + tensorField_real(1:3,1:3,cells(1)+1:cells1Red*2,:,:) = 0.0_pReal call fftw_mpi_execute_dft_r2c(planTensorForth,tensorField_real,tensorField_fourier) end subroutine utilities_FFTtensorForward @@ -430,7 +430,7 @@ end subroutine utilities_FFTtensorBackward !-------------------------------------------------------------------------------------------------- subroutine utilities_FFTscalarForward - scalarField_real(cells(1)+1:grid1Red*2,:,:) = 0.0_pReal + scalarField_real(cells(1)+1:cells1Red*2,:,:) = 0.0_pReal call fftw_mpi_execute_dft_r2c(planScalarForth,scalarField_real,scalarField_fourier) end subroutine utilities_FFTscalarForward @@ -455,7 +455,7 @@ end subroutine utilities_FFTscalarBackward !-------------------------------------------------------------------------------------------------- subroutine utilities_FFTvectorForward - vectorField_real(1:3,cells(1)+1:grid1Red*2,:,:) = 0.0_pReal + vectorField_real(1:3,cells(1)+1:cells1Red*2,:,:) = 0.0_pReal call fftw_mpi_execute_dft_r2c(planVectorForth,vectorField_real,vectorField_fourier) end subroutine utilities_FFTvectorForward @@ -495,7 +495,7 @@ subroutine utilities_fourierGammaConvolution(fieldAim) ! do the actual spectral method calculation (mechanical equilibrium) memoryEfficient: if (num%memory_efficient) then !$OMP PARALLEL DO PRIVATE(l,m,n,o,temp33_cmplx,xiDyad_cmplx,A,A_inv,err,gamma_hat) - do k = 1, cells3; do j = 1, cells(2); do i = 1, grid1Red + do k = 1, cells3; do j = 1, cells(2); do i = 1, cells1Red if (any([i,j,k+cells3Offset] /= 1)) then ! singular point at xi=(0.0,0.0,0.0) i.e. i=j=k=1 do concurrent(l = 1:3, m = 1:3) xiDyad_cmplx(l,m) = conjg(-xi1st(l,i,j,k))*xi1st(m,i,j,k) @@ -523,7 +523,7 @@ subroutine utilities_fourierGammaConvolution(fieldAim) !$OMP END PARALLEL DO else memoryEfficient !$OMP PARALLEL DO PRIVATE(l,m,temp33_cmplx) - do k = 1, cells3; do j = 1, cells(2); do i = 1,grid1Red + do k = 1, cells3; do j = 1, cells(2); do i = 1,cells1Red do concurrent(l = 1:3, m = 1:3) temp33_cmplx(l,m) = sum(gamma_hat(l,m,1:3,1:3,i,j,k)*tensorField_fourier(1:3,1:3,i,j,k)) end do @@ -550,7 +550,7 @@ subroutine utilities_fourierGreenConvolution(D_ref, mu_ref, Delta_t) !-------------------------------------------------------------------------------------------------- ! do the actual spectral method calculation !$OMP PARALLEL DO PRIVATE(GreenOp_hat) - do k = 1, cells3; do j = 1, cells(2) ;do i = 1, grid1Red + do k = 1, cells3; do j = 1, cells(2) ;do i = 1, cells1Red GreenOp_hat = cmplx(1.0_pReal,0.0_pReal,pReal) & / (cmplx(mu_ref,0.0_pReal,pReal) + cmplx(Delta_t,0.0_pReal) & * sum(conjg(xi1st(1:3,i,j,k))* matmul(cmplx(D_ref,0.0_pReal),xi1st(1:3,i,j,k)))) @@ -579,7 +579,7 @@ real(pReal) function utilities_divergenceRMS() ! calculating RMS divergence criterion in Fourier space utilities_divergenceRMS = 0.0_pReal do k = 1, cells3; do j = 1, cells(2) - do i = 2, grid1Red -1 ! Has somewhere a conj. complex counterpart. Therefore count it twice. + do i = 2, cells1Red -1 ! Has somewhere a conj. complex counterpart. Therefore count it twice. utilities_divergenceRMS = utilities_divergenceRMS & + 2.0_pReal*(sum (real(matmul(tensorField_fourier(1:3,1:3,i,j,k), & ! (sqrt(real(a)**2 + aimag(a)**2))**2 = real(a)**2 + aimag(a)**2, i.e. do not take square root and square again conjg(-xi1st(1:3,i,j,k))*rescaledGeom))**2) & ! --> sum squared L_2 norm of vector @@ -591,10 +591,10 @@ real(pReal) function utilities_divergenceRMS() conjg(-xi1st(1:3,1,j,k))*rescaledGeom))**2) & + sum(aimag(matmul(tensorField_fourier(1:3,1:3,1 ,j,k), & conjg(-xi1st(1:3,1,j,k))*rescaledGeom))**2) & - + sum( real(matmul(tensorField_fourier(1:3,1:3,grid1Red,j,k), & - conjg(-xi1st(1:3,grid1Red,j,k))*rescaledGeom))**2) & - + sum(aimag(matmul(tensorField_fourier(1:3,1:3,grid1Red,j,k), & - conjg(-xi1st(1:3,grid1Red,j,k))*rescaledGeom))**2) + + sum( real(matmul(tensorField_fourier(1:3,1:3,cells1Red,j,k), & + conjg(-xi1st(1:3,cells1Red,j,k))*rescaledGeom))**2) & + + sum(aimag(matmul(tensorField_fourier(1:3,1:3,cells1Red,j,k), & + conjg(-xi1st(1:3,cells1Red,j,k))*rescaledGeom))**2) enddo; enddo if (cells(1) == 1) utilities_divergenceRMS = utilities_divergenceRMS * 0.5_pReal ! counted twice in case of cells(1) == 1 call MPI_Allreduce(MPI_IN_PLACE,utilities_divergenceRMS,1_MPI_INTEGER_KIND,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD,err_MPI) @@ -624,7 +624,7 @@ real(pReal) function utilities_curlRMS() utilities_curlRMS = 0.0_pReal do k = 1, cells3; do j = 1, cells(2); - do i = 2, grid1Red - 1 + do i = 2, cells1Red - 1 do l = 1, 3 curl_fourier(l,1) = (+tensorField_fourier(l,3,i,j,k)*xi1st(2,i,j,k)*rescaledGeom(2) & -tensorField_fourier(l,2,i,j,k)*xi1st(3,i,j,k)*rescaledGeom(3)) @@ -647,12 +647,12 @@ real(pReal) function utilities_curlRMS() utilities_curlRMS = utilities_curlRMS & + sum(curl_fourier%re**2 + curl_fourier%im**2) ! this layer (DC) does not have a conjugate complex counterpart (if cells(1) /= 1) do l = 1, 3 - curl_fourier = (+tensorField_fourier(l,3,grid1Red,j,k)*xi1st(2,grid1Red,j,k)*rescaledGeom(2) & - -tensorField_fourier(l,2,grid1Red,j,k)*xi1st(3,grid1Red,j,k)*rescaledGeom(3)) - curl_fourier = (+tensorField_fourier(l,1,grid1Red,j,k)*xi1st(3,grid1Red,j,k)*rescaledGeom(3) & - -tensorField_fourier(l,3,grid1Red,j,k)*xi1st(1,grid1Red,j,k)*rescaledGeom(1)) - curl_fourier = (+tensorField_fourier(l,2,grid1Red,j,k)*xi1st(1,grid1Red,j,k)*rescaledGeom(1) & - -tensorField_fourier(l,1,grid1Red,j,k)*xi1st(2,grid1Red,j,k)*rescaledGeom(2)) + curl_fourier = (+tensorField_fourier(l,3,cells1Red,j,k)*xi1st(2,cells1Red,j,k)*rescaledGeom(2) & + -tensorField_fourier(l,2,cells1Red,j,k)*xi1st(3,cells1Red,j,k)*rescaledGeom(3)) + curl_fourier = (+tensorField_fourier(l,1,cells1Red,j,k)*xi1st(3,cells1Red,j,k)*rescaledGeom(3) & + -tensorField_fourier(l,3,cells1Red,j,k)*xi1st(1,cells1Red,j,k)*rescaledGeom(1)) + curl_fourier = (+tensorField_fourier(l,2,cells1Red,j,k)*xi1st(1,cells1Red,j,k)*rescaledGeom(1) & + -tensorField_fourier(l,1,cells1Red,j,k)*xi1st(2,cells1Red,j,k)*rescaledGeom(2)) enddo utilities_curlRMS = utilities_curlRMS & + sum(curl_fourier%re**2 + curl_fourier%im**2) ! this layer (Nyquist) does not have a conjugate complex counterpart (if cells(1) /= 1) @@ -744,7 +744,7 @@ subroutine utilities_fourierScalarGradient() integer :: i, j, k - do k = 1, cells3; do j = 1, cells(2); do i = 1,grid1Red + do k = 1, cells3; do j = 1, cells(2); do i = 1,cells1Red vectorField_fourier(1:3,i,j,k) = scalarField_fourier(i,j,k)*xi1st(1:3,i,j,k) ! ToDo: no -conjg? end do; end do; end do @@ -757,8 +757,8 @@ end subroutine utilities_fourierScalarGradient subroutine utilities_fourierVectorDivergence() - scalarField_fourier(1:grid1Red,1:cells(2),1:cells3) = sum(vectorField_fourier(1:3,1:grid1Red,1:cells(2),1:cells3) & - *conjg(-xi1st)) + scalarField_fourier(1:cells1Red,1:cells(2),1:cells3) = sum(vectorField_fourier(1:3,1:cells1Red,1:cells(2),1:cells3) & + *conjg(-xi1st)) end subroutine utilities_fourierVectorDivergence @@ -771,7 +771,7 @@ subroutine utilities_fourierVectorGradient() integer :: i, j, k, m, n - do k = 1, cells3; do j = 1, cells(2); do i = 1,grid1Red + do k = 1, cells3; do j = 1, cells(2); do i = 1,cells1Red do m = 1, 3; do n = 1, 3 tensorField_fourier(m,n,i,j,k) = vectorField_fourier(m,i,j,k)*xi1st(n,i,j,k) end do; end do @@ -788,7 +788,7 @@ subroutine utilities_fourierTensorDivergence() integer :: i, j, k - do k = 1, cells3; do j = 1, cells(2); do i = 1,grid1Red + do k = 1, cells3; do j = 1, cells(2); do i = 1,cells1Red vectorField_fourier(:,i,j,k) = matmul(tensorField_fourier(:,:,i,j,k),conjg(-xi1st(:,i,j,k))) end do; end do; end do @@ -1026,7 +1026,7 @@ subroutine utilities_updateCoords(F) call utilities_FFTtensorForward() !$OMP PARALLEL DO - do k = 1, cells3; do j = 1, cells(2); do i = 1, grid1Red + do k = 1, cells3; do j = 1, cells(2); do i = 1, cells1Red if (any([i,j,k+cells3Offset] /= 1)) then vectorField_fourier(1:3,i,j,k) = matmul(tensorField_fourier(1:3,1:3,i,j,k),xi2nd(1:3,i,j,k)) & / sum(conjg(-xi2nd(1:3,i,j,k))*xi2nd(1:3,i,j,k)) * cmplx(wgt,0.0,pReal) From c66e2336c25d704a36a60ea121c837c34210a388 Mon Sep 17 00:00:00 2001 From: Martin Diehl Date: Mon, 14 Feb 2022 08:58:40 +0100 Subject: [PATCH 5/5] some versions of ifort have problems with 'do concurrent' --- src/grid/spectral_utilities.f90 | 35 ++++++++++++++++++++++++++++++-- src/math.f90 | 31 ++++++++-------------------- src/phase_mechanical_plastic.f90 | 3 +-- 3 files changed, 42 insertions(+), 27 deletions(-) diff --git a/src/grid/spectral_utilities.f90 b/src/grid/spectral_utilities.f90 index f89485c7a..2a675c9b6 100644 --- a/src/grid/spectral_utilities.f90 +++ b/src/grid/spectral_utilities.f90 @@ -376,20 +376,32 @@ subroutine utilities_updateGamma(C) !$OMP PARALLEL DO PRIVATE(l,m,n,o,temp33_cmplx,xiDyad_cmplx,A,A_inv,err) do k = cells3Offset+1, cells3Offset+cells3; do j = 1, cells(2); do i = 1, cells1Red if (any([i,j,k] /= 1)) then ! singular point at xi=(0.0,0.0,0.0) i.e. i=j=k=1 - do concurrent (l = 1:3, m = 1:3) +#ifndef __INTEL_COMPILER + do concurrent(l = 1:3, m = 1:3) xiDyad_cmplx(l,m) = conjg(-xi1st(l,i,j,k-cells3Offset))*xi1st(m,i,j,k-cells3Offset) end do do concurrent(l = 1:3, m = 1:3) temp33_cmplx(l,m) = sum(cmplx(C_ref(l,1:3,m,1:3),0.0_pReal)*xiDyad_cmplx) end do +#else + forall(l = 1:3, m = 1:3) & + xiDyad_cmplx(l,m) = conjg(-xi1st(l,i,j,k-cells3Offset))*xi1st(m,i,j,k-cells3Offset) + forall(l = 1:3, m = 1:3) & + temp33_cmplx(l,m) = sum(cmplx(C_ref(l,1:3,m,1:3),0.0_pReal)*xiDyad_cmplx) +#endif A(1:3,1:3) = temp33_cmplx%re; A(4:6,4:6) = temp33_cmplx%re A(1:3,4:6) = temp33_cmplx%im; A(4:6,1:3) = -temp33_cmplx%im if (abs(math_det33(A(1:3,1:3))) > 1e-16) then call math_invert(A_inv, err, A) temp33_cmplx = cmplx(A_inv(1:3,1:3),A_inv(1:3,4:6),pReal) +#ifndef __INTEL_COMPILER do concurrent(l=1:3, m=1:3, n=1:3, o=1:3) gamma_hat(l,m,n,o,i,j,k-cells3Offset) = temp33_cmplx(l,n) * xiDyad_cmplx(o,m) end do +#else + forall(l=1:3, m=1:3, n=1:3, o=1:3) & + gamma_hat(l,m,n,o,i,j,k-cells3Offset) = temp33_cmplx(l,n) * xiDyad_cmplx(o,m) +#endif end if end if end do; end do; end do @@ -497,23 +509,37 @@ subroutine utilities_fourierGammaConvolution(fieldAim) !$OMP PARALLEL DO PRIVATE(l,m,n,o,temp33_cmplx,xiDyad_cmplx,A,A_inv,err,gamma_hat) do k = 1, cells3; do j = 1, cells(2); do i = 1, cells1Red if (any([i,j,k+cells3Offset] /= 1)) then ! singular point at xi=(0.0,0.0,0.0) i.e. i=j=k=1 +#ifndef __INTEL_COMPILER do concurrent(l = 1:3, m = 1:3) xiDyad_cmplx(l,m) = conjg(-xi1st(l,i,j,k))*xi1st(m,i,j,k) end do do concurrent(l = 1:3, m = 1:3) temp33_cmplx(l,m) = sum(cmplx(C_ref(l,1:3,m,1:3),0.0_pReal)*xiDyad_cmplx) end do +#else + forall(l = 1:3, m = 1:3) & + xiDyad_cmplx(l,m) = conjg(-xi1st(l,i,j,k))*xi1st(m,i,j,k) + forall(l = 1:3, m = 1:3) & + temp33_cmplx(l,m) = sum(cmplx(C_ref(l,1:3,m,1:3),0.0_pReal)*xiDyad_cmplx) +#endif A(1:3,1:3) = temp33_cmplx%re; A(4:6,4:6) = temp33_cmplx%re A(1:3,4:6) = temp33_cmplx%im; A(4:6,1:3) = -temp33_cmplx%im if (abs(math_det33(A(1:3,1:3))) > 1e-16) then call math_invert(A_inv, err, A) temp33_cmplx = cmplx(A_inv(1:3,1:3),A_inv(1:3,4:6),pReal) +#ifndef __INTEL_COMPILER do concurrent(l=1:3, m=1:3, n=1:3, o=1:3) gamma_hat(l,m,n,o,1,1,1) = temp33_cmplx(l,n)*xiDyad_cmplx(o,m) end do do concurrent(l = 1:3, m = 1:3) temp33_cmplx(l,m) = sum(gamma_hat(l,m,1:3,1:3,1,1,1)*tensorField_fourier(1:3,1:3,i,j,k)) end do +#else + forall(l=1:3, m=1:3, n=1:3, o=1:3) & + gamma_hat(l,m,n,o,1,1,1) = temp33_cmplx(l,n)*xiDyad_cmplx(o,m) + forall(l = 1:3, m = 1:3) & + temp33_cmplx(l,m) = sum(gamma_hat(l,m,1:3,1:3,1,1,1)*tensorField_fourier(1:3,1:3,i,j,k)) +#endif tensorField_fourier(1:3,1:3,i,j,k) = temp33_cmplx else tensorField_fourier(1:3,1:3,i,j,k) = cmplx(0.0_pReal,0.0_pReal,pReal) @@ -524,9 +550,14 @@ subroutine utilities_fourierGammaConvolution(fieldAim) else memoryEfficient !$OMP PARALLEL DO PRIVATE(l,m,temp33_cmplx) do k = 1, cells3; do j = 1, cells(2); do i = 1,cells1Red +#ifndef __INTEL_COMPILER do concurrent(l = 1:3, m = 1:3) temp33_cmplx(l,m) = sum(gamma_hat(l,m,1:3,1:3,i,j,k)*tensorField_fourier(1:3,1:3,i,j,k)) end do +#else + forall(l = 1:3, m = 1:3) & + temp33_cmplx(l,m) = sum(gamma_hat(l,m,1:3,1:3,i,j,k)*tensorField_fourier(1:3,1:3,i,j,k)) +#endif tensorField_fourier(1:3,1:3,i,j,k) = temp33_cmplx end do; end do; end do !$OMP END PARALLEL DO @@ -758,7 +789,7 @@ subroutine utilities_fourierVectorDivergence() scalarField_fourier(1:cells1Red,1:cells(2),1:cells3) = sum(vectorField_fourier(1:3,1:cells1Red,1:cells(2),1:cells3) & - *conjg(-xi1st)) + *conjg(-xi1st),1) end subroutine utilities_fourierVectorDivergence diff --git a/src/math.f90 b/src/math.f90 index dd4690672..75b9ddea2 100644 --- a/src/math.f90 +++ b/src/math.f90 @@ -262,9 +262,8 @@ pure function math_identity4th() math_identity4th(i,j,k,l) = 0.5_pReal*(math_I3(i,k)*math_I3(j,l)+math_I3(i,l)*math_I3(j,k)) enddo #else - do i=1,3; do j=1,3; do k=1,3; do l=1,3 + forall(i=1:3, j=1:3, k=1:3, l=1:3) & math_identity4th(i,j,k,l) = 0.5_pReal*(math_I3(i,k)*math_I3(j,l)+math_I3(i,l)*math_I3(j,k)) - enddo; enddo; enddo; enddo #endif end function math_identity4th @@ -338,9 +337,7 @@ pure function math_outer(A,B) math_outer(i,j) = A(i)*B(j) enddo #else - do i=1,size(A,1); do j=1,size(B,1) - math_outer(i,j) = A(i)*B(j) - enddo; enddo + forall(i=1:size(A,1), j=1:size(B,1)) math_outer(i,j) = A(i)*B(j) #endif end function math_outer @@ -387,9 +384,7 @@ pure function math_mul3333xx33(A,B) math_mul3333xx33(i,j) = sum(A(i,j,1:3,1:3)*B(1:3,1:3)) enddo #else - do i=1,3; do j=1,3 - math_mul3333xx33(i,j) = sum(A(i,j,1:3,1:3)*B(1:3,1:3)) - enddo; enddo + forall (i=1:3, j=1:3) math_mul3333xx33(i,j) = sum(A(i,j,1:3,1:3)*B(1:3,1:3)) #endif end function math_mul3333xx33 @@ -411,9 +406,7 @@ pure function math_mul3333xx3333(A,B) math_mul3333xx3333(i,j,k,l) = sum(A(i,j,1:3,1:3)*B(1:3,1:3,k,l)) enddo #else - do i=1,3; do j=1,3; do k=1,3; do l=1,3 - math_mul3333xx3333(i,j,k,l) = sum(A(i,j,1:3,1:3)*B(1:3,1:3,k,l)) - enddo; enddo; enddo; enddo + forall(i=1:3, j=1:3, k=1:3, l=1:3) math_mul3333xx3333(i,j,k,l) = sum(A(i,j,1:3,1:3)*B(1:3,1:3,k,l)) #endif end function math_mul3333xx3333 @@ -752,9 +745,7 @@ pure function math_3333to99(m3333) math_3333to99(i,j) = m3333(MAPPLAIN(1,i),MAPPLAIN(2,i),MAPPLAIN(1,j),MAPPLAIN(2,j)) enddo #else - do i=1,9; do j=1,9 - math_3333to99(i,j) = m3333(MAPPLAIN(1,i),MAPPLAIN(2,i),MAPPLAIN(1,j),MAPPLAIN(2,j)) - enddo; enddo + forall(i=1:9, j=1:9) math_3333to99(i,j) = m3333(MAPPLAIN(1,i),MAPPLAIN(2,i),MAPPLAIN(1,j),MAPPLAIN(2,j)) #endif end function math_3333to99 @@ -775,9 +766,7 @@ pure function math_99to3333(m99) math_99to3333(MAPPLAIN(1,i),MAPPLAIN(2,i),MAPPLAIN(1,j),MAPPLAIN(2,j)) = m99(i,j) enddo #else - do i=1,9; do j=1,9 - math_99to3333(MAPPLAIN(1,i),MAPPLAIN(2,i),MAPPLAIN(1,j),MAPPLAIN(2,j)) = m99(i,j) - enddo; enddo + forall(i=1:9, j=1:9) math_99to3333(MAPPLAIN(1,i),MAPPLAIN(2,i),MAPPLAIN(1,j),MAPPLAIN(2,j)) = m99(i,j) #endif end function math_99to3333 @@ -810,9 +799,7 @@ pure function math_sym3333to66(m3333,weighted) math_sym3333to66(i,j) = w(i)*w(j)*m3333(MAPNYE(1,i),MAPNYE(2,i),MAPNYE(1,j),MAPNYE(2,j)) enddo #else - do i=1,6; do j=1,6 - math_sym3333to66(i,j) = w(i)*w(j)*m3333(MAPNYE(1,i),MAPNYE(2,i),MAPNYE(1,j),MAPNYE(2,j)) - enddo; enddo + forall(i=1:6, j=1:6) math_sym3333to66(i,j) = w(i)*w(j)*m3333(MAPNYE(1,i),MAPNYE(2,i),MAPNYE(1,j),MAPNYE(2,j)) #endif end function math_sym3333to66 @@ -950,9 +937,7 @@ pure function math_3333toVoigt66_stiffness(C) result(C_tilde) C_tilde(i,j) = C(MAPVOIGT(1,i),MAPVOIGT(2,i),MAPVOIGT(1,j),MAPVOIGT(2,j)) end do #else - do i=1,6; do j=1,6 - C_tilde(i,j) = C(MAPVOIGT(1,i),MAPVOIGT(2,i),MAPVOIGT(1,j),MAPVOIGT(2,j)) - end do; end do + forall(i=1:6, j=1:6) C_tilde(i,j) = C(MAPVOIGT(1,i),MAPVOIGT(2,i),MAPVOIGT(1,j),MAPVOIGT(2,j)) #endif end function math_3333toVoigt66_stiffness diff --git a/src/phase_mechanical_plastic.f90 b/src/phase_mechanical_plastic.f90 index 72b67ef64..3915c3b2d 100644 --- a/src/phase_mechanical_plastic.f90 +++ b/src/phase_mechanical_plastic.f90 @@ -379,10 +379,9 @@ module function plastic_deltaState(ph, en) result(broken) en logical :: broken - real(pReal), dimension(3,3) :: & + real(pReal), dimension(3,3) :: & Mp integer :: & - myOffset, & mySize