From 6a58efeffe36e1843af85e670ec91468cdc0ce58 Mon Sep 17 00:00:00 2001 From: Vitesh Shah Date: Mon, 14 Dec 2020 09:14:03 +0100 Subject: [PATCH 01/11] Enabled sigterm handling --- src/DAMASK_interface.f90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/DAMASK_interface.f90 b/src/DAMASK_interface.f90 index 41f421eb8..7490cb083 100644 --- a/src/DAMASK_interface.f90 +++ b/src/DAMASK_interface.f90 @@ -205,7 +205,7 @@ subroutine DAMASK_interface_init if (interface_restartInc > 0) & print'(a,i6.6)', ' Restart from increment: ', interface_restartInc - !call signalterm_c(c_funloc(catchSIGTERM)) + call signalterm_c(c_funloc(catchSIGTERM)) call signalusr1_c(c_funloc(catchSIGUSR1)) call signalusr2_c(c_funloc(catchSIGUSR2)) call interface_setSIGTERM(.false.) From 40bd5db361abbdbf71e595c05b149dec6d8de312 Mon Sep 17 00:00:00 2001 From: Vitesh Shah Date: Mon, 14 Dec 2020 09:17:15 +0100 Subject: [PATCH 02/11] signals enabling writing results,restart and termination --- src/grid/DAMASK_grid.f90 | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/grid/DAMASK_grid.f90 b/src/grid/DAMASK_grid.f90 index a8271cffc..f461fbf90 100644 --- a/src/grid/DAMASK_grid.f90 +++ b/src/grid/DAMASK_grid.f90 @@ -442,18 +442,19 @@ program DAMASK_grid print'(/,a,i0,a)', ' increment ', totalIncsCounter, ' NOT converged' endif; flush(IO_STDOUT) - if (mod(inc,loadCases(l)%f_out) == 0) then + if (mod(inc,loadCases(l)%f_out) == 0 .or. interface_SIGUSR1) then print'(1/,a)', ' ... writing results to file ......................................' flush(IO_STDOUT) call CPFEM_results(totalIncsCounter,time) endif - if (mod(inc,loadCases(l)%f_restart) == 0) then + if (mod(inc,loadCases(l)%f_restart) == 0 .or. interface_SIGUSR2) then call mech_restartWrite call CPFEM_restartWrite endif + if (interface_SIGTERM) exit loadCaseLooping endif skipping - enddo incLooping + enddo incLooping enddo loadCaseLooping From 768d139768be00bb61b08a80f9c2b63635007276 Mon Sep 17 00:00:00 2001 From: Vitesh Shah Date: Mon, 14 Dec 2020 09:34:35 +0100 Subject: [PATCH 03/11] reset the signal terms to false after task is done --- src/grid/DAMASK_grid.f90 | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/grid/DAMASK_grid.f90 b/src/grid/DAMASK_grid.f90 index f461fbf90..4b9ee561a 100644 --- a/src/grid/DAMASK_grid.f90 +++ b/src/grid/DAMASK_grid.f90 @@ -446,10 +446,12 @@ program DAMASK_grid print'(1/,a)', ' ... writing results to file ......................................' flush(IO_STDOUT) call CPFEM_results(totalIncsCounter,time) + call interface_setSIGUSR1(.false.) endif if (mod(inc,loadCases(l)%f_restart) == 0 .or. interface_SIGUSR2) then call mech_restartWrite call CPFEM_restartWrite + call interface_setSIGUSR2(.false.) endif if (interface_SIGTERM) exit loadCaseLooping endif skipping From da1b006315c88abe8a19a775b354a8667ebeb927 Mon Sep 17 00:00:00 2001 From: Vitesh Shah Date: Thu, 17 Dec 2020 12:42:17 +0100 Subject: [PATCH 04/11] MPI communication for SIGUSRXXX --- src/grid/DAMASK_grid.f90 | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/grid/DAMASK_grid.f90 b/src/grid/DAMASK_grid.f90 index 4b9ee561a..78e88b9b2 100644 --- a/src/grid/DAMASK_grid.f90 +++ b/src/grid/DAMASK_grid.f90 @@ -61,10 +61,13 @@ program DAMASK_grid logical :: & guess, & !< guess along former trajectory stagIterate, & - cutBack = .false. + cutBack = .false.,& + set_signal,& + set_signal1 integer :: & i, j, m, field, & errorID = 0, & + ierr,& cutBackLevel = 0, & !< cut back level \f$ t = \frac{t_{inc}}{2^l} \f$ stepFraction = 0, & !< fraction of current time interval l = 0, & !< current load case @@ -445,13 +448,17 @@ program DAMASK_grid if (mod(inc,loadCases(l)%f_out) == 0 .or. interface_SIGUSR1) then print'(1/,a)', ' ... writing results to file ......................................' flush(IO_STDOUT) + call MPI_ALLREDUCE(interface_SIGUSR1,set_signal,1,MPI_LOGICAL,MPI_LAND,PETSC_COMM_WORLD,ierr) call CPFEM_results(totalIncsCounter,time) call interface_setSIGUSR1(.false.) + call MPI_ALLREDUCE(interface_SIGUSR1,set_signal,1,MPI_LOGICAL,MPI_LAND,PETSC_COMM_WORLD,ierr) endif if (mod(inc,loadCases(l)%f_restart) == 0 .or. interface_SIGUSR2) then + call MPI_ALLREDUCE(interface_SIGUSR2,set_signal1,1,MPI_LOGICAL,MPI_LAND,PETSC_COMM_WORLD,ierr) call mech_restartWrite call CPFEM_restartWrite call interface_setSIGUSR2(.false.) + call MPI_ALLREDUCE(interface_SIGUSR2,set_signal1,1,MPI_LOGICAL,MPI_LAND,PETSC_COMM_WORLD,ierr) endif if (interface_SIGTERM) exit loadCaseLooping endif skipping From d83e8dac597a7d4865a6e3e04fcc478ff639b698 Mon Sep 17 00:00:00 2001 From: Martin Diehl Date: Thu, 17 Dec 2020 13:20:18 +0100 Subject: [PATCH 05/11] need to share variable before if --- src/grid/DAMASK_grid.f90 | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/src/grid/DAMASK_grid.f90 b/src/grid/DAMASK_grid.f90 index 78e88b9b2..ef5546f7e 100644 --- a/src/grid/DAMASK_grid.f90 +++ b/src/grid/DAMASK_grid.f90 @@ -62,8 +62,7 @@ program DAMASK_grid guess, & !< guess along former trajectory stagIterate, & cutBack = .false.,& - set_signal,& - set_signal1 + signal integer :: & i, j, m, field, & errorID = 0, & @@ -445,22 +444,21 @@ program DAMASK_grid print'(/,a,i0,a)', ' increment ', totalIncsCounter, ' NOT converged' endif; flush(IO_STDOUT) - if (mod(inc,loadCases(l)%f_out) == 0 .or. interface_SIGUSR1) then + call MPI_ALLREDUCE(interface_SIGUSR1,signal,1,MPI_LOGICAL,MPI_LAND,PETSC_COMM_WORLD,ierr) + call interface_setSIGUSR1(.false.) + if (mod(inc,loadCases(l)%f_out) == 0 .or. signal) then print'(1/,a)', ' ... writing results to file ......................................' flush(IO_STDOUT) - call MPI_ALLREDUCE(interface_SIGUSR1,set_signal,1,MPI_LOGICAL,MPI_LAND,PETSC_COMM_WORLD,ierr) call CPFEM_results(totalIncsCounter,time) - call interface_setSIGUSR1(.false.) - call MPI_ALLREDUCE(interface_SIGUSR1,set_signal,1,MPI_LOGICAL,MPI_LAND,PETSC_COMM_WORLD,ierr) endif - if (mod(inc,loadCases(l)%f_restart) == 0 .or. interface_SIGUSR2) then - call MPI_ALLREDUCE(interface_SIGUSR2,set_signal1,1,MPI_LOGICAL,MPI_LAND,PETSC_COMM_WORLD,ierr) + call MPI_ALLREDUCE(interface_SIGUSR2,signal,1,MPI_LOGICAL,MPI_LAND,PETSC_COMM_WORLD,ierr) + call interface_setSIGUSR2(.false.) + if (mod(inc,loadCases(l)%f_restart) == 0 .or. signal) then call mech_restartWrite call CPFEM_restartWrite - call interface_setSIGUSR2(.false.) - call MPI_ALLREDUCE(interface_SIGUSR2,set_signal1,1,MPI_LOGICAL,MPI_LAND,PETSC_COMM_WORLD,ierr) endif - if (interface_SIGTERM) exit loadCaseLooping + call MPI_ALLREDUCE(interface_SIGTERM,signal,1,MPI_LOGICAL,MPI_LAND,PETSC_COMM_WORLD,ierr) + if (signal) exit loadCaseLooping endif skipping enddo incLooping From 0a3d43e0fc6be8b908dc2e6ca9354e73bb09e3a4 Mon Sep 17 00:00:00 2001 From: Martin Diehl Date: Thu, 17 Dec 2020 15:17:20 +0100 Subject: [PATCH 06/11] only one process gets the signal: use OR, not AND --- src/grid/DAMASK_grid.f90 | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/grid/DAMASK_grid.f90 b/src/grid/DAMASK_grid.f90 index ef5546f7e..d09469322 100644 --- a/src/grid/DAMASK_grid.f90 +++ b/src/grid/DAMASK_grid.f90 @@ -444,20 +444,23 @@ program DAMASK_grid print'(/,a,i0,a)', ' increment ', totalIncsCounter, ' NOT converged' endif; flush(IO_STDOUT) - call MPI_ALLREDUCE(interface_SIGUSR1,signal,1,MPI_LOGICAL,MPI_LAND,PETSC_COMM_WORLD,ierr) + call MPI_Allreduce(interface_SIGUSR1,signal,1,MPI_LOGICAL,MPI_LOR,PETSC_COMM_WORLD,ierr) + if (ierr /= 0) error stop 'MPI error' call interface_setSIGUSR1(.false.) if (mod(inc,loadCases(l)%f_out) == 0 .or. signal) then print'(1/,a)', ' ... writing results to file ......................................' flush(IO_STDOUT) call CPFEM_results(totalIncsCounter,time) endif - call MPI_ALLREDUCE(interface_SIGUSR2,signal,1,MPI_LOGICAL,MPI_LAND,PETSC_COMM_WORLD,ierr) + call MPI_Allreduce(interface_SIGUSR2,signal,1,MPI_LOGICAL,MPI_LOR,PETSC_COMM_WORLD,ierr) + if (ierr /= 0) error stop 'MPI error' call interface_setSIGUSR2(.false.) if (mod(inc,loadCases(l)%f_restart) == 0 .or. signal) then call mech_restartWrite call CPFEM_restartWrite endif - call MPI_ALLREDUCE(interface_SIGTERM,signal,1,MPI_LOGICAL,MPI_LAND,PETSC_COMM_WORLD,ierr) + call MPI_Allreduce(interface_SIGTERM,signal,1,MPI_LOGICAL,MPI_LOR,PETSC_COMM_WORLD,ierr) + if (ierr /= 0) error stop 'MPI error' if (signal) exit loadCaseLooping endif skipping From a2b5178b4535028e9aafd9c9f3cefe9954a33ea2 Mon Sep 17 00:00:00 2001 From: Martin Diehl Date: Sun, 17 Jan 2021 21:56:19 +0100 Subject: [PATCH 07/11] polishing --- PRIVATE | 2 +- src/DAMASK_interface.f90 | 68 +++++++++++++++++++++++----------------- src/grid/DAMASK_grid.f90 | 4 +-- 3 files changed, 43 insertions(+), 31 deletions(-) diff --git a/PRIVATE b/PRIVATE index b1a31a79c..047e5f361 160000 --- a/PRIVATE +++ b/PRIVATE @@ -1 +1 @@ -Subproject commit b1a31a79cc90d458494068a96cfd3e9497aa330c +Subproject commit 047e5f3613e284a5ceb1f8b07119e8231bb94d4d diff --git a/src/DAMASK_interface.f90 b/src/DAMASK_interface.f90 index ffb8a0442..ab64dcf01 100644 --- a/src/DAMASK_interface.f90 +++ b/src/DAMASK_interface.f90 @@ -386,24 +386,14 @@ end function makeRelativePath subroutine catchSIGTERM(signal) bind(C) integer(C_INT), value :: signal + + + print'(a,i0)', ' received signal ',signal call interface_setSIGTERM(.true.) - print'(a,i0,a)', ' received signal ',signal, ', set SIGTERM=TRUE' - end subroutine catchSIGTERM -!-------------------------------------------------------------------------------------------------- -!> @brief Set global variable interface_SIGTERM. -!-------------------------------------------------------------------------------------------------- -subroutine interface_setSIGTERM(state) - - logical, intent(in) :: state - interface_SIGTERM = state - -end subroutine interface_setSIGTERM - - !-------------------------------------------------------------------------------------------------- !> @brief Set global variable interface_SIGUSR1 to .true. !> @details This function can be registered to catch signals send to the executable. @@ -411,24 +401,14 @@ end subroutine interface_setSIGTERM subroutine catchSIGUSR1(signal) bind(C) integer(C_INT), value :: signal + + + print'(a,i0)', ' received signal ',signal call interface_setSIGUSR1(.true.) - print'(a,i0,a)', ' received signal ',signal, ', set SIGUSR1=TRUE' - end subroutine catchSIGUSR1 -!-------------------------------------------------------------------------------------------------- -!> @brief Set global variable interface_SIGUSR. -!-------------------------------------------------------------------------------------------------- -subroutine interface_setSIGUSR1(state) - - logical, intent(in) :: state - interface_SIGUSR1 = state - -end subroutine interface_setSIGUSR1 - - !-------------------------------------------------------------------------------------------------- !> @brief Set global variable interface_SIGUSR2 to .true. !> @details This function can be registered to catch signals send to the executable. @@ -436,20 +416,52 @@ end subroutine interface_setSIGUSR1 subroutine catchSIGUSR2(signal) bind(C) integer(C_INT), value :: signal + + + print'(a,i0,a)', ' received signal ',signal call interface_setSIGUSR2(.true.) - print'(a,i0,a)', ' received signal ',signal, ', set SIGUSR2=TRUE' - end subroutine catchSIGUSR2 +!-------------------------------------------------------------------------------------------------- +!> @brief Set global variable interface_SIGTERM. +!-------------------------------------------------------------------------------------------------- +subroutine interface_setSIGTERM(state) + + logical, intent(in) :: state + + + interface_SIGTERM = state + print*, 'set SIGTERM to',state + +end subroutine interface_setSIGTERM + + +!-------------------------------------------------------------------------------------------------- +!> @brief Set global variable interface_SIGUSR. +!-------------------------------------------------------------------------------------------------- +subroutine interface_setSIGUSR1(state) + + logical, intent(in) :: state + + + interface_SIGUSR1 = state + print*, 'set SIGUSR1 to',state + +end subroutine interface_setSIGUSR1 + + !-------------------------------------------------------------------------------------------------- !> @brief Set global variable interface_SIGUSR2. !-------------------------------------------------------------------------------------------------- subroutine interface_setSIGUSR2(state) logical, intent(in) :: state + + interface_SIGUSR2 = state + print*, 'set SIGUSR2 to',state end subroutine interface_setSIGUSR2 diff --git a/src/grid/DAMASK_grid.f90 b/src/grid/DAMASK_grid.f90 index 850ccf9e8..02d7b4cc3 100644 --- a/src/grid/DAMASK_grid.f90 +++ b/src/grid/DAMASK_grid.f90 @@ -453,19 +453,19 @@ program DAMASK_grid call MPI_Allreduce(interface_SIGUSR1,signal,1,MPI_LOGICAL,MPI_LOR,PETSC_COMM_WORLD,ierr) if (ierr /= 0) error stop 'MPI error' - call interface_setSIGUSR1(.false.) if (mod(inc,loadCases(l)%f_out) == 0 .or. signal) then print'(1/,a)', ' ... writing results to file ......................................' flush(IO_STDOUT) call CPFEM_results(totalIncsCounter,time) endif + if(signal) call interface_setSIGUSR1(.false.) call MPI_Allreduce(interface_SIGUSR2,signal,1,MPI_LOGICAL,MPI_LOR,PETSC_COMM_WORLD,ierr) if (ierr /= 0) error stop 'MPI error' - call interface_setSIGUSR2(.false.) if (mod(inc,loadCases(l)%f_restart) == 0 .or. signal) then call mech_restartWrite call CPFEM_restartWrite endif + if(signal) call interface_setSIGUSR2(.false.) call MPI_Allreduce(interface_SIGTERM,signal,1,MPI_LOGICAL,MPI_LOR,PETSC_COMM_WORLD,ierr) if (ierr /= 0) error stop 'MPI error' if (signal) exit loadCaseLooping From 04f5d8ce9a5948fa7276a2a928c6df490e06540d Mon Sep 17 00:00:00 2001 From: Martin Diehl Date: Sun, 17 Jan 2021 23:59:19 +0100 Subject: [PATCH 08/11] updated test still unclear why serial signal tests are problematic --- PRIVATE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PRIVATE b/PRIVATE index 047e5f361..a12312c47 160000 --- a/PRIVATE +++ b/PRIVATE @@ -1 +1 @@ -Subproject commit 047e5f3613e284a5ceb1f8b07119e8231bb94d4d +Subproject commit a12312c475159b6490709c5558f8363806b174be From 35f3a914622e08e606968974e3bf778f439e7426 Mon Sep 17 00:00:00 2001 From: Martin Diehl Date: Mon, 18 Jan 2021 07:18:31 +0100 Subject: [PATCH 09/11] fine tuning test for signal handling --- PRIVATE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PRIVATE b/PRIVATE index a12312c47..b82bf9fe6 160000 --- a/PRIVATE +++ b/PRIVATE @@ -1 +1 @@ -Subproject commit a12312c475159b6490709c5558f8363806b174be +Subproject commit b82bf9fe6412165186a01d58f9c9652be4b55db4 From bcbf63b5fc752bad224bd0bea6a805fdb345f8c2 Mon Sep 17 00:00:00 2001 From: Martin Diehl Date: Mon, 18 Jan 2021 12:49:25 +0100 Subject: [PATCH 10/11] improved test --- PRIVATE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PRIVATE b/PRIVATE index b82bf9fe6..99454fe2d 160000 --- a/PRIVATE +++ b/PRIVATE @@ -1 +1 @@ -Subproject commit b82bf9fe6412165186a01d58f9c9652be4b55db4 +Subproject commit 99454fe2d3d0f2c3d0ec8c53b6cdd4a9504c1664 From c7f9c0dc301f330dc90e5af75382b11befc1d757 Mon Sep 17 00:00:00 2001 From: Martin Diehl Date: Tue, 19 Jan 2021 00:49:09 +0100 Subject: [PATCH 11/11] again improving the tests for signal handling --- PRIVATE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PRIVATE b/PRIVATE index 17121848b..72661176e 160000 --- a/PRIVATE +++ b/PRIVATE @@ -1 +1 @@ -Subproject commit 17121848b9590fcc8ebbeb2d7c76794843ba2bdf +Subproject commit 72661176e9055cf6ec106bb61c50482c5bc78de8