From 7cf3bcb9c9976c331c0dd1fb657d187a5e71e666 Mon Sep 17 00:00:00 2001 From: Martin Diehl Date: Sat, 7 May 2022 21:39:43 +0200 Subject: [PATCH 01/13] easier to read --- src/polynomials.f90 | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/polynomials.f90 b/src/polynomials.f90 index 155f54bf8..55aadf948 100644 --- a/src/polynomials.f90 +++ b/src/polynomials.f90 @@ -46,14 +46,14 @@ end subroutine polynomials_init !-------------------------------------------------------------------------------------------------- !> @brief Initialize a Polynomial from Coefficients. !-------------------------------------------------------------------------------------------------- -function polynomial_from_coef(coef,x_ref) result(p) +pure function polynomial_from_coef(coef,x_ref) result(p) - real(pReal), dimension(:), intent(in) :: coef + real(pReal), dimension(0:), intent(in) :: coef real(pReal), intent(in) :: x_ref type(tPolynomial) :: p - allocate(p%coef(0:size(coef)-1),source=coef) ! should be zero based + p%coef = coef p%x_ref = x_ref end function polynomial_from_coef @@ -77,9 +77,7 @@ function polynomial_from_dict(dict,y,x) result(p) if (dict%contains(y//','//x)) then x_ref = dict%get_asFloat(x//'_ref') coef = [coef,dict%get_asFloat(y//','//x)] - if (dict%contains(y//','//x//'^2')) then - coef = [coef,dict%get_asFloat(y//','//x//'^2')] - end if + if (dict%contains(y//','//x//'^2')) coef = [coef,dict%get_asFloat(y//','//x//'^2')] else x_ref = huge(0.0_pReal) ! Simplify debugging end if @@ -173,7 +171,6 @@ subroutine selfTest if (dNeq(p1%at(x_ref+x),p1%at(x_ref-x),1e-10_pReal)) error stop 'polynomials: eval(even)' if (dNeq(p1%der1_at(x_ref+x),-p1%der1_at(x_ref-x),1e-10_pReal)) error stop 'polynomials: eval_der(even)' - end subroutine selfTest end module polynomials From 70b9943920d17043e9cbd27eab7d3c2a5834b053 Mon Sep 17 00:00:00 2001 From: Martin Diehl Date: Sat, 7 May 2022 22:47:13 +0200 Subject: [PATCH 02/13] allow to specify only quadratic term in Dict --- src/polynomials.f90 | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/src/polynomials.f90 b/src/polynomials.f90 index 55aadf948..6c11c4598 100644 --- a/src/polynomials.f90 +++ b/src/polynomials.f90 @@ -13,7 +13,7 @@ module polynomials type, public :: tPolynomial real(pReal), dimension(:), allocatable :: coef - real(pReal) :: x_ref + real(pReal) :: x_ref = huge(0.0_pReal) contains procedure, public :: at => eval procedure, public :: der1_at => eval_der1 @@ -70,6 +70,7 @@ function polynomial_from_dict(dict,y,x) result(p) real(pReal), dimension(:), allocatable :: coef real(pReal) :: x_ref + integer :: i allocate(coef(1),source=dict%get_asFloat(y)) @@ -77,9 +78,10 @@ function polynomial_from_dict(dict,y,x) result(p) if (dict%contains(y//','//x)) then x_ref = dict%get_asFloat(x//'_ref') coef = [coef,dict%get_asFloat(y//','//x)] - if (dict%contains(y//','//x//'^2')) coef = [coef,dict%get_asFloat(y//','//x//'^2')] - else - x_ref = huge(0.0_pReal) ! Simplify debugging + end if + if (dict%contains(y//','//x//'^2')) then + x_ref = dict%get_asFloat(x//'_ref') + coef = [coef,[(0.0_pReal,i=size(coef),2-1)],dict%get_asFloat(y//','//x//'^2')] end if p = Polynomial(coef,x_ref) @@ -130,15 +132,17 @@ end function eval_der1 !-------------------------------------------------------------------------------------------------- !> @brief Check correctness of polynomical functionality. !-------------------------------------------------------------------------------------------------- -subroutine selfTest +subroutine selfTest() type(tPolynomial) :: p1, p2 real(pReal), dimension(3) :: coef + integer :: i real(pReal) :: x_ref, x class(tNode), pointer :: dict - character(len=pStringLen), dimension(3) :: coef_s + character(len=pStringLen), dimension(size(coef)) :: coef_s character(len=pStringLen) :: x_ref_s, x_s, YAML_s + call random_number(coef) call random_number(x_ref) call random_number(x) @@ -150,9 +154,9 @@ subroutine selfTest p1 = polynomial(coef,x_ref) if (dNeq(p1%at(x_ref),coef(1))) error stop 'polynomial: @ref' - write(coef_s(1),*) coef(1) - write(coef_s(2),*) coef(2) - write(coef_s(3),*) coef(3) + do i = 1, size(coef_s) + write(coef_s(i),*) coef(i) + end do write(x_ref_s,*) x_ref write(x_s,*) x YAML_s = 'C: '//trim(adjustl(coef_s(1)))//IO_EOL//& @@ -163,11 +167,19 @@ subroutine selfTest p2 = polynomial(dict%asDict(),'C','T') if (dNeq(p1%at(x),p2%at(x),1.0e-10_pReal)) error stop 'polynomials: init' - p1 = polynomial(coef*[0.0_pReal,1.0_pReal,0.0_pReal],x_ref) + YAML_s = 'C: 0.0'//IO_EOL//& + 'C,T: '//trim(adjustl(coef_s(2)))//IO_EOL//& + 'T_ref: '//trim(adjustl(x_ref_s))//IO_EOL + Dict => YAML_parse_str(trim(YAML_s)) + p1 = polynomial(dict%asDict(),'C','T') if (dNeq(p1%at(x_ref+x),-p1%at(x_ref-x),1.0e-10_pReal)) error stop 'polynomials: eval(odd)' if (dNeq(p1%der1_at(x),p1%der1_at(5.0_pReal*x),1.0e-10_pReal)) error stop 'polynomials: eval_der(odd)' - p1 = polynomial(coef*[0.0_pReal,0.0_pReal,1.0_pReal],x_ref) + YAML_s = 'C: 0.0'//IO_EOL//& + 'C,T^2: '//trim(adjustl(coef_s(3)))//IO_EOL//& + 'T_ref: '//trim(adjustl(x_ref_s))//IO_EOL + Dict => YAML_parse_str(trim(YAML_s)) + p1 = polynomial(dict%asDict(),'C','T') if (dNeq(p1%at(x_ref+x),p1%at(x_ref-x),1e-10_pReal)) error stop 'polynomials: eval(even)' if (dNeq(p1%der1_at(x_ref+x),-p1%der1_at(x_ref-x),1e-10_pReal)) error stop 'polynomials: eval_der(even)' From 71e4fa222c6938033b03e16783c5f0e64bc4b314 Mon Sep 17 00:00:00 2001 From: Martin Diehl Date: Sat, 7 May 2022 22:53:31 +0200 Subject: [PATCH 03/13] generic code for variable order --- src/polynomials.f90 | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/polynomials.f90 b/src/polynomials.f90 index 6c11c4598..9f30fe5ac 100644 --- a/src/polynomials.f90 +++ b/src/polynomials.f90 @@ -70,7 +70,8 @@ function polynomial_from_dict(dict,y,x) result(p) real(pReal), dimension(:), allocatable :: coef real(pReal) :: x_ref - integer :: i + integer :: i, o + character(len=1) :: o_s allocate(coef(1),source=dict%get_asFloat(y)) @@ -79,10 +80,13 @@ function polynomial_from_dict(dict,y,x) result(p) x_ref = dict%get_asFloat(x//'_ref') coef = [coef,dict%get_asFloat(y//','//x)] end if - if (dict%contains(y//','//x//'^2')) then - x_ref = dict%get_asFloat(x//'_ref') - coef = [coef,[(0.0_pReal,i=size(coef),2-1)],dict%get_asFloat(y//','//x//'^2')] - end if + do o = 2,2 + write(o_s,'(I0.0)') o + if (dict%contains(y//','//x//'^'//o_s)) then + x_ref = dict%get_asFloat(x//'_ref') + coef = [coef,[(0.0_pReal,i=size(coef),o-1)],dict%get_asFloat(y//','//x//'^'//o_s)] + end if + end do p = Polynomial(coef,x_ref) From 9605a5c2decec34d7f133831cbc612eabe9d30f9 Mon Sep 17 00:00:00 2001 From: Martin Diehl Date: Sat, 7 May 2022 23:04:06 +0200 Subject: [PATCH 04/13] polynomial expansion up to order 4 --- src/polynomials.f90 | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/src/polynomials.f90 b/src/polynomials.f90 index 9f30fe5ac..321c24fb8 100644 --- a/src/polynomials.f90 +++ b/src/polynomials.f90 @@ -80,7 +80,7 @@ function polynomial_from_dict(dict,y,x) result(p) x_ref = dict%get_asFloat(x//'_ref') coef = [coef,dict%get_asFloat(y//','//x)] end if - do o = 2,2 + do o = 2,4 write(o_s,'(I0.0)') o if (dict%contains(y//','//x//'^'//o_s)) then x_ref = dict%get_asFloat(x//'_ref') @@ -139,7 +139,7 @@ end function eval_der1 subroutine selfTest() type(tPolynomial) :: p1, p2 - real(pReal), dimension(3) :: coef + real(pReal), dimension(5) :: coef integer :: i real(pReal) :: x_ref, x class(tNode), pointer :: dict @@ -166,6 +166,8 @@ subroutine selfTest() YAML_s = 'C: '//trim(adjustl(coef_s(1)))//IO_EOL//& 'C,T: '//trim(adjustl(coef_s(2)))//IO_EOL//& 'C,T^2: '//trim(adjustl(coef_s(3)))//IO_EOL//& + 'C,T^3: '//trim(adjustl(coef_s(4)))//IO_EOL//& + 'C,T^4: '//trim(adjustl(coef_s(5)))//IO_EOL//& 'T_ref: '//trim(adjustl(x_ref_s))//IO_EOL Dict => YAML_parse_str(trim(YAML_s)) p2 = polynomial(dict%asDict(),'C','T') @@ -176,16 +178,32 @@ subroutine selfTest() 'T_ref: '//trim(adjustl(x_ref_s))//IO_EOL Dict => YAML_parse_str(trim(YAML_s)) p1 = polynomial(dict%asDict(),'C','T') - if (dNeq(p1%at(x_ref+x),-p1%at(x_ref-x),1.0e-10_pReal)) error stop 'polynomials: eval(odd)' - if (dNeq(p1%der1_at(x),p1%der1_at(5.0_pReal*x),1.0e-10_pReal)) error stop 'polynomials: eval_der(odd)' + if (dNeq(p1%at(x_ref+x),-p1%at(x_ref-x),1.0e-10_pReal)) error stop 'polynomials: eval(linear)' + if (dNeq(p1%der1_at(x),p1%der1_at(5.0_pReal*x),1.0e-10_pReal)) error stop 'polynomials: eval_der(linear)' YAML_s = 'C: 0.0'//IO_EOL//& 'C,T^2: '//trim(adjustl(coef_s(3)))//IO_EOL//& 'T_ref: '//trim(adjustl(x_ref_s))//IO_EOL Dict => YAML_parse_str(trim(YAML_s)) p1 = polynomial(dict%asDict(),'C','T') - if (dNeq(p1%at(x_ref+x),p1%at(x_ref-x),1e-10_pReal)) error stop 'polynomials: eval(even)' - if (dNeq(p1%der1_at(x_ref+x),-p1%der1_at(x_ref-x),1e-10_pReal)) error stop 'polynomials: eval_der(even)' + if (dNeq(p1%at(x_ref+x),p1%at(x_ref-x),1e-10_pReal)) error stop 'polynomials: eval(quadratic)' + if (dNeq(p1%der1_at(x_ref+x),-p1%der1_at(x_ref-x),1e-10_pReal)) error stop 'polynomials: eval_der(quadratic)' + + YAML_s = 'Y: 0.0'//IO_EOL//& + 'Y,X^3: '//trim(adjustl(coef_s(2)))//IO_EOL//& + 'X_ref: '//trim(adjustl(x_ref_s))//IO_EOL + Dict => YAML_parse_str(trim(YAML_s)) + p1 = polynomial(dict%asDict(),'Y','X') + if (dNeq(p1%at(x_ref+x),-p1%at(x_ref-x),1.0e-10_pReal)) error stop 'polynomials: eval(cubic)' + + YAML_s = 'Y: 0.0'//IO_EOL//& + 'Y,X^4: '//trim(adjustl(coef_s(2)))//IO_EOL//& + 'X_ref: '//trim(adjustl(x_ref_s))//IO_EOL + Dict => YAML_parse_str(trim(YAML_s)) + p1 = polynomial(dict%asDict(),'Y','X') + if (dNeq(p1%at(x_ref+x),p1%at(x_ref-x),1.0e-10_pReal)) error stop 'polynomials: eval(quartic)' + + end subroutine selfTest From ec184cb8feb135a95838bf5ea9c070c8ab23c2c6 Mon Sep 17 00:00:00 2001 From: Martin Diehl Date: Sun, 8 May 2022 08:30:58 +0200 Subject: [PATCH 05/13] need to relax absolute tolerance absolute values for quartic become quite high --- src/polynomials.f90 | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/polynomials.f90 b/src/polynomials.f90 index 321c24fb8..1a023eba1 100644 --- a/src/polynomials.f90 +++ b/src/polynomials.f90 @@ -190,19 +190,18 @@ subroutine selfTest() if (dNeq(p1%der1_at(x_ref+x),-p1%der1_at(x_ref-x),1e-10_pReal)) error stop 'polynomials: eval_der(quadratic)' YAML_s = 'Y: 0.0'//IO_EOL//& - 'Y,X^3: '//trim(adjustl(coef_s(2)))//IO_EOL//& + 'Y,X^3: '//trim(adjustl(coef_s(1)))//IO_EOL//& 'X_ref: '//trim(adjustl(x_ref_s))//IO_EOL Dict => YAML_parse_str(trim(YAML_s)) p1 = polynomial(dict%asDict(),'Y','X') - if (dNeq(p1%at(x_ref+x),-p1%at(x_ref-x),1.0e-10_pReal)) error stop 'polynomials: eval(cubic)' + if (dNeq(p1%at(x_ref+x),-p1%at(x_ref-x),1.0e-8_pReal)) error stop 'polynomials: eval(cubic)' YAML_s = 'Y: 0.0'//IO_EOL//& - 'Y,X^4: '//trim(adjustl(coef_s(2)))//IO_EOL//& + 'Y,X^4: '//trim(adjustl(coef_s(1)))//IO_EOL//& 'X_ref: '//trim(adjustl(x_ref_s))//IO_EOL Dict => YAML_parse_str(trim(YAML_s)) p1 = polynomial(dict%asDict(),'Y','X') - if (dNeq(p1%at(x_ref+x),p1%at(x_ref-x),1.0e-10_pReal)) error stop 'polynomials: eval(quartic)' - + if (dNeq(p1%at(x_ref+x),p1%at(x_ref-x),1.0e-6_pReal)) error stop 'polynomials: eval(quartic)' end subroutine selfTest From 72c29f744c3071001df8dd44815bb817574dc5b0 Mon Sep 17 00:00:00 2001 From: Martin Diehl Date: Sun, 8 May 2022 12:12:42 +0200 Subject: [PATCH 06/13] better tests --- src/polynomials.f90 | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/polynomials.f90 b/src/polynomials.f90 index 1a023eba1..2426ba48d 100644 --- a/src/polynomials.f90 +++ b/src/polynomials.f90 @@ -141,7 +141,7 @@ subroutine selfTest() type(tPolynomial) :: p1, p2 real(pReal), dimension(5) :: coef integer :: i - real(pReal) :: x_ref, x + real(pReal) :: x_ref, x, y class(tNode), pointer :: dict character(len=pStringLen), dimension(size(coef)) :: coef_s character(len=pStringLen) :: x_ref_s, x_s, YAML_s @@ -155,6 +155,9 @@ subroutine selfTest() x_ref = x_ref*10_pReal -0.5_pReal x = x*10_pReal -0.5_pReal + p1 = polynomial([coef(1)],x_ref) + if (dNeq(p1%at(x),coef(1))) error stop 'polynomial: eval(constant)' + p1 = polynomial(coef,x_ref) if (dNeq(p1%at(x_ref),coef(1))) error stop 'polynomial: @ref' @@ -172,6 +175,8 @@ subroutine selfTest() Dict => YAML_parse_str(trim(YAML_s)) p2 = polynomial(dict%asDict(),'C','T') if (dNeq(p1%at(x),p2%at(x),1.0e-10_pReal)) error stop 'polynomials: init' + y = coef(1)+coef(2)*(x-x_ref)+coef(3)*(x-x_ref)**2+coef(4)*(x-x_ref)**3+coef(5)*(x-x_ref)**4 + if (dNeq(p1%at(x),y,1.0e-7_pReal)) error stop 'polynomials: eval(full)' YAML_s = 'C: 0.0'//IO_EOL//& 'C,T: '//trim(adjustl(coef_s(2)))//IO_EOL//& @@ -189,15 +194,15 @@ subroutine selfTest() if (dNeq(p1%at(x_ref+x),p1%at(x_ref-x),1e-10_pReal)) error stop 'polynomials: eval(quadratic)' if (dNeq(p1%der1_at(x_ref+x),-p1%der1_at(x_ref-x),1e-10_pReal)) error stop 'polynomials: eval_der(quadratic)' - YAML_s = 'Y: 0.0'//IO_EOL//& - 'Y,X^3: '//trim(adjustl(coef_s(1)))//IO_EOL//& + YAML_s = 'Y: '//trim(adjustl(coef_s(1)))//IO_EOL//& + 'Y,X^3: '//trim(adjustl(coef_s(2)))//IO_EOL//& 'X_ref: '//trim(adjustl(x_ref_s))//IO_EOL Dict => YAML_parse_str(trim(YAML_s)) p1 = polynomial(dict%asDict(),'Y','X') - if (dNeq(p1%at(x_ref+x),-p1%at(x_ref-x),1.0e-8_pReal)) error stop 'polynomials: eval(cubic)' + if (dNeq(p1%at(x_ref+x)-coef(1),-(p1%at(x_ref-x)-coef(1)),1.0e-8_pReal)) error stop 'polynomials: eval(cubic)' - YAML_s = 'Y: 0.0'//IO_EOL//& - 'Y,X^4: '//trim(adjustl(coef_s(1)))//IO_EOL//& + YAML_s = 'Y: '//trim(adjustl(coef_s(1)))//IO_EOL//& + 'Y,X^4: '//trim(adjustl(coef_s(2)))//IO_EOL//& 'X_ref: '//trim(adjustl(x_ref_s))//IO_EOL Dict => YAML_parse_str(trim(YAML_s)) p1 = polynomial(dict%asDict(),'Y','X') From 240426402c95b1f9c5d13efd73e3e718d96afc87 Mon Sep 17 00:00:00 2001 From: Martin Diehl Date: Sun, 8 May 2022 13:32:14 +0200 Subject: [PATCH 07/13] using (faster) Horner evaluation https://rosettacode.org/wiki/Horner%27s_rule_for_polynomial_evaluation --- src/polynomials.f90 | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/polynomials.f90 b/src/polynomials.f90 index 2426ba48d..d26aa300f 100644 --- a/src/polynomials.f90 +++ b/src/polynomials.f90 @@ -105,9 +105,9 @@ pure function eval(self,x) result(y) integer :: i - y = self%coef(0) - do i = 1, ubound(self%coef,1) - y = y + self%coef(i) * (x-self%x_ref)**i + y = 0.0_pReal + do i = ubound(self%coef,1), 0 , -1 + y = y*(x-self%x_ref) + self%coef(i) enddo end function eval From 8f9fbb30e5a0062160cbccd3f9d83d58bac9ed43 Mon Sep 17 00:00:00 2001 From: Martin Diehl Date: Sun, 8 May 2022 15:02:57 +0200 Subject: [PATCH 08/13] use fused multiply-add where possible only possible for Intel compiler --- src/math.f90 | 28 +++++++++++++++++-------- src/phase_mechanical.f90 | 45 +++++++++++++++++++++++++++++++--------- src/polynomials.f90 | 8 +++++-- 3 files changed, 60 insertions(+), 21 deletions(-) diff --git a/src/math.f90 b/src/math.f90 index 94ad2c48d..135d2b6fd 100644 --- a/src/math.f90 +++ b/src/math.f90 @@ -82,7 +82,7 @@ contains !-------------------------------------------------------------------------------------------------- !> @brief initialization of random seed generator and internal checks !-------------------------------------------------------------------------------------------------- -subroutine math_init +subroutine math_init() real(pReal), dimension(4) :: randTest integer :: randSize @@ -1045,24 +1045,34 @@ pure subroutine math_eigh33(w,v,m) w = math_eigvalsh33(m) - v(1:3,2) = [ m(1, 2) * m(2, 3) - m(1, 3) * m(2, 2), & - m(1, 3) * m(1, 2) - m(2, 3) * m(1, 1), & - m(1, 2)**2] + v(1:3,2) = [ m(1,2) * m(2,3) - m(1,3) * m(2,2), & + m(1,3) * m(1,2) - m(2,3) * m(1,1), & + m(1,2)**2] T = maxval(abs(w)) U = max(T, T**2) threshold = sqrt(5.68e-14_pReal * U**2) - v(1:3,1) = [ v(1,2) + m(1, 3) * w(1), & - v(2,2) + m(2, 3) * w(1), & +#ifndef __INTEL_COMPILER + v(1:3,1) = [m(1,3)*w(1) + v(1,2), & + m(2,3)*w(1) + v(2,2), & +#else + v(1:3,1) = [IEEE_FMA(m(1,3),w(1),v(1,2)), & + IEEE_FMA(m(2,3),w(1),v(2,2)), & +#endif (m(1,1) - w(1)) * (m(2,2) - w(1)) - v(3,2)] norm = norm2(v(1:3, 1)) fallback1: if (norm < threshold) then call math_eigh(w,v,error,m) else fallback1 v(1:3,1) = v(1:3, 1) / norm - v(1:3,2) = [ v(1,2) + m(1, 3) * w(2), & - v(2,2) + m(2, 3) * w(2), & +#ifndef __INTEL_COMPILER + v(1:3,2) = [m(1,3)*w(2) + v(1,2), & + m(2,3)*w(2) + v(2,2), & +#else + v(1:3,2) = [IEEE_FMA(m(1,3),w(2),v(1,2)), & + IEEE_FMA(m(2,3),w(2),v(2,2)), & +#endif (m(1,1) - w(2)) * (m(2,2) - w(2)) - v(3,2)] norm = norm2(v(1:3, 2)) fallback2: if (norm < threshold) then @@ -1300,7 +1310,7 @@ end function math_clip !-------------------------------------------------------------------------------------------------- !> @brief Check correctness of some math functions. !-------------------------------------------------------------------------------------------------- -subroutine selfTest +subroutine selfTest() integer, dimension(2,4) :: & sort_in_ = reshape([+1,+5, +5,+6, -1,-1, +3,-2],[2,4]) diff --git a/src/phase_mechanical.f90 b/src/phase_mechanical.f90 index b13675e38..66b9ff090 100644 --- a/src/phase_mechanical.f90 +++ b/src/phase_mechanical.f90 @@ -680,8 +680,11 @@ function integrateStateEuler(F_0,F,subFp0,subFi0,subState0,Delta_t,ph,en) result if (any(IEEE_is_NaN(dotState))) return sizeDotState = plasticState(ph)%sizeDotState - plasticState(ph)%state(1:sizeDotState,en) = subState0 & - + dotState * Delta_t +#ifndef __INTEL_COMPILER + plasticState(ph)%state(1:sizeDotState,en) = subState0 + dotState*Delta_t +#else + plasticState(ph)%state(1:sizeDotState,en) = IEEE_FMA(dotState,Delta_t,subState0) +#endif broken = plastic_deltaState(ph,en) if(broken) return @@ -720,8 +723,11 @@ function integrateStateAdaptiveEuler(F_0,F,subFp0,subFi0,subState0,Delta_t,ph,en sizeDotState = plasticState(ph)%sizeDotState r = - dotState * 0.5_pReal * Delta_t - plasticState(ph)%state(1:sizeDotState,en) = subState0 & - + dotState * Delta_t +#ifndef __INTEL_COMPILER + plasticState(ph)%state(1:sizeDotState,en) = subState0 + dotState*Delta_t +#else + plasticState(ph)%state(1:sizeDotState,en) = IEEE_FMA(dotState,Delta_t,subState0) +#endif broken = plastic_deltaState(ph,en) if(broken) return @@ -842,12 +848,18 @@ function integrateStateRK(F_0,F,subFp0,subFi0,subState0,Delta_t,ph,en,A,B,C,DB) dotState = A(1,stage) * plastic_RKdotState(1:sizeDotState,1) do n = 2, stage - dotState = dotState & - + A(n,stage) * plastic_RKdotState(1:sizeDotState,n) +#ifndef __INTEL_COMPILER + dotState = dotState + A(n,stage)*plastic_RKdotState(1:sizeDotState,n) +#else + dotState = IEEE_FMA(A(n,stage),plastic_RKdotState(1:sizeDotState,n),dotState) +#endif enddo - plasticState(ph)%state(1:sizeDotState,en) = subState0 & - + dotState * Delta_t +#ifndef __INTEL_COMPILER + plasticState(ph)%state(1:sizeDotState,en) = subState0 + dotState*Delta_t +#else + plasticState(ph)%state(1:sizeDotState,en) = IEEE_FMA(dotState,Delta_t,subState0) +#endif broken = integrateStress(F_0+(F-F_0)*Delta_t*C(stage),subFp0,subFi0,Delta_t*C(stage), ph,en) if(broken) exit @@ -861,8 +873,11 @@ function integrateStateRK(F_0,F,subFp0,subFi0,subState0,Delta_t,ph,en,A,B,C,DB) plastic_RKdotState(1:sizeDotState,size(B)) = dotState dotState = matmul(plastic_RKdotState,B) - plasticState(ph)%state(1:sizeDotState,en) = subState0 & - + dotState * Delta_t +#ifndef __INTEL_COMPILER + plasticState(ph)%state(1:sizeDotState,en) = subState0 + dotState*Delta_t +#else + plasticState(ph)%state(1:sizeDotState,en) = IEEE_FMA(dotState,Delta_t,subState0) +#endif if(present(DB)) & broken = .not. converged(matmul(plastic_RKdotState(1:sizeDotState,1:size(DB)),DB) * Delta_t, & @@ -1146,12 +1161,18 @@ module function phase_mechanical_dPdF(Delta_t,co,ce) result(dPdF) else lhs_3333 = 0.0_pReal; rhs_3333 = 0.0_pReal do o=1,3; do p=1,3 +#ifndef __INTEL_COMPILER lhs_3333(1:3,1:3,o,p) = lhs_3333(1:3,1:3,o,p) & + matmul(invSubFi0,dLidFi(1:3,1:3,o,p)) * Delta_t lhs_3333(1:3,o,1:3,p) = lhs_3333(1:3,o,1:3,p) & + invFi*invFi(p,o) rhs_3333(1:3,1:3,o,p) = rhs_3333(1:3,1:3,o,p) & - matmul(invSubFi0,dLidS(1:3,1:3,o,p)) * Delta_t +#else + lhs_3333(1:3,1:3,o,p) = IEEE_FMA(matmul(invSubFi0,dLidFi(1:3,1:3,o,p)),Delta_t,lhs_3333(1:3,1:3,o,p)) + lhs_3333(1:3,o,1:3,p) = IEEE_FMA(invFi,invFi(p,o),lhs_3333(1:3,o,1:3,p)) + rhs_3333(1:3,1:3,o,p) = IEEE_FMA(matmul(invSubFi0,dLidS(1:3,1:3,o,p)),-Delta_t,rhs_3333(1:3,1:3,o,p)) +#endif enddo; enddo call math_invert(temp_99,error,math_3333to99(lhs_3333)) if (error) then @@ -1180,8 +1201,12 @@ module function phase_mechanical_dPdF(Delta_t,co,ce) result(dPdF) temp_3333(1:3,1:3,p,o) = matmul(matmul(temp_33_2,dLpdS(1:3,1:3,p,o)), invFi) & + matmul(temp_33_3,dLidS(1:3,1:3,p,o)) enddo; enddo +#ifndef __INTEL_COMPILER lhs_3333 = math_mul3333xx3333(dSdFe,temp_3333) * Delta_t & + math_mul3333xx3333(dSdFi,dFidS) +#else + lhs_3333 = IEEE_FMA(math_mul3333xx3333(dSdFe,temp_3333),Delta_t,math_mul3333xx3333(dSdFi,dFidS)) +#endif call math_invert(temp_99,error,math_eye(9)+math_3333to99(lhs_3333)) if (error) then diff --git a/src/polynomials.f90 b/src/polynomials.f90 index d26aa300f..9b7386ec7 100644 --- a/src/polynomials.f90 +++ b/src/polynomials.f90 @@ -106,8 +106,12 @@ pure function eval(self,x) result(y) y = 0.0_pReal - do i = ubound(self%coef,1), 0 , -1 - y = y*(x-self%x_ref) + self%coef(i) + do i = ubound(self%coef,1), 0, -1 +#ifndef __INTEL_COMPILER + y = y*(x-self%x_ref) +self%coef(i) +#else + y = IEEE_FMA(y,x-self%x_ref,self%coef(i)) +#endif enddo end function eval From b2052cb3c7549ef4d558718c072b50727465ab86 Mon Sep 17 00:00:00 2001 From: Martin Diehl Date: Sun, 8 May 2022 16:41:19 +0200 Subject: [PATCH 09/13] less strict tolerances values can be in the order of 1e5, so 1e-6 precision is not too bad --- src/polynomials.f90 | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/polynomials.f90 b/src/polynomials.f90 index 9b7386ec7..ae7d1709c 100644 --- a/src/polynomials.f90 +++ b/src/polynomials.f90 @@ -178,9 +178,9 @@ subroutine selfTest() 'T_ref: '//trim(adjustl(x_ref_s))//IO_EOL Dict => YAML_parse_str(trim(YAML_s)) p2 = polynomial(dict%asDict(),'C','T') - if (dNeq(p1%at(x),p2%at(x),1.0e-10_pReal)) error stop 'polynomials: init' + if (dNeq(p1%at(x),p2%at(x),1.0e-6_pReal)) error stop 'polynomials: init' y = coef(1)+coef(2)*(x-x_ref)+coef(3)*(x-x_ref)**2+coef(4)*(x-x_ref)**3+coef(5)*(x-x_ref)**4 - if (dNeq(p1%at(x),y,1.0e-7_pReal)) error stop 'polynomials: eval(full)' + if (dNeq(p1%at(x),y,1.0e-6_pReal)) error stop 'polynomials: eval(full)' YAML_s = 'C: 0.0'//IO_EOL//& 'C,T: '//trim(adjustl(coef_s(2)))//IO_EOL//& @@ -203,7 +203,7 @@ subroutine selfTest() 'X_ref: '//trim(adjustl(x_ref_s))//IO_EOL Dict => YAML_parse_str(trim(YAML_s)) p1 = polynomial(dict%asDict(),'Y','X') - if (dNeq(p1%at(x_ref+x)-coef(1),-(p1%at(x_ref-x)-coef(1)),1.0e-8_pReal)) error stop 'polynomials: eval(cubic)' + if (dNeq(p1%at(x_ref+x)-coef(1),-(p1%at(x_ref-x)-coef(1)),1.0e-8_pReal)) error stop 'polynomials: eval(cubic)' YAML_s = 'Y: '//trim(adjustl(coef_s(1)))//IO_EOL//& 'Y,X^4: '//trim(adjustl(coef_s(2)))//IO_EOL//& From d713026f7e31e060131c6c50733f9bcd0b7442c6 Mon Sep 17 00:00:00 2001 From: Martin Diehl Date: Sun, 8 May 2022 16:49:30 +0200 Subject: [PATCH 10/13] fast evaluation for (most common) case of constant --- src/polynomials.f90 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/polynomials.f90 b/src/polynomials.f90 index ae7d1709c..ffd70b61c 100644 --- a/src/polynomials.f90 +++ b/src/polynomials.f90 @@ -105,8 +105,8 @@ pure function eval(self,x) result(y) integer :: i - y = 0.0_pReal - do i = ubound(self%coef,1), 0, -1 + y = self%coef(ubound(self%coef,1)) + do i = ubound(self%coef,1)-1, 0, -1 #ifndef __INTEL_COMPILER y = y*(x-self%x_ref) +self%coef(i) #else From 10d8a63cb656aed2a529abd2db63019f235941e2 Mon Sep 17 00:00:00 2001 From: Martin Diehl Date: Sun, 8 May 2022 17:18:15 +0200 Subject: [PATCH 11/13] not used and current code is not good (not using Horner scheme) --- src/polynomials.f90 | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/src/polynomials.f90 b/src/polynomials.f90 index ffd70b61c..61f20e796 100644 --- a/src/polynomials.f90 +++ b/src/polynomials.f90 @@ -16,7 +16,6 @@ module polynomials real(pReal) :: x_ref = huge(0.0_pReal) contains procedure, public :: at => eval - procedure, public :: der1_at => eval_der1 end type tPolynomial interface polynomial @@ -117,26 +116,6 @@ pure function eval(self,x) result(y) end function eval -!-------------------------------------------------------------------------------------------------- -!> @brief Evaluate a first derivative of Polynomial. -!-------------------------------------------------------------------------------------------------- -pure function eval_der1(self,x) result(y) - - class(tPolynomial), intent(in) :: self - real(pReal), intent(in) :: x - real(pReal) :: y - - integer :: i - - - y = 0.0_pReal - do i = 1, ubound(self%coef,1) - y = y + real(i,pReal)*self%coef(i) * (x-self%x_ref)**(i-1) - enddo - -end function eval_der1 - - !-------------------------------------------------------------------------------------------------- !> @brief Check correctness of polynomical functionality. !-------------------------------------------------------------------------------------------------- @@ -188,7 +167,6 @@ subroutine selfTest() Dict => YAML_parse_str(trim(YAML_s)) p1 = polynomial(dict%asDict(),'C','T') if (dNeq(p1%at(x_ref+x),-p1%at(x_ref-x),1.0e-10_pReal)) error stop 'polynomials: eval(linear)' - if (dNeq(p1%der1_at(x),p1%der1_at(5.0_pReal*x),1.0e-10_pReal)) error stop 'polynomials: eval_der(linear)' YAML_s = 'C: 0.0'//IO_EOL//& 'C,T^2: '//trim(adjustl(coef_s(3)))//IO_EOL//& @@ -196,7 +174,6 @@ subroutine selfTest() Dict => YAML_parse_str(trim(YAML_s)) p1 = polynomial(dict%asDict(),'C','T') if (dNeq(p1%at(x_ref+x),p1%at(x_ref-x),1e-10_pReal)) error stop 'polynomials: eval(quadratic)' - if (dNeq(p1%der1_at(x_ref+x),-p1%der1_at(x_ref-x),1e-10_pReal)) error stop 'polynomials: eval_der(quadratic)' YAML_s = 'Y: '//trim(adjustl(coef_s(1)))//IO_EOL//& 'Y,X^3: '//trim(adjustl(coef_s(2)))//IO_EOL//& From b376b10b7a876b0cb9c7fd2de7d4093a45b21a77 Mon Sep 17 00:00:00 2001 From: Martin Diehl Date: Sun, 8 May 2022 17:47:20 +0200 Subject: [PATCH 12/13] classic Intel gives FPE --- src/math.f90 | 4 ++-- src/phase_mechanical.f90 | 14 +++++++------- src/polynomials.f90 | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/math.f90 b/src/math.f90 index 135d2b6fd..25c90ccf4 100644 --- a/src/math.f90 +++ b/src/math.f90 @@ -1053,7 +1053,7 @@ pure subroutine math_eigh33(w,v,m) U = max(T, T**2) threshold = sqrt(5.68e-14_pReal * U**2) -#ifndef __INTEL_COMPILER +#ifndef __INTEL_LLVM_COMPILER v(1:3,1) = [m(1,3)*w(1) + v(1,2), & m(2,3)*w(1) + v(2,2), & #else @@ -1066,7 +1066,7 @@ pure subroutine math_eigh33(w,v,m) call math_eigh(w,v,error,m) else fallback1 v(1:3,1) = v(1:3, 1) / norm -#ifndef __INTEL_COMPILER +#ifndef __INTEL_LLVM_COMPILER v(1:3,2) = [m(1,3)*w(2) + v(1,2), & m(2,3)*w(2) + v(2,2), & #else diff --git a/src/phase_mechanical.f90 b/src/phase_mechanical.f90 index 66b9ff090..fc9e40e02 100644 --- a/src/phase_mechanical.f90 +++ b/src/phase_mechanical.f90 @@ -680,7 +680,7 @@ function integrateStateEuler(F_0,F,subFp0,subFi0,subState0,Delta_t,ph,en) result if (any(IEEE_is_NaN(dotState))) return sizeDotState = plasticState(ph)%sizeDotState -#ifndef __INTEL_COMPILER +#ifndef __INTEL_LLVM_COMPILER plasticState(ph)%state(1:sizeDotState,en) = subState0 + dotState*Delta_t #else plasticState(ph)%state(1:sizeDotState,en) = IEEE_FMA(dotState,Delta_t,subState0) @@ -723,7 +723,7 @@ function integrateStateAdaptiveEuler(F_0,F,subFp0,subFi0,subState0,Delta_t,ph,en sizeDotState = plasticState(ph)%sizeDotState r = - dotState * 0.5_pReal * Delta_t -#ifndef __INTEL_COMPILER +#ifndef __INTEL_LLVM_COMPILER plasticState(ph)%state(1:sizeDotState,en) = subState0 + dotState*Delta_t #else plasticState(ph)%state(1:sizeDotState,en) = IEEE_FMA(dotState,Delta_t,subState0) @@ -848,14 +848,14 @@ function integrateStateRK(F_0,F,subFp0,subFi0,subState0,Delta_t,ph,en,A,B,C,DB) dotState = A(1,stage) * plastic_RKdotState(1:sizeDotState,1) do n = 2, stage -#ifndef __INTEL_COMPILER +#ifndef __INTEL_LLVM_COMPILER dotState = dotState + A(n,stage)*plastic_RKdotState(1:sizeDotState,n) #else dotState = IEEE_FMA(A(n,stage),plastic_RKdotState(1:sizeDotState,n),dotState) #endif enddo -#ifndef __INTEL_COMPILER +#ifndef __INTEL_LLVM_COMPILER plasticState(ph)%state(1:sizeDotState,en) = subState0 + dotState*Delta_t #else plasticState(ph)%state(1:sizeDotState,en) = IEEE_FMA(dotState,Delta_t,subState0) @@ -873,7 +873,7 @@ function integrateStateRK(F_0,F,subFp0,subFi0,subState0,Delta_t,ph,en,A,B,C,DB) plastic_RKdotState(1:sizeDotState,size(B)) = dotState dotState = matmul(plastic_RKdotState,B) -#ifndef __INTEL_COMPILER +#ifndef __INTEL_LLVM_COMPILER plasticState(ph)%state(1:sizeDotState,en) = subState0 + dotState*Delta_t #else plasticState(ph)%state(1:sizeDotState,en) = IEEE_FMA(dotState,Delta_t,subState0) @@ -1161,7 +1161,7 @@ module function phase_mechanical_dPdF(Delta_t,co,ce) result(dPdF) else lhs_3333 = 0.0_pReal; rhs_3333 = 0.0_pReal do o=1,3; do p=1,3 -#ifndef __INTEL_COMPILER +#ifndef __INTEL_LLVM_COMPILER lhs_3333(1:3,1:3,o,p) = lhs_3333(1:3,1:3,o,p) & + matmul(invSubFi0,dLidFi(1:3,1:3,o,p)) * Delta_t lhs_3333(1:3,o,1:3,p) = lhs_3333(1:3,o,1:3,p) & @@ -1201,7 +1201,7 @@ module function phase_mechanical_dPdF(Delta_t,co,ce) result(dPdF) temp_3333(1:3,1:3,p,o) = matmul(matmul(temp_33_2,dLpdS(1:3,1:3,p,o)), invFi) & + matmul(temp_33_3,dLidS(1:3,1:3,p,o)) enddo; enddo -#ifndef __INTEL_COMPILER +#ifndef __INTEL_LLVM_COMPILER lhs_3333 = math_mul3333xx3333(dSdFe,temp_3333) * Delta_t & + math_mul3333xx3333(dSdFi,dFidS) #else diff --git a/src/polynomials.f90 b/src/polynomials.f90 index 61f20e796..46e338b19 100644 --- a/src/polynomials.f90 +++ b/src/polynomials.f90 @@ -106,7 +106,7 @@ pure function eval(self,x) result(y) y = self%coef(ubound(self%coef,1)) do i = ubound(self%coef,1)-1, 0, -1 -#ifndef __INTEL_COMPILER +#ifndef __INTEL_LLVM_COMPILER y = y*(x-self%x_ref) +self%coef(i) #else y = IEEE_FMA(y,x-self%x_ref,self%coef(i)) From 53796fce7a6ea194981bb9165b22df95b5724eb7 Mon Sep 17 00:00:00 2001 From: Martin Diehl Date: Sun, 8 May 2022 19:57:25 +0200 Subject: [PATCH 13/13] trustworthy reference --- src/polynomials.f90 | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/polynomials.f90 b/src/polynomials.f90 index 46e338b19..c883528ca 100644 --- a/src/polynomials.f90 +++ b/src/polynomials.f90 @@ -94,6 +94,7 @@ end function polynomial_from_dict !-------------------------------------------------------------------------------------------------- !> @brief Evaluate a Polynomial. +!> @details https://nvlpubs.nist.gov/nistpubs/jres/71b/jresv71bn1p11_a1b.pdf (eq. 1.2) !-------------------------------------------------------------------------------------------------- pure function eval(self,x) result(y) @@ -101,15 +102,15 @@ pure function eval(self,x) result(y) real(pReal), intent(in) :: x real(pReal) :: y - integer :: i + integer :: o y = self%coef(ubound(self%coef,1)) - do i = ubound(self%coef,1)-1, 0, -1 + do o = ubound(self%coef,1)-1, 0, -1 #ifndef __INTEL_LLVM_COMPILER - y = y*(x-self%x_ref) +self%coef(i) + y = y*(x-self%x_ref) +self%coef(o) #else - y = IEEE_FMA(y,x-self%x_ref,self%coef(i)) + y = IEEE_FMA(y,x-self%x_ref,self%coef(o)) #endif enddo