From 7cf3bcb9c9976c331c0dd1fb657d187a5e71e666 Mon Sep 17 00:00:00 2001
From: Martin Diehl <mail@martin-diehl.net>
Date: Sat, 7 May 2022 21:39:43 +0200
Subject: [PATCH 01/13] easier to read

---
 src/polynomials.f90 | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/src/polynomials.f90 b/src/polynomials.f90
index 155f54bf8..55aadf948 100644
--- a/src/polynomials.f90
+++ b/src/polynomials.f90
@@ -46,14 +46,14 @@ end subroutine polynomials_init
 !--------------------------------------------------------------------------------------------------
 !> @brief Initialize a Polynomial from Coefficients.
 !--------------------------------------------------------------------------------------------------
-function polynomial_from_coef(coef,x_ref) result(p)
+pure function polynomial_from_coef(coef,x_ref) result(p)
 
-  real(pReal), dimension(:), intent(in) :: coef
+  real(pReal), dimension(0:), intent(in) :: coef
   real(pReal), intent(in) :: x_ref
   type(tPolynomial) :: p
 
 
-  allocate(p%coef(0:size(coef)-1),source=coef)                                                      ! should be zero based
+  p%coef = coef
   p%x_ref = x_ref
 
 end function polynomial_from_coef
@@ -77,9 +77,7 @@ function polynomial_from_dict(dict,y,x) result(p)
   if (dict%contains(y//','//x)) then
     x_ref = dict%get_asFloat(x//'_ref')
     coef = [coef,dict%get_asFloat(y//','//x)]
-    if (dict%contains(y//','//x//'^2')) then
-      coef = [coef,dict%get_asFloat(y//','//x//'^2')]
-    end if
+    if (dict%contains(y//','//x//'^2')) coef = [coef,dict%get_asFloat(y//','//x//'^2')]
   else
     x_ref = huge(0.0_pReal)                                                                         ! Simplify debugging
   end if
@@ -173,7 +171,6 @@ subroutine selfTest
   if (dNeq(p1%at(x_ref+x),p1%at(x_ref-x),1e-10_pReal))            error stop 'polynomials: eval(even)'
   if (dNeq(p1%der1_at(x_ref+x),-p1%der1_at(x_ref-x),1e-10_pReal)) error stop 'polynomials: eval_der(even)'
 
-
 end subroutine selfTest
 
 end module polynomials

From 70b9943920d17043e9cbd27eab7d3c2a5834b053 Mon Sep 17 00:00:00 2001
From: Martin Diehl <mail@martin-diehl.net>
Date: Sat, 7 May 2022 22:47:13 +0200
Subject: [PATCH 02/13] allow to specify only quadratic term in Dict

---
 src/polynomials.f90 | 34 +++++++++++++++++++++++-----------
 1 file changed, 23 insertions(+), 11 deletions(-)

diff --git a/src/polynomials.f90 b/src/polynomials.f90
index 55aadf948..6c11c4598 100644
--- a/src/polynomials.f90
+++ b/src/polynomials.f90
@@ -13,7 +13,7 @@ module polynomials
 
   type, public :: tPolynomial
     real(pReal), dimension(:), allocatable :: coef
-    real(pReal) :: x_ref
+    real(pReal) :: x_ref = huge(0.0_pReal)
     contains
     procedure, public :: at => eval
     procedure, public :: der1_at => eval_der1
@@ -70,6 +70,7 @@ function polynomial_from_dict(dict,y,x) result(p)
 
   real(pReal), dimension(:), allocatable :: coef
   real(pReal) :: x_ref
+  integer :: i
 
 
   allocate(coef(1),source=dict%get_asFloat(y))
@@ -77,9 +78,10 @@ function polynomial_from_dict(dict,y,x) result(p)
   if (dict%contains(y//','//x)) then
     x_ref = dict%get_asFloat(x//'_ref')
     coef = [coef,dict%get_asFloat(y//','//x)]
-    if (dict%contains(y//','//x//'^2')) coef = [coef,dict%get_asFloat(y//','//x//'^2')]
-  else
-    x_ref = huge(0.0_pReal)                                                                         ! Simplify debugging
+  end if
+  if (dict%contains(y//','//x//'^2')) then
+    x_ref = dict%get_asFloat(x//'_ref')
+    coef = [coef,[(0.0_pReal,i=size(coef),2-1)],dict%get_asFloat(y//','//x//'^2')]
   end if
 
   p = Polynomial(coef,x_ref)
@@ -130,15 +132,17 @@ end function eval_der1
 !--------------------------------------------------------------------------------------------------
 !> @brief Check correctness of polynomical functionality.
 !--------------------------------------------------------------------------------------------------
-subroutine selfTest
+subroutine selfTest()
 
   type(tPolynomial) :: p1, p2
   real(pReal), dimension(3) :: coef
+  integer :: i
   real(pReal) :: x_ref, x
   class(tNode), pointer :: dict
-  character(len=pStringLen), dimension(3) :: coef_s
+  character(len=pStringLen), dimension(size(coef)) :: coef_s
   character(len=pStringLen) :: x_ref_s, x_s, YAML_s
 
+
   call random_number(coef)
   call random_number(x_ref)
   call random_number(x)
@@ -150,9 +154,9 @@ subroutine selfTest
   p1 = polynomial(coef,x_ref)
   if (dNeq(p1%at(x_ref),coef(1)))  error stop 'polynomial: @ref'
 
-  write(coef_s(1),*) coef(1)
-  write(coef_s(2),*) coef(2)
-  write(coef_s(3),*) coef(3)
+  do i = 1, size(coef_s)
+    write(coef_s(i),*) coef(i)
+  end do
   write(x_ref_s,*) x_ref
   write(x_s,*) x
   YAML_s = 'C: '//trim(adjustl(coef_s(1)))//IO_EOL//&
@@ -163,11 +167,19 @@ subroutine selfTest
   p2 = polynomial(dict%asDict(),'C','T')
   if (dNeq(p1%at(x),p2%at(x),1.0e-10_pReal))                      error stop 'polynomials: init'
 
-  p1 = polynomial(coef*[0.0_pReal,1.0_pReal,0.0_pReal],x_ref)
+  YAML_s = 'C: 0.0'//IO_EOL//&
+           'C,T: '//trim(adjustl(coef_s(2)))//IO_EOL//&
+           'T_ref: '//trim(adjustl(x_ref_s))//IO_EOL
+  Dict => YAML_parse_str(trim(YAML_s))
+  p1 = polynomial(dict%asDict(),'C','T')
   if (dNeq(p1%at(x_ref+x),-p1%at(x_ref-x),1.0e-10_pReal))         error stop 'polynomials: eval(odd)'
   if (dNeq(p1%der1_at(x),p1%der1_at(5.0_pReal*x),1.0e-10_pReal))  error stop 'polynomials: eval_der(odd)'
 
-  p1 = polynomial(coef*[0.0_pReal,0.0_pReal,1.0_pReal],x_ref)
+  YAML_s = 'C: 0.0'//IO_EOL//&
+           'C,T^2: '//trim(adjustl(coef_s(3)))//IO_EOL//&
+           'T_ref: '//trim(adjustl(x_ref_s))//IO_EOL
+  Dict => YAML_parse_str(trim(YAML_s))
+  p1 = polynomial(dict%asDict(),'C','T')
   if (dNeq(p1%at(x_ref+x),p1%at(x_ref-x),1e-10_pReal))            error stop 'polynomials: eval(even)'
   if (dNeq(p1%der1_at(x_ref+x),-p1%der1_at(x_ref-x),1e-10_pReal)) error stop 'polynomials: eval_der(even)'
 

From 71e4fa222c6938033b03e16783c5f0e64bc4b314 Mon Sep 17 00:00:00 2001
From: Martin Diehl <mail@martin-diehl.net>
Date: Sat, 7 May 2022 22:53:31 +0200
Subject: [PATCH 03/13] generic code for variable order

---
 src/polynomials.f90 | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/src/polynomials.f90 b/src/polynomials.f90
index 6c11c4598..9f30fe5ac 100644
--- a/src/polynomials.f90
+++ b/src/polynomials.f90
@@ -70,7 +70,8 @@ function polynomial_from_dict(dict,y,x) result(p)
 
   real(pReal), dimension(:), allocatable :: coef
   real(pReal) :: x_ref
-  integer :: i
+  integer :: i, o
+  character(len=1) :: o_s
 
 
   allocate(coef(1),source=dict%get_asFloat(y))
@@ -79,10 +80,13 @@ function polynomial_from_dict(dict,y,x) result(p)
     x_ref = dict%get_asFloat(x//'_ref')
     coef = [coef,dict%get_asFloat(y//','//x)]
   end if
-  if (dict%contains(y//','//x//'^2')) then
-    x_ref = dict%get_asFloat(x//'_ref')
-    coef = [coef,[(0.0_pReal,i=size(coef),2-1)],dict%get_asFloat(y//','//x//'^2')]
-  end if
+  do o = 2,2
+    write(o_s,'(I0.0)') o
+    if (dict%contains(y//','//x//'^'//o_s)) then
+      x_ref = dict%get_asFloat(x//'_ref')
+      coef = [coef,[(0.0_pReal,i=size(coef),o-1)],dict%get_asFloat(y//','//x//'^'//o_s)]
+    end if
+  end do
 
   p = Polynomial(coef,x_ref)
 

From 9605a5c2decec34d7f133831cbc612eabe9d30f9 Mon Sep 17 00:00:00 2001
From: Martin Diehl <mail@martin-diehl.net>
Date: Sat, 7 May 2022 23:04:06 +0200
Subject: [PATCH 04/13] polynomial expansion up to order 4

---
 src/polynomials.f90 | 30 ++++++++++++++++++++++++------
 1 file changed, 24 insertions(+), 6 deletions(-)

diff --git a/src/polynomials.f90 b/src/polynomials.f90
index 9f30fe5ac..321c24fb8 100644
--- a/src/polynomials.f90
+++ b/src/polynomials.f90
@@ -80,7 +80,7 @@ function polynomial_from_dict(dict,y,x) result(p)
     x_ref = dict%get_asFloat(x//'_ref')
     coef = [coef,dict%get_asFloat(y//','//x)]
   end if
-  do o = 2,2
+  do o = 2,4
     write(o_s,'(I0.0)') o
     if (dict%contains(y//','//x//'^'//o_s)) then
       x_ref = dict%get_asFloat(x//'_ref')
@@ -139,7 +139,7 @@ end function eval_der1
 subroutine selfTest()
 
   type(tPolynomial) :: p1, p2
-  real(pReal), dimension(3) :: coef
+  real(pReal), dimension(5) :: coef
   integer :: i
   real(pReal) :: x_ref, x
   class(tNode), pointer :: dict
@@ -166,6 +166,8 @@ subroutine selfTest()
   YAML_s = 'C: '//trim(adjustl(coef_s(1)))//IO_EOL//&
            'C,T: '//trim(adjustl(coef_s(2)))//IO_EOL//&
            'C,T^2: '//trim(adjustl(coef_s(3)))//IO_EOL//&
+           'C,T^3: '//trim(adjustl(coef_s(4)))//IO_EOL//&
+           'C,T^4: '//trim(adjustl(coef_s(5)))//IO_EOL//&
            'T_ref: '//trim(adjustl(x_ref_s))//IO_EOL
   Dict => YAML_parse_str(trim(YAML_s))
   p2 = polynomial(dict%asDict(),'C','T')
@@ -176,16 +178,32 @@ subroutine selfTest()
            'T_ref: '//trim(adjustl(x_ref_s))//IO_EOL
   Dict => YAML_parse_str(trim(YAML_s))
   p1 = polynomial(dict%asDict(),'C','T')
-  if (dNeq(p1%at(x_ref+x),-p1%at(x_ref-x),1.0e-10_pReal))         error stop 'polynomials: eval(odd)'
-  if (dNeq(p1%der1_at(x),p1%der1_at(5.0_pReal*x),1.0e-10_pReal))  error stop 'polynomials: eval_der(odd)'
+  if (dNeq(p1%at(x_ref+x),-p1%at(x_ref-x),1.0e-10_pReal))         error stop 'polynomials: eval(linear)'
+  if (dNeq(p1%der1_at(x),p1%der1_at(5.0_pReal*x),1.0e-10_pReal))  error stop 'polynomials: eval_der(linear)'
 
   YAML_s = 'C: 0.0'//IO_EOL//&
            'C,T^2: '//trim(adjustl(coef_s(3)))//IO_EOL//&
            'T_ref: '//trim(adjustl(x_ref_s))//IO_EOL
   Dict => YAML_parse_str(trim(YAML_s))
   p1 = polynomial(dict%asDict(),'C','T')
-  if (dNeq(p1%at(x_ref+x),p1%at(x_ref-x),1e-10_pReal))            error stop 'polynomials: eval(even)'
-  if (dNeq(p1%der1_at(x_ref+x),-p1%der1_at(x_ref-x),1e-10_pReal)) error stop 'polynomials: eval_der(even)'
+  if (dNeq(p1%at(x_ref+x),p1%at(x_ref-x),1e-10_pReal))            error stop 'polynomials: eval(quadratic)'
+  if (dNeq(p1%der1_at(x_ref+x),-p1%der1_at(x_ref-x),1e-10_pReal)) error stop 'polynomials: eval_der(quadratic)'
+
+  YAML_s = 'Y: 0.0'//IO_EOL//&
+           'Y,X^3: '//trim(adjustl(coef_s(2)))//IO_EOL//&
+           'X_ref: '//trim(adjustl(x_ref_s))//IO_EOL
+  Dict => YAML_parse_str(trim(YAML_s))
+  p1 = polynomial(dict%asDict(),'Y','X')
+  if (dNeq(p1%at(x_ref+x),-p1%at(x_ref-x),1.0e-10_pReal))         error stop 'polynomials: eval(cubic)'
+
+  YAML_s = 'Y: 0.0'//IO_EOL//&
+           'Y,X^4: '//trim(adjustl(coef_s(2)))//IO_EOL//&
+           'X_ref: '//trim(adjustl(x_ref_s))//IO_EOL
+  Dict => YAML_parse_str(trim(YAML_s))
+  p1 = polynomial(dict%asDict(),'Y','X')
+  if (dNeq(p1%at(x_ref+x),p1%at(x_ref-x),1.0e-10_pReal))          error stop 'polynomials: eval(quartic)'
+
+
 
 end subroutine selfTest
 

From ec184cb8feb135a95838bf5ea9c070c8ab23c2c6 Mon Sep 17 00:00:00 2001
From: Martin Diehl <mail@martin-diehl.net>
Date: Sun, 8 May 2022 08:30:58 +0200
Subject: [PATCH 05/13] need to relax absolute tolerance

absolute values for quartic become quite high
---
 src/polynomials.f90 | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/polynomials.f90 b/src/polynomials.f90
index 321c24fb8..1a023eba1 100644
--- a/src/polynomials.f90
+++ b/src/polynomials.f90
@@ -190,19 +190,18 @@ subroutine selfTest()
   if (dNeq(p1%der1_at(x_ref+x),-p1%der1_at(x_ref-x),1e-10_pReal)) error stop 'polynomials: eval_der(quadratic)'
 
   YAML_s = 'Y: 0.0'//IO_EOL//&
-           'Y,X^3: '//trim(adjustl(coef_s(2)))//IO_EOL//&
+           'Y,X^3: '//trim(adjustl(coef_s(1)))//IO_EOL//&
            'X_ref: '//trim(adjustl(x_ref_s))//IO_EOL
   Dict => YAML_parse_str(trim(YAML_s))
   p1 = polynomial(dict%asDict(),'Y','X')
-  if (dNeq(p1%at(x_ref+x),-p1%at(x_ref-x),1.0e-10_pReal))         error stop 'polynomials: eval(cubic)'
+  if (dNeq(p1%at(x_ref+x),-p1%at(x_ref-x),1.0e-8_pReal))          error stop 'polynomials: eval(cubic)'
 
   YAML_s = 'Y: 0.0'//IO_EOL//&
-           'Y,X^4: '//trim(adjustl(coef_s(2)))//IO_EOL//&
+           'Y,X^4: '//trim(adjustl(coef_s(1)))//IO_EOL//&
            'X_ref: '//trim(adjustl(x_ref_s))//IO_EOL
   Dict => YAML_parse_str(trim(YAML_s))
   p1 = polynomial(dict%asDict(),'Y','X')
-  if (dNeq(p1%at(x_ref+x),p1%at(x_ref-x),1.0e-10_pReal))          error stop 'polynomials: eval(quartic)'
-
+  if (dNeq(p1%at(x_ref+x),p1%at(x_ref-x),1.0e-6_pReal))           error stop 'polynomials: eval(quartic)'
 
 
 end subroutine selfTest

From 72c29f744c3071001df8dd44815bb817574dc5b0 Mon Sep 17 00:00:00 2001
From: Martin Diehl <mail@martin-diehl.net>
Date: Sun, 8 May 2022 12:12:42 +0200
Subject: [PATCH 06/13] better tests

---
 src/polynomials.f90 | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/src/polynomials.f90 b/src/polynomials.f90
index 1a023eba1..2426ba48d 100644
--- a/src/polynomials.f90
+++ b/src/polynomials.f90
@@ -141,7 +141,7 @@ subroutine selfTest()
   type(tPolynomial) :: p1, p2
   real(pReal), dimension(5) :: coef
   integer :: i
-  real(pReal) :: x_ref, x
+  real(pReal) :: x_ref, x, y
   class(tNode), pointer :: dict
   character(len=pStringLen), dimension(size(coef)) :: coef_s
   character(len=pStringLen) :: x_ref_s, x_s, YAML_s
@@ -155,6 +155,9 @@ subroutine selfTest()
   x_ref = x_ref*10_pReal -0.5_pReal
   x = x*10_pReal -0.5_pReal
 
+  p1 = polynomial([coef(1)],x_ref)
+  if (dNeq(p1%at(x),coef(1)))      error stop 'polynomial: eval(constant)'
+
   p1 = polynomial(coef,x_ref)
   if (dNeq(p1%at(x_ref),coef(1)))  error stop 'polynomial: @ref'
 
@@ -172,6 +175,8 @@ subroutine selfTest()
   Dict => YAML_parse_str(trim(YAML_s))
   p2 = polynomial(dict%asDict(),'C','T')
   if (dNeq(p1%at(x),p2%at(x),1.0e-10_pReal))                      error stop 'polynomials: init'
+  y = coef(1)+coef(2)*(x-x_ref)+coef(3)*(x-x_ref)**2+coef(4)*(x-x_ref)**3+coef(5)*(x-x_ref)**4
+  if (dNeq(p1%at(x),y,1.0e-7_pReal))                             error stop 'polynomials: eval(full)'
 
   YAML_s = 'C: 0.0'//IO_EOL//&
            'C,T: '//trim(adjustl(coef_s(2)))//IO_EOL//&
@@ -189,15 +194,15 @@ subroutine selfTest()
   if (dNeq(p1%at(x_ref+x),p1%at(x_ref-x),1e-10_pReal))            error stop 'polynomials: eval(quadratic)'
   if (dNeq(p1%der1_at(x_ref+x),-p1%der1_at(x_ref-x),1e-10_pReal)) error stop 'polynomials: eval_der(quadratic)'
 
-  YAML_s = 'Y: 0.0'//IO_EOL//&
-           'Y,X^3: '//trim(adjustl(coef_s(1)))//IO_EOL//&
+  YAML_s = 'Y: '//trim(adjustl(coef_s(1)))//IO_EOL//&
+           'Y,X^3: '//trim(adjustl(coef_s(2)))//IO_EOL//&
            'X_ref: '//trim(adjustl(x_ref_s))//IO_EOL
   Dict => YAML_parse_str(trim(YAML_s))
   p1 = polynomial(dict%asDict(),'Y','X')
-  if (dNeq(p1%at(x_ref+x),-p1%at(x_ref-x),1.0e-8_pReal))          error stop 'polynomials: eval(cubic)'
+  if (dNeq(p1%at(x_ref+x)-coef(1),-(p1%at(x_ref-x)-coef(1)),1.0e-8_pReal))          error stop 'polynomials: eval(cubic)'
 
-  YAML_s = 'Y: 0.0'//IO_EOL//&
-           'Y,X^4: '//trim(adjustl(coef_s(1)))//IO_EOL//&
+  YAML_s = 'Y: '//trim(adjustl(coef_s(1)))//IO_EOL//&
+           'Y,X^4: '//trim(adjustl(coef_s(2)))//IO_EOL//&
            'X_ref: '//trim(adjustl(x_ref_s))//IO_EOL
   Dict => YAML_parse_str(trim(YAML_s))
   p1 = polynomial(dict%asDict(),'Y','X')

From 240426402c95b1f9c5d13efd73e3e718d96afc87 Mon Sep 17 00:00:00 2001
From: Martin Diehl <mail@martin-diehl.net>
Date: Sun, 8 May 2022 13:32:14 +0200
Subject: [PATCH 07/13] using (faster) Horner evaluation

https://rosettacode.org/wiki/Horner%27s_rule_for_polynomial_evaluation
---
 src/polynomials.f90 | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/polynomials.f90 b/src/polynomials.f90
index 2426ba48d..d26aa300f 100644
--- a/src/polynomials.f90
+++ b/src/polynomials.f90
@@ -105,9 +105,9 @@ pure function eval(self,x) result(y)
   integer :: i
 
 
-  y = self%coef(0)
-  do i = 1, ubound(self%coef,1)
-    y = y + self%coef(i) * (x-self%x_ref)**i
+  y = 0.0_pReal
+  do i = ubound(self%coef,1), 0 , -1
+    y = y*(x-self%x_ref) + self%coef(i)
   enddo
 
 end function eval

From 8f9fbb30e5a0062160cbccd3f9d83d58bac9ed43 Mon Sep 17 00:00:00 2001
From: Martin Diehl <mail@martin-diehl.net>
Date: Sun, 8 May 2022 15:02:57 +0200
Subject: [PATCH 08/13] use fused multiply-add where possible

only possible for Intel compiler
---
 src/math.f90             | 28 +++++++++++++++++--------
 src/phase_mechanical.f90 | 45 +++++++++++++++++++++++++++++++---------
 src/polynomials.f90      |  8 +++++--
 3 files changed, 60 insertions(+), 21 deletions(-)

diff --git a/src/math.f90 b/src/math.f90
index 94ad2c48d..135d2b6fd 100644
--- a/src/math.f90
+++ b/src/math.f90
@@ -82,7 +82,7 @@ contains
 !--------------------------------------------------------------------------------------------------
 !> @brief initialization of random seed generator and internal checks
 !--------------------------------------------------------------------------------------------------
-subroutine math_init
+subroutine math_init()
 
   real(pReal), dimension(4) :: randTest
   integer :: randSize
@@ -1045,24 +1045,34 @@ pure subroutine math_eigh33(w,v,m)
 
   w = math_eigvalsh33(m)
 
-  v(1:3,2) = [ m(1, 2) * m(2, 3) - m(1, 3) * m(2, 2), &
-               m(1, 3) * m(1, 2) - m(2, 3) * m(1, 1), &
-               m(1, 2)**2]
+  v(1:3,2) = [ m(1,2) * m(2,3) - m(1,3) * m(2,2), &
+               m(1,3) * m(1,2) - m(2,3) * m(1,1), &
+               m(1,2)**2]
 
   T = maxval(abs(w))
   U = max(T, T**2)
   threshold = sqrt(5.68e-14_pReal * U**2)
 
-  v(1:3,1) = [ v(1,2) + m(1, 3) * w(1), &
-               v(2,2) + m(2, 3) * w(1), &
+#ifndef __INTEL_COMPILER
+  v(1:3,1) = [m(1,3)*w(1) + v(1,2), &
+              m(2,3)*w(1) + v(2,2), &
+#else
+  v(1:3,1) = [IEEE_FMA(m(1,3),w(1),v(1,2)), &
+              IEEE_FMA(m(2,3),w(1),v(2,2)), &
+#endif
               (m(1,1) - w(1)) * (m(2,2) - w(1)) - v(3,2)]
   norm = norm2(v(1:3, 1))
   fallback1: if (norm < threshold) then
     call math_eigh(w,v,error,m)
   else fallback1
     v(1:3,1) = v(1:3, 1) / norm
-    v(1:3,2) = [ v(1,2) + m(1, 3) * w(2), &
-                 v(2,2) + m(2, 3) * w(2), &
+#ifndef __INTEL_COMPILER
+    v(1:3,2) = [m(1,3)*w(2) + v(1,2), &
+                m(2,3)*w(2) + v(2,2), &
+#else
+    v(1:3,2) = [IEEE_FMA(m(1,3),w(2),v(1,2)), &
+                IEEE_FMA(m(2,3),w(2),v(2,2)), &
+#endif
                 (m(1,1) - w(2)) * (m(2,2) - w(2)) - v(3,2)]
     norm = norm2(v(1:3, 2))
     fallback2: if (norm < threshold) then
@@ -1300,7 +1310,7 @@ end function math_clip
 !--------------------------------------------------------------------------------------------------
 !> @brief Check correctness of some math functions.
 !--------------------------------------------------------------------------------------------------
-subroutine selfTest
+subroutine selfTest()
 
   integer, dimension(2,4) :: &
     sort_in_   = reshape([+1,+5,  +5,+6,  -1,-1,  +3,-2],[2,4])
diff --git a/src/phase_mechanical.f90 b/src/phase_mechanical.f90
index b13675e38..66b9ff090 100644
--- a/src/phase_mechanical.f90
+++ b/src/phase_mechanical.f90
@@ -680,8 +680,11 @@ function integrateStateEuler(F_0,F,subFp0,subFi0,subState0,Delta_t,ph,en) result
   if (any(IEEE_is_NaN(dotState))) return
 
   sizeDotState = plasticState(ph)%sizeDotState
-  plasticState(ph)%state(1:sizeDotState,en) = subState0 &
-                                            + dotState * Delta_t
+#ifndef __INTEL_COMPILER
+  plasticState(ph)%state(1:sizeDotState,en) = subState0 + dotState*Delta_t
+#else
+  plasticState(ph)%state(1:sizeDotState,en) = IEEE_FMA(dotState,Delta_t,subState0)
+#endif
 
   broken = plastic_deltaState(ph,en)
   if(broken) return
@@ -720,8 +723,11 @@ function integrateStateAdaptiveEuler(F_0,F,subFp0,subFi0,subState0,Delta_t,ph,en
   sizeDotState = plasticState(ph)%sizeDotState
 
   r = - dotState * 0.5_pReal * Delta_t
-  plasticState(ph)%state(1:sizeDotState,en) = subState0 &
-                                            + dotState * Delta_t
+#ifndef __INTEL_COMPILER
+  plasticState(ph)%state(1:sizeDotState,en) = subState0 + dotState*Delta_t
+#else
+  plasticState(ph)%state(1:sizeDotState,en) = IEEE_FMA(dotState,Delta_t,subState0)
+#endif
 
   broken = plastic_deltaState(ph,en)
   if(broken) return
@@ -842,12 +848,18 @@ function integrateStateRK(F_0,F,subFp0,subFi0,subState0,Delta_t,ph,en,A,B,C,DB)
     dotState = A(1,stage) * plastic_RKdotState(1:sizeDotState,1)
 
     do n = 2, stage
-      dotState = dotState &
-               + A(n,stage) * plastic_RKdotState(1:sizeDotState,n)
+#ifndef __INTEL_COMPILER
+      dotState = dotState + A(n,stage)*plastic_RKdotState(1:sizeDotState,n)
+#else
+      dotState = IEEE_FMA(A(n,stage),plastic_RKdotState(1:sizeDotState,n),dotState)
+#endif
     enddo
 
-    plasticState(ph)%state(1:sizeDotState,en) = subState0 &
-                                              + dotState * Delta_t
+#ifndef __INTEL_COMPILER
+    plasticState(ph)%state(1:sizeDotState,en) = subState0 + dotState*Delta_t
+#else
+    plasticState(ph)%state(1:sizeDotState,en) = IEEE_FMA(dotState,Delta_t,subState0)
+#endif
 
     broken = integrateStress(F_0+(F-F_0)*Delta_t*C(stage),subFp0,subFi0,Delta_t*C(stage), ph,en)
     if(broken) exit
@@ -861,8 +873,11 @@ function integrateStateRK(F_0,F,subFp0,subFi0,subState0,Delta_t,ph,en,A,B,C,DB)
 
   plastic_RKdotState(1:sizeDotState,size(B)) = dotState
   dotState = matmul(plastic_RKdotState,B)
-  plasticState(ph)%state(1:sizeDotState,en) = subState0 &
-                                            + dotState * Delta_t
+#ifndef __INTEL_COMPILER
+  plasticState(ph)%state(1:sizeDotState,en) = subState0 + dotState*Delta_t
+#else
+  plasticState(ph)%state(1:sizeDotState,en) = IEEE_FMA(dotState,Delta_t,subState0)
+#endif
 
   if(present(DB)) &
     broken = .not. converged(matmul(plastic_RKdotState(1:sizeDotState,1:size(DB)),DB) * Delta_t, &
@@ -1146,12 +1161,18 @@ module function phase_mechanical_dPdF(Delta_t,co,ce) result(dPdF)
   else
     lhs_3333 = 0.0_pReal; rhs_3333 = 0.0_pReal
     do o=1,3; do p=1,3
+#ifndef __INTEL_COMPILER
       lhs_3333(1:3,1:3,o,p) = lhs_3333(1:3,1:3,o,p) &
                             + matmul(invSubFi0,dLidFi(1:3,1:3,o,p)) * Delta_t
       lhs_3333(1:3,o,1:3,p) = lhs_3333(1:3,o,1:3,p) &
                             + invFi*invFi(p,o)
       rhs_3333(1:3,1:3,o,p) = rhs_3333(1:3,1:3,o,p) &
                             - matmul(invSubFi0,dLidS(1:3,1:3,o,p)) * Delta_t
+#else
+      lhs_3333(1:3,1:3,o,p) = IEEE_FMA(matmul(invSubFi0,dLidFi(1:3,1:3,o,p)),Delta_t,lhs_3333(1:3,1:3,o,p))
+      lhs_3333(1:3,o,1:3,p) = IEEE_FMA(invFi,invFi(p,o),lhs_3333(1:3,o,1:3,p))
+      rhs_3333(1:3,1:3,o,p) = IEEE_FMA(matmul(invSubFi0,dLidS(1:3,1:3,o,p)),-Delta_t,rhs_3333(1:3,1:3,o,p))
+#endif
     enddo; enddo
     call math_invert(temp_99,error,math_3333to99(lhs_3333))
     if (error) then
@@ -1180,8 +1201,12 @@ module function phase_mechanical_dPdF(Delta_t,co,ce) result(dPdF)
     temp_3333(1:3,1:3,p,o) = matmul(matmul(temp_33_2,dLpdS(1:3,1:3,p,o)), invFi) &
                            + matmul(temp_33_3,dLidS(1:3,1:3,p,o))
   enddo; enddo
+#ifndef __INTEL_COMPILER
   lhs_3333 = math_mul3333xx3333(dSdFe,temp_3333) * Delta_t &
            + math_mul3333xx3333(dSdFi,dFidS)
+#else
+  lhs_3333 = IEEE_FMA(math_mul3333xx3333(dSdFe,temp_3333),Delta_t,math_mul3333xx3333(dSdFi,dFidS))
+#endif
 
   call math_invert(temp_99,error,math_eye(9)+math_3333to99(lhs_3333))
   if (error) then
diff --git a/src/polynomials.f90 b/src/polynomials.f90
index d26aa300f..9b7386ec7 100644
--- a/src/polynomials.f90
+++ b/src/polynomials.f90
@@ -106,8 +106,12 @@ pure function eval(self,x) result(y)
 
 
   y = 0.0_pReal
-  do i = ubound(self%coef,1), 0 , -1
-    y = y*(x-self%x_ref) + self%coef(i)
+  do i = ubound(self%coef,1), 0, -1
+#ifndef __INTEL_COMPILER
+    y = y*(x-self%x_ref) +self%coef(i)
+#else
+    y = IEEE_FMA(y,x-self%x_ref,self%coef(i))
+#endif
   enddo
 
 end function eval

From b2052cb3c7549ef4d558718c072b50727465ab86 Mon Sep 17 00:00:00 2001
From: Martin Diehl <mail@martin-diehl.net>
Date: Sun, 8 May 2022 16:41:19 +0200
Subject: [PATCH 09/13] less strict tolerances

values can be in the order of 1e5, so 1e-6 precision is not too bad
---
 src/polynomials.f90 | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/polynomials.f90 b/src/polynomials.f90
index 9b7386ec7..ae7d1709c 100644
--- a/src/polynomials.f90
+++ b/src/polynomials.f90
@@ -178,9 +178,9 @@ subroutine selfTest()
            'T_ref: '//trim(adjustl(x_ref_s))//IO_EOL
   Dict => YAML_parse_str(trim(YAML_s))
   p2 = polynomial(dict%asDict(),'C','T')
-  if (dNeq(p1%at(x),p2%at(x),1.0e-10_pReal))                      error stop 'polynomials: init'
+  if (dNeq(p1%at(x),p2%at(x),1.0e-6_pReal))                      error stop 'polynomials: init'
   y = coef(1)+coef(2)*(x-x_ref)+coef(3)*(x-x_ref)**2+coef(4)*(x-x_ref)**3+coef(5)*(x-x_ref)**4
-  if (dNeq(p1%at(x),y,1.0e-7_pReal))                             error stop 'polynomials: eval(full)'
+  if (dNeq(p1%at(x),y,1.0e-6_pReal))                             error stop 'polynomials: eval(full)'
 
   YAML_s = 'C: 0.0'//IO_EOL//&
            'C,T: '//trim(adjustl(coef_s(2)))//IO_EOL//&
@@ -203,7 +203,7 @@ subroutine selfTest()
            'X_ref: '//trim(adjustl(x_ref_s))//IO_EOL
   Dict => YAML_parse_str(trim(YAML_s))
   p1 = polynomial(dict%asDict(),'Y','X')
-  if (dNeq(p1%at(x_ref+x)-coef(1),-(p1%at(x_ref-x)-coef(1)),1.0e-8_pReal))          error stop 'polynomials: eval(cubic)'
+  if (dNeq(p1%at(x_ref+x)-coef(1),-(p1%at(x_ref-x)-coef(1)),1.0e-8_pReal)) error stop 'polynomials: eval(cubic)'
 
   YAML_s = 'Y: '//trim(adjustl(coef_s(1)))//IO_EOL//&
            'Y,X^4: '//trim(adjustl(coef_s(2)))//IO_EOL//&

From d713026f7e31e060131c6c50733f9bcd0b7442c6 Mon Sep 17 00:00:00 2001
From: Martin Diehl <mail@martin-diehl.net>
Date: Sun, 8 May 2022 16:49:30 +0200
Subject: [PATCH 10/13] fast evaluation for (most common) case of constant

---
 src/polynomials.f90 | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/polynomials.f90 b/src/polynomials.f90
index ae7d1709c..ffd70b61c 100644
--- a/src/polynomials.f90
+++ b/src/polynomials.f90
@@ -105,8 +105,8 @@ pure function eval(self,x) result(y)
   integer :: i
 
 
-  y = 0.0_pReal
-  do i = ubound(self%coef,1), 0, -1
+  y = self%coef(ubound(self%coef,1))
+  do i = ubound(self%coef,1)-1, 0, -1
 #ifndef __INTEL_COMPILER
     y = y*(x-self%x_ref) +self%coef(i)
 #else

From 10d8a63cb656aed2a529abd2db63019f235941e2 Mon Sep 17 00:00:00 2001
From: Martin Diehl <mail@martin-diehl.net>
Date: Sun, 8 May 2022 17:18:15 +0200
Subject: [PATCH 11/13] not used

and current code is not good (not using Horner scheme)
---
 src/polynomials.f90 | 23 -----------------------
 1 file changed, 23 deletions(-)

diff --git a/src/polynomials.f90 b/src/polynomials.f90
index ffd70b61c..61f20e796 100644
--- a/src/polynomials.f90
+++ b/src/polynomials.f90
@@ -16,7 +16,6 @@ module polynomials
     real(pReal) :: x_ref = huge(0.0_pReal)
     contains
     procedure, public :: at => eval
-    procedure, public :: der1_at => eval_der1
   end type tPolynomial
 
   interface polynomial
@@ -117,26 +116,6 @@ pure function eval(self,x) result(y)
 end function eval
 
 
-!--------------------------------------------------------------------------------------------------
-!> @brief Evaluate a first derivative of Polynomial.
-!--------------------------------------------------------------------------------------------------
-pure function eval_der1(self,x) result(y)
-
-  class(tPolynomial), intent(in) :: self
-  real(pReal), intent(in) :: x
-  real(pReal) :: y
-
-  integer :: i
-
-
-  y = 0.0_pReal
-  do i = 1, ubound(self%coef,1)
-    y = y + real(i,pReal)*self%coef(i) * (x-self%x_ref)**(i-1)
-  enddo
-
-end function eval_der1
-
-
 !--------------------------------------------------------------------------------------------------
 !> @brief Check correctness of polynomical functionality.
 !--------------------------------------------------------------------------------------------------
@@ -188,7 +167,6 @@ subroutine selfTest()
   Dict => YAML_parse_str(trim(YAML_s))
   p1 = polynomial(dict%asDict(),'C','T')
   if (dNeq(p1%at(x_ref+x),-p1%at(x_ref-x),1.0e-10_pReal))         error stop 'polynomials: eval(linear)'
-  if (dNeq(p1%der1_at(x),p1%der1_at(5.0_pReal*x),1.0e-10_pReal))  error stop 'polynomials: eval_der(linear)'
 
   YAML_s = 'C: 0.0'//IO_EOL//&
            'C,T^2: '//trim(adjustl(coef_s(3)))//IO_EOL//&
@@ -196,7 +174,6 @@ subroutine selfTest()
   Dict => YAML_parse_str(trim(YAML_s))
   p1 = polynomial(dict%asDict(),'C','T')
   if (dNeq(p1%at(x_ref+x),p1%at(x_ref-x),1e-10_pReal))            error stop 'polynomials: eval(quadratic)'
-  if (dNeq(p1%der1_at(x_ref+x),-p1%der1_at(x_ref-x),1e-10_pReal)) error stop 'polynomials: eval_der(quadratic)'
 
   YAML_s = 'Y: '//trim(adjustl(coef_s(1)))//IO_EOL//&
            'Y,X^3: '//trim(adjustl(coef_s(2)))//IO_EOL//&

From b376b10b7a876b0cb9c7fd2de7d4093a45b21a77 Mon Sep 17 00:00:00 2001
From: Martin Diehl <mail@martin-diehl.net>
Date: Sun, 8 May 2022 17:47:20 +0200
Subject: [PATCH 12/13] classic Intel gives FPE

---
 src/math.f90             |  4 ++--
 src/phase_mechanical.f90 | 14 +++++++-------
 src/polynomials.f90      |  2 +-
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/math.f90 b/src/math.f90
index 135d2b6fd..25c90ccf4 100644
--- a/src/math.f90
+++ b/src/math.f90
@@ -1053,7 +1053,7 @@ pure subroutine math_eigh33(w,v,m)
   U = max(T, T**2)
   threshold = sqrt(5.68e-14_pReal * U**2)
 
-#ifndef __INTEL_COMPILER
+#ifndef __INTEL_LLVM_COMPILER
   v(1:3,1) = [m(1,3)*w(1) + v(1,2), &
               m(2,3)*w(1) + v(2,2), &
 #else
@@ -1066,7 +1066,7 @@ pure subroutine math_eigh33(w,v,m)
     call math_eigh(w,v,error,m)
   else fallback1
     v(1:3,1) = v(1:3, 1) / norm
-#ifndef __INTEL_COMPILER
+#ifndef __INTEL_LLVM_COMPILER
     v(1:3,2) = [m(1,3)*w(2) + v(1,2), &
                 m(2,3)*w(2) + v(2,2), &
 #else
diff --git a/src/phase_mechanical.f90 b/src/phase_mechanical.f90
index 66b9ff090..fc9e40e02 100644
--- a/src/phase_mechanical.f90
+++ b/src/phase_mechanical.f90
@@ -680,7 +680,7 @@ function integrateStateEuler(F_0,F,subFp0,subFi0,subState0,Delta_t,ph,en) result
   if (any(IEEE_is_NaN(dotState))) return
 
   sizeDotState = plasticState(ph)%sizeDotState
-#ifndef __INTEL_COMPILER
+#ifndef __INTEL_LLVM_COMPILER
   plasticState(ph)%state(1:sizeDotState,en) = subState0 + dotState*Delta_t
 #else
   plasticState(ph)%state(1:sizeDotState,en) = IEEE_FMA(dotState,Delta_t,subState0)
@@ -723,7 +723,7 @@ function integrateStateAdaptiveEuler(F_0,F,subFp0,subFi0,subState0,Delta_t,ph,en
   sizeDotState = plasticState(ph)%sizeDotState
 
   r = - dotState * 0.5_pReal * Delta_t
-#ifndef __INTEL_COMPILER
+#ifndef __INTEL_LLVM_COMPILER
   plasticState(ph)%state(1:sizeDotState,en) = subState0 + dotState*Delta_t
 #else
   plasticState(ph)%state(1:sizeDotState,en) = IEEE_FMA(dotState,Delta_t,subState0)
@@ -848,14 +848,14 @@ function integrateStateRK(F_0,F,subFp0,subFi0,subState0,Delta_t,ph,en,A,B,C,DB)
     dotState = A(1,stage) * plastic_RKdotState(1:sizeDotState,1)
 
     do n = 2, stage
-#ifndef __INTEL_COMPILER
+#ifndef __INTEL_LLVM_COMPILER
       dotState = dotState + A(n,stage)*plastic_RKdotState(1:sizeDotState,n)
 #else
       dotState = IEEE_FMA(A(n,stage),plastic_RKdotState(1:sizeDotState,n),dotState)
 #endif
     enddo
 
-#ifndef __INTEL_COMPILER
+#ifndef __INTEL_LLVM_COMPILER
     plasticState(ph)%state(1:sizeDotState,en) = subState0 + dotState*Delta_t
 #else
     plasticState(ph)%state(1:sizeDotState,en) = IEEE_FMA(dotState,Delta_t,subState0)
@@ -873,7 +873,7 @@ function integrateStateRK(F_0,F,subFp0,subFi0,subState0,Delta_t,ph,en,A,B,C,DB)
 
   plastic_RKdotState(1:sizeDotState,size(B)) = dotState
   dotState = matmul(plastic_RKdotState,B)
-#ifndef __INTEL_COMPILER
+#ifndef __INTEL_LLVM_COMPILER
   plasticState(ph)%state(1:sizeDotState,en) = subState0 + dotState*Delta_t
 #else
   plasticState(ph)%state(1:sizeDotState,en) = IEEE_FMA(dotState,Delta_t,subState0)
@@ -1161,7 +1161,7 @@ module function phase_mechanical_dPdF(Delta_t,co,ce) result(dPdF)
   else
     lhs_3333 = 0.0_pReal; rhs_3333 = 0.0_pReal
     do o=1,3; do p=1,3
-#ifndef __INTEL_COMPILER
+#ifndef __INTEL_LLVM_COMPILER
       lhs_3333(1:3,1:3,o,p) = lhs_3333(1:3,1:3,o,p) &
                             + matmul(invSubFi0,dLidFi(1:3,1:3,o,p)) * Delta_t
       lhs_3333(1:3,o,1:3,p) = lhs_3333(1:3,o,1:3,p) &
@@ -1201,7 +1201,7 @@ module function phase_mechanical_dPdF(Delta_t,co,ce) result(dPdF)
     temp_3333(1:3,1:3,p,o) = matmul(matmul(temp_33_2,dLpdS(1:3,1:3,p,o)), invFi) &
                            + matmul(temp_33_3,dLidS(1:3,1:3,p,o))
   enddo; enddo
-#ifndef __INTEL_COMPILER
+#ifndef __INTEL_LLVM_COMPILER
   lhs_3333 = math_mul3333xx3333(dSdFe,temp_3333) * Delta_t &
            + math_mul3333xx3333(dSdFi,dFidS)
 #else
diff --git a/src/polynomials.f90 b/src/polynomials.f90
index 61f20e796..46e338b19 100644
--- a/src/polynomials.f90
+++ b/src/polynomials.f90
@@ -106,7 +106,7 @@ pure function eval(self,x) result(y)
 
   y = self%coef(ubound(self%coef,1))
   do i = ubound(self%coef,1)-1, 0, -1
-#ifndef __INTEL_COMPILER
+#ifndef __INTEL_LLVM_COMPILER
     y = y*(x-self%x_ref) +self%coef(i)
 #else
     y = IEEE_FMA(y,x-self%x_ref,self%coef(i))

From 53796fce7a6ea194981bb9165b22df95b5724eb7 Mon Sep 17 00:00:00 2001
From: Martin Diehl <mail@martin-diehl.net>
Date: Sun, 8 May 2022 19:57:25 +0200
Subject: [PATCH 13/13] trustworthy reference

---
 src/polynomials.f90 | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/polynomials.f90 b/src/polynomials.f90
index 46e338b19..c883528ca 100644
--- a/src/polynomials.f90
+++ b/src/polynomials.f90
@@ -94,6 +94,7 @@ end function polynomial_from_dict
 
 !--------------------------------------------------------------------------------------------------
 !> @brief Evaluate a Polynomial.
+!> @details https://nvlpubs.nist.gov/nistpubs/jres/71b/jresv71bn1p11_a1b.pdf (eq. 1.2)
 !--------------------------------------------------------------------------------------------------
 pure function eval(self,x) result(y)
 
@@ -101,15 +102,15 @@ pure function eval(self,x) result(y)
   real(pReal), intent(in) :: x
   real(pReal) :: y
 
-  integer :: i
+  integer :: o
 
 
   y = self%coef(ubound(self%coef,1))
-  do i = ubound(self%coef,1)-1, 0, -1
+  do o = ubound(self%coef,1)-1, 0, -1
 #ifndef __INTEL_LLVM_COMPILER
-    y = y*(x-self%x_ref) +self%coef(i)
+    y = y*(x-self%x_ref) +self%coef(o)
 #else
-    y = IEEE_FMA(y,x-self%x_ref,self%coef(i))
+    y = IEEE_FMA(y,x-self%x_ref,self%coef(o))
 #endif
   enddo