added single precision libraries for FFTW

First try of implement single precision crystal plasticity, not working yet. polishing text about geometry construction. polishing postResults, still having problems concerning machines without MSC installation
2011-02-24 18:39:57 +00:00 · 2011-02-24 18:39:57 +00:00 · f525c02ded
parent e49e5e13af
commit f525c02ded
8 changed files with 3592 additions and 5 deletions
--- a/code/libfftw3f.a
+++ b/code/libfftw3f.a
--- a/code/libfftw3f_threads.a
+++ b/code/libfftw3f_threads.a
--- a/code/makefile_single
+++ b/code/makefile_single
@ -0,0 +1,86 @@
+# Makefile to compile the Material subroutine for BVP solution using spectral method with single precision
+#
+# BE SURE THAT ALL DOUBLE FILES ARE REMOVE FROM THE FOLDER
+# Uses openmp to parallelise the material subroutines (set number of cores with "export MPIE_NUM_THREADS=n" to n)
+# Uses linux threads to parallelise fftw3 (should also be possible with openmp)
+# Besides of the f90 files written at MPIE, the two library files of fftw3 "libfftw3_threads.a" "libfftw3.a" are also needed
+# Install fftw3 (v3.2.2 is tested) with "./configure --enable-threads --enable-float"  and "make", "make install" is not needed
+# as long as the two library files are copied to the source code directory.
+
+cpspectral_single.exe: mpie_spectral.o CPFEM.a
+	ifort -openmp -o cpspectral_single.exe mpie_spectral.o CPFEM.a libfftw3f_threads.a libfftw3f.a constitutive.a advanced.a basics.a -lpthread
+	rm *.mod
+
+mpie_spectral.o: mpie_spectral.f90 CPFEM.o 
+	ifort -openmp -c -O3 -heap-arrays 500000000 mpie_spectral.f90 
+
+CPFEM.a: CPFEM.o
+	ar rc CPFEM.a homogenization.o homogenization_RGC.o homogenization_isostrain.o crystallite.o CPFEM.o constitutive.o
+
+CPFEM.o: CPFEM.f90 homogenization.o
+	ifort -openmp -c -O3 -heap-arrays 500000000 CPFEM.f90
+homogenization.o: homogenization.f90 homogenization_isostrain.o homogenization_RGC.o crystallite.o
+	ifort -openmp -c -O3 -heap-arrays 500000000 homogenization.f90
+homogenization_RGC.o: homogenization_RGC.f90 constitutive.a
+	ifort -openmp -c -O3 -heap-arrays 500000000 homogenization_RGC.f90
+homogenization_isostrain.o: homogenization_isostrain.f90 basics.a advanced.a
+	ifort -openmp -c -O3 -heap-arrays 500000000 homogenization_isostrain.f90
+crystallite.o: crystallite.f90 constitutive.a
+	ifort -openmp -c -O3 -heap-arrays 500000000 crystallite.f90
+
+
+
+constitutive.a: constitutive.o
+	ar rc constitutive.a constitutive.o constitutive_titanmod.o constitutive_nonlocal.o constitutive_dislotwin.o constitutive_j2.o constitutive_phenopowerlaw.o basics.a advanced.a
+
+constitutive.o: constitutive.f90 constitutive_titanmod.o constitutive_nonlocal.o constitutive_dislotwin.o constitutive_j2.o constitutive_phenopowerlaw.o
+	ifort -openmp -c -O3 -heap-arrays 500000000 constitutive.f90
+
+constitutive_titanmod.o: constitutive_titanmod.f90 basics.a advanced.a
+	ifort -openmp -c -O3 -heap-arrays 500000000 constitutive_titanmod.f90
+
+constitutive_nonlocal.o: constitutive_nonlocal.f90 basics.a advanced.a
+	ifort -openmp -c -O3 -heap-arrays 500000000 constitutive_nonlocal.f90
+
+constitutive_dislotwin.o: constitutive_dislotwin.f90 basics.a advanced.a
+	ifort -openmp -c -O3 -heap-arrays 500000000 constitutive_dislotwin.f90
+
+constitutive_j2.o: constitutive_j2.f90 basics.a advanced.a
+	ifort -openmp -c -O3 -heap-arrays 500000000 constitutive_j2.f90
+
+constitutive_phenopowerlaw.o: constitutive_phenopowerlaw.f90 basics.a advanced.a
+	ifort -openmp -c -O3 -heap-arrays 500000000 constitutive_phenopowerlaw.f90
+
+
+
+advanced.a: lattice.o
+	ar rc advanced.a FEsolving.o mesh.o material.o lattice.o
+
+lattice.o: lattice.f90 material.o
+	ifort -openmp -c -O3 -heap-arrays 500000000 lattice.f90
+material.o: material.f90 mesh.o
+	ifort -openmp -c -O3 -heap-arrays 500000000 material.f90
+mesh.o: mesh.f90 FEsolving.o
+	ifort -openmp -c -O3 -heap-arrays 500000000 mesh.f90
+FEsolving.o: FEsolving.f90 basics.a
+	ifort -openmp -c -O3 -heap-arrays 500000000 FEsolving.f90
+
+
+
+basics.a: debug.o math_single.o
+	ar rc basics.a debug.o math_single.o numerics.o IO.o mpie_spectral_interface.o prec_single.o
+
+debug.o: debug.f90 numerics.o
+	ifort -openmp -c -O3 debug.f90
+
+math_single.o: math_single.f90 numerics.o
+	ifort -openmp -c -O3 math_single.f90
+
+numerics.o: numerics.f90 IO.o
+	ifort -openmp -c -O3 numerics.f90
+IO.o: IO.f90 mpie_spectral_interface.o
+	ifort -openmp -c -O3 IO.f90
+mpie_spectral_interface.o: mpie_spectral_interface.f90 prec_single.o
+	ifort -openmp -c -O3 mpie_spectral_interface.f90
+prec_single.o: prec_single.f90
+	ifort -openmp -c -O3 prec_single.f90
--- a/code/math_single.f90
+++ b/code/math_single.f90
--- a/code/mpie_spectral_single.f90
+++ b/code/mpie_spectral_single.f90
@ -0,0 +1,579 @@
+!* $Id: mpie_spectral.f90 769 2011-02-21 14:37:38Z MPIE\m.diehl $
+!********************************************************************
+! Material subroutine for BVP solution using spectral method
+!
+! written by P. Eisenlohr,
+!            F. Roters,
+!            L. Hantcherli,
+!            W.A. Counts,
+!            D.D. Tjahjanto,
+!            C. Kords,
+!            M. Diehl,
+!            R. Lebensohn
+!
+! MPI fuer Eisenforschung, Duesseldorf
+!
+!********************************************************************
+!     Usage:
+!             - start program with mpie_spectral PathToGeomFile/NameOfGeom.geom
+!               PathToLoadFile/NameOfLoadFile.load
+!             - PathToGeomFile will be the working directory
+!             - make sure the file "material.config" exists in the working
+!               directory. For further configuration use "numerics.config"
+!********************************************************************
+program mpie_spectral
+!********************************************************************
+
+ use mpie_interface
+ use prec, only: pInt, pReal
+ use IO
+ use math
+ use CPFEM, only: CPFEM_general, CPFEM_initAll 
+ use numerics, only: err_div_tol, err_stress_tol, err_stress_tolrel, err_defgrad_tol,&
+                     itmax, memory_efficient, mpieNumThreadsInt       
+ use homogenization, only: materialpoint_sizeResults, materialpoint_results
+!$ use OMP_LIB                                                          ! the openMP function library
+
+ implicit none
+ include 'fftw3.f' !header file for fftw3 (declaring variables). Library files are also needed
+! compile FFTW 3.2.2 with ./configure --enable-threads
+! variables to read from loadcase and geom file
+ real(pReal), dimension(9) ::                      valuevector           ! stores information temporarily from loadcase file
+ integer(pInt), parameter ::                       maxNchunksInput = 24  ! 4 identifiers, 18 values for the matrices and 2 scalars
+ integer(pInt), dimension (1+maxNchunksInput*2) :: posInput
+ integer(pInt), parameter ::                       maxNchunksGeom = 7    ! 4 identifiers, 3 values
+ integer(pInt), dimension (1+2*maxNchunksGeom) ::  posGeom
+ integer(pInt) unit, N_l, N_s, N_t, N_n                                  ! numbers of identifiers
+ character(len=1024) path, line
+ logical gotResolution,gotDimension,gotHomogenization
+ logical, dimension(9) :: bc_maskvector
+
+! variables storing information from loadcase file
+ real(pReal)                                    timeinc
+ real(pReal), dimension (:,:,:), allocatable :: bc_velocityGrad, &
+                                                bc_stress             ! velocity gradient and stress BC
+ real(pReal), dimension(:), allocatable ::      bc_timeIncrement      ! length of increment
+ integer(pInt)                                  N_Loadcases, steps
+ integer(pInt), dimension(:), allocatable ::    bc_steps              ! number of steps
+ logical, dimension(:,:,:,:), allocatable ::    bc_mask               ! mask of boundary conditions
+
+! variables storing information from geom file
+ real(pReal) wgt
+ real(pReal), dimension(3) ::  geomdimension
+ integer(pInt) homog
+ integer(pInt), dimension(3) :: resolution
+
+! stress etc.
+ real(pReal), dimension(3,3) ::                         ones, zeroes, temp33_Real, damper,&
+                                                        pstress, pstress_av, cstress_av, defgrad_av,&
+                                                        defgradAim, defgradAimOld, defgradAimCorr, defgradAimCorrPrev,&
+                                                        mask_stress, mask_defgrad                          
+ real(pReal), dimension(3,3,3,3) ::                     dPdF, c0, s0
+ real(pReal), dimension(6) ::                           cstress                                ! cauchy stress in Mandel notation
+ real(pReal), dimension(6,6) ::                         dsde, c066, s066                       ! Mandel notation of 4th order tensors
+ real(pReal), dimension(:,:,:,:,:), allocatable ::      workfft, defgrad, defgradold
+ 
+! variables storing information for spectral method
+ complex(pReal) ::                                      img
+ complex(pReal), dimension(3,3) ::                      temp33_Complex
+ real(pReal), dimension(3,3) ::                         xinormdyad
+ real(pReal), dimension(:,:,:,:,:,:,:), allocatable ::  gamma_hat
+ real(pReal), dimension(3) ::                           xi, xi_central
+ integer(pInt), dimension(3) ::                         k_s
+ integer*8, dimension(2) ::                             plan_fft
+ 
+! loop variables, convergence etc.
+ real(pReal) guessmode, err_div, err_stress, err_defgrad, sigma0        
+ integer(pInt)  i, j, k, l, m, n, p
+ integer(pInt)  loadcase, ielem, iter, calcmode, CPFEM_mode, ierr
+ logical errmatinv
+ 
+ real(pReal) temperature                               ! not used, but needed for call to CPFEM_general
+
+!Initializing
+!$ call omp_set_num_threads(mpieNumThreadsInt)         ! set number of threads for parallel execution set by MPIE_NUM_THREADS
+
+ bc_maskvector = ''
+ unit = 234_pInt
+ ones   = 1.0_pReal; zeroes = 0.0_pReal
+ img = cmplx(0.0,1.0)
+ 
+ N_l = 0_pInt; N_s = 0_pInt
+ N_t = 0_pInt; N_n = 0_pInt
+ gotResolution =.false.; gotDimension =.false.; gotHomogenization = .false.
+ resolution = 1_pInt; geomdimension = 0.0_pReal
+ 
+ temperature = 300.0_pReal
+
+ if (IargC() /= 2) call IO_error(102)                     ! check for correct number of given arguments
+
+! Reading the loadcase file and assign variables
+ path = getLoadcaseName()
+ print '(a,/,a)', 'Loadcase:      ',trim(path)
+ print '(a,/,a)', 'Workingdir:    ',trim(getSolverWorkingDirectoryName())
+ print '(a,/,a)', 'SolverJobName: ',trim(getSolverJobName())
+
+ if (.not. IO_open_file(unit,path)) call IO_error(45,ext_msg = path)
+ 
+ rewind(unit)
+ do
+   read(unit,'(a1024)',END = 101) line
+   if (IO_isBlank(line)) cycle                            ! skip empty lines
+   posInput = IO_stringPos(line,maxNchunksInput)
+   do i = 1, maxNchunksInput, 1
+       select case (IO_lc(IO_stringValue(line,posInput,i)))
+            case('l','velocitygrad')
+                 N_l = N_l+1
+            case('s','stress')
+                 N_s = N_s+1
+            case('t','time','delta')
+                 N_t = N_t+1
+            case('n','incs','increments','steps')
+                 N_n = N_n+1
+        end select
+   enddo                                                  ! count all identifiers to allocate memory and do sanity check
+ enddo
+
+101 N_Loadcases = N_l
+
+! allocate memory depending on lines in input file
+ allocate (bc_velocityGrad(3,3,N_Loadcases));       bc_velocityGrad = 0.0_pReal
+ allocate (bc_stress(3,3,N_Loadcases));             bc_stress = 0.0_pReal
+ allocate (bc_mask(3,3,2,N_Loadcases));             bc_mask = .false.
+ allocate (bc_timeIncrement(N_Loadcases));          bc_timeIncrement = 0.0_pReal
+ allocate (bc_steps(N_Loadcases));                  bc_steps = 0_pInt
+
+ rewind(unit)
+ i = 0_pInt
+ do
+   read(unit,'(a1024)',END = 200) line
+   if (IO_isBlank(line)) cycle                           ! skip empty lines
+   i = i + 1
+   posInput = IO_stringPos(line,maxNchunksInput)
+   do j = 1,maxNchunksInput,2
+     select case (IO_lc(IO_stringValue(line,posInput,j)))
+       case('l','velocitygrad')
+         valuevector = 0.0_pReal
+         forall (k = 1:9) bc_maskvector(k) = IO_stringValue(line,posInput,j+k) /= '#'
+         do k = 1,9
+           if (bc_maskvector(k)) valuevector(k) = IO_floatValue(line,posInput,j+k) ! assign values for the velocity gradient matrix
+         enddo
+         bc_mask(:,:,1,i) = transpose(reshape(bc_maskvector,(/3,3/)))
+         bc_velocityGrad(:,:,i) = math_transpose3x3(reshape(valuevector,(/3,3/)))
+       case('s','stress')
+         valuevector = 0.0_pReal
+         forall (k = 1:9) bc_maskvector(k) = IO_stringValue(line,posInput,j+k) /= '#'
+         do k = 1,9
+           if (bc_maskvector(k)) valuevector(k) = IO_floatValue(line,posInput,j+k)  ! assign values for the bc_stress matrix
+         enddo
+         bc_mask(:,:,2,i) = transpose(reshape(bc_maskvector,(/3,3/)))
+         bc_stress(:,:,i) = math_transpose3x3(reshape(valuevector,(/3,3/)))
+       case('t','time','delta')                                            ! increment time
+           bc_timeIncrement(i) = IO_floatValue(line,posInput,j+1)
+       case('n','incs','increments','steps')                               ! bc_steps
+           bc_steps(i) = IO_intValue(line,posInput,j+1)
+     end select
+ enddo; enddo
+
+200 close(unit)
+
+ do i = 1, N_Loadcases
+   if (any(bc_mask(:,:,1,i) == bc_mask(:,:,2,i))) call IO_error(46,i)     ! bc_mask consistency
+   if (bc_timeIncrement(i) < 0.0_pReal) call IO_error(47,i)                 ! negative time increment forbidden
+   if (bc_steps(i) < 1_pInt) call IO_error(48,i)                            ! non-positive increments requested
+   print '(a,/,3(3(f12.6,x)/))','L:'        ,math_transpose3x3(bc_velocityGrad(:,:,i))
+   print '(a,/,3(3(f12.6,x)/))','bc_stress:',math_transpose3x3(bc_stress(:,:,i))
+   print '(a,/,3(3(l,x)/))',    'bc_mask for velocitygrad:',transpose(bc_mask(:,:,1,i))
+   print '(a,/,3(3(l,x)/))',    'bc_mask for stress:'      ,transpose(bc_mask(:,:,2,i))
+   print '(a,f12.6)','time: ',bc_timeIncrement(i)
+   print '(a,i5)','incs: ',bc_steps(i)
+   print *, ''
+ enddo
+
+!read header of geom file to get the information needed before the complete geom file is intepretated by mesh.f90
+ path = getModelName()
+ print '(a,a)', 'GeomName: ',trim(path)
+ if (.not. IO_open_file(unit,trim(path)//InputFileExtension)) call IO_error(101,ext_msg = trim(path)//InputFileExtension)
+
+ rewind(unit)
+ do
+   read(unit,'(a1024)',END = 100) line
+   if (IO_isBlank(line)) cycle                            ! skip empty lines
+   posGeom = IO_stringPos(line,maxNchunksGeom)             
+
+   select case ( IO_lc(IO_StringValue(line,posGeom,1)) )
+     case ('dimension')
+       gotDimension = .true.
+       do i = 2,6,2
+         select case (IO_lc(IO_stringValue(line,posGeom,i)))
+           case('x')
+              geomdimension(1) = IO_floatValue(line,posGeom,i+1)
+           case('y')
+              geomdimension(2) = IO_floatValue(line,posGeom,i+1)
+           case('z')
+              geomdimension(3) = IO_floatValue(line,posGeom,i+1)
+         end select
+       enddo
+     case ('homogenization')
+       gotHomogenization = .true.
+       homog = IO_intValue(line,posGeom,2)
+     case ('resolution')
+       gotResolution = .true.
+       do i = 2,6,2
+         select case (IO_lc(IO_stringValue(line,posGeom,i)))
+           case('a')
+             resolution(1) = IO_intValue(line,posGeom,i+1)
+           case('b')
+             resolution(2) = IO_intValue(line,posGeom,i+1)
+           case('c')
+             resolution(3) = IO_intValue(line,posGeom,i+1)
+         end select
+       enddo
+   end select
+   if (gotDimension .and. gotHomogenization .and. gotResolution) exit
+ enddo
+ 100 close(unit)
+ 
+ if(mod(resolution(1),2)/=0 .or. mod(resolution(2),2)/=0 .or. mod(resolution(3),2)/=0)  call IO_error(103)
+ 
+ print '(a,/,i4,i4,i4)','resolution a b c:', resolution
+ print '(a,/,f6.1,f6.1,f6.1)','dimension x y z:', geomdimension
+ print '(a,i4)','homogenization: ',homog
+ 
+ allocate (defgrad   (resolution(1),resolution(2),resolution(3),3,3)); defgrad    = 0.0_pReal
+ allocate (defgradold(resolution(1),resolution(2),resolution(3),3,3)); defgradold = 0.0_pReal
+ 
+ wgt = 1.0_pReal/real(resolution(1)*resolution(2)*resolution(3), pReal)
+ defgradAim = math_I3
+ defgradAimOld = math_I3
+ defgrad_av = math_I3
+ 
+! Initialization of CPFEM_general (= constitutive law) and of deformation gradient field
+ call CPFEM_initAll(temperature,1_pInt,1_pInt)
+ ielem = 0_pInt
+ c066 = 0.0_pReal 
+ do k = 1, resolution(3); do j = 1, resolution(2); do i = 1, resolution(1)
+   defgradold(i,j,k,:,:) = math_I3                    !no deformation at the beginning
+   defgrad(i,j,k,:,:) = math_I3 
+   ielem = ielem +1 
+   call CPFEM_general(2,math_I3,math_I3,temperature,0.0_pReal,ielem,1_pInt,cstress,dsde,pstress,dPdF)
+   c066 = c066 + dsde
+ enddo; enddo; enddo
+ c066 = c066 * wgt
+ c0 = math_mandel66to3333(c066)
+ call math_invert(6, c066, s066,i, errmatinv)
+ if(errmatinv) call IO_error(800)                     !Matrix inversion error
+ s0 = math_mandel66to3333(s066)
+ 
+ if(memory_efficient) then ! allocate just single fourth order tensor
+   allocate (gamma_hat(1,1,1,3,3,3,3)); gamma_hat = 0.0_pReal
+ else                      ! precalculation of gamma_hat field
+   allocate (gamma_hat(resolution(1)/2+1,resolution(2),resolution(3),3,3,3,3)); gamma_hat = 0.0_pReal
+   do k = 1, resolution(3)
+     k_s(3) = k-1
+     if(k > resolution(3)/2+1) k_s(3) = k_s(3)-resolution(3)
+     do j = 1, resolution(2)
+       k_s(2) = j-1
+       if(j > resolution(2)/2+1) k_s(2) = k_s(2)-resolution(2)     
+       do i = 1, resolution(1)/2+1
+         k_s(1) = i-1
+         xi(3) = 0.0_pReal     !for the 2D case
+         if(resolution(3) > 1) xi(3) = real(k_s(3), pReal)/geomdimension(3) !3D case
+                               xi(2) = real(k_s(2), pReal)/geomdimension(2)
+                               xi(1) = real(k_s(1), pReal)/geomdimension(1)
+         if (any(xi /= 0.0_pReal)) then     
+           do l = 1,3; do m = 1,3
+             xinormdyad(l,m) = xi(l)*xi(m)/sum(xi**2)
+           enddo; enddo
+           temp33_Real = math_inv3x3(math_mul3333xx33(c0, xinormdyad)) 
+         else
+           xinormdyad  = 0.0_pReal
+           temp33_Real = 0.0_pReal
+         endif 
+         do l=1,3; do m=1,3; do n=1,3; do p=1,3
+           gamma_hat(i,j,k, l,m,n,p) = - 0.25*(temp33_Real(l,n)+temp33_Real(n,l)) *&
+                                              (xinormdyad(m,p)+xinormdyad(p,m))
+         enddo; enddo; enddo; enddo         
+   enddo; enddo; enddo
+ endif
+ 
+! calculate xi for the calculation of divergence in Fourier space (central frequency)
+ xi_central(3) = 0.0_pReal !2D case
+ if(resolution(3) > 1) xi_central(3) = real(resolution(3)/2, pReal)/geomdimension(3) !3D case
+                       xi_central(2) = real(resolution(2)/2, pReal)/geomdimension(2)
+                       xi_central(1) = real(resolution(1)/2, pReal)/geomdimension(1)
+ 
+ allocate (workfft(resolution(1)+2,resolution(2),resolution(3),3,3)); workfft = 0.0_pReal
+
+! Initialization of fftw (see manual on fftw.org for more details) 
+ call sfftw_init_threads(ierr)
+ if(ierr == 0) call IO_error(104,ierr)
+ call sfftw_plan_with_nthreads(mpieNumThreadsInt) 
+ call sfftw_plan_many_dft_r2c(plan_fft(1),3,(/resolution(1),resolution(2),resolution(3)/),9,&
+   workfft,(/resolution(1)  +2,resolution(2),resolution(3)/),1,(resolution(1)  +2)*resolution(2)*resolution(3),&
+   workfft,(/resolution(1)/2+1,resolution(2),resolution(3)/),1,(resolution(1)/2+1)*resolution(2)*resolution(3),FFTW_PATIENT)   
+ call sfftw_plan_many_dft_c2r(plan_fft(2),3,(/resolution(1),resolution(2),resolution(3)/),9,&
+   workfft,(/resolution(1)/2+1,resolution(2),resolution(3)/),1,(resolution(1)/2+1)*resolution(2)*resolution(3),&
+   workfft,(/resolution(1)  +2,resolution(2),resolution(3)/),1,(resolution(1)  +2)*resolution(2)*resolution(3),FFTW_PATIENT)
+
+! write header of output file
+ open(538,file=trim(getSolverWorkingDirectoryName())//trim(getSolverJobName())&
+                                                    //'.spectralOut',form='UNFORMATTED')       
+ write(538), 'load',trim(getLoadcaseName())
+ write(538), 'workingdir',trim(getSolverWorkingDirectoryName())
+ write(538), 'geometry',trim(getSolverJobName())//InputFileExtension
+ write(538), 'resolution',resolution
+ write(538), 'dimension',geomdimension
+ write(538), 'materialpoint_sizeResults', materialpoint_sizeResults
+ write(538), 'increments', sum(bc_steps)
+ write(538), 'eoh'
+ write(538)  materialpoint_results(:,1,:) 
+ write(538)  materialpoint_results(:,1,:) !to be conform with t16 Marc format
+! Initialization done
+
+!*************************************************************
+!Loop over loadcases defined in the loadcase file
+ do loadcase = 1, N_Loadcases
+!*************************************************************
+
+   timeinc = bc_timeIncrement(loadcase)/bc_steps(loadcase)
+   guessmode = 0.0_pReal                                   ! change of load case, homogeneous guess for the first step
+  
+   mask_defgrad =  merge(ones,zeroes,bc_mask(:,:,1,loadcase))
+   mask_stress =  merge(ones,zeroes,bc_mask(:,:,2,loadcase))
+   damper = ones/10
+!*************************************************************
+! loop oper steps defined in input file for current loadcase
+   do steps = 1, bc_steps(loadcase)
+!*************************************************************
+     temp33_Real = defgradAim
+     defgradAim = defgradAim &                        ! update macroscopic displacement gradient (defgrad BC)
+                  + guessmode * mask_stress * (defgradAim - defgradAimOld) &
+                  + math_mul33x33(bc_velocityGrad(:,:,loadcase), defgradAim)*timeinc
+     defgradAimOld = temp33_Real
+     
+     do k = 1, resolution(3); do j = 1, resolution(2); do i = 1, resolution(1)
+       temp33_Real = defgrad(i,j,k,:,:)
+       defgrad(i,j,k,:,:) = defgrad(i,j,k,:,:)&         ! old fluctuations as guess for new step, no fluctuations for new loadcase
+                          + guessmode * (defgrad(i,j,k,:,:) - defgradold(i,j,k,:,:))&                                           
+                          + (1.0_pReal-guessmode) * math_mul33x33(bc_velocityGrad(:,:,loadcase),defgradold(i,j,k,:,:))*timeinc
+       defgradold(i,j,k,:,:) = temp33_Real                                                                  
+     enddo; enddo; enddo
+
+     guessmode = 1.0_pReal                              ! keep guessing along former trajectory during same loadcase
+     if(all(bc_mask(:,:,1,loadcase))) then
+       calcmode = 1_pInt                                ! if no stress BC is given (calmode 0 is not needed)
+     else
+       calcmode = 0_pInt                                ! start calculation of BC fulfillment
+     endif
+     CPFEM_mode = 1_pInt                                ! winding forward
+     iter = 0_pInt
+     err_div= 2_pReal * err_div_tol                     ! go into loop 
+     defgradAimCorr = 0.0_pReal                         ! reset damping calculation
+     damper = damper * 0.9_pReal
+    
+!*************************************************************
+! convergence loop
+     do while(iter < itmax .and. &     
+             (err_div > err_div_tol .or. &
+              err_stress > err_stress_tol .or. &
+              err_defgrad > err_defgrad_tol))    
+       iter = iter + 1_pInt
+       print*, ' '
+       print '(3(A,I5.5,tr2))', ' Loadcase = ',loadcase, ' Step = ',steps,'Iteration = ',iter
+       cstress_av = 0.0_pReal
+       workfft = 0.0_pReal !needed because of the padding for FFTW
+!*************************************************************
+       
+! adjust defgrad to fulfill BCs 
+       select case (calcmode)       
+       case (0)                                                                      
+         print *, 'Update Stress Field (constitutive evaluation P(F))'  
+         ielem = 0_pInt
+         do k = 1, resolution(3); do j = 1, resolution(2); do i = 1, resolution(1)
+           ielem = ielem + 1
+           call CPFEM_general(3, defgradold(i,j,k,:,:), defgrad(i,j,k,:,:),&
+                              temperature,timeinc,ielem,1_pInt,&
+                              cstress,dsde, pstress, dPdF)
+         enddo; enddo; enddo
+         
+         ielem = 0_pInt       
+         do k = 1, resolution(3); do j = 1, resolution(2); do i = 1, resolution(1)
+           ielem = ielem + 1_pInt
+           call CPFEM_general(CPFEM_mode,&                                  ! first element in first iteration retains CPFEM_mode 1, 
+                              defgradold(i,j,k,:,:), defgrad(i,j,k,:,:),&   ! others get 2 (saves winding forward effort)
+                              temperature,timeinc,ielem,1_pInt,&
+                              cstress,dsde, pstress, dPdF)
+           CPFEM_mode = 2_pInt
+           workfft(i,j,k,:,:) = pstress
+           cstress_av = cstress_av + math_mandel6to33(cstress)          
+         enddo; enddo; enddo
+         
+         cstress_av = cstress_av * wgt
+         do m = 1,3; do n = 1,3
+           pstress_av(m,n) = sum(workfft(1:resolution(1),:,:,m,n)) * wgt   
+           defgrad_av(m,n) = sum(defgrad(:,:,:,m,n)) * wgt
+         enddo; enddo
+
+         err_stress = maxval(abs(mask_stress * (pstress_av - bc_stress(:,:,loadcase))))
+         err_stress_tol = maxval(abs(pstress_av))*err_stress_tolrel                               
+         
+         print*, 'Correcting deformation gradient to fullfill BCs'
+         defgradAimCorrPrev = defgradAimCorr
+         defgradAimCorr     = -mask_stress * math_mul3333xx33(s0, (mask_stress*(pstress_av - bc_stress(:,:,loadcase))))
+
+         do m=1,3; do n =1,3                                        ! calculate damper (correction is far to strong)
+           if ( sign(1.0_pReal,defgradAimCorr(m,n))/=sign(1.0_pReal,defgradAimCorrPrev(m,n))) then
+             damper(m,n) = max(0.01_pReal,damper(m,n)*0.8)
+           else
+             damper(m,n) = min(1.0_pReal,damper(m,n) *1.2)
+           endif
+         enddo; enddo
+         defgradAimCorr = mask_Stress*(damper * defgradAimCorr)
+         defgradAim = defgradAim + defgradAimCorr                                                                  
+
+         do m = 1,3; do n = 1,3                        
+           defgrad(:,:,:,m,n) = defgrad(:,:,:,m,n) + (defgradAim(m,n) - defgrad_av(m,n)) !anticipated target minus current state
+         enddo; enddo
+         err_div = 2 * err_div_tol 
+         err_defgrad = maxval(abs(mask_defgrad * (defgrad_av - defgradAim)))         
+         print '(a,/,3(3(f12.7,x)/))', ' Deformation Gradient:',math_transpose3x3(defgrad_av)                           
+         print '(a,/,3(3(f10.4,x)/))', ' Cauchy Stress / MPa: '   ,math_transpose3x3(cstress_av)/1.e6        
+         print '(2(a,E8.2))', ' error stress:               ',err_stress, '  Tol. = ', err_stress_tol 
+         print '(2(a,E8.2))', ' error deformation gradient: ',err_defgrad,'  Tol. = ', err_defgrad_tol*0.8  
+         if(err_stress < err_stress_tol*0.8) then 
+           calcmode = 1_pInt
+         endif
+             
+! Using the spectral method to calculate the change of deformation gradient, check divergence of stress field in fourier space        
+       case (1)
+         print *, 'Update Stress Field (constitutive evaluation P(F))'
+         ielem = 0_pInt
+         do k = 1, resolution(3); do j = 1, resolution(2); do i = 1, resolution(1)
+           ielem = ielem + 1_pInt
+           call CPFEM_general(3, defgradold(i,j,k,:,:), defgrad(i,j,k,:,:),&
+                              temperature,timeinc,ielem,1_pInt,&
+                              cstress,dsde, pstress, dPdF)
+         enddo; enddo; enddo
+         ielem = 0_pInt                 
+         do k = 1, resolution(3); do j = 1, resolution(2); do i = 1, resolution(1)
+           ielem = ielem + 1_pInt
+           call CPFEM_general(CPFEM_mode,&                                  ! first element in first iteration retains CPFEM_mode 1,
+                              defgradold(i,j,k,:,:), defgrad(i,j,k,:,:),&
+                              temperature,timeinc,ielem,1_pInt,&
+                              cstress,dsde, pstress, dPdF)
+           CPFEM_mode = 2_pInt
+           workfft(i,j,k,:,:) = pstress 
+           cstress_av = cstress_av + math_mandel6to33(cstress)          
+         enddo; enddo; enddo       
+         cstress_av = cstress_av * wgt
+         do m = 1,3; do n = 1,3
+           pstress_av(m,n) = sum(workfft(1:resolution(1),:,:,m,n))*wgt
+         enddo; enddo
+         
+         print *, 'Calculating equilibrium using spectral method'
+         err_div = 0.0_pReal; sigma0 = 0.0_pReal
+         call sfftw_execute_dft_r2c(plan_fft(1),workfft,workfft)       ! FFT of pstress
+         do m = 1,3 ! L infinity Norm of stress tensor 
+           sigma0 = max(sigma0, sum(abs(workfft(1,1,1,m,:) + (workfft(2,1,1,m,:))*img)))     
+         enddo
+         err_div = (maxval(abs(math_mul33x3_complex(workfft(resolution(1)+1,resolution(2)/2+1,resolution(3)/2+1,:,:)+& ! L infinity Norm of div(stress)
+                                                    workfft(resolution(1)+2,resolution(2)/2+1,resolution(3)/2+1,:,:)*img,xi_central))))
+         err_div = err_div/sigma0     !weighting of error
+
+         if(memory_efficient) then     ! memory saving version, in-time calculation of gamma_hat
+         do k = 1, resolution(3)
+           k_s(3) = k-1
+           if(k > resolution(3)/2+1) k_s(3) = k_s(3)-resolution(3)
+           do j = 1, resolution(2)
+             k_s(2) = j-1
+             if(j > resolution(2)/2+1) k_s(2) = k_s(2)-resolution(2)
+             do i = 1, resolution(1)/2+1
+               k_s(1) = i-1
+               xi(3) = 0.0_pReal     !for the 2D case
+               if(resolution(3) > 1) xi(3) = real(k_s(3), pReal)/geomdimension(3) !3D case
+                                     xi(2) = real(k_s(2), pReal)/geomdimension(2)
+                                     xi(1) = real(k_s(1), pReal)/geomdimension(1)                                 
+               if (any(xi(:) /= 0.0_pReal)) then     
+                 do l = 1,3; do m = 1,3
+                    xinormdyad(l,m) = xi(l)*xi(m)/sum(xi**2)
+                 enddo; enddo
+                 temp33_Real = math_inv3x3(math_mul3333xx33(c0, xinormdyad)) 
+               else
+                 xinormdyad = 0.0_pReal
+                 temp33_Real = 0.0_pReal
+               endif 
+               do l=1,3; do m=1,3; do n=1,3; do p=1,3
+                gamma_hat(1,1,1, l,m,n,p) = - 0.25_pReal*(temp33_Real(l,n)+temp33_Real(n,l))*&
+                                                         (xinormdyad(m,p) +xinormdyad(p,m))
+               enddo; enddo; enddo; enddo   
+               do m = 1,3; do n = 1,3
+                 temp33_Complex(m,n) = sum(gamma_hat(1,1,1,m,n,:,:) *(workfft(i*2-1,j,k,:,:)&                 
+                                                                     +workfft(i*2  ,j,k,:,:)*img))
+               enddo; enddo
+               workfft(i*2-1,j,k,:,:) = real (temp33_Complex) 
+               workfft(i*2  ,j,k,:,:) = aimag(temp33_Complex)
+           enddo; enddo; enddo
+         else                         !use precalculated gamma-operator
+         do k = 1, resolution(3); do j = 1, resolution(2); do i = 1, resolution(1)/2+1
+           do m = 1,3; do n = 1,3
+             temp33_Complex(m,n) = sum(gamma_hat(i,j,k, m,n,:,:) *(workfft(i*2-1,j,k,:,:)&                 
+                                                                 + workfft(i*2  ,j,k,:,:)*img))
+           enddo; enddo
+           workfft(i*2-1,j,k,:,:) = real (temp33_Complex) 
+           workfft(i*2  ,j,k,:,:) = aimag(temp33_Complex) 
+          enddo; enddo; enddo
+         endif
+         
+         workfft(1,1,1,:,:) = defgrad_av - math_I3 !zero frequency (real part)
+         workfft(2,1,1,:,:) = 0.0_pReal            !zero frequency (imaginary part)
+         
+         call sfftw_execute_dft_c2r(plan_fft(2),workfft,workfft)
+         defgrad = defgrad + workfft(1:resolution(1),:,:,:,:)*wgt
+         do m = 1,3; do n = 1,3
+           defgrad_av(m,n) = sum(defgrad(:,:,:,m,n))*wgt
+           defgrad(:,:,:,m,n) = defgrad(:,:,:,m,n) + (defgradAim(m,n) - defgrad_av(m,n)) !anticipated target minus current state
+         enddo; enddo
+         
+         err_stress = maxval(abs(mask_stress * (pstress_av - bc_stress(:,:,loadcase))))
+         err_stress_tol = maxval(abs(pstress_av))*err_stress_tolrel                             !accecpt relativ error specified
+         err_defgrad = maxval(abs(mask_defgrad * (defgrad_av - defgradAim)))  
+         
+         print '(2(a,E8.2))', ' error divergence:           ',err_div,    '  Tol. = ', err_div_tol
+         print '(2(a,E8.2))', ' error stress:               ',err_stress, '  Tol. = ', err_stress_tol 
+         print '(2(a,E8.2))', ' error deformation gradient: ',err_defgrad,'  Tol. = ', err_defgrad_tol 
+
+         if((err_stress > err_stress_tol .or. err_defgrad > err_defgrad_tol) .and. err_div < err_div_tol) then  ! change to calculation of BCs, reset damper etc.
+           calcmode = 0_pInt
+           defgradAimCorr = 0.0_pReal
+           damper = damper * 0.9_pReal
+         endif     
+       end select 
+     enddo    ! end looping when convergency is achieved 
+     
+     write(538)  materialpoint_results(:,1,:) !write to output file
+     
+     print '(a,x,f12.7)'         , ' Determinant of Deformation Aim: ', math_det3x3(defgradAim)
+     print '(a,/,3(3(f12.7,x)/))', ' Deformation Aim:     ',math_transpose3x3(defgradAim)
+     print '(a,/,3(3(f12.7,x)/))', ' Deformation Gradient:',math_transpose3x3(defgrad_av) 
+     print '(a,/,3(3(f10.4,x)/))', ' Cauchy Stress / MPa: ',math_transpose3x3(cstress_av)/1.e6
+     print '(A)', '************************************************************'
+   enddo  ! end looping over steps in current loadcase
+ enddo    ! end looping over loadcases
+ close(538)
+ call dfftw_destroy_plan(plan_fft(1)); call dfftw_destroy_plan(plan_fft(2))
+    
+end program mpie_spectral
+
+!********************************************************************
+! quit subroutine to satisfy IO_error
+!
+!********************************************************************
+subroutine quit(id)
+ use prec
+ implicit none
+
+ integer(pInt) id
+
+ stop
+end subroutine
--- a/code/prec_single.f90
+++ b/code/prec_single.f90
@ -0,0 +1,28 @@
+!* $Id: prec.f90 407 2009-08-31 15:09:15Z MPIE\f.roters $
+!##############################################################
+ MODULE prec
+!##############################################################
+
+ implicit none
+ 
+!    *** Precision of real and integer variables ***
+ integer, parameter :: pReal = selected_real_kind(6,37)       ! 6 significant digits, up to 1e+-37
+ integer, parameter :: pInt  = selected_int_kind(9)           ! up to +- 1e9
+ integer, parameter :: pLongInt  = 8                          ! should be 64bit
+
+ type :: p_vec
+     real(pReal), dimension(:), pointer :: p
+ end type p_vec
+
+CONTAINS
+
+subroutine prec_init
+ write(6,*)
+ write(6,*) '<<<+-  prec init  -+>>>'
+ write(6,*) '$Id: prec.f90 407 2009-08-31 15:09:15Z MPIE\f.roters $'
+ write(6,*)
+ return
+end subroutine
+
+
+ END MODULE prec
--- a/documentation/Visualization/Geometry_Reconstruction.tex
+++ b/documentation/Visualization/Geometry_Reconstruction.tex
@ -94,7 +94,7 @@ It is defined as:
 \end{equation}
 with \vctr y\ are the coordinates in current configuration and \vctr x\ are the coordinates in reference configuration.

-The three-dimensional field of second order tensors is transformed to the Fourier space, giving three-dimensional field of second order tensors that depend on the three dimensional wave vector and not on the vector of spatial coordinates:
+The three-dimensional field of second order tensors is transformed to the Fourier space, giving three-dimensional field of second order tensors that depend on the three dimensional wave vector instead on the vector of spatial coordinates:
 \begin{equation}
 \mathcal F \left( \F(\vctr x) \right)=  \hat{\F}(\vctr k)
 \end{equation}
@ -110,15 +110,21 @@ The locally fluctuating part is integrated in Fourier space, while the integrati

 The fluctuation field of the position vector in deformed configuration in Fourier space reads as:
 \begin{equation}
-\hat{\tilde{y}}_{\rm j}(\vctr k) = \hat{F_{\rm ji}}(\vctr k) \left(k_{\rm i} i 2 \pi \right)^{-1} \forall k_i \neq 0
+\hat{\tilde{y}}_{\rm j}(\vctr k) = \hat{F_{\rm ji}}(\vctr k) \left(k_{\rm i}(\vctr k) i 2 \pi \right)^{-1} \forall k_i \neq 0
+\end{equation}
+The average part ($k_i=0$) is set to zero in Fourier space:
+
+\begin{equation}
+\hat{\tilde{y}}_{\rm j}(\vctr k) = \hat{F_{\rm ji}}(\vctr k) \left(k_{\rm i}(\vctr k) 0 \right) \forall k_i = 0
 \end{equation}
-The average part is set to zero in Fourier space.

 The inverse Fourier transform gives the locally fluctuating part of each position in current configuration:
 \begin{equation}
 \mathcal{F}^{-1}\left(\hat{\tilde{\vctr y}}(\vctr k) \right) = \tilde{\vctr y}(\vctr x)
 \end{equation}
-and the position vector in undeformed configuration is given as:
+As the average part is set to zero, the same integration scheme works if $\tilde{\tnsr F}$ is used instead of ${\tnsr F}$.
+
+The position vector in undeformed configuration is given as:
 \begin{equation}
 \vctr y(\vctr x) = \overline{\F}: \vctr x + \tilde{\vctr y}(\vctr x)
 \end{equation}
--- a/processing/post/postResults
+++ b/processing/post/postResults
@ -381,8 +381,8 @@ def OpenPostfile(name,type):
 # -----------------------------

    p = {\
-           'marc':     post_open,\
           'spectral': MPIEspectral_result,\
+           'marc':     post_open,\
        }[type.lower()]\
                        (name+
                              {\