diff --git a/code/DAMASK_spectral.f90 b/code/DAMASK_spectral.f90 index 3bf9ad6e3..93fea07e3 100644 --- a/code/DAMASK_spectral.f90 +++ b/code/DAMASK_spectral.f90 @@ -21,6 +21,8 @@ !******************************************************************** ! Material subroutine for BVP solution using spectral method ! +! Run 'DAMASK_spectral.exe --help' to get usage hints +! ! written by P. Eisenlohr, ! F. Roters, ! L. Hantcherli, @@ -32,16 +34,6 @@ ! ! MPI fuer Eisenforschung, Duesseldorf ! -!******************************************************************** -! Usage: -! - start program with DAMASK_spectral -! -g (--geom, --geometry) PathToGeomFile/NameOfGeom.geom -! -l (--load, --loadcase) PathToLoadFile/NameOfLoadFile.load -! - PathToGeomFile will be the working directory -! - make sure the file "material.config" exists in the working -! directory. For further configuration use "numerics.config" and -! "numerics.config" -!******************************************************************** program DAMASK_spectral !******************************************************************** @@ -50,6 +42,7 @@ program DAMASK_spectral use IO use debug, only: spectral_debug_verbosity use math + use kdtree2_module use mesh, only: mesh_ipCenterOfGravity use CPFEM, only: CPFEM_general, CPFEM_initAll use FEsolving, only: restartWrite, restartReadStep @@ -126,6 +119,13 @@ program DAMASK_spectral integer(pInt), dimension(3) :: k_s integer*8, dimension(3) :: fftw_plan ! plans for fftw (forward and backward) +! variables for regriding + real(pReal), dimension(:,:,:,:) ,allocatable :: deformed_small + real(pReal), dimension(:,:) ,allocatable :: deformed_large + real(pReal), dimension(:,:,:,:) ,allocatable :: new_coordinates + type(kdtree2), pointer :: tree + real(pReal), dimension(3) :: shift + ! loop variables, convergence etc. real(pReal) :: time = 0.0_pReal, time0 = 0.0_pReal, timeinc ! elapsed time, begin of interval, time interval real(pReal) :: guessmode, err_div, err_stress, err_stress_tol, p_hat_avg @@ -392,14 +392,14 @@ program DAMASK_spectral if (any(bc(loadcase)%maskStress .eqv. bc(loadcase)%maskDeformation)) errorID = 31 ! exclusive or masking only if (any(bc(loadcase)%maskStress .and. transpose(bc(loadcase)%maskStress) .and. & reshape((/.false.,.true.,.true.,.true.,.false.,.true.,.true.,.true.,.false./),(/3,3/)))) & - errorID = 38_pInt ! no rotation is allowed by stress BC + errorID = 38_pInt ! no rotation is allowed by stress BC if (any(abs(math_mul33x33(bc(loadcase)%rotation,math_transpose3x3(bc(loadcase)%rotation))-math_I3)& > reshape(spread(rotation_tol,1,9),(/3,3/)))& .or. abs(math_det3x3(bc(loadcase)%rotation)) > 1.0_pReal + rotation_tol) & - errorID = 46_pInt ! given rotation matrix contains strain - if (bc(loadcase)%timeIncrement < 0.0_pReal) errorID = 34_pInt ! negative time increment - if (bc(loadcase)%steps < 1_pInt) errorID = 35_pInt ! non-positive increment count - if (bc(loadcase)%outputfrequency < 1_pInt) errorID = 36_pInt ! non-positive result frequency + errorID = 46_pInt ! given rotation matrix contains strain + if (bc(loadcase)%timeIncrement < 0.0_pReal) errorID = 34_pInt ! negative time increment + if (bc(loadcase)%steps < 1_pInt) errorID = 35_pInt ! non-positive increment count + if (bc(loadcase)%outputfrequency < 1_pInt) errorID = 36_pInt ! non-positive result frequency if (errorID > 0_pInt) call IO_error(error_ID = errorID, ext_msg = loadcase_string) enddo @@ -444,7 +444,7 @@ program DAMASK_spectral else ! not-1st step of 1st loadcase of loglinear scale timeinc = bc(1)%timeIncrement*(2.0_pReal**real(step-1_pInt-bc(1)%steps ,pReal)) endif - else ! not-1st loadcase of loglinear scale + else ! not-1st loadcase of loglinear scale timeinc = time0 *( (1.0_pReal + bc(loadcase)%timeIncrement/time0 )**real( step/bc(loadcase)%steps ,pReal) & -(1.0_pReal + bc(loadcase)%timeIncrement/time0 )**real( (step-1_pInt)/bc(loadcase)%steps ,pReal) ) endif @@ -456,9 +456,10 @@ program DAMASK_spectral ! Initialization Start !************************************************************* - if(totalStepsCounter == restartReadStep) then ! Initialize values - if (regrid .eqv. .true. ) then ! 'DeInitialize' the values changing in case of regridding + if(totalStepsCounter == restartReadStep) then ! Initialize values + if (regrid .eqv. .true. ) then ! 'DeInitialize' the values changing in case of regridding regrid = .false. + call dfftw_destroy_plan(fftw_plan(1)); call dfftw_destroy_plan(fftw_plan(2)) if(debugDivergence) call dfftw_destroy_plan(fftw_plan(3)) deallocate (defgrad) @@ -492,9 +493,7 @@ program DAMASK_spectral divergence,(/res(1)/2_pInt+1_pInt,res(2),res(3)/),1,(res(1)/2_pInt+1_pInt)*res(2)*res(3),& divergence,(/res(1) +2_pInt,res(2),res(3)/),1,(res(1) +2_pInt)*res(2)*res(3),fftw_planner_flag) if (debugGeneral) then - !$OMP CRITICAL (write2out) write (6,*) 'FFTW initialized' - !$OMP END CRITICAL (write2out) endif if (restartReadStep==1_pInt) then ! not restarting, no deformation at the beginning @@ -519,7 +518,7 @@ program DAMASK_spectral ielem = 0_pInt do k = 1_pInt, res(3); do j = 1_pInt, res(2); do i = 1_pInt, res(1) ielem = ielem + 1_pInt - coordinates(1:3,i,j,k) = mesh_ipCenterOfGravity(1:3,1,ielem) ! set to initial coordinates ToDo: SHOULD BE UPDATED TO CURRENT POSITION IN FUTURE REVISIONS!!! But do we know them? I don't think so. Otherwise we don't need geometry reconstruction + coordinates(1:3,i,j,k) = mesh_ipCenterOfGravity(1:3,1,ielem) ! set to initial coordinates ToDo: SHOULD BE UPDATED TO CURRENT POSITION IN FUTURE REVISIONS!!! But do we know them? I don't think so. Otherwise we don't need geometry reconstruction call CPFEM_general(2_pInt,coordinates(1:3,i,j,k),math_I3,math_I3,temperature(i,j,k),& 0.0_pReal,ielem,1_pInt,cstress,dsde,pstress,dPdF) c_current = c_current + dPdF @@ -527,9 +526,7 @@ program DAMASK_spectral c0_reference = c_current * wgt ! linear reference material stiffness if (debugGeneral) then - !$OMP CRITICAL (write2out) write (6,*) 'first call to CPFEM_general finished' - !$OMP END CRITICAL (write2out) endif do k = 1_pInt, res(3) ! calculation of discrete angular frequencies, ordered as in FFTW (wrap around) @@ -613,7 +610,7 @@ program DAMASK_spectral if(totalStepsCounter >= restartReadStep) then ! Do calculations (otherwise just forwarding) if(bc(loadcase)%restartFrequency>0_pInt) & - restartWrite = ( mod(step - 1_pInt,bc(loadcase)%restartFrequency)==0_pInt) ! at frequency of writing restart information + restartWrite = ( mod(step - 1_pInt,bc(loadcase)%restartFrequency)==0_pInt) ! at frequency of writing restart information ! setting restart parameter for FEsolving (first call to CPFEM_general will write ToDo: true?) if (bc(loadcase)%velGradApplied) & ! calculate fDot from given L and current F fDot = math_mul33x33(bc(loadcase)%deformation, defgradAim) @@ -751,33 +748,35 @@ program DAMASK_spectral p_hat_avg = sqrt(maxval (math_eigenvalues3x3(math_mul33x33(workfft(1,1,1,1:3,1:3),& ! L_2 norm of average stress (freq 0,0,0) in fourier space, math_transpose3x3(workfft(1,1,1,1:3,1:3)))))) ! ignore imaginary part as it is always zero for real only input err_div_avg_complex = 0.0_pReal - err_div_max = 0.0_pReal ! only if debugDivergence == .true. of importance - divergence = 0.0_pReal ! - '' - + err_div_max = 0.0_pReal ! only important if debugDivergence == .true. + divergence = 0.0_pReal ! - '' - do k = 1_pInt, res(3); do j = 1_pInt, res(2); do i = 1_pInt, res(1)/2_pInt+1_pInt divergence_complex = math_mul33x3_complex(workfft(i*2_pInt-1_pInt,j,k,1:3,1:3)& ! calculate divergence out of the real and imaginary part of the stress + workfft(i*2_pInt ,j,k,1:3,1:3)*img,xi(1:3,i,j,k)) + if(debugDivergence) then ! need divergence NOT squared + divergence(i*2_pInt-1_pInt,j,k,1:3) = -aimag(divergence_complex) ! real part at i*2-1, imaginary part at i*2, multiply by i + divergence(i*2_pInt ,j,k,1:3) = real(divergence_complex) ! ==> switch order and change sign of img part + endif + divergence_complex = divergence_complex**2.0_pReal ! all criteria need divergence squared if(i==1_pInt .or. i == res(1)/2_pInt + 1_pInt) then ! We are on one of the two slides without conjg. complex counterpart - err_div_avg_complex = err_div_avg_complex + sum(divergence_complex**2.0_pReal) ! Avg of L_2 norm of div(stress) in fourier space (Suquet small strain) + err_div_avg_complex = err_div_avg_complex + sum(divergence_complex) ! RMS of L_2 norm of div(stress) in fourier space (Suquet small strain) else ! Has somewhere a conj. complex counterpart. Therefore count it twice. - err_div_avg_complex = err_div_avg_complex +2.0*real(sum(divergence_complex**2.0_pReal)) ! Ignore img part (conjg. complex sum will end up 0). This and the different order + err_div_avg_complex = err_div_avg_complex +2.0*real(sum(divergence_complex)) ! Ignore img part (conjg. complex sum will end up 0). This and the different order endif ! compared to c2c transform results in slight numerical deviations. if(debugDivergence) then - err_div_max = max(err_div_max,abs(sqrt(sum(divergence_complex**2.0_pReal)))) ! maximum of L two norm of div(stress) in fourier space (Suquet large strain) - divergence(i*2_pInt-1_pInt,j,k,1:3) = -aimag(divergence_complex)*pi*2.0_pReal ! real part at i*2-1, imaginary part at i*2, multiply by i - divergence(i*2_pInt ,j,k,1:3) = real(divergence_complex)*pi*2.0_pReal ! ==> switch order and change sign of img part - endif + err_div_max = max(err_div_max,abs(sqrt(sum(divergence_complex)))) ! maximum of L two norm of div(stress) in fourier space (Suquet large strain) + endif enddo; enddo; enddo - err_div = abs(sqrt (err_div_avg_complex*wgt)) ! weighting by and taking square root. abs(...) because result is a complex number + err_div = abs(sqrt (err_div_avg_complex*wgt)) ! weighting by and taking square root (RMS). abs(...) because result is a complex number err_div = err_div *correctionFactor/p_hat_avg ! weighting by average stress and multiplying with correction factor err_div_max = err_div_max*correctionFactor/p_hat_avg ! - '' - only if debugDivergence == .true. of importance ! calculate additional divergence criteria and report ------------- if(debugDivergence) then call dfftw_execute_dft_c2r(fftw_plan(3),divergence,divergence) - divergence = divergence * wgt - + divergence = divergence *pi*2.0_pReal* wgt !pointwise factor 2*pi from differentation and weighting from FT err_real_div_avg = 0.0_pReal err_real_div_max = 0.0_pReal do k = 1_pInt, res(3); do j = 1_pInt, res(2); do i = 1_pInt, res(1) @@ -786,8 +785,8 @@ program DAMASK_spectral enddo; enddo; enddo p_real_avg = sqrt(maxval (math_eigenvalues3x3(math_mul33x33(pstress_av_lab,& ! L_2 norm of average stress in real space, math_transpose3x3(pstress_av_lab))))) - err_real_div_avg = sqrt(wgt*err_real_div_avg)*correctionFactor/p_hat_avg - err_real_div_max = err_real_div_max *correctionFactor/p_hat_avg + err_real_div_avg = sqrt(wgt*err_real_div_avg)*correctionFactor/p_real_avg ! RMS in real space + err_real_div_max = err_real_div_max *correctionFactor/p_real_avg print '(a,es10.4,a,f6.2)', 'error divergence FT avg = ',err_div, ', ', err_div/err_div_tol print '(a,es10.4)', 'error divergence FT max = ',err_div_max @@ -797,7 +796,7 @@ program DAMASK_spectral print '(a,es10.4,a,f6.2)', 'error divergence = ',err_div, ', ', err_div/err_div_tol endif ! -------------------------- - if(memory_efficient) then ! memory saving version, on-the-fly calculation of gamma_hat + if(memory_efficient) then ! memory saving version, on-the-fly calculation of gamma_hat do k = 1_pInt, res(3); do j = 1_pInt, res(2) ;do i = 1_pInt, res(1)/2_pInt+1_pInt if (any(xi(1:3,i,j,k) /= 0.0_pReal)) then do l = 1_pInt,3_pInt; do m = 1_pInt,3_pInt @@ -847,7 +846,7 @@ program DAMASK_spectral if(size_reduced > 0_pInt) then ! calculate stress BC if applied err_stress = maxval(abs(mask_stress * (pstress_av - bc(loadcase)%stress))) ! maximum deviaton (tensor norm not applicable) - err_stress_tol = maxval(abs(mask_defgrad * pstress_av)) * err_stress_tolrel ! don't use any tensor norm because the comparison should be coherent + err_stress_tol = maxval(abs(pstress_av)) * err_stress_tolrel ! don't use any tensor norm because the comparison should be coherent print '(a)', '' print '(a,es10.4,a,f6.2)', 'error stress = ',err_stress, ', ', err_stress/err_stress_tol print '(a)', '... correcting deformation gradient to fulfill BCs ..........' @@ -883,7 +882,7 @@ program DAMASK_spectral ! ------------------------------ enddo ! end looping when convergency is achieved - + !$OMP CRITICAL (write2out) print '(a)', '' print '(a)', '=============================================================' @@ -899,6 +898,47 @@ program DAMASK_spectral write(538), materialpoint_results(1_pInt:materialpoint_sizeResults,1,1_pInt:Npoints) ! write result to file endif !$OMP END CRITICAL (write2out) + +! ################################################## +! # test of regridding + ! allocate(deformed_small(res(1) ,res(2) ,res(3) ,3)); deformed_small = 0.0_pReal + ! allocate(deformed_large(3,Npoints*27_pInt)); deformed_large = 0.0_pReal !ToDo: make it smaller (small corona only) + + ! call deformed_fft(res,geomdimension,defgrad_av_lab,1.0_pReal,defgrad,deformed_small) ! calculate deformed coordinates + ! shift = math_mul33x3(defgrad_av_lab,geomdimension) + + ! print*, 'defgrad' + ! do k = 1_pInt, res(3); do j = 1_pInt, res(2); do i = 1_pInt, res(1) + ! print*, defgrad(i,j,k,1:3,1:3) + ! enddo; enddo; enddo + + ! print*, 'deformed_small' + ! do k = 1_pInt, res(3); do j = 1_pInt, res(2); do i = 1_pInt, res(1) + ! print*, deformed_small(i,j,k,1:3) + ! enddo; enddo; enddo + + ! print*, 'shift', shift + ! ielem = 0_pInt + ! do k = 1_pInt, res(3); do j = 1_pInt, res(2); do i = 1_pInt, res(1) + ! do n = -1, 1 + ! do m = -1, 1 + ! do l = -1, 1 + ! ielem = ielem +1_pInt + ! deformed_large(1:3,ielem) = deformed_small(i,j,k,1:3)+ real((/l,m,n/),pReal)*shift + ! enddo; enddo; enddo + ! enddo; enddo; enddo + ! print*, 'deformed_large' + ! print*, deformed_large + ! tree => kdtree2_create(deformed_large,sort=.true.,rearrange=.true.) + + ! allocate(new_coordinates(res(1),res(2),res(3),3)); new_coordinates = 0.0_pReal !fluctuation free new coordinates + ! do k = 1_pInt, res(3); do j = 1_pInt, res(2); do i = 1_pInt, res(1) + ! new_coordinates(i,j,k,1:3) = math_mul33x3(defgrad_av_lab,coordinates(1:3,i,j,k)) + ! enddo; enddo; enddo + ! pause +! ################################################## +! # end test of regridding + endif ! end calculation/forwarding enddo ! end looping over steps in current loadcase deallocate(c_reduced) diff --git a/code/kdtree2.f90 b/code/kdtree2.f90 new file mode 100644 index 000000000..422cef447 --- /dev/null +++ b/code/kdtree2.f90 @@ -0,0 +1,1885 @@ +! +!(c) Matthew Kennel, Institute for Nonlinear Science (2004) +! +! Licensed under the Academic Free License version 1.1 found in file LICENSE +! with additional provisions found in that same file. +! +!####################################################### +! modifications: changed precision according to prec.f90 +! k.komerla, m.diehl +!####################################################### + +module kdtree2_priority_queue_module + use prec + ! + ! maintain a priority queue (PQ) of data, pairs of 'priority/payload', + ! implemented with a binary heap. This is the type, and the 'dis' field + ! is the priority. + ! + type kdtree2_result + ! a pair of distances, indexes + real(pReal) :: dis !=0.0 + integer(pInt) :: idx !=-1 Initializers cause some bugs in compilers. + end type kdtree2_result + ! + ! A heap-based priority queue lets one efficiently implement the following + ! operations, each in log(N) time, as opposed to linear time. + ! + ! 1) add a datum (push a datum onto the queue, increasing its length) + ! 2) return the priority value of the maximum priority element + ! 3) pop-off (and delete) the element with the maximum priority, decreasing + ! the size of the queue. + ! 4) replace the datum with the maximum priority with a supplied datum + ! (of either higher or lower priority), maintaining the size of the + ! queue. + ! + ! + ! In the k-d tree case, the 'priority' is the square distance of a point in + ! the data set to a reference point. The goal is to keep the smallest M + ! distances to a reference point. The tree algorithm searches terminal + ! nodes to decide whether to add points under consideration. + ! + ! A priority queue is useful here because it lets one quickly return the + ! largest distance currently existing in the list. If a new candidate + ! distance is smaller than this, then the new candidate ought to replace + ! the old candidate. In priority queue terms, this means removing the + ! highest priority element, and inserting the new one. + ! + ! Algorithms based on Cormen, Leiserson, Rivest, _Introduction + ! to Algorithms_, 1990, with further optimization by the author. + ! + ! Originally informed by a C implementation by Sriranga Veeraraghavan. + ! + ! This module is not written in the most clear way, but is implemented such + ! for speed, as it its operations will be called many times during searches + ! of large numbers of neighbors. + ! + type pq + ! + ! The priority queue consists of elements + ! priority(1:heap_size), with associated payload(:). + ! + ! There are heap_size active elements. + ! Assumes the allocation is always sufficient. Will NOT increase it + ! to match. + integer(pInt) :: heap_size = 0 + type(kdtree2_result), pointer :: elems(:) + end type pq + + public :: kdtree2_result + + public :: pq + public :: pq_create + public :: pq_delete, pq_insert + public :: pq_extract_max, pq_max, pq_replace_max, pq_maxpri + private + +contains + + + function pq_create(results_in) result(res) + ! + ! Create a priority queue from ALREADY allocated + ! array pointers for storage. NOTE! It will NOT + ! add any alements to the heap, i.e. any existing + ! data in the input arrays will NOT be used and may + ! be overwritten. + ! + ! usage: + ! real(pReal), pointer :: x(:) + ! integer(pInt), pointer :: k(:) + ! allocate(x(1000),k(1000)) + ! pq => pq_create(x,k) + ! + type(kdtree2_result), target:: results_in(:) + type(pq) :: res + ! + ! + integer(pInt) :: nalloc + + nalloc = size(results_in,1) + if (nalloc .lt. 1) then + write (*,*) 'PQ_CREATE: error, input arrays must be allocated.' + end if + res%elems => results_in + res%heap_size = 0 + return + end function pq_create + + ! + ! operations for getting parents and left + right children + ! of elements in a binary heap. + ! + +! +! These are written inline for speed. +! +! integer(pInt) function parent(i) +! integer(pInt), intent(in) :: i +! parent = (i/2) +! return +! end function parent + +! integer(pInt) function left(i) +! integer(pInt), intent(in) ::i +! left = (2*i) +! return +! end function left + +! integer(pInt) function right(i) +! integer(pInt), intent(in) :: i +! right = (2*i)+1 +! return +! end function right + +! logical function compare_priority(p1,p2) +! real(pReal), intent(in) :: p1, p2 +! +! compare_priority = (p1 .gt. p2) +! return +! end function compare_priority + + subroutine heapify(a,i_in) + ! + ! take a heap rooted at 'i' and force it to be in the + ! heap canonical form. This is performance critical + ! and has been tweaked a little to reflect this. + ! + type(pq),pointer :: a + integer(pInt), intent(in) :: i_in + ! + integer(pInt) :: i, l, r, largest + + real(pReal) :: pri_i, pri_l, pri_r, pri_largest + + + type(kdtree2_result) :: temp + + i = i_in + +bigloop: do + l = 2*i ! left(i) + r = l+1 ! right(i) + ! + ! set 'largest' to the index of either i, l, r + ! depending on whose priority is largest. + ! + ! note that l or r can be larger than the heap size + ! in which case they do not count. + + + ! does left child have higher priority? + if (l .gt. a%heap_size) then + ! we know that i is the largest as both l and r are invalid. + exit + else + pri_i = a%elems(i)%dis + pri_l = a%elems(l)%dis + if (pri_l .gt. pri_i) then + largest = l + pri_largest = pri_l + else + largest = i + pri_largest = pri_i + endif + + ! + ! between i and l we have a winner + ! now choose between that and r. + ! + if (r .le. a%heap_size) then + pri_r = a%elems(r)%dis + if (pri_r .gt. pri_largest) then + largest = r + endif + endif + endif + + if (largest .ne. i) then + ! swap data in nodes largest and i, then heapify + + temp = a%elems(i) + a%elems(i) = a%elems(largest) + a%elems(largest) = temp + ! + ! Canonical heapify() algorithm has tail-ecursive call: + ! + ! call heapify(a,largest) + ! we will simulate with cycle + ! + i = largest + cycle bigloop ! continue the loop + else + return ! break from the loop + end if + enddo bigloop + return + end subroutine heapify + + subroutine pq_max(a,e) + ! + ! return the priority and its payload of the maximum priority element + ! on the queue, which should be the first one, if it is + ! in heapified form. + ! + type(pq),pointer :: a + type(kdtree2_result),intent(out) :: e + + if (a%heap_size .gt. 0) then + e = a%elems(1) + else + write (*,*) 'PQ_MAX: ERROR, heap_size < 1' + stop + endif + return + end subroutine pq_max + + real(pReal) function pq_maxpri(a) + type(pq), pointer :: a + + if (a%heap_size .gt. 0) then + pq_maxpri = a%elems(1)%dis + else + write (*,*) 'PQ_MAX_PRI: ERROR, heapsize < 1' + stop + endif + return + end function pq_maxpri + + subroutine pq_extract_max(a,e) + ! + ! return the priority and payload of maximum priority + ! element, and remove it from the queue. + ! (equivalent to 'pop()' on a stack) + ! + type(pq),pointer :: a + type(kdtree2_result), intent(out) :: e + + if (a%heap_size .ge. 1) then + ! + ! return max as first element + ! + e = a%elems(1) + + ! + ! move last element to first + ! + a%elems(1) = a%elems(a%heap_size) + a%heap_size = a%heap_size-1 + call heapify(a,1) + return + else + write (*,*) 'PQ_EXTRACT_MAX: error, attempted to pop non-positive PQ' + stop + end if + + end subroutine pq_extract_max + + + real(pReal) function pq_insert(a,dis,idx) + ! + ! Insert a new element and return the new maximum priority, + ! which may or may not be the same as the old maximum priority. + ! + type(pq),pointer :: a + real(pReal), intent(in) :: dis + integer(pInt), intent(in) :: idx + ! type(kdtree2_result), intent(in) :: e + ! + integer(pInt) :: i, isparent + real(pReal) :: parentdis + ! + + ! if (a%heap_size .ge. a%max_elems) then + ! write (*,*) 'PQ_INSERT: error, attempt made to insert element on full PQ' + ! stop + ! else + a%heap_size = a%heap_size + 1 + i = a%heap_size + + do while (i .gt. 1) + isparent = int(i/2) + parentdis = a%elems(isparent)%dis + if (dis .gt. parentdis) then + ! move what was in i's parent into i. + a%elems(i)%dis = parentdis + a%elems(i)%idx = a%elems(isparent)%idx + i = isparent + else + exit + endif + end do + + ! insert the element at the determined position + a%elems(i)%dis = dis + a%elems(i)%idx = idx + + pq_insert = a%elems(1)%dis + return + ! end if + + end function pq_insert + + subroutine pq_adjust_heap(a,i) + type(pq),pointer :: a + integer(pInt), intent(in) :: i + ! + ! nominally arguments (a,i), but specialize for a=1 + ! + ! This routine assumes that the trees with roots 2 and 3 are already heaps, i.e. + ! the children of '1' are heaps. When the procedure is completed, the + ! tree rooted at 1 is a heap. + real(pReal) :: prichild + integer(pInt) :: parent, child, N + + type(kdtree2_result) :: e + + e = a%elems(i) + + parent = i + child = 2*i + N = a%heap_size + + do while (child .le. N) + if (child .lt. N) then + if (a%elems(child)%dis .lt. a%elems(child+1)%dis) then + child = child+1 + endif + endif + prichild = a%elems(child)%dis + if (e%dis .ge. prichild) then + exit + else + ! move child into parent. + a%elems(parent) = a%elems(child) + parent = child + child = 2*parent + end if + end do + a%elems(parent) = e + return + end subroutine pq_adjust_heap + + + real(pReal) function pq_replace_max(a,dis,idx) + ! + ! Replace the extant maximum priority element + ! in the PQ with (dis,idx). Return + ! the new maximum priority, which may be larger + ! or smaller than the old one. + ! + type(pq),pointer :: a + real(pReal), intent(in) :: dis + integer(pInt), intent(in) :: idx +! type(kdtree2_result), intent(in) :: e + ! not tested as well! + + integer(pInt) :: parent, child, N + real(pReal) :: prichild, prichildp1 + + type(kdtree2_result) :: etmp + + if (.true.) then + N=a%heap_size + if (N .ge. 1) then + parent =1 + child=2 + + loop: do while (child .le. N) + prichild = a%elems(child)%dis + + ! + ! posibly child+1 has higher priority, and if + ! so, get it, and increment child. + ! + + if (child .lt. N) then + prichildp1 = a%elems(child+1)%dis + if (prichild .lt. prichildp1) then + child = child+1 + prichild = prichildp1 + endif + endif + + if (dis .ge. prichild) then + exit loop + ! we have a proper place for our new element, + ! bigger than either children's priority. + else + ! move child into parent. + a%elems(parent) = a%elems(child) + parent = child + child = 2*parent + end if + end do loop + a%elems(parent)%dis = dis + a%elems(parent)%idx = idx + pq_replace_max = a%elems(1)%dis + else + a%elems(1)%dis = dis + a%elems(1)%idx = idx + pq_replace_max = dis + endif + else + ! + ! slower version using elementary pop and push operations. + ! + call pq_extract_max(a,etmp) + etmp%dis = dis + etmp%idx = idx + pq_replace_max = pq_insert(a,dis,idx) + endif + return + end function pq_replace_max + + subroutine pq_delete(a,i) + ! + ! delete item with index 'i' + ! + type(pq),pointer :: a + integer(pInt) :: i + + if ((i .lt. 1) .or. (i .gt. a%heap_size)) then + write (*,*) 'PQ_DELETE: error, attempt to remove out of bounds element.' + stop + endif + + ! swap the item to be deleted with the last element + ! and shorten heap by one. + a%elems(i) = a%elems(a%heap_size) + a%heap_size = a%heap_size - 1 + + call heapify(a,i) + + end subroutine pq_delete + +end module kdtree2_priority_queue_module + + +module kdtree2_module + use prec + use kdtree2_priority_queue_module + ! K-D tree routines in Fortran 90 by Matt Kennel. + ! Original program was written in Sather by Steve Omohundro and + ! Matt Kennel. Only the Euclidean metric is supported. + ! + ! + ! This module is identical to 'kd_tree', except that the order + ! of subscripts is reversed in the data file. + ! In otherwords for an embedding of N D-dimensional vectors, the + ! data file is here, in natural Fortran order data(1:D, 1:N) + ! because Fortran lays out columns first, + ! + ! whereas conventionally (C-style) it is data(1:N,1:D) + ! as in the original kd_tree module. + ! + !-------------DATA TYPE, CREATION, DELETION--------------------- + public :: pReal + public :: kdtree2, kdtree2_result, tree_node, kdtree2_create, kdtree2_destroy + !--------------------------------------------------------------- + !-------------------SEARCH ROUTINES----------------------------- + public :: kdtree2_n_nearest,kdtree2_n_nearest_around_point + ! Return fixed number of nearest neighbors around arbitrary vector, + ! or extant point in dataset, with decorrelation window. + ! + public :: kdtree2_r_nearest, kdtree2_r_nearest_around_point + ! Return points within a fixed ball of arb vector/extant point + ! + public :: kdtree2_sort_results + ! Sort, in order of increasing distance, rseults from above. + ! + public :: kdtree2_r_count, kdtree2_r_count_around_point + ! Count points within a fixed ball of arb vector/extant point + ! + public :: kdtree2_n_nearest_brute_force, kdtree2_r_nearest_brute_force + ! brute force of kdtree2_[n|r]_nearest + !---------------------------------------------------------------- + + + integer(pInt), parameter :: bucket_size = 12 + ! The maximum number of points to keep in a terminal node. + + type interval + real(pReal) :: lower,upper + end type interval + + type :: tree_node + ! an internal tree node + private + integer(pInt) :: cut_dim + ! the dimension to cut + real(pReal) :: cut_val + ! where to cut the dimension + real(pReal) :: cut_val_left, cut_val_right + ! improved cutoffs knowing the spread in child boxes. + integer(pInt) :: l, u + type (tree_node), pointer :: left, right + type(interval), pointer :: box(:) => null() + ! child pointers + ! Points included in this node are indexes[k] with k \in [l,u] + + + end type tree_node + + type :: kdtree2 + ! Global information about the tree, one per tree + integer(pInt) :: dimen=0, n=0 + ! dimensionality and total # of points + real(pReal), pointer :: the_data(:,:) => null() + ! pointer to the actual data array + ! + ! IMPORTANT NOTE: IT IS DIMENSIONED the_data(1:d,1:N) + ! which may be opposite of what may be conventional. + ! This is, because in Fortran, the memory layout is such that + ! the first dimension is in sequential order. Hence, with + ! (1:d,1:N), all components of the vector will be in consecutive + ! memory locations. The search time is dominated by the + ! evaluation of distances in the terminal nodes. Putting all + ! vector components in consecutive memory location improves + ! memory cache locality, and hence search speed, and may enable + ! vectorization on some processors and compilers. + + integer(pInt), pointer :: ind(:) => null() + ! permuted index into the data, so that indexes[l..u] of some + ! bucket represent the indexes of the actual points in that + ! bucket. + logical :: sort = .false. + ! do we always sort output results? + logical :: rearrange = .false. + real(pReal), pointer :: rearranged_data(:,:) => null() + ! if (rearrange .eqv. .true.) then rearranged_data has been + ! created so that rearranged_data(:,i) = the_data(:,ind(i)), + ! permitting search to use more cache-friendly rearranged_data, at + ! some initial computation and storage cost. + type (tree_node), pointer :: root => null() + ! root pointer of the tree + end type kdtree2 + + + type :: tree_search_record + ! + ! One of these is created for each search. + ! + private + ! + ! Many fields are copied from the tree structure, in order to + ! speed up the search. + ! + integer(pInt) :: dimen + integer(pInt) :: nn, nfound + real(pReal) :: ballsize + integer(pInt) :: centeridx=999, correltime=9999 + ! exclude points within 'correltime' of 'centeridx', iff centeridx >= 0 + integer(pInt) :: nalloc ! how much allocated for results(:)? + logical :: rearrange ! are the data rearranged or original? + ! did the # of points found overflow the storage provided? + logical :: overflow + real(pReal), pointer :: qv(:) ! query vector + type(kdtree2_result), pointer :: results(:) ! results + type(pq) :: pq + real(pReal), pointer :: data(:,:) ! temp pointer to data + integer(pInt), pointer :: ind(:) ! temp pointer to indexes + end type tree_search_record + + private + ! everything else is private. + + type(tree_search_record), save, target :: sr ! A GLOBAL VARIABLE for search + +contains + + function kdtree2_create(input_data,dim,sort,rearrange) result (mr) + ! + ! create the actual tree structure, given an input array of data. + ! + ! Note, input data is input_data(1:d,1:N), NOT the other way around. + ! THIS IS THE REVERSE OF THE PREVIOUS VERSION OF THIS MODULE. + ! The reason for it is cache friendliness, improving performance. + ! + ! Optional arguments: If 'dim' is specified, then the tree + ! will only search the first 'dim' components + ! of input_data, otherwise, dim is inferred + ! from SIZE(input_data,1). + ! + ! if sort .eqv. .true. then output results + ! will be sorted by increasing distance. + ! default=.false., as it is faster to not sort. + ! + ! if rearrange .eqv. .true. then an internal + ! copy of the data, rearranged by terminal node, + ! will be made for cache friendliness. + ! default=.true., as it speeds searches, but + ! building takes longer, and extra memory is used. + ! + ! .. Function Return Cut_value .. + type (kdtree2), pointer :: mr + integer(pInt), intent(in), optional :: dim + logical, intent(in), optional :: sort + logical, intent(in), optional :: rearrange + ! .. + ! .. Array Arguments .. + real(pReal), target :: input_data(:,:) + ! + integer(pInt) :: i + ! .. + allocate (mr) + mr%the_data => input_data + ! pointer assignment + + if (present(dim)) then + mr%dimen = dim + else + mr%dimen = size(input_data,1) + end if + mr%n = size(input_data,2) + + if (mr%dimen > mr%n) then + ! unlikely to be correct + write (*,*) 'KD_TREE_TRANS: likely user error.' + write (*,*) 'KD_TREE_TRANS: You passed in matrix with D=',mr%dimen + write (*,*) 'KD_TREE_TRANS: and N=',mr%n + write (*,*) 'KD_TREE_TRANS: note, that new format is data(1:D,1:N)' + write (*,*) 'KD_TREE_TRANS: with usually N >> D. If N =approx= D, then a k-d tree' + write (*,*) 'KD_TREE_TRANS: is not an appropriate data structure.' + stop + end if + + call build_tree(mr) + + if (present(sort)) then + mr%sort = sort + else + mr%sort = .false. + endif + + if (present(rearrange)) then + mr%rearrange = rearrange + else + mr%rearrange = .true. + endif + + if (mr%rearrange) then + allocate(mr%rearranged_data(mr%dimen,mr%n)) + do i=1,mr%n + mr%rearranged_data(:,i) = mr%the_data(:, & + mr%ind(i)) + enddo + else + nullify(mr%rearranged_data) + endif + + end function kdtree2_create + + subroutine build_tree(tp) + type (kdtree2), pointer :: tp + ! .. + integer(pInt) :: j + type(tree_node), pointer :: dummy => null() + ! .. + allocate (tp%ind(tp%n)) + forall (j=1:tp%n) + tp%ind(j) = j + end forall + tp%root => build_tree_for_range(tp,1,tp%n, dummy) + end subroutine build_tree + + recursive function build_tree_for_range(tp,l,u,parent) result (res) + ! .. Function Return Cut_value .. + type (tree_node), pointer :: res + ! .. + ! .. Structure Arguments .. + type (kdtree2), pointer :: tp + type (tree_node),pointer :: parent + ! .. + ! .. Scalar Arguments .. + integer(pInt), intent (In) :: l, u + ! .. + ! .. Local Scalars .. + integer(pInt) :: i, c, m, dimen + logical :: recompute + real(pReal) :: average + +!!$ If (.False.) Then +!!$ If ((l .Lt. 1) .Or. (l .Gt. tp%n)) Then +!!$ Stop 'illegal L value in build_tree_for_range' +!!$ End If +!!$ If ((u .Lt. 1) .Or. (u .Gt. tp%n)) Then +!!$ Stop 'illegal u value in build_tree_for_range' +!!$ End If +!!$ If (u .Lt. l) Then +!!$ Stop 'U is less than L, thats illegal.' +!!$ End If +!!$ Endif +!!$ + ! first compute min and max + dimen = tp%dimen + allocate (res) + allocate(res%box(dimen)) + + ! First, compute an APPROXIMATE bounding box of all points associated with this node. + if ( u < l ) then + ! no points in this box + nullify(res) + return + end if + + if ((u-l)<=bucket_size) then + ! + ! always compute true bounding box for terminal nodes. + ! + do i=1,dimen + call spread_in_coordinate(tp,i,l,u,res%box(i)) + end do + res%cut_dim = 0 + res%cut_val = 0.0 + res%l = l + res%u = u + res%left =>null() + res%right => null() + else + ! + ! modify approximate bounding box. This will be an + ! overestimate of the true bounding box, as we are only recomputing + ! the bounding box for the dimension that the parent split on. + ! + ! Going to a true bounding box computation would significantly + ! increase the time necessary to build the tree, and usually + ! has only a very small difference. This box is not used + ! for searching but only for deciding which coordinate to split on. + ! + do i=1,dimen + recompute=.true. + if (associated(parent)) then + if (i .ne. parent%cut_dim) then + recompute=.false. + end if + endif + if (recompute) then + call spread_in_coordinate(tp,i,l,u,res%box(i)) + else + res%box(i) = parent%box(i) + endif + end do + + + c = maxloc(res%box(1:dimen)%upper-res%box(1:dimen)%lower,1) + ! + ! c is the identity of which coordinate has the greatest spread. + ! + + if (.false.) then + ! select exact median to have fully balanced tree. + m = (l+u)/2 + call select_on_coordinate(tp%the_data,tp%ind,c,m,l,u) + else + ! + ! select point halfway between min and max, as per A. Moore, + ! who says this helps in some degenerate cases, or + ! actual arithmetic average. + ! + if (.true.) then + ! actually compute average + average = sum(tp%the_data(c,tp%ind(l:u))) / real(u-l+1,pReal) + else + average = (res%box(c)%upper + res%box(c)%lower)/2.0 + endif + + res%cut_val = average + m = select_on_coordinate_value(tp%the_data,tp%ind,c,average,l,u) + endif + + ! moves indexes around + res%cut_dim = c + res%l = l + res%u = u +! res%cut_val = tp%the_data(c,tp%ind(m)) + + res%left => build_tree_for_range(tp,l,m,res) + res%right => build_tree_for_range(tp,m+1,u,res) + + if (associated(res%right) .eqv. .false.) then + res%box = res%left%box + res%cut_val_left = res%left%box(c)%upper + res%cut_val = res%cut_val_left + elseif (associated(res%left) .eqv. .false.) then + res%box = res%right%box + res%cut_val_right = res%right%box(c)%lower + res%cut_val = res%cut_val_right + else + res%cut_val_right = res%right%box(c)%lower + res%cut_val_left = res%left%box(c)%upper + res%cut_val = (res%cut_val_left + res%cut_val_right)/2 + + + ! now remake the true bounding box for self. + ! Since we are taking unions (in effect) of a tree structure, + ! this is much faster than doing an exhaustive + ! search over all points + res%box%upper = max(res%left%box%upper,res%right%box%upper) + res%box%lower = min(res%left%box%lower,res%right%box%lower) + endif + end if + end function build_tree_for_range + + integer(pInt) function select_on_coordinate_value(v,ind,c,alpha,li,ui) & + result(res) + ! Move elts of ind around between l and u, so that all points + ! <= than alpha (in c cooordinate) are first, and then + ! all points > alpha are second. + + ! + ! Algorithm (matt kennel). + ! + ! Consider the list as having three parts: on the left, + ! the points known to be <= alpha. On the right, the points + ! known to be > alpha, and in the middle, the currently unknown + ! points. The algorithm is to scan the unknown points, starting + ! from the left, and swapping them so that they are added to + ! the left stack or the right stack, as appropriate. + ! + ! The algorithm finishes when the unknown stack is empty. + ! + ! .. Scalar Arguments .. + integer(pInt), intent (In) :: c, li, ui + real(pReal), intent(in) :: alpha + ! .. + real(pReal) :: v(1:,1:) + integer(pInt) :: ind(1:) + integer(pInt) :: tmp + ! .. + integer(pInt) :: lb, rb + ! + ! The points known to be <= alpha are in + ! [l,lb-1] + ! + ! The points known to be > alpha are in + ! [rb+1,u]. + ! + ! Therefore we add new points into lb or + ! rb as appropriate. When lb=rb + ! we are done. We return the location of the last point <= alpha. + ! + ! + lb = li; rb = ui + + do while (lb < rb) + if ( v(c,ind(lb)) <= alpha ) then + ! it is good where it is. + lb = lb+1 + else + ! swap it with rb. + tmp = ind(lb); ind(lb) = ind(rb); ind(rb) = tmp + rb = rb-1 + endif + end do + + ! now lb .eq. ub + if (v(c,ind(lb)) <= alpha) then + res = lb + else + res = lb-1 + endif + + end function select_on_coordinate_value + + subroutine select_on_coordinate(v,ind,c,k,li,ui) + ! Move elts of ind around between l and u, so that the kth + ! element + ! is >= those below, <= those above, in the coordinate c. + ! .. Scalar Arguments .. + integer(pInt), intent (In) :: c, k, li, ui + ! .. + integer(pInt) :: i, l, m, s, t, u + ! .. + real(pReal) :: v(:,:) + integer(pInt) :: ind(:) + ! .. + l = li + u = ui + do while (l=k) u = m - 1 + end do + end subroutine select_on_coordinate + + subroutine spread_in_coordinate(tp,c,l,u,interv) + ! the spread in coordinate 'c', between l and u. + ! + ! Return lower bound in 'smin', and upper in 'smax', + ! .. + ! .. Structure Arguments .. + type (kdtree2), pointer :: tp + type(interval), intent(out) :: interv + ! .. + ! .. Scalar Arguments .. + integer(pInt), intent (In) :: c, l, u + ! .. + ! .. Local Scalars .. + real(pReal) :: last, lmax, lmin, t, smin,smax + integer(pInt) :: i, ulocal + ! .. + ! .. Local Arrays .. + real(pReal), pointer :: v(:,:) + integer(pInt), pointer :: ind(:) + ! .. + v => tp%the_data(1:,1:) + ind => tp%ind(1:) + smin = v(c,ind(l)) + smax = smin + + ulocal = u + + do i = l + 2, ulocal, 2 + lmin = v(c,ind(i-1)) + lmax = v(c,ind(i)) + if (lmin>lmax) then + t = lmin + lmin = lmax + lmax = t + end if + if (smin>lmin) smin = lmin + if (smaxlast) smin = last + if (smax qv + sr%nn = nn + sr%nfound = 0 + sr%centeridx = -1 + sr%correltime = 0 + sr%overflow = .false. + + sr%results => results + + sr%nalloc = nn ! will be checked + + sr%ind => tp%ind + sr%rearrange = tp%rearrange + if (tp%rearrange) then + sr%Data => tp%rearranged_data + else + sr%Data => tp%the_data + endif + sr%dimen = tp%dimen + + call validate_query_storage(nn) + sr%pq = pq_create(results) + + call search(tp%root) + + if (tp%sort) then + call kdtree2_sort_results(nn, results) + endif +! deallocate(sr%pqp) + return + end subroutine kdtree2_n_nearest + + subroutine kdtree2_n_nearest_around_point(tp,idxin,correltime,nn,results) + ! Find the 'nn' vectors in the tree nearest to point 'idxin', + ! with correlation window 'correltime', returing results in + ! results(:), which must be pre-allocated upon entry. + type (kdtree2), pointer :: tp + integer(pInt), intent (In) :: idxin, correltime, nn + type(kdtree2_result), target :: results(:) + + allocate (sr%qv(tp%dimen)) + sr%qv = tp%the_data(:,idxin) ! copy the vector + sr%ballsize = huge(1.0) ! the largest real(pReal) number + sr%centeridx = idxin + sr%correltime = correltime + + sr%nn = nn + sr%nfound = 0 + + sr%dimen = tp%dimen + sr%nalloc = nn + + sr%results => results + + sr%ind => tp%ind + sr%rearrange = tp%rearrange + + if (sr%rearrange) then + sr%Data => tp%rearranged_data + else + sr%Data => tp%the_data + endif + + call validate_query_storage(nn) + sr%pq = pq_create(results) + + call search(tp%root) + + if (tp%sort) then + call kdtree2_sort_results(nn, results) + endif + deallocate (sr%qv) + return + end subroutine kdtree2_n_nearest_around_point + + subroutine kdtree2_r_nearest(tp,qv,r2,nfound,nalloc,results) + ! find the nearest neighbors to point 'idxin', within SQUARED + ! Euclidean distance 'r2'. Upon ENTRY, nalloc must be the + ! size of memory allocated for results(1:nalloc). Upon + ! EXIT, nfound is the number actually found within the ball. + ! + ! Note that if nfound .gt. nalloc then more neighbors were found + ! than there were storage to store. The resulting list is NOT + ! the smallest ball inside norm r^2 + ! + ! Results are NOT sorted unless tree was created with sort option. + type (kdtree2), pointer :: tp + real(pReal), target, intent (In) :: qv(:) + real(pReal), intent(in) :: r2 + integer(pInt), intent(out) :: nfound + integer(pInt), intent (In) :: nalloc + type(kdtree2_result), target :: results(:) + + ! + sr%qv => qv + sr%ballsize = r2 + sr%nn = 0 ! flag for fixed ball search + sr%nfound = 0 + sr%centeridx = -1 + sr%correltime = 0 + + sr%results => results + + call validate_query_storage(nalloc) + sr%nalloc = nalloc + sr%overflow = .false. + sr%ind => tp%ind + sr%rearrange= tp%rearrange + + if (tp%rearrange) then + sr%Data => tp%rearranged_data + else + sr%Data => tp%the_data + endif + sr%dimen = tp%dimen + + ! + !sr%dsl = Huge(sr%dsl) ! set to huge positive values + !sr%il = -1 ! set to invalid indexes + ! + + call search(tp%root) + nfound = sr%nfound + if (tp%sort) then + call kdtree2_sort_results(nfound, results) + endif + + if (sr%overflow) then + write (*,*) 'KD_TREE_TRANS: warning! return from kdtree2_r_nearest found more neighbors' + write (*,*) 'KD_TREE_TRANS: than storage was provided for. Answer is NOT smallest ball' + write (*,*) 'KD_TREE_TRANS: with that number of neighbors! I.e. it is wrong.' + endif + + return + end subroutine kdtree2_r_nearest + + subroutine kdtree2_r_nearest_around_point(tp,idxin,correltime,r2,& + nfound,nalloc,results) + ! + ! Like kdtree2_r_nearest, but around a point 'idxin' already existing + ! in the data set. + ! + ! Results are NOT sorted unless tree was created with sort option. + ! + type (kdtree2), pointer :: tp + integer(pInt), intent (In) :: idxin, correltime, nalloc + real(pReal), intent(in) :: r2 + integer(pInt), intent(out) :: nfound + type(kdtree2_result), target :: results(:) + ! .. + ! .. Intrinsic Functions .. + intrinsic HUGE + ! .. + allocate (sr%qv(tp%dimen)) + sr%qv = tp%the_data(:,idxin) ! copy the vector + sr%ballsize = r2 + sr%nn = 0 ! flag for fixed r search + sr%nfound = 0 + sr%centeridx = idxin + sr%correltime = correltime + + sr%results => results + + sr%nalloc = nalloc + sr%overflow = .false. + + call validate_query_storage(nalloc) + + ! sr%dsl = HUGE(sr%dsl) ! set to huge positive values + ! sr%il = -1 ! set to invalid indexes + + sr%ind => tp%ind + sr%rearrange = tp%rearrange + + if (tp%rearrange) then + sr%Data => tp%rearranged_data + else + sr%Data => tp%the_data + endif + sr%rearrange = tp%rearrange + sr%dimen = tp%dimen + + ! + !sr%dsl = Huge(sr%dsl) ! set to huge positive values + !sr%il = -1 ! set to invalid indexes + ! + + call search(tp%root) + nfound = sr%nfound + if (tp%sort) then + call kdtree2_sort_results(nfound,results) + endif + + if (sr%overflow) then + write (*,*) 'KD_TREE_TRANS: warning! return from kdtree2_r_nearest found more neighbors' + write (*,*) 'KD_TREE_TRANS: than storage was provided for. Answer is NOT smallest ball' + write (*,*) 'KD_TREE_TRANS: with that number of neighbors! I.e. it is wrong.' + endif + + deallocate (sr%qv) + return + end subroutine kdtree2_r_nearest_around_point + + function kdtree2_r_count(tp,qv,r2) result(nfound) + ! Count the number of neighbors within square distance 'r2'. + type (kdtree2), pointer :: tp + real(pReal), target, intent (In) :: qv(:) + real(pReal), intent(in) :: r2 + integer(pInt) :: nfound + ! .. + ! .. Intrinsic Functions .. + intrinsic HUGE + ! .. + sr%qv => qv + sr%ballsize = r2 + + sr%nn = 0 ! flag for fixed r search + sr%nfound = 0 + sr%centeridx = -1 + sr%correltime = 0 + + nullify(sr%results) ! for some reason, FTN 95 chokes on '=> null()' + + sr%nalloc = 0 ! we do not allocate any storage but that's OK + ! for counting. + sr%ind => tp%ind + sr%rearrange = tp%rearrange + if (tp%rearrange) then + sr%Data => tp%rearranged_data + else + sr%Data => tp%the_data + endif + sr%dimen = tp%dimen + + ! + !sr%dsl = Huge(sr%dsl) ! set to huge positive values + !sr%il = -1 ! set to invalid indexes + ! + sr%overflow = .false. + + call search(tp%root) + + nfound = sr%nfound + + return + end function kdtree2_r_count + + function kdtree2_r_count_around_point(tp,idxin,correltime,r2) & + result(nfound) + ! Count the number of neighbors within square distance 'r2' around + ! point 'idxin' with decorrelation time 'correltime'. + ! + type (kdtree2), pointer :: tp + integer(pInt), intent (In) :: correltime, idxin + real(pReal), intent(in) :: r2 + integer(pInt) :: nfound + ! .. + ! .. + ! .. Intrinsic Functions .. + intrinsic HUGE + ! .. + allocate (sr%qv(tp%dimen)) + sr%qv = tp%the_data(:,idxin) + sr%ballsize = r2 + + sr%nn = 0 ! flag for fixed r search + sr%nfound = 0 + sr%centeridx = idxin + sr%correltime = correltime + nullify(sr%results) + + sr%nalloc = 0 ! we do not allocate any storage but that's OK + ! for counting. + + sr%ind => tp%ind + sr%rearrange = tp%rearrange + + if (sr%rearrange) then + sr%Data => tp%rearranged_data + else + sr%Data => tp%the_data + endif + sr%dimen = tp%dimen + + ! + !sr%dsl = Huge(sr%dsl) ! set to huge positive values + !sr%il = -1 ! set to invalid indexes + ! + sr%overflow = .false. + + call search(tp%root) + + nfound = sr%nfound + + return + end function kdtree2_r_count_around_point + + + subroutine validate_query_storage(n) + ! + ! make sure we have enough storage for n + ! + integer(pInt), intent(in) :: n + + if (size(sr%results,1) .lt. n) then + write (*,*) 'KD_TREE_TRANS: you did not provide enough storage for results(1:n)' + stop + return + endif + + return + end subroutine validate_query_storage + + function square_distance(d, iv,qv) result (res) + ! distance between iv[1:n] and qv[1:n] + ! .. Function Return Value .. + ! re-implemented to improve vectorization. + real(pReal) :: res + ! .. + ! .. + ! .. Scalar Arguments .. + integer(pInt) :: d + ! .. + ! .. Array Arguments .. + real(pReal) :: iv(:),qv(:) + ! .. + ! .. + res = sum( (iv(1:d)-qv(1:d))**2 ) + end function square_distance + + recursive subroutine search(node) + ! + ! This is the innermost core routine of the kd-tree search. Along + ! with "process_terminal_node", it is the performance bottleneck. + ! + ! This version uses a logically complete secondary search of + ! "box in bounds", whether the sear + ! + type (Tree_node), pointer :: node + ! .. + type(tree_node),pointer :: ncloser, nfarther + ! + integer(pInt) :: cut_dim, i + ! .. + real(pReal) :: qval, dis + real(pReal) :: ballsize + real(pReal), pointer :: qv(:) + type(interval), pointer :: box(:) + + if ((associated(node%left) .and. associated(node%right)) .eqv. .false.) then + ! we are on a terminal node + if (sr%nn .eq. 0) then + call process_terminal_node_fixedball(node) + else + call process_terminal_node(node) + endif + else + ! we are not on a terminal node + qv => sr%qv(1:) + cut_dim = node%cut_dim + qval = qv(cut_dim) + + if (qval < node%cut_val) then + ncloser => node%left + nfarther => node%right + dis = (node%cut_val_right - qval)**2 +! extra = node%cut_val - qval + else + ncloser => node%right + nfarther => node%left + dis = (node%cut_val_left - qval)**2 +! extra = qval- node%cut_val_left + endif + + if (associated(ncloser)) call search(ncloser) + + ! we may need to search the second node. + if (associated(nfarther)) then + ballsize = sr%ballsize +! dis=extra**2 + if (dis <= ballsize) then + ! + ! we do this separately as going on the first cut dimen is often + ! a good idea. + ! note that if extra**2 < sr%ballsize, then the next + ! check will also be false. + ! + box => node%box(1:) + do i=1,sr%dimen + if (i .ne. cut_dim) then + dis = dis + dis2_from_bnd(qv(i),box(i)%lower,box(i)%upper) + if (dis > ballsize) then + return + endif + endif + end do + + ! + ! if we are still here then we need to search mroe. + ! + call search(nfarther) + endif + endif + end if + end subroutine search + + + real(pReal) function dis2_from_bnd(x,amin,amax) result (res) + real(pReal), intent(in) :: x, amin,amax + + if (x > amax) then + res = (x-amax)**2; + return + else + if (x < amin) then + res = (amin-x)**2; + return + else + res = 0.0 + return + endif + endif + return + end function dis2_from_bnd + + logical function box_in_search_range(node, sr) result(res) + ! + ! Return the distance from 'qv' to the CLOSEST corner of node's + ! bounding box + ! for all coordinates outside the box. Coordinates inside the box + ! contribute nothing to the distance. + ! + type (tree_node), pointer :: node + type (tree_search_record), pointer :: sr + + integer(pInt) :: dimen, i + real(pReal) :: dis, ballsize + real(pReal) :: l, u + + dimen = sr%dimen + ballsize = sr%ballsize + dis = 0.0 + res = .true. + do i=1,dimen + l = node%box(i)%lower + u = node%box(i)%upper + dis = dis + (dis2_from_bnd(sr%qv(i),l,u)) + if (dis > ballsize) then + res = .false. + return + endif + end do + res = .true. + return + end function box_in_search_range + + + subroutine process_terminal_node(node) + ! + ! Look for actual near neighbors in 'node', and update + ! the search results on the sr data structure. + ! + type (tree_node), pointer :: node + ! + real(pReal), pointer :: qv(:) + integer(pInt), pointer :: ind(:) + real(pReal), pointer :: data(:,:) + ! + integer(pInt) :: dimen, i, indexofi, k, centeridx, correltime + real(pReal) :: ballsize, sd, newpri + logical :: rearrange + type(pq), pointer :: pqp + ! + ! copy values from sr to local variables + ! + ! + ! Notice, making local pointers with an EXPLICIT lower bound + ! seems to generate faster code. + ! why? I don't know. + qv => sr%qv(1:) + pqp => sr%pq + dimen = sr%dimen + ballsize = sr%ballsize + rearrange = sr%rearrange + ind => sr%ind(1:) + data => sr%Data(1:,1:) + centeridx = sr%centeridx + correltime = sr%correltime + + ! doing_correl = (centeridx >= 0) ! Do we have a decorrelation window? + ! include_point = .true. ! by default include all points + ! search through terminal bucket. + + mainloop: do i = node%l, node%u + if (rearrange) then + sd = 0.0 + do k = 1,dimen + sd = sd + (data(k,i) - qv(k))**2 + if (sd>ballsize) cycle mainloop + end do + indexofi = ind(i) ! only read it if we have not broken out + else + indexofi = ind(i) + sd = 0.0 + do k = 1,dimen + sd = sd + (data(k,indexofi) - qv(k))**2 + if (sd>ballsize) cycle mainloop + end do + endif + + if (centeridx > 0) then ! doing correlation interval? + if (abs(indexofi-centeridx) < correltime) cycle mainloop + endif + + + ! + ! two choices for any point. The list so far is either undersized, + ! or it is not. + ! + ! If it is undersized, then add the point and its distance + ! unconditionally. If the point added fills up the working + ! list then set the sr%ballsize, maximum distance bound (largest distance on + ! list) to be that distance, instead of the initialized +infinity. + ! + ! If the running list is full size, then compute the + ! distance but break out immediately if it is larger + ! than sr%ballsize, "best squared distance" (of the largest element), + ! as it cannot be a good neighbor. + ! + ! Once computed, compare to best_square distance. + ! if it is smaller, then delete the previous largest + ! element and add the new one. + + if (sr%nfound .lt. sr%nn) then + ! + ! add this point unconditionally to fill list. + ! + sr%nfound = sr%nfound +1 + newpri = pq_insert(pqp,sd,indexofi) + if (sr%nfound .eq. sr%nn) ballsize = newpri + ! we have just filled the working list. + ! put the best square distance to the maximum value + ! on the list, which is extractable from the PQ. + + else + ! + ! now, if we get here, + ! we know that the current node has a squared + ! distance smaller than the largest one on the list, and + ! belongs on the list. + ! Hence we replace that with the current one. + ! + ballsize = pq_replace_max(pqp,sd,indexofi) + endif + end do mainloop + ! + ! Reset sr variables which may have changed during loop + ! + sr%ballsize = ballsize + + end subroutine process_terminal_node + + subroutine process_terminal_node_fixedball(node) + ! + ! Look for actual near neighbors in 'node', and update + ! the search results on the sr data structure, i.e. + ! save all within a fixed ball. + ! + type (tree_node), pointer :: node + ! + real(pReal), pointer :: qv(:) + integer(pInt), pointer :: ind(:) + real(pReal), pointer :: data(:,:) + ! + integer(pInt) :: nfound + integer(pInt) :: dimen, i, indexofi, k + integer(pInt) :: centeridx, correltime, nn + real(pReal) :: ballsize, sd + logical :: rearrange + + ! + ! copy values from sr to local variables + ! + qv => sr%qv(1:) + dimen = sr%dimen + ballsize = sr%ballsize + rearrange = sr%rearrange + ind => sr%ind(1:) + data => sr%Data(1:,1:) + centeridx = sr%centeridx + correltime = sr%correltime + nn = sr%nn ! number to search for + nfound = sr%nfound + + ! search through terminal bucket. + mainloop: do i = node%l, node%u + + ! + ! two choices for any point. The list so far is either undersized, + ! or it is not. + ! + ! If it is undersized, then add the point and its distance + ! unconditionally. If the point added fills up the working + ! list then set the sr%ballsize, maximum distance bound (largest distance on + ! list) to be that distance, instead of the initialized +infinity. + ! + ! If the running list is full size, then compute the + ! distance but break out immediately if it is larger + ! than sr%ballsize, "best squared distance" (of the largest element), + ! as it cannot be a good neighbor. + ! + ! Once computed, compare to best_square distance. + ! if it is smaller, then delete the previous largest + ! element and add the new one. + + ! which index to the point do we use? + + if (rearrange) then + sd = 0.0 + do k = 1,dimen + sd = sd + (data(k,i) - qv(k))**2 + if (sd>ballsize) cycle mainloop + end do + indexofi = ind(i) ! only read it if we have not broken out + else + indexofi = ind(i) + sd = 0.0 + do k = 1,dimen + sd = sd + (data(k,indexofi) - qv(k))**2 + if (sd>ballsize) cycle mainloop + end do + endif + + if (centeridx > 0) then ! doing correlation interval? + if (abs(indexofi-centeridx) 1)then + ileft=ileft-1 + value=a(ileft); ivalue=ind(ileft) + else + value=a(iright); ivalue=ind(iright) + a(iright)=a(1); ind(iright)=ind(1) + iright=iright-1 + if (iright == 1) then + a(1)=value;ind(1)=ivalue + return + endif + endif + i=ileft + j=2*ileft + do while (j <= iright) + if(j < iright) then + if(a(j) < a(j+1)) j=j+1 + endif + if(value < a(j)) then + a(i)=a(j); ind(i)=ind(j) + i=j + j=j+j + else + j=iright+1 + endif + end do + a(i)=value; ind(i)=ivalue + end do + end subroutine heapsort + + subroutine heapsort_struct(a,n) + ! + ! Sort a(1:n) in ascending order + ! + ! + integer(pInt),intent(in) :: n + type(kdtree2_result),intent(inout) :: a(:) + + ! + ! + type(kdtree2_result) :: value ! temporary value + + integer(pInt) :: i,j + integer(pInt) :: ileft,iright + + ileft=n/2+1 + iright=n + + ! do i=1,n + ! ind(i)=i + ! Generate initial idum array + ! end do + + if(n.eq.1) return + + do + if(ileft > 1)then + ileft=ileft-1 + value=a(ileft) + else + value=a(iright) + a(iright)=a(1) + iright=iright-1 + if (iright == 1) then + a(1) = value + return + endif + endif + i=ileft + j=2*ileft + do while (j <= iright) + if(j < iright) then + if(a(j)%dis < a(j+1)%dis) j=j+1 + endif + if(value%dis < a(j)%dis) then + a(i)=a(j); + i=j + j=j+j + else + j=iright+1 + endif + end do + a(i)=value + end do + end subroutine heapsort_struct + +end module kdtree2_module + diff --git a/code/makefile b/code/makefile index 8b0340b60..ea00386ac 100644 --- a/code/makefile +++ b/code/makefile @@ -58,9 +58,9 @@ DEBUG5 =-stand std03/std95 ######################################################################################## #auto values will be set by setup_code.py -FFTWROOT := +FFTWROOT := /nethome/m.diehl/DAMASK/lib/fftw IKMLROOT := -ACMLROOT := +ACMLROOT := /opt/acml4.4.0 LAPACKROOT := F90 ?= ifort @@ -206,15 +206,18 @@ FEsolving.o: FEsolving.f90 basics.a -basics.a: debug.o math.o - ar rc basics.a debug.o math.o numerics.o IO.o DAMASK_spectral_interface.o prec.o +basics.a: kdtree2.o + ar rc basics.a kdtree2.o math.o debug.o numerics.o IO.o DAMASK_spectral_interface.o prec.o +kdtree2.o: kdtree2.f90 math.o + $(PREFIX) $(COMPILERNAME) $(COMPILE) kdtree2.f90 $(SUFFIX) + +math.o: math.f90 debug.o + $(PREFIX) $(COMPILERNAME) $(COMPILE) math.f90 $(SUFFIX) + debug.o: debug.f90 numerics.o $(PREFIX) $(COMPILERNAME) $(COMPILE) debug.f90 $(SUFFIX) -math.o: math.f90 numerics.o - $(PREFIX) $(COMPILERNAME) $(COMPILE) math.f90 $(SUFFIX) - numerics.o: numerics.f90 IO.o $(PREFIX) $(COMPILERNAME) $(COMPILE) numerics.f90 $(SUFFIX)