From c54600fd1f0b099e63a07394cf980bbcff501653 Mon Sep 17 00:00:00 2001 From: Martin Diehl Date: Wed, 4 Jan 2012 17:43:26 +0000 Subject: [PATCH] added kdtree2 source and changed makefile to compile it. started to implement the nearest neighbor search for regridding corrected calculation of divergence in real space. corrected handling of maximum stress deviation (removed mask) --- code/DAMASK_spectral.f90 | 120 ++- code/kdtree2.f90 | 1885 ++++++++++++++++++++++++++++++++++++++ code/makefile | 17 +- 3 files changed, 1975 insertions(+), 47 deletions(-) create mode 100644 code/kdtree2.f90 diff --git a/code/DAMASK_spectral.f90 b/code/DAMASK_spectral.f90 index 3bf9ad6e3..93fea07e3 100644 --- a/code/DAMASK_spectral.f90 +++ b/code/DAMASK_spectral.f90 @@ -21,6 +21,8 @@ !******************************************************************** ! Material subroutine for BVP solution using spectral method ! +! Run 'DAMASK_spectral.exe --help' to get usage hints +! ! written by P. Eisenlohr, ! F. Roters, ! L. Hantcherli, @@ -32,16 +34,6 @@ ! ! MPI fuer Eisenforschung, Duesseldorf ! -!******************************************************************** -! Usage: -! - start program with DAMASK_spectral -! -g (--geom, --geometry) PathToGeomFile/NameOfGeom.geom -! -l (--load, --loadcase) PathToLoadFile/NameOfLoadFile.load -! - PathToGeomFile will be the working directory -! - make sure the file "material.config" exists in the working -! directory. For further configuration use "numerics.config" and -! "numerics.config" -!******************************************************************** program DAMASK_spectral !******************************************************************** @@ -50,6 +42,7 @@ program DAMASK_spectral use IO use debug, only: spectral_debug_verbosity use math + use kdtree2_module use mesh, only: mesh_ipCenterOfGravity use CPFEM, only: CPFEM_general, CPFEM_initAll use FEsolving, only: restartWrite, restartReadStep @@ -126,6 +119,13 @@ program DAMASK_spectral integer(pInt), dimension(3) :: k_s integer*8, dimension(3) :: fftw_plan ! plans for fftw (forward and backward) +! variables for regriding + real(pReal), dimension(:,:,:,:) ,allocatable :: deformed_small + real(pReal), dimension(:,:) ,allocatable :: deformed_large + real(pReal), dimension(:,:,:,:) ,allocatable :: new_coordinates + type(kdtree2), pointer :: tree + real(pReal), dimension(3) :: shift + ! loop variables, convergence etc. real(pReal) :: time = 0.0_pReal, time0 = 0.0_pReal, timeinc ! elapsed time, begin of interval, time interval real(pReal) :: guessmode, err_div, err_stress, err_stress_tol, p_hat_avg @@ -392,14 +392,14 @@ program DAMASK_spectral if (any(bc(loadcase)%maskStress .eqv. bc(loadcase)%maskDeformation)) errorID = 31 ! exclusive or masking only if (any(bc(loadcase)%maskStress .and. transpose(bc(loadcase)%maskStress) .and. & reshape((/.false.,.true.,.true.,.true.,.false.,.true.,.true.,.true.,.false./),(/3,3/)))) & - errorID = 38_pInt ! no rotation is allowed by stress BC + errorID = 38_pInt ! no rotation is allowed by stress BC if (any(abs(math_mul33x33(bc(loadcase)%rotation,math_transpose3x3(bc(loadcase)%rotation))-math_I3)& > reshape(spread(rotation_tol,1,9),(/3,3/)))& .or. abs(math_det3x3(bc(loadcase)%rotation)) > 1.0_pReal + rotation_tol) & - errorID = 46_pInt ! given rotation matrix contains strain - if (bc(loadcase)%timeIncrement < 0.0_pReal) errorID = 34_pInt ! negative time increment - if (bc(loadcase)%steps < 1_pInt) errorID = 35_pInt ! non-positive increment count - if (bc(loadcase)%outputfrequency < 1_pInt) errorID = 36_pInt ! non-positive result frequency + errorID = 46_pInt ! given rotation matrix contains strain + if (bc(loadcase)%timeIncrement < 0.0_pReal) errorID = 34_pInt ! negative time increment + if (bc(loadcase)%steps < 1_pInt) errorID = 35_pInt ! non-positive increment count + if (bc(loadcase)%outputfrequency < 1_pInt) errorID = 36_pInt ! non-positive result frequency if (errorID > 0_pInt) call IO_error(error_ID = errorID, ext_msg = loadcase_string) enddo @@ -444,7 +444,7 @@ program DAMASK_spectral else ! not-1st step of 1st loadcase of loglinear scale timeinc = bc(1)%timeIncrement*(2.0_pReal**real(step-1_pInt-bc(1)%steps ,pReal)) endif - else ! not-1st loadcase of loglinear scale + else ! not-1st loadcase of loglinear scale timeinc = time0 *( (1.0_pReal + bc(loadcase)%timeIncrement/time0 )**real( step/bc(loadcase)%steps ,pReal) & -(1.0_pReal + bc(loadcase)%timeIncrement/time0 )**real( (step-1_pInt)/bc(loadcase)%steps ,pReal) ) endif @@ -456,9 +456,10 @@ program DAMASK_spectral ! Initialization Start !************************************************************* - if(totalStepsCounter == restartReadStep) then ! Initialize values - if (regrid .eqv. .true. ) then ! 'DeInitialize' the values changing in case of regridding + if(totalStepsCounter == restartReadStep) then ! Initialize values + if (regrid .eqv. .true. ) then ! 'DeInitialize' the values changing in case of regridding regrid = .false. + call dfftw_destroy_plan(fftw_plan(1)); call dfftw_destroy_plan(fftw_plan(2)) if(debugDivergence) call dfftw_destroy_plan(fftw_plan(3)) deallocate (defgrad) @@ -492,9 +493,7 @@ program DAMASK_spectral divergence,(/res(1)/2_pInt+1_pInt,res(2),res(3)/),1,(res(1)/2_pInt+1_pInt)*res(2)*res(3),& divergence,(/res(1) +2_pInt,res(2),res(3)/),1,(res(1) +2_pInt)*res(2)*res(3),fftw_planner_flag) if (debugGeneral) then - !$OMP CRITICAL (write2out) write (6,*) 'FFTW initialized' - !$OMP END CRITICAL (write2out) endif if (restartReadStep==1_pInt) then ! not restarting, no deformation at the beginning @@ -519,7 +518,7 @@ program DAMASK_spectral ielem = 0_pInt do k = 1_pInt, res(3); do j = 1_pInt, res(2); do i = 1_pInt, res(1) ielem = ielem + 1_pInt - coordinates(1:3,i,j,k) = mesh_ipCenterOfGravity(1:3,1,ielem) ! set to initial coordinates ToDo: SHOULD BE UPDATED TO CURRENT POSITION IN FUTURE REVISIONS!!! But do we know them? I don't think so. Otherwise we don't need geometry reconstruction + coordinates(1:3,i,j,k) = mesh_ipCenterOfGravity(1:3,1,ielem) ! set to initial coordinates ToDo: SHOULD BE UPDATED TO CURRENT POSITION IN FUTURE REVISIONS!!! But do we know them? I don't think so. Otherwise we don't need geometry reconstruction call CPFEM_general(2_pInt,coordinates(1:3,i,j,k),math_I3,math_I3,temperature(i,j,k),& 0.0_pReal,ielem,1_pInt,cstress,dsde,pstress,dPdF) c_current = c_current + dPdF @@ -527,9 +526,7 @@ program DAMASK_spectral c0_reference = c_current * wgt ! linear reference material stiffness if (debugGeneral) then - !$OMP CRITICAL (write2out) write (6,*) 'first call to CPFEM_general finished' - !$OMP END CRITICAL (write2out) endif do k = 1_pInt, res(3) ! calculation of discrete angular frequencies, ordered as in FFTW (wrap around) @@ -613,7 +610,7 @@ program DAMASK_spectral if(totalStepsCounter >= restartReadStep) then ! Do calculations (otherwise just forwarding) if(bc(loadcase)%restartFrequency>0_pInt) & - restartWrite = ( mod(step - 1_pInt,bc(loadcase)%restartFrequency)==0_pInt) ! at frequency of writing restart information + restartWrite = ( mod(step - 1_pInt,bc(loadcase)%restartFrequency)==0_pInt) ! at frequency of writing restart information ! setting restart parameter for FEsolving (first call to CPFEM_general will write ToDo: true?) if (bc(loadcase)%velGradApplied) & ! calculate fDot from given L and current F fDot = math_mul33x33(bc(loadcase)%deformation, defgradAim) @@ -751,33 +748,35 @@ program DAMASK_spectral p_hat_avg = sqrt(maxval (math_eigenvalues3x3(math_mul33x33(workfft(1,1,1,1:3,1:3),& ! L_2 norm of average stress (freq 0,0,0) in fourier space, math_transpose3x3(workfft(1,1,1,1:3,1:3)))))) ! ignore imaginary part as it is always zero for real only input err_div_avg_complex = 0.0_pReal - err_div_max = 0.0_pReal ! only if debugDivergence == .true. of importance - divergence = 0.0_pReal ! - '' - + err_div_max = 0.0_pReal ! only important if debugDivergence == .true. + divergence = 0.0_pReal ! - '' - do k = 1_pInt, res(3); do j = 1_pInt, res(2); do i = 1_pInt, res(1)/2_pInt+1_pInt divergence_complex = math_mul33x3_complex(workfft(i*2_pInt-1_pInt,j,k,1:3,1:3)& ! calculate divergence out of the real and imaginary part of the stress + workfft(i*2_pInt ,j,k,1:3,1:3)*img,xi(1:3,i,j,k)) + if(debugDivergence) then ! need divergence NOT squared + divergence(i*2_pInt-1_pInt,j,k,1:3) = -aimag(divergence_complex) ! real part at i*2-1, imaginary part at i*2, multiply by i + divergence(i*2_pInt ,j,k,1:3) = real(divergence_complex) ! ==> switch order and change sign of img part + endif + divergence_complex = divergence_complex**2.0_pReal ! all criteria need divergence squared if(i==1_pInt .or. i == res(1)/2_pInt + 1_pInt) then ! We are on one of the two slides without conjg. complex counterpart - err_div_avg_complex = err_div_avg_complex + sum(divergence_complex**2.0_pReal) ! Avg of L_2 norm of div(stress) in fourier space (Suquet small strain) + err_div_avg_complex = err_div_avg_complex + sum(divergence_complex) ! RMS of L_2 norm of div(stress) in fourier space (Suquet small strain) else ! Has somewhere a conj. complex counterpart. Therefore count it twice. - err_div_avg_complex = err_div_avg_complex +2.0*real(sum(divergence_complex**2.0_pReal)) ! Ignore img part (conjg. complex sum will end up 0). This and the different order + err_div_avg_complex = err_div_avg_complex +2.0*real(sum(divergence_complex)) ! Ignore img part (conjg. complex sum will end up 0). This and the different order endif ! compared to c2c transform results in slight numerical deviations. if(debugDivergence) then - err_div_max = max(err_div_max,abs(sqrt(sum(divergence_complex**2.0_pReal)))) ! maximum of L two norm of div(stress) in fourier space (Suquet large strain) - divergence(i*2_pInt-1_pInt,j,k,1:3) = -aimag(divergence_complex)*pi*2.0_pReal ! real part at i*2-1, imaginary part at i*2, multiply by i - divergence(i*2_pInt ,j,k,1:3) = real(divergence_complex)*pi*2.0_pReal ! ==> switch order and change sign of img part - endif + err_div_max = max(err_div_max,abs(sqrt(sum(divergence_complex)))) ! maximum of L two norm of div(stress) in fourier space (Suquet large strain) + endif enddo; enddo; enddo - err_div = abs(sqrt (err_div_avg_complex*wgt)) ! weighting by and taking square root. abs(...) because result is a complex number + err_div = abs(sqrt (err_div_avg_complex*wgt)) ! weighting by and taking square root (RMS). abs(...) because result is a complex number err_div = err_div *correctionFactor/p_hat_avg ! weighting by average stress and multiplying with correction factor err_div_max = err_div_max*correctionFactor/p_hat_avg ! - '' - only if debugDivergence == .true. of importance ! calculate additional divergence criteria and report ------------- if(debugDivergence) then call dfftw_execute_dft_c2r(fftw_plan(3),divergence,divergence) - divergence = divergence * wgt - + divergence = divergence *pi*2.0_pReal* wgt !pointwise factor 2*pi from differentation and weighting from FT err_real_div_avg = 0.0_pReal err_real_div_max = 0.0_pReal do k = 1_pInt, res(3); do j = 1_pInt, res(2); do i = 1_pInt, res(1) @@ -786,8 +785,8 @@ program DAMASK_spectral enddo; enddo; enddo p_real_avg = sqrt(maxval (math_eigenvalues3x3(math_mul33x33(pstress_av_lab,& ! L_2 norm of average stress in real space, math_transpose3x3(pstress_av_lab))))) - err_real_div_avg = sqrt(wgt*err_real_div_avg)*correctionFactor/p_hat_avg - err_real_div_max = err_real_div_max *correctionFactor/p_hat_avg + err_real_div_avg = sqrt(wgt*err_real_div_avg)*correctionFactor/p_real_avg ! RMS in real space + err_real_div_max = err_real_div_max *correctionFactor/p_real_avg print '(a,es10.4,a,f6.2)', 'error divergence FT avg = ',err_div, ', ', err_div/err_div_tol print '(a,es10.4)', 'error divergence FT max = ',err_div_max @@ -797,7 +796,7 @@ program DAMASK_spectral print '(a,es10.4,a,f6.2)', 'error divergence = ',err_div, ', ', err_div/err_div_tol endif ! -------------------------- - if(memory_efficient) then ! memory saving version, on-the-fly calculation of gamma_hat + if(memory_efficient) then ! memory saving version, on-the-fly calculation of gamma_hat do k = 1_pInt, res(3); do j = 1_pInt, res(2) ;do i = 1_pInt, res(1)/2_pInt+1_pInt if (any(xi(1:3,i,j,k) /= 0.0_pReal)) then do l = 1_pInt,3_pInt; do m = 1_pInt,3_pInt @@ -847,7 +846,7 @@ program DAMASK_spectral if(size_reduced > 0_pInt) then ! calculate stress BC if applied err_stress = maxval(abs(mask_stress * (pstress_av - bc(loadcase)%stress))) ! maximum deviaton (tensor norm not applicable) - err_stress_tol = maxval(abs(mask_defgrad * pstress_av)) * err_stress_tolrel ! don't use any tensor norm because the comparison should be coherent + err_stress_tol = maxval(abs(pstress_av)) * err_stress_tolrel ! don't use any tensor norm because the comparison should be coherent print '(a)', '' print '(a,es10.4,a,f6.2)', 'error stress = ',err_stress, ', ', err_stress/err_stress_tol print '(a)', '... correcting deformation gradient to fulfill BCs ..........' @@ -883,7 +882,7 @@ program DAMASK_spectral ! ------------------------------ enddo ! end looping when convergency is achieved - + !$OMP CRITICAL (write2out) print '(a)', '' print '(a)', '=============================================================' @@ -899,6 +898,47 @@ program DAMASK_spectral write(538), materialpoint_results(1_pInt:materialpoint_sizeResults,1,1_pInt:Npoints) ! write result to file endif !$OMP END CRITICAL (write2out) + +! ################################################## +! # test of regridding + ! allocate(deformed_small(res(1) ,res(2) ,res(3) ,3)); deformed_small = 0.0_pReal + ! allocate(deformed_large(3,Npoints*27_pInt)); deformed_large = 0.0_pReal !ToDo: make it smaller (small corona only) + + ! call deformed_fft(res,geomdimension,defgrad_av_lab,1.0_pReal,defgrad,deformed_small) ! calculate deformed coordinates + ! shift = math_mul33x3(defgrad_av_lab,geomdimension) + + ! print*, 'defgrad' + ! do k = 1_pInt, res(3); do j = 1_pInt, res(2); do i = 1_pInt, res(1) + ! print*, defgrad(i,j,k,1:3,1:3) + ! enddo; enddo; enddo + + ! print*, 'deformed_small' + ! do k = 1_pInt, res(3); do j = 1_pInt, res(2); do i = 1_pInt, res(1) + ! print*, deformed_small(i,j,k,1:3) + ! enddo; enddo; enddo + + ! print*, 'shift', shift + ! ielem = 0_pInt + ! do k = 1_pInt, res(3); do j = 1_pInt, res(2); do i = 1_pInt, res(1) + ! do n = -1, 1 + ! do m = -1, 1 + ! do l = -1, 1 + ! ielem = ielem +1_pInt + ! deformed_large(1:3,ielem) = deformed_small(i,j,k,1:3)+ real((/l,m,n/),pReal)*shift + ! enddo; enddo; enddo + ! enddo; enddo; enddo + ! print*, 'deformed_large' + ! print*, deformed_large + ! tree => kdtree2_create(deformed_large,sort=.true.,rearrange=.true.) + + ! allocate(new_coordinates(res(1),res(2),res(3),3)); new_coordinates = 0.0_pReal !fluctuation free new coordinates + ! do k = 1_pInt, res(3); do j = 1_pInt, res(2); do i = 1_pInt, res(1) + ! new_coordinates(i,j,k,1:3) = math_mul33x3(defgrad_av_lab,coordinates(1:3,i,j,k)) + ! enddo; enddo; enddo + ! pause +! ################################################## +! # end test of regridding + endif ! end calculation/forwarding enddo ! end looping over steps in current loadcase deallocate(c_reduced) diff --git a/code/kdtree2.f90 b/code/kdtree2.f90 new file mode 100644 index 000000000..422cef447 --- /dev/null +++ b/code/kdtree2.f90 @@ -0,0 +1,1885 @@ +! +!(c) Matthew Kennel, Institute for Nonlinear Science (2004) +! +! Licensed under the Academic Free License version 1.1 found in file LICENSE +! with additional provisions found in that same file. +! +!####################################################### +! modifications: changed precision according to prec.f90 +! k.komerla, m.diehl +!####################################################### + +module kdtree2_priority_queue_module + use prec + ! + ! maintain a priority queue (PQ) of data, pairs of 'priority/payload', + ! implemented with a binary heap. This is the type, and the 'dis' field + ! is the priority. + ! + type kdtree2_result + ! a pair of distances, indexes + real(pReal) :: dis !=0.0 + integer(pInt) :: idx !=-1 Initializers cause some bugs in compilers. + end type kdtree2_result + ! + ! A heap-based priority queue lets one efficiently implement the following + ! operations, each in log(N) time, as opposed to linear time. + ! + ! 1) add a datum (push a datum onto the queue, increasing its length) + ! 2) return the priority value of the maximum priority element + ! 3) pop-off (and delete) the element with the maximum priority, decreasing + ! the size of the queue. + ! 4) replace the datum with the maximum priority with a supplied datum + ! (of either higher or lower priority), maintaining the size of the + ! queue. + ! + ! + ! In the k-d tree case, the 'priority' is the square distance of a point in + ! the data set to a reference point. The goal is to keep the smallest M + ! distances to a reference point. The tree algorithm searches terminal + ! nodes to decide whether to add points under consideration. + ! + ! A priority queue is useful here because it lets one quickly return the + ! largest distance currently existing in the list. If a new candidate + ! distance is smaller than this, then the new candidate ought to replace + ! the old candidate. In priority queue terms, this means removing the + ! highest priority element, and inserting the new one. + ! + ! Algorithms based on Cormen, Leiserson, Rivest, _Introduction + ! to Algorithms_, 1990, with further optimization by the author. + ! + ! Originally informed by a C implementation by Sriranga Veeraraghavan. + ! + ! This module is not written in the most clear way, but is implemented such + ! for speed, as it its operations will be called many times during searches + ! of large numbers of neighbors. + ! + type pq + ! + ! The priority queue consists of elements + ! priority(1:heap_size), with associated payload(:). + ! + ! There are heap_size active elements. + ! Assumes the allocation is always sufficient. Will NOT increase it + ! to match. + integer(pInt) :: heap_size = 0 + type(kdtree2_result), pointer :: elems(:) + end type pq + + public :: kdtree2_result + + public :: pq + public :: pq_create + public :: pq_delete, pq_insert + public :: pq_extract_max, pq_max, pq_replace_max, pq_maxpri + private + +contains + + + function pq_create(results_in) result(res) + ! + ! Create a priority queue from ALREADY allocated + ! array pointers for storage. NOTE! It will NOT + ! add any alements to the heap, i.e. any existing + ! data in the input arrays will NOT be used and may + ! be overwritten. + ! + ! usage: + ! real(pReal), pointer :: x(:) + ! integer(pInt), pointer :: k(:) + ! allocate(x(1000),k(1000)) + ! pq => pq_create(x,k) + ! + type(kdtree2_result), target:: results_in(:) + type(pq) :: res + ! + ! + integer(pInt) :: nalloc + + nalloc = size(results_in,1) + if (nalloc .lt. 1) then + write (*,*) 'PQ_CREATE: error, input arrays must be allocated.' + end if + res%elems => results_in + res%heap_size = 0 + return + end function pq_create + + ! + ! operations for getting parents and left + right children + ! of elements in a binary heap. + ! + +! +! These are written inline for speed. +! +! integer(pInt) function parent(i) +! integer(pInt), intent(in) :: i +! parent = (i/2) +! return +! end function parent + +! integer(pInt) function left(i) +! integer(pInt), intent(in) ::i +! left = (2*i) +! return +! end function left + +! integer(pInt) function right(i) +! integer(pInt), intent(in) :: i +! right = (2*i)+1 +! return +! end function right + +! logical function compare_priority(p1,p2) +! real(pReal), intent(in) :: p1, p2 +! +! compare_priority = (p1 .gt. p2) +! return +! end function compare_priority + + subroutine heapify(a,i_in) + ! + ! take a heap rooted at 'i' and force it to be in the + ! heap canonical form. This is performance critical + ! and has been tweaked a little to reflect this. + ! + type(pq),pointer :: a + integer(pInt), intent(in) :: i_in + ! + integer(pInt) :: i, l, r, largest + + real(pReal) :: pri_i, pri_l, pri_r, pri_largest + + + type(kdtree2_result) :: temp + + i = i_in + +bigloop: do + l = 2*i ! left(i) + r = l+1 ! right(i) + ! + ! set 'largest' to the index of either i, l, r + ! depending on whose priority is largest. + ! + ! note that l or r can be larger than the heap size + ! in which case they do not count. + + + ! does left child have higher priority? + if (l .gt. a%heap_size) then + ! we know that i is the largest as both l and r are invalid. + exit + else + pri_i = a%elems(i)%dis + pri_l = a%elems(l)%dis + if (pri_l .gt. pri_i) then + largest = l + pri_largest = pri_l + else + largest = i + pri_largest = pri_i + endif + + ! + ! between i and l we have a winner + ! now choose between that and r. + ! + if (r .le. a%heap_size) then + pri_r = a%elems(r)%dis + if (pri_r .gt. pri_largest) then + largest = r + endif + endif + endif + + if (largest .ne. i) then + ! swap data in nodes largest and i, then heapify + + temp = a%elems(i) + a%elems(i) = a%elems(largest) + a%elems(largest) = temp + ! + ! Canonical heapify() algorithm has tail-ecursive call: + ! + ! call heapify(a,largest) + ! we will simulate with cycle + ! + i = largest + cycle bigloop ! continue the loop + else + return ! break from the loop + end if + enddo bigloop + return + end subroutine heapify + + subroutine pq_max(a,e) + ! + ! return the priority and its payload of the maximum priority element + ! on the queue, which should be the first one, if it is + ! in heapified form. + ! + type(pq),pointer :: a + type(kdtree2_result),intent(out) :: e + + if (a%heap_size .gt. 0) then + e = a%elems(1) + else + write (*,*) 'PQ_MAX: ERROR, heap_size < 1' + stop + endif + return + end subroutine pq_max + + real(pReal) function pq_maxpri(a) + type(pq), pointer :: a + + if (a%heap_size .gt. 0) then + pq_maxpri = a%elems(1)%dis + else + write (*,*) 'PQ_MAX_PRI: ERROR, heapsize < 1' + stop + endif + return + end function pq_maxpri + + subroutine pq_extract_max(a,e) + ! + ! return the priority and payload of maximum priority + ! element, and remove it from the queue. + ! (equivalent to 'pop()' on a stack) + ! + type(pq),pointer :: a + type(kdtree2_result), intent(out) :: e + + if (a%heap_size .ge. 1) then + ! + ! return max as first element + ! + e = a%elems(1) + + ! + ! move last element to first + ! + a%elems(1) = a%elems(a%heap_size) + a%heap_size = a%heap_size-1 + call heapify(a,1) + return + else + write (*,*) 'PQ_EXTRACT_MAX: error, attempted to pop non-positive PQ' + stop + end if + + end subroutine pq_extract_max + + + real(pReal) function pq_insert(a,dis,idx) + ! + ! Insert a new element and return the new maximum priority, + ! which may or may not be the same as the old maximum priority. + ! + type(pq),pointer :: a + real(pReal), intent(in) :: dis + integer(pInt), intent(in) :: idx + ! type(kdtree2_result), intent(in) :: e + ! + integer(pInt) :: i, isparent + real(pReal) :: parentdis + ! + + ! if (a%heap_size .ge. a%max_elems) then + ! write (*,*) 'PQ_INSERT: error, attempt made to insert element on full PQ' + ! stop + ! else + a%heap_size = a%heap_size + 1 + i = a%heap_size + + do while (i .gt. 1) + isparent = int(i/2) + parentdis = a%elems(isparent)%dis + if (dis .gt. parentdis) then + ! move what was in i's parent into i. + a%elems(i)%dis = parentdis + a%elems(i)%idx = a%elems(isparent)%idx + i = isparent + else + exit + endif + end do + + ! insert the element at the determined position + a%elems(i)%dis = dis + a%elems(i)%idx = idx + + pq_insert = a%elems(1)%dis + return + ! end if + + end function pq_insert + + subroutine pq_adjust_heap(a,i) + type(pq),pointer :: a + integer(pInt), intent(in) :: i + ! + ! nominally arguments (a,i), but specialize for a=1 + ! + ! This routine assumes that the trees with roots 2 and 3 are already heaps, i.e. + ! the children of '1' are heaps. When the procedure is completed, the + ! tree rooted at 1 is a heap. + real(pReal) :: prichild + integer(pInt) :: parent, child, N + + type(kdtree2_result) :: e + + e = a%elems(i) + + parent = i + child = 2*i + N = a%heap_size + + do while (child .le. N) + if (child .lt. N) then + if (a%elems(child)%dis .lt. a%elems(child+1)%dis) then + child = child+1 + endif + endif + prichild = a%elems(child)%dis + if (e%dis .ge. prichild) then + exit + else + ! move child into parent. + a%elems(parent) = a%elems(child) + parent = child + child = 2*parent + end if + end do + a%elems(parent) = e + return + end subroutine pq_adjust_heap + + + real(pReal) function pq_replace_max(a,dis,idx) + ! + ! Replace the extant maximum priority element + ! in the PQ with (dis,idx). Return + ! the new maximum priority, which may be larger + ! or smaller than the old one. + ! + type(pq),pointer :: a + real(pReal), intent(in) :: dis + integer(pInt), intent(in) :: idx +! type(kdtree2_result), intent(in) :: e + ! not tested as well! + + integer(pInt) :: parent, child, N + real(pReal) :: prichild, prichildp1 + + type(kdtree2_result) :: etmp + + if (.true.) then + N=a%heap_size + if (N .ge. 1) then + parent =1 + child=2 + + loop: do while (child .le. N) + prichild = a%elems(child)%dis + + ! + ! posibly child+1 has higher priority, and if + ! so, get it, and increment child. + ! + + if (child .lt. N) then + prichildp1 = a%elems(child+1)%dis + if (prichild .lt. prichildp1) then + child = child+1 + prichild = prichildp1 + endif + endif + + if (dis .ge. prichild) then + exit loop + ! we have a proper place for our new element, + ! bigger than either children's priority. + else + ! move child into parent. + a%elems(parent) = a%elems(child) + parent = child + child = 2*parent + end if + end do loop + a%elems(parent)%dis = dis + a%elems(parent)%idx = idx + pq_replace_max = a%elems(1)%dis + else + a%elems(1)%dis = dis + a%elems(1)%idx = idx + pq_replace_max = dis + endif + else + ! + ! slower version using elementary pop and push operations. + ! + call pq_extract_max(a,etmp) + etmp%dis = dis + etmp%idx = idx + pq_replace_max = pq_insert(a,dis,idx) + endif + return + end function pq_replace_max + + subroutine pq_delete(a,i) + ! + ! delete item with index 'i' + ! + type(pq),pointer :: a + integer(pInt) :: i + + if ((i .lt. 1) .or. (i .gt. a%heap_size)) then + write (*,*) 'PQ_DELETE: error, attempt to remove out of bounds element.' + stop + endif + + ! swap the item to be deleted with the last element + ! and shorten heap by one. + a%elems(i) = a%elems(a%heap_size) + a%heap_size = a%heap_size - 1 + + call heapify(a,i) + + end subroutine pq_delete + +end module kdtree2_priority_queue_module + + +module kdtree2_module + use prec + use kdtree2_priority_queue_module + ! K-D tree routines in Fortran 90 by Matt Kennel. + ! Original program was written in Sather by Steve Omohundro and + ! Matt Kennel. Only the Euclidean metric is supported. + ! + ! + ! This module is identical to 'kd_tree', except that the order + ! of subscripts is reversed in the data file. + ! In otherwords for an embedding of N D-dimensional vectors, the + ! data file is here, in natural Fortran order data(1:D, 1:N) + ! because Fortran lays out columns first, + ! + ! whereas conventionally (C-style) it is data(1:N,1:D) + ! as in the original kd_tree module. + ! + !-------------DATA TYPE, CREATION, DELETION--------------------- + public :: pReal + public :: kdtree2, kdtree2_result, tree_node, kdtree2_create, kdtree2_destroy + !--------------------------------------------------------------- + !-------------------SEARCH ROUTINES----------------------------- + public :: kdtree2_n_nearest,kdtree2_n_nearest_around_point + ! Return fixed number of nearest neighbors around arbitrary vector, + ! or extant point in dataset, with decorrelation window. + ! + public :: kdtree2_r_nearest, kdtree2_r_nearest_around_point + ! Return points within a fixed ball of arb vector/extant point + ! + public :: kdtree2_sort_results + ! Sort, in order of increasing distance, rseults from above. + ! + public :: kdtree2_r_count, kdtree2_r_count_around_point + ! Count points within a fixed ball of arb vector/extant point + ! + public :: kdtree2_n_nearest_brute_force, kdtree2_r_nearest_brute_force + ! brute force of kdtree2_[n|r]_nearest + !---------------------------------------------------------------- + + + integer(pInt), parameter :: bucket_size = 12 + ! The maximum number of points to keep in a terminal node. + + type interval + real(pReal) :: lower,upper + end type interval + + type :: tree_node + ! an internal tree node + private + integer(pInt) :: cut_dim + ! the dimension to cut + real(pReal) :: cut_val + ! where to cut the dimension + real(pReal) :: cut_val_left, cut_val_right + ! improved cutoffs knowing the spread in child boxes. + integer(pInt) :: l, u + type (tree_node), pointer :: left, right + type(interval), pointer :: box(:) => null() + ! child pointers + ! Points included in this node are indexes[k] with k \in [l,u] + + + end type tree_node + + type :: kdtree2 + ! Global information about the tree, one per tree + integer(pInt) :: dimen=0, n=0 + ! dimensionality and total # of points + real(pReal), pointer :: the_data(:,:) => null() + ! pointer to the actual data array + ! + ! IMPORTANT NOTE: IT IS DIMENSIONED the_data(1:d,1:N) + ! which may be opposite of what may be conventional. + ! This is, because in Fortran, the memory layout is such that + ! the first dimension is in sequential order. Hence, with + ! (1:d,1:N), all components of the vector will be in consecutive + ! memory locations. The search time is dominated by the + ! evaluation of distances in the terminal nodes. Putting all + ! vector components in consecutive memory location improves + ! memory cache locality, and hence search speed, and may enable + ! vectorization on some processors and compilers. + + integer(pInt), pointer :: ind(:) => null() + ! permuted index into the data, so that indexes[l..u] of some + ! bucket represent the indexes of the actual points in that + ! bucket. + logical :: sort = .false. + ! do we always sort output results? + logical :: rearrange = .false. + real(pReal), pointer :: rearranged_data(:,:) => null() + ! if (rearrange .eqv. .true.) then rearranged_data has been + ! created so that rearranged_data(:,i) = the_data(:,ind(i)), + ! permitting search to use more cache-friendly rearranged_data, at + ! some initial computation and storage cost. + type (tree_node), pointer :: root => null() + ! root pointer of the tree + end type kdtree2 + + + type :: tree_search_record + ! + ! One of these is created for each search. + ! + private + ! + ! Many fields are copied from the tree structure, in order to + ! speed up the search. + ! + integer(pInt) :: dimen + integer(pInt) :: nn, nfound + real(pReal) :: ballsize + integer(pInt) :: centeridx=999, correltime=9999 + ! exclude points within 'correltime' of 'centeridx', iff centeridx >= 0 + integer(pInt) :: nalloc ! how much allocated for results(:)? + logical :: rearrange ! are the data rearranged or original? + ! did the # of points found overflow the storage provided? + logical :: overflow + real(pReal), pointer :: qv(:) ! query vector + type(kdtree2_result), pointer :: results(:) ! results + type(pq) :: pq + real(pReal), pointer :: data(:,:) ! temp pointer to data + integer(pInt), pointer :: ind(:) ! temp pointer to indexes + end type tree_search_record + + private + ! everything else is private. + + type(tree_search_record), save, target :: sr ! A GLOBAL VARIABLE for search + +contains + + function kdtree2_create(input_data,dim,sort,rearrange) result (mr) + ! + ! create the actual tree structure, given an input array of data. + ! + ! Note, input data is input_data(1:d,1:N), NOT the other way around. + ! THIS IS THE REVERSE OF THE PREVIOUS VERSION OF THIS MODULE. + ! The reason for it is cache friendliness, improving performance. + ! + ! Optional arguments: If 'dim' is specified, then the tree + ! will only search the first 'dim' components + ! of input_data, otherwise, dim is inferred + ! from SIZE(input_data,1). + ! + ! if sort .eqv. .true. then output results + ! will be sorted by increasing distance. + ! default=.false., as it is faster to not sort. + ! + ! if rearrange .eqv. .true. then an internal + ! copy of the data, rearranged by terminal node, + ! will be made for cache friendliness. + ! default=.true., as it speeds searches, but + ! building takes longer, and extra memory is used. + ! + ! .. Function Return Cut_value .. + type (kdtree2), pointer :: mr + integer(pInt), intent(in), optional :: dim + logical, intent(in), optional :: sort + logical, intent(in), optional :: rearrange + ! .. + ! .. Array Arguments .. + real(pReal), target :: input_data(:,:) + ! + integer(pInt) :: i + ! .. + allocate (mr) + mr%the_data => input_data + ! pointer assignment + + if (present(dim)) then + mr%dimen = dim + else + mr%dimen = size(input_data,1) + end if + mr%n = size(input_data,2) + + if (mr%dimen > mr%n) then + ! unlikely to be correct + write (*,*) 'KD_TREE_TRANS: likely user error.' + write (*,*) 'KD_TREE_TRANS: You passed in matrix with D=',mr%dimen + write (*,*) 'KD_TREE_TRANS: and N=',mr%n + write (*,*) 'KD_TREE_TRANS: note, that new format is data(1:D,1:N)' + write (*,*) 'KD_TREE_TRANS: with usually N >> D. If N =approx= D, then a k-d tree' + write (*,*) 'KD_TREE_TRANS: is not an appropriate data structure.' + stop + end if + + call build_tree(mr) + + if (present(sort)) then + mr%sort = sort + else + mr%sort = .false. + endif + + if (present(rearrange)) then + mr%rearrange = rearrange + else + mr%rearrange = .true. + endif + + if (mr%rearrange) then + allocate(mr%rearranged_data(mr%dimen,mr%n)) + do i=1,mr%n + mr%rearranged_data(:,i) = mr%the_data(:, & + mr%ind(i)) + enddo + else + nullify(mr%rearranged_data) + endif + + end function kdtree2_create + + subroutine build_tree(tp) + type (kdtree2), pointer :: tp + ! .. + integer(pInt) :: j + type(tree_node), pointer :: dummy => null() + ! .. + allocate (tp%ind(tp%n)) + forall (j=1:tp%n) + tp%ind(j) = j + end forall + tp%root => build_tree_for_range(tp,1,tp%n, dummy) + end subroutine build_tree + + recursive function build_tree_for_range(tp,l,u,parent) result (res) + ! .. Function Return Cut_value .. + type (tree_node), pointer :: res + ! .. + ! .. Structure Arguments .. + type (kdtree2), pointer :: tp + type (tree_node),pointer :: parent + ! .. + ! .. Scalar Arguments .. + integer(pInt), intent (In) :: l, u + ! .. + ! .. Local Scalars .. + integer(pInt) :: i, c, m, dimen + logical :: recompute + real(pReal) :: average + +!!$ If (.False.) Then +!!$ If ((l .Lt. 1) .Or. (l .Gt. tp%n)) Then +!!$ Stop 'illegal L value in build_tree_for_range' +!!$ End If +!!$ If ((u .Lt. 1) .Or. (u .Gt. tp%n)) Then +!!$ Stop 'illegal u value in build_tree_for_range' +!!$ End If +!!$ If (u .Lt. l) Then +!!$ Stop 'U is less than L, thats illegal.' +!!$ End If +!!$ Endif +!!$ + ! first compute min and max + dimen = tp%dimen + allocate (res) + allocate(res%box(dimen)) + + ! First, compute an APPROXIMATE bounding box of all points associated with this node. + if ( u < l ) then + ! no points in this box + nullify(res) + return + end if + + if ((u-l)<=bucket_size) then + ! + ! always compute true bounding box for terminal nodes. + ! + do i=1,dimen + call spread_in_coordinate(tp,i,l,u,res%box(i)) + end do + res%cut_dim = 0 + res%cut_val = 0.0 + res%l = l + res%u = u + res%left =>null() + res%right => null() + else + ! + ! modify approximate bounding box. This will be an + ! overestimate of the true bounding box, as we are only recomputing + ! the bounding box for the dimension that the parent split on. + ! + ! Going to a true bounding box computation would significantly + ! increase the time necessary to build the tree, and usually + ! has only a very small difference. This box is not used + ! for searching but only for deciding which coordinate to split on. + ! + do i=1,dimen + recompute=.true. + if (associated(parent)) then + if (i .ne. parent%cut_dim) then + recompute=.false. + end if + endif + if (recompute) then + call spread_in_coordinate(tp,i,l,u,res%box(i)) + else + res%box(i) = parent%box(i) + endif + end do + + + c = maxloc(res%box(1:dimen)%upper-res%box(1:dimen)%lower,1) + ! + ! c is the identity of which coordinate has the greatest spread. + ! + + if (.false.) then + ! select exact median to have fully balanced tree. + m = (l+u)/2 + call select_on_coordinate(tp%the_data,tp%ind,c,m,l,u) + else + ! + ! select point halfway between min and max, as per A. Moore, + ! who says this helps in some degenerate cases, or + ! actual arithmetic average. + ! + if (.true.) then + ! actually compute average + average = sum(tp%the_data(c,tp%ind(l:u))) / real(u-l+1,pReal) + else + average = (res%box(c)%upper + res%box(c)%lower)/2.0 + endif + + res%cut_val = average + m = select_on_coordinate_value(tp%the_data,tp%ind,c,average,l,u) + endif + + ! moves indexes around + res%cut_dim = c + res%l = l + res%u = u +! res%cut_val = tp%the_data(c,tp%ind(m)) + + res%left => build_tree_for_range(tp,l,m,res) + res%right => build_tree_for_range(tp,m+1,u,res) + + if (associated(res%right) .eqv. .false.) then + res%box = res%left%box + res%cut_val_left = res%left%box(c)%upper + res%cut_val = res%cut_val_left + elseif (associated(res%left) .eqv. .false.) then + res%box = res%right%box + res%cut_val_right = res%right%box(c)%lower + res%cut_val = res%cut_val_right + else + res%cut_val_right = res%right%box(c)%lower + res%cut_val_left = res%left%box(c)%upper + res%cut_val = (res%cut_val_left + res%cut_val_right)/2 + + + ! now remake the true bounding box for self. + ! Since we are taking unions (in effect) of a tree structure, + ! this is much faster than doing an exhaustive + ! search over all points + res%box%upper = max(res%left%box%upper,res%right%box%upper) + res%box%lower = min(res%left%box%lower,res%right%box%lower) + endif + end if + end function build_tree_for_range + + integer(pInt) function select_on_coordinate_value(v,ind,c,alpha,li,ui) & + result(res) + ! Move elts of ind around between l and u, so that all points + ! <= than alpha (in c cooordinate) are first, and then + ! all points > alpha are second. + + ! + ! Algorithm (matt kennel). + ! + ! Consider the list as having three parts: on the left, + ! the points known to be <= alpha. On the right, the points + ! known to be > alpha, and in the middle, the currently unknown + ! points. The algorithm is to scan the unknown points, starting + ! from the left, and swapping them so that they are added to + ! the left stack or the right stack, as appropriate. + ! + ! The algorithm finishes when the unknown stack is empty. + ! + ! .. Scalar Arguments .. + integer(pInt), intent (In) :: c, li, ui + real(pReal), intent(in) :: alpha + ! .. + real(pReal) :: v(1:,1:) + integer(pInt) :: ind(1:) + integer(pInt) :: tmp + ! .. + integer(pInt) :: lb, rb + ! + ! The points known to be <= alpha are in + ! [l,lb-1] + ! + ! The points known to be > alpha are in + ! [rb+1,u]. + ! + ! Therefore we add new points into lb or + ! rb as appropriate. When lb=rb + ! we are done. We return the location of the last point <= alpha. + ! + ! + lb = li; rb = ui + + do while (lb < rb) + if ( v(c,ind(lb)) <= alpha ) then + ! it is good where it is. + lb = lb+1 + else + ! swap it with rb. + tmp = ind(lb); ind(lb) = ind(rb); ind(rb) = tmp + rb = rb-1 + endif + end do + + ! now lb .eq. ub + if (v(c,ind(lb)) <= alpha) then + res = lb + else + res = lb-1 + endif + + end function select_on_coordinate_value + + subroutine select_on_coordinate(v,ind,c,k,li,ui) + ! Move elts of ind around between l and u, so that the kth + ! element + ! is >= those below, <= those above, in the coordinate c. + ! .. Scalar Arguments .. + integer(pInt), intent (In) :: c, k, li, ui + ! .. + integer(pInt) :: i, l, m, s, t, u + ! .. + real(pReal) :: v(:,:) + integer(pInt) :: ind(:) + ! .. + l = li + u = ui + do while (l=k) u = m - 1 + end do + end subroutine select_on_coordinate + + subroutine spread_in_coordinate(tp,c,l,u,interv) + ! the spread in coordinate 'c', between l and u. + ! + ! Return lower bound in 'smin', and upper in 'smax', + ! .. + ! .. Structure Arguments .. + type (kdtree2), pointer :: tp + type(interval), intent(out) :: interv + ! .. + ! .. Scalar Arguments .. + integer(pInt), intent (In) :: c, l, u + ! .. + ! .. Local Scalars .. + real(pReal) :: last, lmax, lmin, t, smin,smax + integer(pInt) :: i, ulocal + ! .. + ! .. Local Arrays .. + real(pReal), pointer :: v(:,:) + integer(pInt), pointer :: ind(:) + ! .. + v => tp%the_data(1:,1:) + ind => tp%ind(1:) + smin = v(c,ind(l)) + smax = smin + + ulocal = u + + do i = l + 2, ulocal, 2 + lmin = v(c,ind(i-1)) + lmax = v(c,ind(i)) + if (lmin>lmax) then + t = lmin + lmin = lmax + lmax = t + end if + if (smin>lmin) smin = lmin + if (smaxlast) smin = last + if (smax qv + sr%nn = nn + sr%nfound = 0 + sr%centeridx = -1 + sr%correltime = 0 + sr%overflow = .false. + + sr%results => results + + sr%nalloc = nn ! will be checked + + sr%ind => tp%ind + sr%rearrange = tp%rearrange + if (tp%rearrange) then + sr%Data => tp%rearranged_data + else + sr%Data => tp%the_data + endif + sr%dimen = tp%dimen + + call validate_query_storage(nn) + sr%pq = pq_create(results) + + call search(tp%root) + + if (tp%sort) then + call kdtree2_sort_results(nn, results) + endif +! deallocate(sr%pqp) + return + end subroutine kdtree2_n_nearest + + subroutine kdtree2_n_nearest_around_point(tp,idxin,correltime,nn,results) + ! Find the 'nn' vectors in the tree nearest to point 'idxin', + ! with correlation window 'correltime', returing results in + ! results(:), which must be pre-allocated upon entry. + type (kdtree2), pointer :: tp + integer(pInt), intent (In) :: idxin, correltime, nn + type(kdtree2_result), target :: results(:) + + allocate (sr%qv(tp%dimen)) + sr%qv = tp%the_data(:,idxin) ! copy the vector + sr%ballsize = huge(1.0) ! the largest real(pReal) number + sr%centeridx = idxin + sr%correltime = correltime + + sr%nn = nn + sr%nfound = 0 + + sr%dimen = tp%dimen + sr%nalloc = nn + + sr%results => results + + sr%ind => tp%ind + sr%rearrange = tp%rearrange + + if (sr%rearrange) then + sr%Data => tp%rearranged_data + else + sr%Data => tp%the_data + endif + + call validate_query_storage(nn) + sr%pq = pq_create(results) + + call search(tp%root) + + if (tp%sort) then + call kdtree2_sort_results(nn, results) + endif + deallocate (sr%qv) + return + end subroutine kdtree2_n_nearest_around_point + + subroutine kdtree2_r_nearest(tp,qv,r2,nfound,nalloc,results) + ! find the nearest neighbors to point 'idxin', within SQUARED + ! Euclidean distance 'r2'. Upon ENTRY, nalloc must be the + ! size of memory allocated for results(1:nalloc). Upon + ! EXIT, nfound is the number actually found within the ball. + ! + ! Note that if nfound .gt. nalloc then more neighbors were found + ! than there were storage to store. The resulting list is NOT + ! the smallest ball inside norm r^2 + ! + ! Results are NOT sorted unless tree was created with sort option. + type (kdtree2), pointer :: tp + real(pReal), target, intent (In) :: qv(:) + real(pReal), intent(in) :: r2 + integer(pInt), intent(out) :: nfound + integer(pInt), intent (In) :: nalloc + type(kdtree2_result), target :: results(:) + + ! + sr%qv => qv + sr%ballsize = r2 + sr%nn = 0 ! flag for fixed ball search + sr%nfound = 0 + sr%centeridx = -1 + sr%correltime = 0 + + sr%results => results + + call validate_query_storage(nalloc) + sr%nalloc = nalloc + sr%overflow = .false. + sr%ind => tp%ind + sr%rearrange= tp%rearrange + + if (tp%rearrange) then + sr%Data => tp%rearranged_data + else + sr%Data => tp%the_data + endif + sr%dimen = tp%dimen + + ! + !sr%dsl = Huge(sr%dsl) ! set to huge positive values + !sr%il = -1 ! set to invalid indexes + ! + + call search(tp%root) + nfound = sr%nfound + if (tp%sort) then + call kdtree2_sort_results(nfound, results) + endif + + if (sr%overflow) then + write (*,*) 'KD_TREE_TRANS: warning! return from kdtree2_r_nearest found more neighbors' + write (*,*) 'KD_TREE_TRANS: than storage was provided for. Answer is NOT smallest ball' + write (*,*) 'KD_TREE_TRANS: with that number of neighbors! I.e. it is wrong.' + endif + + return + end subroutine kdtree2_r_nearest + + subroutine kdtree2_r_nearest_around_point(tp,idxin,correltime,r2,& + nfound,nalloc,results) + ! + ! Like kdtree2_r_nearest, but around a point 'idxin' already existing + ! in the data set. + ! + ! Results are NOT sorted unless tree was created with sort option. + ! + type (kdtree2), pointer :: tp + integer(pInt), intent (In) :: idxin, correltime, nalloc + real(pReal), intent(in) :: r2 + integer(pInt), intent(out) :: nfound + type(kdtree2_result), target :: results(:) + ! .. + ! .. Intrinsic Functions .. + intrinsic HUGE + ! .. + allocate (sr%qv(tp%dimen)) + sr%qv = tp%the_data(:,idxin) ! copy the vector + sr%ballsize = r2 + sr%nn = 0 ! flag for fixed r search + sr%nfound = 0 + sr%centeridx = idxin + sr%correltime = correltime + + sr%results => results + + sr%nalloc = nalloc + sr%overflow = .false. + + call validate_query_storage(nalloc) + + ! sr%dsl = HUGE(sr%dsl) ! set to huge positive values + ! sr%il = -1 ! set to invalid indexes + + sr%ind => tp%ind + sr%rearrange = tp%rearrange + + if (tp%rearrange) then + sr%Data => tp%rearranged_data + else + sr%Data => tp%the_data + endif + sr%rearrange = tp%rearrange + sr%dimen = tp%dimen + + ! + !sr%dsl = Huge(sr%dsl) ! set to huge positive values + !sr%il = -1 ! set to invalid indexes + ! + + call search(tp%root) + nfound = sr%nfound + if (tp%sort) then + call kdtree2_sort_results(nfound,results) + endif + + if (sr%overflow) then + write (*,*) 'KD_TREE_TRANS: warning! return from kdtree2_r_nearest found more neighbors' + write (*,*) 'KD_TREE_TRANS: than storage was provided for. Answer is NOT smallest ball' + write (*,*) 'KD_TREE_TRANS: with that number of neighbors! I.e. it is wrong.' + endif + + deallocate (sr%qv) + return + end subroutine kdtree2_r_nearest_around_point + + function kdtree2_r_count(tp,qv,r2) result(nfound) + ! Count the number of neighbors within square distance 'r2'. + type (kdtree2), pointer :: tp + real(pReal), target, intent (In) :: qv(:) + real(pReal), intent(in) :: r2 + integer(pInt) :: nfound + ! .. + ! .. Intrinsic Functions .. + intrinsic HUGE + ! .. + sr%qv => qv + sr%ballsize = r2 + + sr%nn = 0 ! flag for fixed r search + sr%nfound = 0 + sr%centeridx = -1 + sr%correltime = 0 + + nullify(sr%results) ! for some reason, FTN 95 chokes on '=> null()' + + sr%nalloc = 0 ! we do not allocate any storage but that's OK + ! for counting. + sr%ind => tp%ind + sr%rearrange = tp%rearrange + if (tp%rearrange) then + sr%Data => tp%rearranged_data + else + sr%Data => tp%the_data + endif + sr%dimen = tp%dimen + + ! + !sr%dsl = Huge(sr%dsl) ! set to huge positive values + !sr%il = -1 ! set to invalid indexes + ! + sr%overflow = .false. + + call search(tp%root) + + nfound = sr%nfound + + return + end function kdtree2_r_count + + function kdtree2_r_count_around_point(tp,idxin,correltime,r2) & + result(nfound) + ! Count the number of neighbors within square distance 'r2' around + ! point 'idxin' with decorrelation time 'correltime'. + ! + type (kdtree2), pointer :: tp + integer(pInt), intent (In) :: correltime, idxin + real(pReal), intent(in) :: r2 + integer(pInt) :: nfound + ! .. + ! .. + ! .. Intrinsic Functions .. + intrinsic HUGE + ! .. + allocate (sr%qv(tp%dimen)) + sr%qv = tp%the_data(:,idxin) + sr%ballsize = r2 + + sr%nn = 0 ! flag for fixed r search + sr%nfound = 0 + sr%centeridx = idxin + sr%correltime = correltime + nullify(sr%results) + + sr%nalloc = 0 ! we do not allocate any storage but that's OK + ! for counting. + + sr%ind => tp%ind + sr%rearrange = tp%rearrange + + if (sr%rearrange) then + sr%Data => tp%rearranged_data + else + sr%Data => tp%the_data + endif + sr%dimen = tp%dimen + + ! + !sr%dsl = Huge(sr%dsl) ! set to huge positive values + !sr%il = -1 ! set to invalid indexes + ! + sr%overflow = .false. + + call search(tp%root) + + nfound = sr%nfound + + return + end function kdtree2_r_count_around_point + + + subroutine validate_query_storage(n) + ! + ! make sure we have enough storage for n + ! + integer(pInt), intent(in) :: n + + if (size(sr%results,1) .lt. n) then + write (*,*) 'KD_TREE_TRANS: you did not provide enough storage for results(1:n)' + stop + return + endif + + return + end subroutine validate_query_storage + + function square_distance(d, iv,qv) result (res) + ! distance between iv[1:n] and qv[1:n] + ! .. Function Return Value .. + ! re-implemented to improve vectorization. + real(pReal) :: res + ! .. + ! .. + ! .. Scalar Arguments .. + integer(pInt) :: d + ! .. + ! .. Array Arguments .. + real(pReal) :: iv(:),qv(:) + ! .. + ! .. + res = sum( (iv(1:d)-qv(1:d))**2 ) + end function square_distance + + recursive subroutine search(node) + ! + ! This is the innermost core routine of the kd-tree search. Along + ! with "process_terminal_node", it is the performance bottleneck. + ! + ! This version uses a logically complete secondary search of + ! "box in bounds", whether the sear + ! + type (Tree_node), pointer :: node + ! .. + type(tree_node),pointer :: ncloser, nfarther + ! + integer(pInt) :: cut_dim, i + ! .. + real(pReal) :: qval, dis + real(pReal) :: ballsize + real(pReal), pointer :: qv(:) + type(interval), pointer :: box(:) + + if ((associated(node%left) .and. associated(node%right)) .eqv. .false.) then + ! we are on a terminal node + if (sr%nn .eq. 0) then + call process_terminal_node_fixedball(node) + else + call process_terminal_node(node) + endif + else + ! we are not on a terminal node + qv => sr%qv(1:) + cut_dim = node%cut_dim + qval = qv(cut_dim) + + if (qval < node%cut_val) then + ncloser => node%left + nfarther => node%right + dis = (node%cut_val_right - qval)**2 +! extra = node%cut_val - qval + else + ncloser => node%right + nfarther => node%left + dis = (node%cut_val_left - qval)**2 +! extra = qval- node%cut_val_left + endif + + if (associated(ncloser)) call search(ncloser) + + ! we may need to search the second node. + if (associated(nfarther)) then + ballsize = sr%ballsize +! dis=extra**2 + if (dis <= ballsize) then + ! + ! we do this separately as going on the first cut dimen is often + ! a good idea. + ! note that if extra**2 < sr%ballsize, then the next + ! check will also be false. + ! + box => node%box(1:) + do i=1,sr%dimen + if (i .ne. cut_dim) then + dis = dis + dis2_from_bnd(qv(i),box(i)%lower,box(i)%upper) + if (dis > ballsize) then + return + endif + endif + end do + + ! + ! if we are still here then we need to search mroe. + ! + call search(nfarther) + endif + endif + end if + end subroutine search + + + real(pReal) function dis2_from_bnd(x,amin,amax) result (res) + real(pReal), intent(in) :: x, amin,amax + + if (x > amax) then + res = (x-amax)**2; + return + else + if (x < amin) then + res = (amin-x)**2; + return + else + res = 0.0 + return + endif + endif + return + end function dis2_from_bnd + + logical function box_in_search_range(node, sr) result(res) + ! + ! Return the distance from 'qv' to the CLOSEST corner of node's + ! bounding box + ! for all coordinates outside the box. Coordinates inside the box + ! contribute nothing to the distance. + ! + type (tree_node), pointer :: node + type (tree_search_record), pointer :: sr + + integer(pInt) :: dimen, i + real(pReal) :: dis, ballsize + real(pReal) :: l, u + + dimen = sr%dimen + ballsize = sr%ballsize + dis = 0.0 + res = .true. + do i=1,dimen + l = node%box(i)%lower + u = node%box(i)%upper + dis = dis + (dis2_from_bnd(sr%qv(i),l,u)) + if (dis > ballsize) then + res = .false. + return + endif + end do + res = .true. + return + end function box_in_search_range + + + subroutine process_terminal_node(node) + ! + ! Look for actual near neighbors in 'node', and update + ! the search results on the sr data structure. + ! + type (tree_node), pointer :: node + ! + real(pReal), pointer :: qv(:) + integer(pInt), pointer :: ind(:) + real(pReal), pointer :: data(:,:) + ! + integer(pInt) :: dimen, i, indexofi, k, centeridx, correltime + real(pReal) :: ballsize, sd, newpri + logical :: rearrange + type(pq), pointer :: pqp + ! + ! copy values from sr to local variables + ! + ! + ! Notice, making local pointers with an EXPLICIT lower bound + ! seems to generate faster code. + ! why? I don't know. + qv => sr%qv(1:) + pqp => sr%pq + dimen = sr%dimen + ballsize = sr%ballsize + rearrange = sr%rearrange + ind => sr%ind(1:) + data => sr%Data(1:,1:) + centeridx = sr%centeridx + correltime = sr%correltime + + ! doing_correl = (centeridx >= 0) ! Do we have a decorrelation window? + ! include_point = .true. ! by default include all points + ! search through terminal bucket. + + mainloop: do i = node%l, node%u + if (rearrange) then + sd = 0.0 + do k = 1,dimen + sd = sd + (data(k,i) - qv(k))**2 + if (sd>ballsize) cycle mainloop + end do + indexofi = ind(i) ! only read it if we have not broken out + else + indexofi = ind(i) + sd = 0.0 + do k = 1,dimen + sd = sd + (data(k,indexofi) - qv(k))**2 + if (sd>ballsize) cycle mainloop + end do + endif + + if (centeridx > 0) then ! doing correlation interval? + if (abs(indexofi-centeridx) < correltime) cycle mainloop + endif + + + ! + ! two choices for any point. The list so far is either undersized, + ! or it is not. + ! + ! If it is undersized, then add the point and its distance + ! unconditionally. If the point added fills up the working + ! list then set the sr%ballsize, maximum distance bound (largest distance on + ! list) to be that distance, instead of the initialized +infinity. + ! + ! If the running list is full size, then compute the + ! distance but break out immediately if it is larger + ! than sr%ballsize, "best squared distance" (of the largest element), + ! as it cannot be a good neighbor. + ! + ! Once computed, compare to best_square distance. + ! if it is smaller, then delete the previous largest + ! element and add the new one. + + if (sr%nfound .lt. sr%nn) then + ! + ! add this point unconditionally to fill list. + ! + sr%nfound = sr%nfound +1 + newpri = pq_insert(pqp,sd,indexofi) + if (sr%nfound .eq. sr%nn) ballsize = newpri + ! we have just filled the working list. + ! put the best square distance to the maximum value + ! on the list, which is extractable from the PQ. + + else + ! + ! now, if we get here, + ! we know that the current node has a squared + ! distance smaller than the largest one on the list, and + ! belongs on the list. + ! Hence we replace that with the current one. + ! + ballsize = pq_replace_max(pqp,sd,indexofi) + endif + end do mainloop + ! + ! Reset sr variables which may have changed during loop + ! + sr%ballsize = ballsize + + end subroutine process_terminal_node + + subroutine process_terminal_node_fixedball(node) + ! + ! Look for actual near neighbors in 'node', and update + ! the search results on the sr data structure, i.e. + ! save all within a fixed ball. + ! + type (tree_node), pointer :: node + ! + real(pReal), pointer :: qv(:) + integer(pInt), pointer :: ind(:) + real(pReal), pointer :: data(:,:) + ! + integer(pInt) :: nfound + integer(pInt) :: dimen, i, indexofi, k + integer(pInt) :: centeridx, correltime, nn + real(pReal) :: ballsize, sd + logical :: rearrange + + ! + ! copy values from sr to local variables + ! + qv => sr%qv(1:) + dimen = sr%dimen + ballsize = sr%ballsize + rearrange = sr%rearrange + ind => sr%ind(1:) + data => sr%Data(1:,1:) + centeridx = sr%centeridx + correltime = sr%correltime + nn = sr%nn ! number to search for + nfound = sr%nfound + + ! search through terminal bucket. + mainloop: do i = node%l, node%u + + ! + ! two choices for any point. The list so far is either undersized, + ! or it is not. + ! + ! If it is undersized, then add the point and its distance + ! unconditionally. If the point added fills up the working + ! list then set the sr%ballsize, maximum distance bound (largest distance on + ! list) to be that distance, instead of the initialized +infinity. + ! + ! If the running list is full size, then compute the + ! distance but break out immediately if it is larger + ! than sr%ballsize, "best squared distance" (of the largest element), + ! as it cannot be a good neighbor. + ! + ! Once computed, compare to best_square distance. + ! if it is smaller, then delete the previous largest + ! element and add the new one. + + ! which index to the point do we use? + + if (rearrange) then + sd = 0.0 + do k = 1,dimen + sd = sd + (data(k,i) - qv(k))**2 + if (sd>ballsize) cycle mainloop + end do + indexofi = ind(i) ! only read it if we have not broken out + else + indexofi = ind(i) + sd = 0.0 + do k = 1,dimen + sd = sd + (data(k,indexofi) - qv(k))**2 + if (sd>ballsize) cycle mainloop + end do + endif + + if (centeridx > 0) then ! doing correlation interval? + if (abs(indexofi-centeridx) 1)then + ileft=ileft-1 + value=a(ileft); ivalue=ind(ileft) + else + value=a(iright); ivalue=ind(iright) + a(iright)=a(1); ind(iright)=ind(1) + iright=iright-1 + if (iright == 1) then + a(1)=value;ind(1)=ivalue + return + endif + endif + i=ileft + j=2*ileft + do while (j <= iright) + if(j < iright) then + if(a(j) < a(j+1)) j=j+1 + endif + if(value < a(j)) then + a(i)=a(j); ind(i)=ind(j) + i=j + j=j+j + else + j=iright+1 + endif + end do + a(i)=value; ind(i)=ivalue + end do + end subroutine heapsort + + subroutine heapsort_struct(a,n) + ! + ! Sort a(1:n) in ascending order + ! + ! + integer(pInt),intent(in) :: n + type(kdtree2_result),intent(inout) :: a(:) + + ! + ! + type(kdtree2_result) :: value ! temporary value + + integer(pInt) :: i,j + integer(pInt) :: ileft,iright + + ileft=n/2+1 + iright=n + + ! do i=1,n + ! ind(i)=i + ! Generate initial idum array + ! end do + + if(n.eq.1) return + + do + if(ileft > 1)then + ileft=ileft-1 + value=a(ileft) + else + value=a(iright) + a(iright)=a(1) + iright=iright-1 + if (iright == 1) then + a(1) = value + return + endif + endif + i=ileft + j=2*ileft + do while (j <= iright) + if(j < iright) then + if(a(j)%dis < a(j+1)%dis) j=j+1 + endif + if(value%dis < a(j)%dis) then + a(i)=a(j); + i=j + j=j+j + else + j=iright+1 + endif + end do + a(i)=value + end do + end subroutine heapsort_struct + +end module kdtree2_module + diff --git a/code/makefile b/code/makefile index 8b0340b60..ea00386ac 100644 --- a/code/makefile +++ b/code/makefile @@ -58,9 +58,9 @@ DEBUG5 =-stand std03/std95 ######################################################################################## #auto values will be set by setup_code.py -FFTWROOT := +FFTWROOT := /nethome/m.diehl/DAMASK/lib/fftw IKMLROOT := -ACMLROOT := +ACMLROOT := /opt/acml4.4.0 LAPACKROOT := F90 ?= ifort @@ -206,15 +206,18 @@ FEsolving.o: FEsolving.f90 basics.a -basics.a: debug.o math.o - ar rc basics.a debug.o math.o numerics.o IO.o DAMASK_spectral_interface.o prec.o +basics.a: kdtree2.o + ar rc basics.a kdtree2.o math.o debug.o numerics.o IO.o DAMASK_spectral_interface.o prec.o +kdtree2.o: kdtree2.f90 math.o + $(PREFIX) $(COMPILERNAME) $(COMPILE) kdtree2.f90 $(SUFFIX) + +math.o: math.f90 debug.o + $(PREFIX) $(COMPILERNAME) $(COMPILE) math.f90 $(SUFFIX) + debug.o: debug.f90 numerics.o $(PREFIX) $(COMPILERNAME) $(COMPILE) debug.f90 $(SUFFIX) -math.o: math.f90 numerics.o - $(PREFIX) $(COMPILERNAME) $(COMPILE) math.f90 $(SUFFIX) - numerics.o: numerics.f90 IO.o $(PREFIX) $(COMPILERNAME) $(COMPILE) numerics.f90 $(SUFFIX)