!#############################################################################################################################
! BEGIN KDTREE2
!#############################################################################################################################
!(c) Matthew Kennel, Institute for Nonlinear Science (2004)
!
! Licensed under the Academic Free License version 1.1 found in file LICENSE
! with additional provisions found in that same file.
!
!#######################################################
! modifications: changed precision according to prec.f90
! k.komerla, m.diehl
!#######################################################

module kdtree2_priority_queue_module
 use prec, only: pInt, &
                 pReal
 !
 ! maintain a priority queue (PQ) of data, pairs of 'priority/payload', 
 ! implemented with a binary heap.  This is the type, and the 'dis' field
 ! is the priority.
 !
 implicit none
 private
 type kdtree2_result
     ! a pair of distances, indexes
     real(pReal)    :: dis = 0.0_pReal
     integer(pInt) :: idx = -1_pInt    !Initializers cause some bugs in compilers.
 end type kdtree2_result
 !
 ! A heap-based priority queue lets one efficiently implement the following
 ! operations, each in log(N) time, as opposed to linear time.
 !
 ! 1)  add a datum (push a datum onto the queue, increasing its length) 
 ! 2)  return the priority value of the maximum priority element 
 ! 3)  pop-off (and delete) the element with the maximum priority, decreasing
 !     the size of the queue. 
 ! 4)  replace the datum with the maximum priority with a supplied datum
 !     (of either higher or lower priority), maintaining the size of the
 !     queue. 
 !
 !
 ! In the k-d tree case, the 'priority' is the square distance of a point in
 ! the data set to a reference point.   The goal is to keep the smallest M
 ! distances to a reference point.  The tree algorithm searches terminal
 ! nodes to decide whether to add points under consideration.
 !
 ! A priority queue is useful here because it lets one quickly return the
 ! largest distance currently existing in the list.  If a new candidate
 ! distance is smaller than this, then the new candidate ought to replace
 ! the old candidate.  In priority queue terms, this means removing the
 ! highest priority element, and inserting the new one.
 !
 ! Algorithms based on Cormen, Leiserson, Rivest, _Introduction
 ! to Algorithms_, 1990, with further optimization by the author.
 !
 ! Originally informed by a C implementation by Sriranga Veeraraghavan.
 !
 ! This module is not written in the most clear way, but is implemented such
 ! for speed, as it its operations will be called many times during searches
 ! of large numbers of neighbors.
 !
 type pq
     !
     ! The priority queue consists of elements
     ! priority(1:heap_size), with associated payload(:).
     !
     ! There are heap_size active elements. 
     ! Assumes the allocation is always sufficient.  Will NOT increase it
     ! to match.
     integer(pInt) :: heap_size = 0_pInt
     type(kdtree2_result), pointer :: elems(:) 
 end type pq

 public :: kdtree2_result

 public :: pq
 public :: pq_create
 public :: pq_delete, pq_insert
 public :: pq_extract_max, pq_max, pq_replace_max, pq_maxpri

contains

function pq_create(results_in) result(res)
 !
 ! Create a priority queue from ALREADY allocated
 ! array pointers for storage.  NOTE! It will NOT
 ! add any alements to the heap, i.e. any existing
 ! data in the input arrays will NOT be used and may
 ! be overwritten.
 ! 
 ! usage:
 !    real(pReal), pointer :: x(:)
 !    integer(pInt), pointer :: k(:)
 !    allocate(x(1000),k(1000))
 !    pq => pq_create(x,k)
 !
 implicit none
 type(kdtree2_result), target:: results_in(:) 
 type(pq) :: res
 !
 !
 integer :: nalloc

 nalloc = size(results_in,1)
 if (nalloc .lt. 1) then
    write (*,*) 'PQ_CREATE: error, input arrays must be allocated.'
 end if
 res%elems => results_in
 res%heap_size = 0_pInt
 return
end function pq_create

 !
 ! operations for getting parents and left + right children
 ! of elements in a binary heap.
 !

!
! These are written inline for speed.
!    
!  integer(pInt) function parent(i)
!    integer(pInt), intent(in) :: i
!    parent = (i/2)
!    return
!  end function parent

!  integer(pInt) function left(i)
!    integer(pInt), intent(in) ::i
!    left = (2*i)
!    return
!  end function left

!  integer(pInt) function right(i)
!    integer(pInt), intent(in) :: i
!    right = (2*i)+1
!    return
!  end function right

!  logical function compare_priority(p1,p2)
!    real(pReal), intent(in) :: p1, p2
!
!    compare_priority = (p1 .gt. p2)
!    return
!  end function compare_priority

subroutine heapify(a,i_in)
 !
 ! take a heap rooted at 'i' and force it to be in the
 ! heap canonical form.   This is performance critical 
 ! and has been tweaked a little to reflect this.
 !
 implicit none
 type(pq),pointer   :: a
 integer(pInt), intent(in) :: i_in
 !
 integer(pInt) :: i, l, r, largest

 real(pReal)    :: pri_i, pri_l, pri_r, pri_largest


 type(kdtree2_result) :: temp

 i = i_in

 bigloop:  do
       l = 2_pInt*i ! left(i)
       r = l+1_pInt ! right(i)
       ! 
       ! set 'largest' to the index of either i, l, r
       ! depending on whose priority is largest.
       !
       ! note that l or r can be larger than the heap size
       ! in which case they do not count.


       ! does left child have higher priority? 
       if (l .gt. a%heap_size) then
          ! we know that i is the largest as both l and r are invalid.
          exit 
       else
          pri_i = a%elems(i)%dis
          pri_l = a%elems(l)%dis 
          if (pri_l .gt. pri_i) then
             largest = l
             pri_largest = pri_l
          else
             largest = i
             pri_largest = pri_i
          endif

          !
          ! between i and l we have a winner
          ! now choose between that and r.
          !
          if (r .le. a%heap_size) then
             pri_r = a%elems(r)%dis
             if (pri_r .gt. pri_largest) then
                largest = r
             endif
          endif
       endif

       if (largest .ne. i) then
          ! swap data in nodes largest and i, then heapify

          temp = a%elems(i)
          a%elems(i) = a%elems(largest)
          a%elems(largest) = temp 
          ! 
          ! Canonical heapify() algorithm has tail-ecursive call: 
          !
          !        call heapify(a,largest)   
          ! we will simulate with cycle
          !
          i = largest
          cycle bigloop ! continue the loop 
       else
          return   ! break from the loop
       end if
  enddo bigloop
  return
end subroutine heapify

subroutine pq_max(a,e) 
 !
 ! return the priority and its payload of the maximum priority element
 ! on the queue, which should be the first one, if it is 
 ! in heapified form.
 !
 use IO, only: IO_error
 
 implicit none
 type(pq),pointer :: a
 type(kdtree2_result),intent(out)  :: e

 if (a%heap_size .gt. 0) then
    e = a%elems(1) 
 else
    call IO_error (460_pInt, ext_msg='PQ_MAX: heap_size < 1')
 endif
 return
end subroutine pq_max
  
real(pReal) function pq_maxpri(a)
 use IO, only: IO_error
 
 implicit none
 type(pq), pointer :: a

 if (a%heap_size .gt. 0) then
    pq_maxpri = a%elems(1)%dis
 else
    call IO_error (460_pInt,ext_msg='PPQ_MAX_PRI: heap_size < 1')
 endif
 return
end function pq_maxpri

subroutine pq_extract_max(a,e)
 !
 ! return the priority and payload of maximum priority
 ! element, and remove it from the queue.
 ! (equivalent to 'pop()' on a stack)
 !
 use IO, only: IO_error

 implicit none
 type(pq),pointer :: a
 type(kdtree2_result), intent(out) :: e
 
 if (a%heap_size .ge. 1) then
    !
    ! return max as first element
    !
    e = a%elems(1) 
    
    !
    ! move last element to first
    !
    a%elems(1) = a%elems(a%heap_size) 
    a%heap_size = a%heap_size-1_pInt
    call heapify(a,1_pInt)
    return
 else
    call IO_error (460_pInt,ext_msg='PQ_EXTRACT_MAX: attempted to pop non-positive PQ')
 end if
end subroutine pq_extract_max


real(pReal) function pq_insert(a,dis,idx) 
 !
 ! Insert a new element and return the new maximum priority,
 ! which may or may not be the same as the old maximum priority.
 !
 implicit none
 type(pq),pointer  :: a
 real(pReal), intent(in) :: dis
 integer(pInt), intent(in) :: idx
 !    type(kdtree2_result), intent(in) :: e
 !
 integer(pInt) :: i, isparent
 real(pReal)    :: parentdis
 !

 !    if (a%heap_size .ge. a%max_elems) then
 !       write (*,*) 'PQ_INSERT: error, attempt made to insert element on full PQ'
 !       stop
 !    else
 a%heap_size = a%heap_size + 1_pInt
 i = a%heap_size

 do while (i .gt. 1)
    isparent = int(i/2_pInt, pInt) !needed casting?
    parentdis = a%elems(isparent)%dis
    if (dis .gt. parentdis) then
       ! move what was in i's parent into i.
       a%elems(i)%dis = parentdis
       a%elems(i)%idx = a%elems(isparent)%idx
       i = isparent
    else
       exit
    endif
 end do

 ! insert the element at the determined position
 a%elems(i)%dis = dis
 a%elems(i)%idx = idx

 pq_insert = a%elems(1)%dis 
 return
 !    end if

end function pq_insert

subroutine pq_adjust_heap(a,i)
 implicit none
 type(pq), pointer  :: a
 integer(pInt), intent(in) :: i
 !
 ! nominally arguments (a,i), but specialize for a=1
 !
 ! This routine assumes that the trees with roots 2 and 3 are already heaps, i.e.
 ! the children of '1' are heaps.  When the procedure is completed, the
 ! tree rooted at 1 is a heap.
 real(pReal) :: prichild
 integer(pInt) :: parent, child, N

 type(kdtree2_result) :: e

 e = a%elems(i) 

 parent = i
 child = 2_pInt*i
 N = a%heap_size
 
 do while (child .le. N)
    if (child .lt. N) then
       if (a%elems(child)%dis .lt. a%elems(child+1)%dis) then
          child = child+1_pInt
       endif
    endif
    prichild = a%elems(child)%dis
    if (e%dis .ge. prichild) then
       exit 
    else
       ! move child into parent.
       a%elems(parent) = a%elems(child) 
       parent = child
       child = 2_pInt*parent
    end if
 end do
 a%elems(parent) = e
 return
end subroutine pq_adjust_heap
    

real(pReal) function pq_replace_max(a,dis,idx) 
 !
 ! Replace the extant maximum priority element
 ! in the PQ with (dis,idx).  Return
 ! the new maximum priority, which may be larger
 ! or smaller than the old one.
 !
 implicit none
 type(pq),pointer         :: a
 real(pReal), intent(in) :: dis
 integer(pInt), intent(in) :: idx
 !type(kdtree2_result), intent(in) :: e
 ! not tested as well!

 integer(pInt) :: parent, child, N
 real(pReal)    :: prichild, prichildp1

 type(kdtree2_result) :: etmp
 
 if (.true.) then
    N=a%heap_size
    if (N .ge. 1) then
       parent =1_pInt
       child=2_pInt

       loop: do while (child .le. N)
          prichild = a%elems(child)%dis

          !
          ! posibly child+1 has higher priority, and if
          ! so, get it, and increment child.
          !

          if (child .lt. N) then
             prichildp1 = a%elems(child+1_pInt)%dis
             if (prichild .lt. prichildp1) then
                child = child+1_pInt
                prichild = prichildp1
             endif
          endif

          if (dis .ge. prichild) then
             exit loop  
             ! we have a proper place for our new element, 
             ! bigger than either children's priority.
          else
             ! move child into parent.
             a%elems(parent) = a%elems(child) 
             parent = child
             child = 2_pInt*parent
          end if
       end do loop
       a%elems(parent)%dis = dis
       a%elems(parent)%idx = idx
       pq_replace_max = a%elems(1)%dis
    else
       a%elems(1)%dis = dis
       a%elems(1)%idx = idx
       pq_replace_max = dis
    endif
 else
    !
    ! slower version using elementary pop and push operations.
    !
    call pq_extract_max(a,etmp) 
    etmp%dis = dis
    etmp%idx = idx
    pq_replace_max = pq_insert(a,dis,idx)
 endif
 return
end function pq_replace_max

subroutine pq_delete(a,i)
 ! 
 ! delete item with index 'i'
 !
 use IO, only: IO_error

 implicit none
 type(pq),pointer :: a
 integer(pInt)           :: i

 if ((i .lt. 1) .or. (i .gt. a%heap_size)) then
    call IO_error (460_pInt,ext_msg='PQ_DELETE: attempt to remove out of bounds element')
 endif

 ! swap the item to be deleted with the last element
 ! and shorten heap by one.
 a%elems(i) = a%elems(a%heap_size) 
 a%heap_size = a%heap_size - 1_pInt

 call heapify(a,i)

end subroutine pq_delete

end module kdtree2_priority_queue_module


module kdtree2_module
 use prec
 use kdtree2_priority_queue_module
 
 implicit none
 private
 ! K-D tree routines in Fortran 90 by Matt Kennel.
 ! Original program was written in Sather by Steve Omohundro and
 ! Matt Kennel.  Only the Euclidean metric is supported. 
 !
 !
 ! This module is identical to 'kd_tree', except that the order
 ! of subscripts is reversed in the data file.
 ! In otherwords for an embedding of N D-dimensional vectors, the
 ! data file is here, in natural Fortran order  myData(1:D, 1:N)
 ! because Fortran lays out columns first,
 !
 ! whereas conventionally (C-style) it is myData(1:N,1:D)
 ! as in the original kd_tree module. 
 !
 !-------------DATA TYPE, CREATION, DELETION---------------------
 public :: pReal
 public :: kdtree2, kdtree2_result, tree_node, kdtree2_create, kdtree2_destroy
 !---------------------------------------------------------------
 !-------------------SEARCH ROUTINES-----------------------------
 public :: kdtree2_n_nearest,kdtree2_n_nearest_around_point
 ! Return fixed number of nearest neighbors around arbitrary vector,
 ! or extant point in dataset, with decorrelation window. 
 !
 public :: kdtree2_r_nearest, kdtree2_r_nearest_around_point
 ! Return points within a fixed ball of arb vector/extant point 
 !
 public :: kdtree2_sort_results
 ! Sort, in order of increasing distance, rseults from above.
 !
 public :: kdtree2_r_count, kdtree2_r_count_around_point 
 ! Count points within a fixed ball of arb vector/extant point 
 !
 public :: kdtree2_n_nearest_brute_force, kdtree2_r_nearest_brute_force
 ! brute force of kdtree2_[n|r]_nearest
 !----------------------------------------------------------------


 integer(pInt), parameter :: bucket_size = 12_pInt
 ! The maximum number of points to keep in a terminal node.

 type interval
     real(pReal) :: lower,upper
 end type interval

 type :: tree_node
     ! an internal tree node
     private
     integer(pInt) :: cut_dim
     ! the dimension to cut
     real(pReal) :: cut_val
     ! where to cut the dimension
     real(pReal) :: cut_val_left, cut_val_right  
     ! improved cutoffs knowing the spread in child boxes.
     integer(pInt) :: l, u
     type (tree_node), pointer :: left, right
     type(interval), pointer :: box(:) => null()
     ! child pointers
     ! Points included in this node are indexes[k] with k \in [l,u] 


 end type tree_node

 type :: kdtree2
     ! Global information about the tree, one per tree
     integer(pInt) :: dimen=0_pInt, n=0_pInt
     ! dimensionality and total # of points
     real(pReal), pointer :: the_data(:,:) => null()
     ! pointer to the actual data array 
     ! 
     !  IMPORTANT NOTE:  IT IS DIMENSIONED   the_data(1:d,1:N)
     !  which may be opposite of what may be conventional.
     !  This is, because in Fortran, the memory layout is such that
     !  the first dimension is in sequential order.  Hence, with
     !  (1:d,1:N), all components of the vector will be in consecutive
     !  memory locations.  The search time is dominated by the
     !  evaluation of distances in the terminal nodes.  Putting all
     !  vector components in consecutive memory location improves
     !  memory cache locality, and hence search speed, and may enable 
     !  vectorization on some processors and compilers. 

     integer(pInt), pointer :: ind(:) => null()
     ! permuted index into the data, so that indexes[l..u] of some
     ! bucket represent the indexes of the actual points in that
     ! bucket.
     logical       :: sort = .false.
     ! do we always sort output results?
     logical       :: rearrange = .false. 
     real(pReal), pointer :: rearranged_data(:,:) => null()
     ! if (rearrange .eqv. .true.) then rearranged_data has been
     ! created so that rearranged_data(:,i) = the_data(:,ind(i)),
     ! permitting search to use more cache-friendly rearranged_data, at
     ! some initial computation and storage cost.
     type (tree_node), pointer :: root => null()
     ! root pointer of the tree
 end type kdtree2


 type :: tree_search_record
     !
     ! One of these is created for each search.
     !
     private
     ! 
     ! Many fields are copied from the tree structure, in order to
     ! speed up the search.
     !
     integer(pInt)           :: dimen   
     integer(pInt)           :: nn, nfound
     real(pReal)             :: ballsize
     integer(pInt)           :: centeridx=999_pInt, correltime=9999_pInt
     ! exclude points within 'correltime' of 'centeridx', iff centeridx >= 0
     integer(pInt)           :: nalloc  ! how much allocated for results(:)?
     logical                 :: rearrange  ! are the data rearranged or original? 
     ! did the # of points found overflow the storage provided?
     logical                 :: overflow
     real(pReal), pointer    :: qv(:)  ! query vector
     type(kdtree2_result), pointer :: results(:) ! results
     type(pq) :: pq
     real(pReal), pointer    :: myData(:,:)  ! temp pointer to data
     integer(pInt), pointer  :: ind(:)     ! temp pointer to indexes
 end type tree_search_record

 type(tree_search_record), save, target :: sr   ! A GLOBAL VARIABLE for search

contains

function kdtree2_create(input_data,myDim,sort,rearrange) result (mr)
 !
 ! create the actual tree structure, given an input array of data.
 !
 ! Note, input data is input_data(1:d,1:N), NOT the other way around.
 ! THIS IS THE REVERSE OF THE PREVIOUS VERSION OF THIS MODULE.
 ! The reason for it is cache friendliness, improving performance.
 !
 ! Optional arguments:  If 'myDim' is specified, then the tree
 !                      will only search the first 'myDim' components
 !                      of input_data, otherwise, myDim is inferred
 !                      from SIZE(input_data,1).
 !
 !                      if sort .eqv. .true. then output results
 !                      will be sorted by increasing distance.
 !                      default=.false., as it is faster to not sort.
 !                      
 !                      if rearrange .eqv. .true. then an internal
 !                      copy of the data, rearranged by terminal node,
 !                      will be made for cache friendliness. 
 !                      default=.true., as it speeds searches, but
 !                      building takes longer, and extra memory is used.
 !
 ! .. Function Return Cut_value ..
 use IO, only: IO_error
 
 implicit none
 type (kdtree2), pointer              :: mr
 integer(pInt), intent(in), optional  :: myDim
 logical, intent(in), optional        :: sort
 logical, intent(in), optional        :: rearrange
 ! ..
 ! .. Array Arguments ..
 real(pReal), target                  :: input_data(:,:)
 !
 integer(pInt)                        :: i
 ! ..
 allocate (mr)
 mr%the_data => input_data
 ! pointer assignment

 if (present(myDim)) then
    mr%dimen = myDim
 else
    mr%dimen = int(size(input_data,1), pInt) ! size returns default integer
 end if
 mr%n = int(size(input_data,2), pInt) ! size returns default integer

 if (mr%dimen > mr%n) then
    !  unlikely to be correct
    write (*,*) 'KD_TREE_TRANS: likely user error.'
    write (*,*) 'KD_TREE_TRANS: You passed in matrix with D=',mr%dimen
    write (*,*) 'KD_TREE_TRANS: and N=',mr%n
    write (*,*) 'KD_TREE_TRANS: note, that new format is myData(1:D,1:N)'
    write (*,*) 'KD_TREE_TRANS: with usually N >> D.   If N =approx= D, then a k-d tree'
    write (*,*) 'KD_TREE_TRANS: is not an appropriate data structure.'
    call IO_error (460_pInt)
 end if

 call build_tree(mr)

 if (present(sort)) then
    mr%sort = sort
 else
    mr%sort = .false.
 endif

 if (present(rearrange)) then
    mr%rearrange = rearrange
 else
    mr%rearrange = .true.
 endif

 if (mr%rearrange) then
    allocate(mr%rearranged_data(mr%dimen,mr%n))
    do i=1_pInt,mr%n
       mr%rearranged_data(:,i) = mr%the_data(:, &
        mr%ind(i))
    enddo
 else
    nullify(mr%rearranged_data)
 endif

end function kdtree2_create

subroutine build_tree(tp)
 implicit none
 type (kdtree2), pointer :: tp
 ! ..
 integer(pInt) :: j
 type(tree_node), pointer :: dummy => null()
 ! ..
 allocate (tp%ind(tp%n))
 forall (j=1_pInt:tp%n)
    tp%ind(j) = j
 end forall
 tp%root => build_tree_for_range(tp,1_pInt,tp%n, dummy)
end subroutine build_tree

recursive function build_tree_for_range(tp,l,u,parent) result (res)
 ! .. Function Return Cut_value ..
 implicit none
 type (tree_node), pointer :: res
 ! ..
 ! .. Structure Arguments ..
 type (kdtree2), pointer :: tp
 type (tree_node),pointer           :: parent
 ! ..
 ! .. Scalar Arguments ..
 integer(pInt), intent (In) :: l, u
 ! ..
 ! .. Local Scalars ..
 integer(pInt) :: i, c, m, dimen
 logical :: recompute
 real(pReal)    :: average

!!$      If (.False.) Then 
!!$         If ((l .Lt. 1) .Or. (l .Gt. tp%n)) Then
!!$            Stop 'illegal L value in build_tree_for_range'
!!$         End If
!!$         If ((u .Lt. 1) .Or. (u .Gt. tp%n)) Then
!!$            Stop 'illegal u value in build_tree_for_range'
!!$         End If
!!$         If (u .Lt. l) Then
!!$            Stop 'U is less than L, thats illegal.'
!!$         End If
!!$      Endif
!!$      
 ! first compute min and max
 dimen = tp%dimen
 allocate (res)
 allocate(res%box(dimen))

 ! First, compute an APPROXIMATE bounding box of all points associated with this node.
 if ( u < l ) then
    ! no points in this box
    nullify(res)
    return
 end if

 if ((u-l)<=bucket_size) then
    !
    ! always compute true bounding box for terminal nodes.
    !
    do i=1_pInt,dimen
       call spread_in_coordinate(tp,i,l,u,res%box(i))
    end do
    res%cut_dim = 0_pInt
    res%cut_val = 0.0_pReal
    res%l = l
    res%u = u
    res%left =>null()
    res%right => null() 
 else
    ! 
    ! modify approximate bounding box.  This will be an
    ! overestimate of the true bounding box, as we are only recomputing 
    ! the bounding box for the dimension that the parent split on.
    !
    ! Going to a true bounding box computation would significantly
    ! increase the time necessary to build the tree, and usually
    ! has only a very small difference.  This box is not used
    ! for searching but only for deciding which coordinate to split on.
    !
    do i=1_pInt,dimen
       recompute=.true.
       if (associated(parent)) then
          if (i .ne. parent%cut_dim) then
             recompute=.false.
          end if
       endif
       if (recompute) then
          call spread_in_coordinate(tp,i,l,u,res%box(i))
       else
          res%box(i) = parent%box(i)
       endif
    end do
    

    c = int(maxloc(res%box(1:dimen)%upper-res%box(1:dimen)%lower,1), pInt)
    !
    ! c is the identity of which coordinate has the greatest spread.
    !
    
    if (.false.) then
       ! select exact median to have fully balanced tree.
       m = (l+u)/2_pInt
       call select_on_coordinate(tp%the_data,tp%ind,c,m,l,u)
    else
       !
       ! select point halfway between min and max, as per A. Moore,
       ! who says this helps in some degenerate cases, or 
       ! actual arithmetic average. 
       !
       if (.true.) then
          ! actually compute average
          average = sum(tp%the_data(c,tp%ind(l:u))) / real(u-l+1_pInt,pReal)
       else
          average = (res%box(c)%upper + res%box(c)%lower)/2.0_pReal
       endif
          
       res%cut_val = average
       m = select_on_coordinate_value(tp%the_data,tp%ind,c,average,l,u)
    endif
       
    ! moves indexes around
    res%cut_dim = c
    res%l = l
    res%u = u
!   res%cut_val = tp%the_data(c,tp%ind(m))

    res%left => build_tree_for_range(tp,l,m,res)
    res%right => build_tree_for_range(tp,m+1_pInt,u,res)

    if (associated(res%right) .eqv. .false.) then
       res%box = res%left%box
       res%cut_val_left = res%left%box(c)%upper
       res%cut_val = res%cut_val_left
    elseif (associated(res%left) .eqv. .false.) then
       res%box = res%right%box
       res%cut_val_right = res%right%box(c)%lower
       res%cut_val = res%cut_val_right
    else
       res%cut_val_right = res%right%box(c)%lower
       res%cut_val_left = res%left%box(c)%upper
       res%cut_val = (res%cut_val_left + res%cut_val_right)/2


       ! now remake the true bounding box for self.  
       ! Since we are taking unions (in effect) of a tree structure,
       ! this is much faster than doing an exhaustive
       ! search over all points
       res%box%upper = max(res%left%box%upper,res%right%box%upper)
       res%box%lower = min(res%left%box%lower,res%right%box%lower) 
    endif
 end if
end function build_tree_for_range

integer(pInt) function select_on_coordinate_value(v,ind,c,alpha,li,ui) result(res)
 ! Move elts of ind around between l and u, so that all points
 ! <= than alpha (in c cooordinate) are first, and then
 ! all points > alpha are second. 

 !
 ! Algorithm (matt kennel). 
 !
 ! Consider the list as having three parts: on the left,
 ! the points known to be <= alpha.  On the right, the points
 ! known to be > alpha, and in the middle, the currently unknown
 ! points.   The algorithm is to scan the unknown points, starting
 ! from the left, and swapping them so that they are added to
 ! the left stack or the right stack, as appropriate.
 ! 
 ! The algorithm finishes when the unknown stack is empty. 
 !
 ! .. Scalar Arguments ..
 implicit none
 integer(pInt), intent (In) :: c, li, ui
 real(pReal), intent(in) :: alpha
 ! ..
 real(pReal) :: v(1:,1:)
 integer(pInt) :: ind(1:)
 integer(pInt) :: tmp  
 ! ..
 integer(pInt) :: lb, rb
 !
 ! The points known to be <= alpha are in
 ! [l,lb-1]
 !
 ! The points known to be > alpha are in
 ! [rb+1,u].  
 !
 ! Therefore we add new points into lb or
 ! rb as appropriate.  When lb=rb
 ! we are done.  We return the location of the last point <= alpha.
 !
 ! 
 lb = li; rb = ui

 do while (lb < rb)
    if ( v(c,ind(lb)) <= alpha ) then
       ! it is good where it is.
       lb = lb+1_pInt
    else
       ! swap it with rb.
       tmp = ind(lb); ind(lb) = ind(rb); ind(rb) = tmp
       rb = rb-1_pInt
    endif
 end do
 
 ! now lb .eq. ub 
 if (v(c,ind(lb)) <= alpha) then
    res = lb
 else
    res = lb-1_pInt
 endif
 
end function select_on_coordinate_value

subroutine select_on_coordinate(v,ind,c,k,li,ui)
 ! Move elts of ind around between l and u, so that the kth
 ! element
 ! is >= those below, <= those above, in the coordinate c.
 ! .. Scalar Arguments ..
 implicit none
 integer(pInt), intent (In) :: c, k, li, ui
 ! ..
 integer(pInt) :: i, l, m, s, t, u
 ! ..
 real(pReal) :: v(:,:)
 integer(pInt) :: ind(:)
 ! ..
 l = li
 u = ui
 do while (l<u)
    t = ind(l)
    m = l
    do i = l + 1_pInt, u
       if (v(c,ind(i))<v(c,t)) then
          m = m + 1_pInt
          s = ind(m)
          ind(m) = ind(i)
          ind(i) = s
       end if
    end do
    s = ind(l)
    ind(l) = ind(m)
    ind(m) = s
    if (m<=k) l = m + 1_pInt
    if (m>=k) u = m - 1_pInt
 end do
end subroutine select_on_coordinate

subroutine spread_in_coordinate(tp,c,l,u,interv) 
 ! the spread in coordinate 'c', between l and u. 
 !
 ! Return lower bound in 'smin', and upper in 'smax', 
 ! ..
 ! .. Structure Arguments ..
 implicit none
 type (kdtree2), pointer :: tp
 type(interval), intent(out) :: interv
 ! ..
 ! .. Scalar Arguments ..
 integer(pInt), intent (In) :: c, l, u
 ! ..
 ! .. Local Scalars ..
 real(pReal) :: last, lmax, lmin, t, smin,smax
 integer(pInt) :: i, ulocal
 ! ..
 ! .. Local Arrays ..
 real(pReal), pointer :: v(:,:)
 integer(pInt), pointer :: ind(:)
 ! ..
 v => tp%the_data(1:,1:)
 ind => tp%ind(1:)
 smin = v(c,ind(l))
 smax = smin

 ulocal = u

 do i = l + 2_pInt, ulocal, 2_pInt
    lmin = v(c,ind(i-1_pInt))
    lmax = v(c,ind(i))
    if (lmin>lmax) then
       t = lmin
       lmin = lmax
       lmax = t
    end if
    if (smin>lmin) smin = lmin
    if (smax<lmax) smax = lmax
 end do
 if (i==ulocal+1) then
    last = v(c,ind(ulocal))
    if (smin>last) smin = last
    if (smax<last) smax = last
 end if

 interv%lower = smin
 interv%upper = smax

end subroutine spread_in_coordinate


subroutine kdtree2_destroy(tp)
 ! Deallocates all memory for the tree, except input data matrix
 ! .. Structure Arguments ..
 implicit none
 type (kdtree2), pointer :: tp
 ! ..
 call destroy_node(tp%root)

 deallocate (tp%ind)
 nullify (tp%ind)

 if (tp%rearrange) then
    deallocate(tp%rearranged_data)
    nullify(tp%rearranged_data)
 endif

 deallocate(tp)
 return

 contains
   recursive subroutine destroy_node(np)
     ! .. Structure Arguments ..
     implicit none
     type (tree_node), pointer :: np
     ! ..
     ! .. Intrinsic Functions ..
     intrinsic ASSOCIATED
     ! ..
     if (associated(np%left)) then
        call destroy_node(np%left)
        nullify (np%left)
     end if
     if (associated(np%right)) then
        call destroy_node(np%right)
        nullify (np%right)
     end if
     if (associated(np%box)) deallocate(np%box)
     deallocate(np)
     return
     
    end subroutine destroy_node

end subroutine kdtree2_destroy

subroutine kdtree2_n_nearest(tp,qv,nn,results)
 ! Find the 'nn' vectors in the tree nearest to 'qv' in euclidean norm
 ! returning their indexes and distances in 'indexes' and 'distances'
 ! arrays already allocated passed to this subroutine.
 implicit none
 type (kdtree2), pointer      :: tp
 real(pReal), target, intent (In)    :: qv(:)
 integer(pInt), intent (In)         :: nn
 type(kdtree2_result), target :: results(:)


 sr%ballsize = huge(1.0_pReal)
 sr%qv => qv
 sr%nn = nn
 sr%nfound = 0_pInt
 sr%centeridx = -1_pInt
 sr%correltime = 0_pInt
 sr%overflow = .false. 

 sr%results => results

 sr%nalloc = nn   ! will be checked

 sr%ind => tp%ind
 sr%rearrange = tp%rearrange
 if (tp%rearrange) then
    sr%myData => tp%rearranged_data
 else
    sr%myData => tp%the_data
 endif
 sr%dimen = tp%dimen

 call validate_query_storage(nn) 
 sr%pq = pq_create(results)

 call search(tp%root)

 if (tp%sort) then
    call kdtree2_sort_results(nn, results)
 endif
!    deallocate(sr%pqp)
  return
end subroutine kdtree2_n_nearest

subroutine kdtree2_n_nearest_around_point(tp,idxin,correltime,nn,results)
 ! Find the 'nn' vectors in the tree nearest to point 'idxin',
 ! with correlation window 'correltime', returing results in
 ! results(:), which must be pre-allocated upon entry.
 implicit none
 type (kdtree2), pointer        :: tp
 integer(pInt), intent (In)           :: idxin, correltime, nn
 type(kdtree2_result), target   :: results(:)

 allocate (sr%qv(tp%dimen))
 sr%qv = tp%the_data(:,idxin) ! copy the vector
 sr%ballsize = huge(1.0_pReal)       ! the largest real(pReal) number
 sr%centeridx = idxin
 sr%correltime = correltime

 sr%nn = nn
 sr%nfound = 0_pInt

 sr%dimen = tp%dimen
 sr%nalloc = nn

 sr%results => results

 sr%ind => tp%ind
 sr%rearrange = tp%rearrange

 if (sr%rearrange) then
    sr%myData => tp%rearranged_data
 else
    sr%myData => tp%the_data
 endif

 call validate_query_storage(nn)
 sr%pq = pq_create(results)

 call search(tp%root)

 if (tp%sort) then
    call kdtree2_sort_results(nn, results)
 endif
 deallocate (sr%qv)
 return
end subroutine kdtree2_n_nearest_around_point

subroutine kdtree2_r_nearest(tp,qv,r2,nfound,nalloc,results) 
 ! find the nearest neighbors to point 'idxin', within SQUARED
 ! Euclidean distance 'r2'.   Upon ENTRY, nalloc must be the
 ! size of memory allocated for results(1:nalloc).  Upon
 ! EXIT, nfound is the number actually found within the ball. 
 !
 !  Note that if nfound .gt. nalloc then more neighbors were found
 !  than there were storage to store.  The resulting list is NOT
 !  the smallest ball inside norm r^2 
 !
 ! Results are NOT sorted unless tree was created with sort option.
 implicit none
 type (kdtree2), pointer      :: tp
 real(pReal), target, intent (In)    :: qv(:)
 real(pReal), intent(in)             :: r2
 integer(pInt), intent(out)         :: nfound
 integer(pInt), intent (In)         :: nalloc
 type(kdtree2_result), target :: results(:)

 !
 sr%qv => qv
 sr%ballsize = r2
 sr%nn = 0_pInt      ! flag for fixed ball search
 sr%nfound = 0_pInt
 sr%centeridx = -1_pInt
 sr%correltime = 0_pInt

 sr%results => results

 call validate_query_storage(nalloc)
 sr%nalloc = nalloc
 sr%overflow = .false. 
 sr%ind => tp%ind
 sr%rearrange= tp%rearrange

 if (tp%rearrange) then
    sr%myData => tp%rearranged_data
 else
    sr%myData => tp%the_data
 endif
 sr%dimen = tp%dimen

 !
 !sr%dsl = Huge(sr%dsl)    ! set to huge positive values
 !sr%il = -1               ! set to invalid indexes
 !

 call search(tp%root)
 nfound = sr%nfound
 if (tp%sort) then
    call kdtree2_sort_results(nfound, results)
 endif

 if (sr%overflow) then
    write (*,*) 'KD_TREE_TRANS: warning! return from kdtree2_r_nearest found more neighbors'
    write (*,*) 'KD_TREE_TRANS: than storage was provided for.  Answer is NOT smallest ball'
    write (*,*) 'KD_TREE_TRANS: with that number of neighbors!  I.e. it is wrong.'
 endif

 return
end subroutine kdtree2_r_nearest

subroutine kdtree2_r_nearest_around_point(tp,idxin,correltime,r2,nfound,nalloc,results)
 !
 ! Like kdtree2_r_nearest, but around a point 'idxin' already existing
 ! in the data set. 
 ! 
 ! Results are NOT sorted unless tree was created with sort option.
 !
 implicit none
 type (kdtree2), pointer      :: tp
 integer(pInt), intent (In)         :: idxin, correltime, nalloc
 real(pReal), intent(in)             :: r2
 integer(pInt), intent(out)         :: nfound
 type(kdtree2_result), target :: results(:)
 ! ..
 ! .. Intrinsic Functions ..
 intrinsic HUGE
 ! ..
 allocate (sr%qv(tp%dimen))
 sr%qv = tp%the_data(:,idxin) ! copy the vector
 sr%ballsize = r2
 sr%nn = 0_pInt    ! flag for fixed r search
 sr%nfound = 0_pInt
 sr%centeridx = idxin
 sr%correltime = correltime

 sr%results => results

 sr%nalloc = nalloc
 sr%overflow = .false.

 call validate_query_storage(nalloc)

 !    sr%dsl = HUGE(sr%dsl)    ! set to huge positive values
 !    sr%il = -1               ! set to invalid indexes

 sr%ind => tp%ind
 sr%rearrange = tp%rearrange

 if (tp%rearrange) then
    sr%myData => tp%rearranged_data
 else
    sr%myData => tp%the_data
 endif
 sr%rearrange = tp%rearrange
 sr%dimen = tp%dimen

 !
 !sr%dsl = Huge(sr%dsl)    ! set to huge positive values
 !sr%il = -1               ! set to invalid indexes
 !

 call search(tp%root)
 nfound = sr%nfound
 if (tp%sort) then
    call kdtree2_sort_results(nfound,results)
 endif

 if (sr%overflow) then
    write (*,*) 'KD_TREE_TRANS: warning! return from kdtree2_r_nearest found more neighbors'
    write (*,*) 'KD_TREE_TRANS: than storage was provided for.  Answer is NOT smallest ball'
    write (*,*) 'KD_TREE_TRANS: with that number of neighbors!  I.e. it is wrong.'
 endif

 deallocate (sr%qv)
 return
end subroutine kdtree2_r_nearest_around_point

function kdtree2_r_count(tp,qv,r2) result(nfound)
 ! Count the number of neighbors within square distance 'r2'. 
 implicit none
 type (kdtree2), pointer   :: tp
 real(pReal), target, intent (In) :: qv(:)
 real(pReal), intent(in)          :: r2
 integer(pInt)                   :: nfound
 ! ..
 ! .. Intrinsic Functions ..
 intrinsic HUGE
 ! ..
 sr%qv => qv
 sr%ballsize = r2

 sr%nn = 0_pInt       ! flag for fixed r search
 sr%nfound = 0_pInt
 sr%centeridx = -1_pInt
 sr%correltime = 0_pInt
 
 nullify(sr%results) ! for some reason, FTN 95 chokes on '=> null()'

 sr%nalloc = 0_pInt            ! we do not allocate any storage but that's OK
                          ! for counting.
 sr%ind => tp%ind
 sr%rearrange = tp%rearrange
 if (tp%rearrange) then
    sr%myData => tp%rearranged_data
 else
    sr%myData => tp%the_data
 endif
 sr%dimen = tp%dimen

 !
 !sr%dsl = Huge(sr%dsl)    ! set to huge positive values
 !sr%il = -1               ! set to invalid indexes
 !
 sr%overflow = .false.

 call search(tp%root)

 nfound = sr%nfound

 return
end function kdtree2_r_count

function kdtree2_r_count_around_point(tp,idxin,correltime,r2) result(nfound)
 ! Count the number of neighbors within square distance 'r2' around
 ! point 'idxin' with decorrelation time 'correltime'.
 !
 implicit none
 type (kdtree2), pointer       :: tp
 
 integer(pInt), intent (In)    :: correltime, idxin
 real(pReal),   intent(in)     :: r2
 
 integer(pInt)                 :: nfound
 ! ..
 ! ..
 ! .. Intrinsic Functions ..
 intrinsic HUGE
 ! ..
 allocate (sr%qv(tp%dimen))
 sr%qv = tp%the_data(:,idxin)
 sr%ballsize = r2

 sr%nn = 0_pInt       ! flag for fixed r search
 sr%nfound = 0_pInt
 sr%centeridx = idxin
 sr%correltime = correltime
 nullify(sr%results)

 sr%nalloc = 0_pInt       ! we do not allocate any storage but that's OK
                          ! for counting.

 sr%ind => tp%ind
 sr%rearrange = tp%rearrange

 if (sr%rearrange) then
    sr%myData => tp%rearranged_data
 else
    sr%myData => tp%the_data
 endif
 sr%dimen = tp%dimen

 !
 !sr%dsl = Huge(sr%dsl)    ! set to huge positive values
 !sr%il = -1               ! set to invalid indexes
 !
 sr%overflow = .false.

 call search(tp%root)

 nfound = sr%nfound

 return
end function kdtree2_r_count_around_point


subroutine validate_query_storage(n)
 !
 ! make sure we have enough storage for n
 !
 use IO, only: IO_error

 implicit none
 integer(pInt), intent(in) :: n

 if (int(size(sr%results,1),pInt) < n) then
    call IO_error (460_pInt,el=n,ip=int(size(sr%results,1),pInt),ext_msg='KD_TREE_TRANS: not enough storage for results(1:n)')
 endif

 return
end subroutine validate_query_storage

function square_distance(d, iv,qv) result (res)
 ! distance between iv[1:n] and qv[1:n] 
 ! .. Function Return Value ..
 ! re-implemented to improve vectorization.
 implicit none
 real(pReal) :: res
 ! .. Scalar Arguments ..
 integer(pInt) :: d
 ! .. Array Arguments ..
 real(pReal) :: iv(:),qv(:)
 ! ..
 ! ..
 res = sum( (iv(1:d)-qv(1:d))**2 )
end function square_distance
  
recursive subroutine search(node)
 !
 ! This is the innermost core routine of the kd-tree search.  Along
 ! with "process_terminal_node", it is the performance bottleneck. 
 !
 ! This version uses a logically complete secondary search of
 ! "box in bounds", whether the sear
 !
 implicit none
 type (Tree_node), pointer      :: node
 ! ..
 type(tree_node),pointer        :: ncloser, nfarther
 !
 integer(pInt)                  :: cut_dim, i
 ! ..
 real(pReal)                    :: qval, dis
 real(pReal)                    :: ballsize
 real(pReal), pointer           :: qv(:)
 type(interval), pointer        :: box(:) 

 if ((associated(node%left) .and. associated(node%right)) .eqv. .false.) then
    ! we are on a terminal node
    if (sr%nn .eq. 0) then
       call process_terminal_node_fixedball(node)
    else
       call process_terminal_node(node)
    endif
 else
    ! we are not on a terminal node
    qv => sr%qv(1:)
    cut_dim = node%cut_dim
    qval = qv(cut_dim)

    if (qval < node%cut_val) then
       ncloser => node%left
       nfarther => node%right
       dis = (node%cut_val_right - qval)**2
!          extra = node%cut_val - qval
    else
       ncloser => node%right
       nfarther => node%left
       dis = (node%cut_val_left - qval)**2
!          extra = qval- node%cut_val_left
    endif

    if (associated(ncloser)) call search(ncloser)

    ! we may need to search the second node. 
    if (associated(nfarther)) then
       ballsize = sr%ballsize
!          dis=extra**2
       if (dis <= ballsize) then
          !
          ! we do this separately as going on the first cut dimen is often
          ! a good idea.
          ! note that if extra**2 < sr%ballsize, then the next
          ! check will also be false. 
          !
          box => node%box(1:)
          do i=1_pInt,sr%dimen
             if (i .ne. cut_dim) then
                dis = dis + dis2_from_bnd(qv(i),box(i)%lower,box(i)%upper)
                if (dis > ballsize) then
                   return
                endif
             endif
          end do
          
          !
          ! if we are still here then we need to search mroe.
          !
          call search(nfarther)
       endif
    endif
 end if
end subroutine search


real(pReal) function dis2_from_bnd(x,amin,amax) result (res)

 implicit none
 real(pReal), intent(in) :: x, amin,amax

 if (x > amax) then
    res = (x-amax)**2;
    return
 else
    if (x < amin) then
       res = (amin-x)**2;
       return
    else
       res = 0.0_pReal
       return
    endif
 endif
 return
end function dis2_from_bnd

logical function box_in_search_range(node, sr) result(res)
 !
 ! Return the distance from 'qv' to the CLOSEST corner of node's
 ! bounding box
 ! for all coordinates outside the box.   Coordinates inside the box
 ! contribute nothing to the distance.
 !
 implicit none
 type (tree_node), pointer          :: node
 type (tree_search_record), pointer :: sr

 integer(pInt) :: dimen, i
 real(pReal)   :: dis, ballsize
 real(pReal)   :: l, u

 dimen = sr%dimen
 ballsize = sr%ballsize
 dis = 0.0_pReal
 res = .true.
 do i=1_pInt,dimen
    l = node%box(i)%lower
    u = node%box(i)%upper
    dis = dis + (dis2_from_bnd(sr%qv(i),l,u))
    if (dis > ballsize) then
       res = .false.
       return
    endif
 end do
 res = .true.
 return
end function box_in_search_range


subroutine process_terminal_node(node)
 !
 ! Look for actual near neighbors in 'node', and update
 ! the search results on the sr data structure.
 !
 implicit none
 type (tree_node), pointer  :: node
 !
 real(pReal), pointer       :: qv(:)
 integer(pInt), pointer     :: ind(:)
 real(pReal), pointer       :: myData(:,:)
 !
 integer(pInt)              :: dimen, i, indexofi, k, centeridx, correltime
 real(pReal)                :: ballsize, sd, newpri
 logical                    :: rearrange
 type(pq), pointer          :: pqp 
 !
 ! copy values from sr to local variables
 !
 !
 ! Notice, making local pointers with an EXPLICIT lower bound
 ! seems to generate faster code.
 ! why?  I don't know.
 qv => sr%qv(1:) 
 pqp => sr%pq
 dimen = sr%dimen
 ballsize = sr%ballsize 
 rearrange = sr%rearrange
 ind => sr%ind(1:)
 myData => sr%myData(1:,1:)     
 centeridx = sr%centeridx
 correltime = sr%correltime

 !    doing_correl = (centeridx >= 0)  ! Do we have a decorrelation window? 
 !    include_point = .true.    ! by default include all points
 ! search through terminal bucket.

 mainloop: do i = node%l, node%u
    if (rearrange) then
       sd = 0.0_pReal
       do k = 1_pInt,dimen
          sd = sd + (myData(k,i) - qv(k))**2.0_pReal
          if (sd>ballsize) cycle mainloop
       end do
       indexofi = ind(i)  ! only read it if we have not broken out
    else
       indexofi = ind(i)
       sd = 0.0_pReal
       do k = 1_pInt,dimen
          sd = sd + (myData(k,indexofi) - qv(k))**2.0_pReal
          if (sd>ballsize) cycle mainloop
       end do
    endif

    if (centeridx > 0_pInt) then ! doing correlation interval?
       if (abs(indexofi-centeridx) < correltime) cycle mainloop
    endif


    ! 
    ! two choices for any point.  The list so far is either undersized,
    ! or it is not.
    !
    ! If it is undersized, then add the point and its distance
    ! unconditionally.  If the point added fills up the working
    ! list then set the sr%ballsize, maximum distance bound (largest distance on
    ! list) to be that distance, instead of the initialized +infinity. 
    !
    ! If the running list is full size, then compute the
    ! distance but break out immediately if it is larger
    ! than sr%ballsize, "best squared distance" (of the largest element),
    ! as it cannot be a good neighbor. 
    !
    ! Once computed, compare to best_square distance.
    ! if it is smaller, then delete the previous largest
    ! element and add the new one. 

    if (sr%nfound .lt. sr%nn) then
       !
       ! add this point unconditionally to fill list.
       !
       sr%nfound = sr%nfound +1_pInt 
       newpri = pq_insert(pqp,sd,indexofi)
       if (sr%nfound .eq. sr%nn) ballsize = newpri
       ! we have just filled the working list.
       ! put the best square distance to the maximum value
       ! on the list, which is extractable from the PQ. 

    else
       !
       ! now, if we get here,
       ! we know that the current node has a squared
       ! distance smaller than the largest one on the list, and
       ! belongs on the list. 
       ! Hence we replace that with the current one.
       !
       ballsize = pq_replace_max(pqp,sd,indexofi)
    endif
 end do mainloop
 !
 ! Reset sr variables which may have changed during loop
 !
 sr%ballsize = ballsize 

end subroutine process_terminal_node

subroutine process_terminal_node_fixedball(node)
 !
 ! Look for actual near neighbors in 'node', and update
 ! the search results on the sr data structure, i.e.
 ! save all within a fixed ball.
 !
 implicit none
 type (tree_node), pointer   :: node
 !
 real(pReal), pointer        :: qv(:)
 integer(pInt), pointer      :: ind(:)
 real(pReal), pointer        :: myData(:,:)
 !
 integer(pInt)               :: nfound
 integer(pInt)               :: dimen, i, indexofi, k
 integer(pInt)               :: centeridx, correltime, nn
 real(pReal)                 :: ballsize, sd
 logical                     :: rearrange

 !
 ! copy values from sr to local variables
 !
 qv => sr%qv(1:)
 dimen = sr%dimen
 ballsize = sr%ballsize 
 rearrange = sr%rearrange
 ind => sr%ind(1:)
 myData => sr%myData(1:,1:)
 centeridx = sr%centeridx
 correltime = sr%correltime
 nn = sr%nn ! number to search for
 nfound = sr%nfound

 ! search through terminal bucket.
 mainloop: do i = node%l, node%u

    ! 
    ! two choices for any point.  The list so far is either undersized,
    ! or it is not.
    !
    ! If it is undersized, then add the point and its distance
    ! unconditionally.  If the point added fills up the working
    ! list then set the sr%ballsize, maximum distance bound (largest distance on
    ! list) to be that distance, instead of the initialized +infinity. 
    !
    ! If the running list is full size, then compute the
    ! distance but break out immediately if it is larger
    ! than sr%ballsize, "best squared distance" (of the largest element),
    ! as it cannot be a good neighbor. 
    !
    ! Once computed, compare to best_square distance.
    ! if it is smaller, then delete the previous largest
    ! element and add the new one. 

    ! which index to the point do we use? 

    if (rearrange) then
       sd = 0.0_pReal
       do k = 1_pInt,dimen
          sd = sd + (myData(k,i) - qv(k))**2.0_pReal
          if (sd>ballsize) cycle mainloop
       end do
       indexofi = ind(i)  ! only read it if we have not broken out
    else
       indexofi = ind(i)
       sd = 0.0_pReal
       do k = 1_pInt,dimen
          sd = sd + (myData(k,indexofi) - qv(k))**2.0_pReal
          if (sd>ballsize) cycle mainloop
       end do
    endif

    if (centeridx > 0_pInt) then ! doing correlation interval?
       if (abs(indexofi-centeridx)<correltime) cycle mainloop
    endif

    nfound = nfound+1_pInt
    if (nfound .gt. sr%nalloc) then
       ! oh nuts, we have to add another one to the tree but
       ! there isn't enough room.
       sr%overflow = .true.
    else
       sr%results(nfound)%dis = sd
       sr%results(nfound)%idx = indexofi
    endif
 end do mainloop
 !
 ! Reset sr variables which may have changed during loop
 !
 sr%nfound = nfound
end subroutine process_terminal_node_fixedball

subroutine kdtree2_n_nearest_brute_force(tp,qv,nn,results) 
 ! find the 'n' nearest neighbors to 'qv' by exhaustive search.
 ! only use this subroutine for testing, as it is SLOW!  The
 ! whole point of a k-d tree is to avoid doing what this subroutine
 ! does.
 implicit none
 type (kdtree2), pointer   :: tp
 real(pReal), intent(in)   :: qv(:)
 integer(pInt), intent(in) :: nn
 type(kdtree2_result)      :: results(:) 

 integer(pInt)             :: i, j, k
 real(pReal), allocatable  :: all_distances(:)
 ! ..
 allocate (all_distances(tp%n))
 do i = 1_pInt, tp%n
    all_distances(i) = square_distance(tp%dimen,qv,tp%the_data(:,i))
 end do
 ! now find 'n' smallest distances
 do i = 1_pInt, nn
    results(i)%dis =  huge(1.0_pReal)
    results(i)%idx = -1_pInt
 end do
 do i = 1_pInt, tp%n
    if (all_distances(i)<results(nn)%dis) then
       ! insert it somewhere on the list
       do j = 1_pInt, nn
          if (all_distances(i)<results(j)%dis) exit
       end do
       ! now we know 'j'
       do k = nn - 1_pInt, j, -1_pInt
          results(k+1_pInt) = results(k)
       end do
       results(j)%dis = all_distances(i)
       results(j)%idx = i
    end if
 end do
 deallocate (all_distances)
end subroutine kdtree2_n_nearest_brute_force


subroutine kdtree2_r_nearest_brute_force(tp,qv,r2,nfound,results) 
 ! find the nearest neighbors to 'qv' with distance**2 <= r2 by exhaustive search.
 ! only use this subroutine for testing, as it is SLOW!  The
 ! whole point of a k-d tree is to avoid doing what this subroutine
 ! does.
 implicit none
 type (kdtree2), pointer    :: tp
 real(pReal), intent(in)    :: qv(:)
 real(pReal), intent(in)    :: r2
 integer(pInt), intent(out) :: nfound
 type(kdtree2_result)       :: results(:) 

 integer(pInt) :: i
 integer :: nalloc
 real(pReal), allocatable :: all_distances(:)
 ! ..
 allocate (all_distances(tp%n))
 do i = 1_pInt, tp%n
    all_distances(i) = square_distance(tp%dimen,qv,tp%the_data(:,i))
 end do
 
 nfound = 0_pInt
 nalloc = size(results,1)

 do i = 1_pInt, tp%n
    if (all_distances(i)< r2) then
       ! insert it somewhere on the list
       if (nfound .lt. nalloc) then
          nfound = nfound+1_pInt
          results(nfound)%dis = all_distances(i)
          results(nfound)%idx = i
       endif
    end if
 enddo
 deallocate (all_distances)

 call kdtree2_sort_results(nfound,results)


end subroutine kdtree2_r_nearest_brute_force

subroutine kdtree2_sort_results(nfound,results)
 !  Use after search to sort results(1:nfound) in order of increasing 
 !  distance.
 implicit none
 integer(pInt), intent(in)          :: nfound
 type(kdtree2_result), target :: results(:) 
 !
 !

 !THIS IS BUGGY WITH INTEL FORTRAN
 !    If (nfound .Gt. 1) Call heapsort(results(1:nfound)%dis,results(1:nfound)%ind,nfound)
 !
 if (nfound .gt. 1_pInt) call heapsort_struct(results,nfound)

 return
end subroutine kdtree2_sort_results

subroutine heapsort(a,ind,n)
 !
 ! Sort a(1:n) in ascending order, permuting ind(1:n) similarly.
 ! 
 ! If ind(k) = k upon input, then it will give a sort index upon output.
 !
 implicit none
 integer(pInt),intent(in)       :: n
 real(pReal), intent(inout)     :: a(:) 
 integer(pInt), intent(inout)   :: ind(:)

 !
 !
 real(pReal)        :: value   ! temporary for a value from a()
 integer(pInt)     :: ivalue  ! temporary for a value from ind()

 integer(pInt)     :: i,j
 integer(pInt)     :: ileft,iright

 ileft=n/2_pInt+1_pInt
 iright=n

 !    do i=1,n
 !       ind(i)=i
 ! Generate initial idum array
 !    end do

 if(n.eq.1_pInt) return                  

 do 
    if(ileft > 1_pInt)then
       ileft=ileft-1_pInt
       value=a(ileft); ivalue=ind(ileft)
    else
       value=a(iright); ivalue=ind(iright)
       a(iright)=a(1); ind(iright)=ind(1)
       iright=iright-1_pInt
       if (iright == 1_pInt) then
          a(1)=value;ind(1)=ivalue
          return
       endif
    endif
    i=ileft
    j=2_pInt*ileft
    do while (j <= iright) 
       if(j < iright) then
          if(a(j) < a(j+1_pInt)) j=j+1_pInt
       endif
       if(value < a(j)) then
          a(i)=a(j); ind(i)=ind(j)
          i=j
          j=j+j
       else
          j=iright+1_pInt
       endif
    end do
    a(i)=value; ind(i)=ivalue
 end do
end subroutine heapsort

subroutine heapsort_struct(a,n)
 !
 ! Sort a(1:n) in ascending order
 ! 
 !
 implicit none
 integer(pInt),intent(in)           :: n
 type(kdtree2_result),intent(inout) :: a(:)

 !
 !
 type(kdtree2_result) :: value ! temporary value

 integer(pInt)     :: i,j
 integer(pInt)     :: ileft,iright

 ileft=n/2_pInt+1_pInt
 iright=n

 !    do i=1,n
 !       ind(i)=i
 ! Generate initial idum array
 !    end do

 if(n.eq.1_pInt) return                  

 do 
    if(ileft > 1_pInt)then
       ileft=ileft-1_pInt
       value=a(ileft)
    else
       value=a(iright)
       a(iright)=a(1)
       iright=iright-1_pInt
       if (iright == 1_pInt) then
          a(1) = value
          return
       endif
    endif
    i=ileft
    j=2_pInt*ileft
    do while (j <= iright) 
       if(j < iright) then
          if(a(j)%dis < a(j+1_pInt)%dis) j=j+1_pInt
       endif
       if(value%dis < a(j)%dis) then
          a(i)=a(j); 
          i=j
          j=j+j
       else
          j=iright+1_pInt
       endif
    end do
    a(i)=value
 end do
end subroutine heapsort_struct

end module kdtree2_module