Skip to content

remove datafree tree #182

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 0 additions & 23 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -149,26 +149,3 @@ idxs = inrange(balltree, point, r, true)

neighborscount = inrangecount(balltree, point, r, true) # counts points without allocating index arrays
```

## Using On-Disk Data Sets

By default, trees store a copy of the `data` provided during construction. For data sets larger than available memory, `DataFreeTree` can be used to strip a tree of its data field and re-link it later.

Example with a large on-disk data set:

```julia
using Mmap
ndim = 2
ndata = 10_000_000_000
data = Mmap.mmap(datafilename, Matrix{Float32}, (ndim, ndata))
data[:] = rand(Float32, ndim, ndata) # create example data
dftree = DataFreeTree(KDTree, data)
```

`dftree` stores the indexing data structures. To perform look-ups, re-link the tree to the data:

```julia
tree = injectdata(dftree, data) # yields a KDTree
knn(tree, data[:,1], 3) # perform operations as usual
```

52 changes: 0 additions & 52 deletions benchmarks/benchmarkdatafreetree.jl

This file was deleted.

1 change: 0 additions & 1 deletion src/NearestNeighbors.jl
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ get_T(::T) where {T} = Float64

include("evaluation.jl")
include("tree_data.jl")
include("datafreetree.jl")
include("knn.jl")
include("inrange.jl")
include("hyperspheres.jl")
Expand Down
41 changes: 11 additions & 30 deletions src/ball_tree.jl
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,10 @@ end
Creates a `BallTree` from the data using the given `metric` and `leafsize`.
"""
function BallTree(data::AbstractVector{V},
metric::M = Euclidean();
metric::Metric = Euclidean();
leafsize::Int = 10,
reorder::Bool = true,
storedata::Bool = true,
reorderbuffer::Vector{V} = Vector{V}()) where {V <: AbstractArray, M <: Metric}
reorder = !isempty(reorderbuffer) || (storedata ? reorder : false)

reorder::Bool = true) where {V <: AbstractArray}
tree_data = TreeData(data, leafsize)
n_d = length(V)
n_p = length(data)

array_buffs = ArrayBuffers(Val{length(V)}, get_T(eltype(V)))
Expand All @@ -46,17 +41,12 @@ function BallTree(data::AbstractVector{V},
# Bottom up creation of hyper spheres so need spheres even for leafs)
hyper_spheres = Vector{HyperSphere{length(V),eltype(V)}}(undef, tree_data.n_internal_nodes + tree_data.n_leafs)

indices_reordered = Vector{Int}()
data_reordered = Vector{V}()

if reorder
indices_reordered = Vector{Int}(undef, n_p)
if isempty(reorderbuffer)
data_reordered = Vector{V}(undef, n_p)
else
data_reordered = reorderbuffer
end
else
# Dummy variables
indices_reordered = Vector{Int}()
data_reordered = Vector{V}()
resize!(indices_reordered, n_p)
resize!(data_reordered, n_p)
end

if metric isa Distances.UnionMetrics
Expand All @@ -78,25 +68,16 @@ function BallTree(data::AbstractVector{V},
indices = indices_reordered
end

BallTree(storedata ? data : similar(data, 0), hyper_spheres, indices, metric, tree_data, reorder)
BallTree(data, hyper_spheres, indices, metric, tree_data, reorder)
end

function BallTree(data::AbstractVecOrMat{T},
metric::M = Euclidean();
metric::Metric = Euclidean();
leafsize::Int = 10,
storedata::Bool = true,
reorder::Bool = true,
reorderbuffer::Matrix{T} = Matrix{T}(undef, 0, 0)) where {T <: AbstractFloat, M <: Metric}
reorder::Bool = true) where {T <: AbstractFloat}
dim = size(data, 1)
npoints = size(data, 2)
points = copy_svec(T, data, Val(dim))
if isempty(reorderbuffer)
reorderbuffer_points = Vector{SVector{dim,T}}()
else
reorderbuffer_points = copy_svec(T, reorderbuffer, Val(dim))
end
BallTree(points, metric, leafsize = leafsize, storedata = storedata, reorder = reorder,
reorderbuffer = reorderbuffer_points)
BallTree(points, metric; leafsize, reorder)
end

# Recursive function to build the tree.
Expand Down
8 changes: 4 additions & 4 deletions src/brute_tree.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ end
Creates a `BruteTree` from the data using the given `metric`.
"""
function BruteTree(data::AbstractVector{V}, metric::PreMetric = Euclidean();
reorder::Bool=false, leafsize::Int=0, storedata::Bool=true) where {V <: AbstractVector}
reorder::Bool=false, leafsize::Int=0) where {V <: AbstractVector}
if metric isa Distances.UnionMetrics
p = parameters(metric)
if p !== nothing && length(p) != length(V)
Expand All @@ -19,14 +19,14 @@ function BruteTree(data::AbstractVector{V}, metric::PreMetric = Euclidean();
end
end

BruteTree(storedata ? data : Vector{V}(), metric, reorder)
BruteTree(data, metric, reorder)
end

function BruteTree(data::AbstractVecOrMat{T}, metric::PreMetric = Euclidean();
reorder::Bool=false, leafsize::Int=0, storedata::Bool=true) where {T}
reorder::Bool=false, leafsize::Int=0) where {T}
dim = size(data, 1)
BruteTree(copy_svec(T, data, Val(dim)),
metric, reorder = reorder, leafsize = leafsize, storedata = storedata)
metric; reorder, leafsize)
end

function _knn(tree::BruteTree{V},
Expand Down
68 changes: 0 additions & 68 deletions src/datafreetree.jl

This file was deleted.

39 changes: 11 additions & 28 deletions src/kd_tree.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,31 +17,22 @@ Creates a `KDTree` from the data using the given `metric` and `leafsize`.
The `metric` must be a `MinkowskiMetric`.
"""
function KDTree(data::AbstractVector{V},
metric::M = Euclidean();
metric::MinkowskiMetric = Euclidean();
leafsize::Int = 10,
storedata::Bool = true,
reorder::Bool = true,
reorderbuffer::Vector{V} = Vector{V}()) where {V <: AbstractArray, M <: MinkowskiMetric}
reorder = !isempty(reorderbuffer) || (storedata ? reorder : false)

reorder::Bool = true) where {V <: AbstractArray}
tree_data = TreeData(data, leafsize)
n_p = length(data)

indices = collect(1:n_p)
split_vals = Vector{eltype(V)}(undef, tree_data.n_internal_nodes)
split_dims = Vector{UInt16}(undef, tree_data.n_internal_nodes)

indices_reordered = Vector{Int}()
data_reordered = Vector{V}()

if reorder
indices_reordered = Vector{Int}(undef, n_p)
if isempty(reorderbuffer)
data_reordered = Vector{V}(undef, n_p)
else
data_reordered = reorderbuffer
end
else
# Dummy variables
indices_reordered = Vector{Int}()
data_reordered = Vector{V}()
resize!(indices_reordered, n_p)
resize!(data_reordered, n_p)
end

if metric isa Distances.UnionMetrics
Expand Down Expand Up @@ -71,24 +62,16 @@ function KDTree(data::AbstractVector{V},
end
end

KDTree(storedata ? data : similar(data, 0), hyper_rec, indices, metric, split_vals, split_dims, tree_data, reorder)
KDTree(data, hyper_rec, indices, metric, split_vals, split_dims, tree_data, reorder)
end

function KDTree(data::AbstractVecOrMat{T},
metric::M = Euclidean();
metric::MinkowskiMetric = Euclidean();
leafsize::Int = 10,
storedata::Bool = true,
reorder::Bool = true,
reorderbuffer::Matrix{T} = Matrix{T}(undef, 0, 0)) where {T <: AbstractFloat, M <: MinkowskiMetric}
reorder::Bool = true) where {T <: AbstractFloat}
dim = size(data, 1)
points = copy_svec(T, data, Val(dim))
if isempty(reorderbuffer)
reorderbuffer_points = Vector{SVector{dim,T}}()
else
reorderbuffer_points = copy_svec(T, reorderbuffer, Val(dim))
end
KDTree(points, metric, leafsize = leafsize, storedata = storedata, reorder = reorder,
reorderbuffer = reorderbuffer_points)
KDTree(points, metric; leafsize, reorder)
end

function build_KDTree(index::Int,
Expand Down
1 change: 0 additions & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ const trees_with_brute = [BruteTree; trees]
include("test_knn.jl")
include("test_inrange.jl")
include("test_monkey.jl")
include("test_datafreetree.jl")

@testset "periodic euclidean" begin
pred = PeriodicEuclidean([Inf, 2.5])
Expand Down
29 changes: 0 additions & 29 deletions test/test_datafreetree.jl

This file was deleted.

Loading