-
Notifications
You must be signed in to change notification settings - Fork 31
WIP: Approximate convex hulls #114
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
7c3749e
c3006c3
31b2315
6dae269
1c6be1e
9aa4394
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,305 @@ | ||
#!/usr/bin/python | ||
# -*- coding: utf-8 -*- | ||
## | ||
# geometry.py: Computational geometry utilities. | ||
## | ||
# © 2016 Chris Ferrie ([email protected]) and | ||
# Christopher E. Granade ([email protected]) | ||
# | ||
# This file is a part of the Qinfer project. | ||
# Licensed under the AGPL version 3. | ||
## | ||
# This program is free software: you can redistribute it and/or modify | ||
# it under the terms of the GNU Affero General Public License as published by | ||
# the Free Software Foundation, either version 3 of the License, or | ||
# (at your option) any later version. | ||
# | ||
# This program is distributed in the hope that it will be useful, | ||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
# GNU Affero General Public License for more details. | ||
# | ||
# You should have received a copy of the GNU Affero General Public License | ||
# along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
## | ||
|
||
## FEATURES ################################################################### | ||
|
||
from __future__ import absolute_import | ||
from __future__ import division | ||
|
||
## EXPORTS ################################################################### | ||
|
||
__all__ = [ | ||
'convex_distance', | ||
'approximate_convex_hull' | ||
] | ||
|
||
## IMPORTS ################################################################### | ||
|
||
import numpy as np | ||
from scipy.spatial.distance import cdist | ||
|
||
from qinfer.utils import requires_optional_module, uniquify | ||
|
||
try: | ||
import cvxopt | ||
import cvxopt.solvers | ||
except: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should only catch on There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Probably, though I don't want any error in importing CVXOPT to affect functions which don't require it. Perhaps silently ignoring There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would catch on If you want more rigorous separation you'll either need to use separate files (one for functions that use |
||
cvxopt = None | ||
|
||
## FUNCTIONS ################################################################# | ||
|
||
# FIXME: add outgoing citations. | ||
|
||
@requires_optional_module('cvxopt') | ||
def convex_distance(test_point, convex_points): | ||
r""" | ||
Returns the distance :math:`d(\vec{x}, S) = | ||
\min_{\vec{z} \in \operatorname{conv}(S)} \| \vec{x} - \vec{z} \|^2` from | ||
a point :math:`\vec{x}` to the convex closure of a set of points | ||
:math:`S`. | ||
""" | ||
# TODO: document this much better, esp. params/returns. | ||
|
||
n_points, dimension = convex_points.shape | ||
|
||
# Start by centering the convex region of interest. | ||
# NB: don't use in-place (-=) here, since we want a copy. | ||
convex_points = convex_points - test_point | ||
|
||
# Construct the quadratic program parameters P and q required by CVXOPT. | ||
# Since we want min x^T P x, with x representing the weight of each point | ||
# in our set of a convex combination, we need to construct the outer | ||
# product | ||
# P_{ik} = sum_j y_ij y_jk | ||
# where y_ij is the jth component of the ith point in our set. | ||
P = cvxopt.matrix(np.dot(convex_points, convex_points.T)) | ||
# We have no linear objective, so we set q = 0. | ||
# Similarly, q is a vector of all ones representing the sum | ||
# over points in the set. | ||
q = cvxopt.matrix(np.zeros((n_points, 1))) | ||
# To enfoce x_i ≥ 0, we need G = -\mathbb{1} and h = 0, | ||
# such that CVXOPT's constraint Gx ≥ h is correct. | ||
G = cvxopt.matrix(-np.eye(n_points)) | ||
h = cvxopt.matrix(0.0, (n_points, 1)) | ||
|
||
# We do, however, have a linear constraint A x = 1. | ||
A = cvxopt.matrix(1.0, (1, n_points)) | ||
b = cvxopt.matrix(1.0) | ||
|
||
# Now we solve with cvxopt. | ||
solution = cvxopt.solvers.qp(P, q, G, h, A, b, | ||
options={'show_progress': False} | ||
) | ||
|
||
# Return the obtained primal objective, noting that CVXOPT finds | ||
# min_x 1/2 x^T P X, so that we must multiply by 2, then take the | ||
# square root. Taking the square root, we have to be a little | ||
# careful, since finite tolerances can cause the objective to | ||
# be negative, even though we constrained it to be at least zero. | ||
# Thus, we take the max of the objective and zero. | ||
return np.sqrt(2 * max(0, solution['primal objective'])) | ||
|
||
def rand_argmin(arr, tol=1e-7): | ||
minimum = np.min(arr) | ||
return np.random.choice(np.nonzero(arr <= minimum + tol)[0]) | ||
|
||
def expand_hull_directed_search(partial_hull, candidate_points, interior_thresh=1e-7): | ||
r""" | ||
Uses the Sartipizadeh and Vincent 2016 algorithm to expand | ||
an approximate convex hull by finding a point :math:`\vec{x}` | ||
solving | ||
|
||
.. math:: | ||
|
||
\vec{x} = \operatorname{arg\,min}_{\vec{x} \in S \setminus E} | ||
\max_{\vec{z} \in S \setminus E} | ||
d(\vec{z}, E \cup \{\vec{x}\}) | ||
|
||
where :math:`E` is the set of points comprising the approximate | ||
hull so far, and where :math:`S` is the set of points that we | ||
wish to approximate the convex hull of. That is, this function | ||
finds a point to add to an approximate hull that minimizes | ||
the worst-case distance to any point not currently in the hull. | ||
""" | ||
# TODO: document this much better, esp. params/returns. | ||
|
||
# returns: | ||
# index of point to add to the hull | ||
# current error of the approximate hull | ||
# masked matrix of known convex distances | ||
|
||
# Our strategy will be to build up a matrix D_{i, j} | ||
# defined as: | ||
# | ||
# Dᵢⱼ ≔ d(xᵢ, E ∪ {xⱼ}) | ||
# | ||
# where the set of candidate points S \ E = {xᵢ} is represented | ||
# as the array candidate_points. | ||
# | ||
# NB: our notation is different from VC16, since they reuse | ||
# the letter E. | ||
|
||
n_partial, dimension = partial_hull.shape | ||
n_candidates, dimension_check = candidate_points.shape | ||
assert dimension == dimension_check | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should probably raise something instead of just blind asserting There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Absolutely. Given that this is still a first stab, I didn't yet take the care to validate inputs and all that will go into the final PR. |
||
|
||
# Start by allocating arrays to represent how far into | ||
# each maximization we've considered for the minimization over | ||
# candidates. | ||
n_distances_considered = np.ones((n_candidates,), dtype=int) | ||
|
||
# We will also need to remember the list of points interior | ||
# to each candidate so that they can be removed later. | ||
interior_points_found = [[] for idx in range(n_candidates)] | ||
|
||
# We then define how to compute each element. | ||
closest_miss = [np.inf] | ||
def distance(idx_max, idx_min): | ||
if idx_max == idx_min: | ||
this_distance = 0 | ||
else: | ||
this_distance = convex_distance( | ||
candidate_points[idx_max], | ||
np.concatenate([ | ||
partial_hull, | ||
candidate_points[idx_min, np.newaxis] | ||
], axis=0) | ||
) | ||
if this_distance < closest_miss[0]: | ||
closest_miss[0] = this_distance | ||
|
||
if this_distance < interior_thresh: | ||
interior_points_found[idx_min].append(idx_max) | ||
return this_distance | ||
|
||
# Next, we allocate an array for the worst maxima we've seen for | ||
# each candidate. | ||
worst_distances = np.array([ | ||
distance(0, idx_min) | ||
for idx_min in range(n_candidates) | ||
]) | ||
|
||
# Using this, we can find our best candidate so far | ||
idx_best_candidate = rand_argmin(worst_distances) | ||
|
||
# ITERATION: Look at each "row" in turn until we get to the bottom. At each step, | ||
# evalaute the next element of the "column" with the best (smallest) | ||
# maximum. | ||
while n_distances_considered[idx_best_candidate] < n_candidates: | ||
|
||
next_distance = distance(n_distances_considered[idx_best_candidate], idx_best_candidate) | ||
n_distances_considered[idx_best_candidate] += 1 | ||
|
||
if next_distance > worst_distances[idx_best_candidate]: | ||
worst_distances[idx_best_candidate] = next_distance | ||
|
||
idx_best_candidate = rand_argmin(worst_distances) | ||
|
||
# COMPLETION: Return the best element we've found so far, along with the value | ||
# at that element, and the indices of interior points for the newly | ||
# expanded hull. | ||
approximation_error = worst_distances[idx_best_candidate] | ||
idxs_interior_points = interior_points_found[idx_best_candidate] | ||
|
||
print(closest_miss) | ||
|
||
return idx_best_candidate, approximation_error, idxs_interior_points | ||
|
||
|
||
def approximate_convex_hull(points, max_n_vertices, desired_approximation_error, | ||
interior_thresh=1e-7, | ||
seed_method='extreme' | ||
): | ||
|
||
n_points, dimension = points.shape | ||
max_n_vertices = min(n_points - 1, max_n_vertices) | ||
|
||
# FIXME: we should shuffle. | ||
|
||
# Start by picking a point a seed point to grow the hull from. | ||
if seed_method == 'extreme': | ||
# http://www.sciencedirect.com/science/article/pii/S2405896315009866 | ||
# idx_dimension = np.random.randint(dimension) | ||
# sign = np.random.choice([-1, 1]) | ||
idx_seed_points = uniqify([ | ||
(sign * points[:, idx_dimension]).argmin() | ||
for idx_dimension in range(dimension) | ||
for sign in [-1, 1] | ||
]) | ||
|
||
elif seed_method == 'centroid': | ||
centroid = np.mean(points, axis=0) | ||
centroid_distances = cdist(points, centroid[None, :])[:, 0] | ||
idx_seed_points = [centroid_distances.argmin(), centroid_distances.argmax()] | ||
|
||
|
||
# We maintain masks to indicate which points are in the approximate | ||
# hull, and which masks are candidates (haven't been eliminated as | ||
# interior). | ||
hull_mask = np.zeros((n_points,), dtype=bool) | ||
hull_mask[idx_seed_points] = True | ||
|
||
candidate_mask = ~hull_mask.copy() | ||
|
||
# Before we iterate, remove candidates that are interior to the seed hull. | ||
# TODO | ||
|
||
# Iterate until we hit max_n_vertices or epsilon_desired, expanding | ||
# the hull each time and eliminating the interior points. | ||
while True: | ||
partial_hull = points[hull_mask] | ||
candidate_points = points[candidate_mask] | ||
idxs_partial_hull = np.nonzero(hull_mask)[0] | ||
|
||
idx_best_candidate, approximation_error, idxs_interior_points = \ | ||
expand_hull_directed_search(partial_hull, candidate_points, interior_thresh=interior_thresh) | ||
|
||
# Add the new best candidate to the approximate hull, | ||
# and eliminate any candidate points interior to the new point. | ||
idxs_candidates = np.nonzero(candidate_mask)[0] | ||
hull_mask[idxs_candidates[idx_best_candidate]] = True | ||
candidate_mask[idxs_candidates[idxs_interior_points]] = False | ||
|
||
# Eliminate anything from the hull_mask | ||
# that's interior to the new point. | ||
n_hull_removed = 0 | ||
reduced_hull_mask = hull_mask.copy() | ||
for idx_previous_hull_point in idxs_partial_hull: | ||
reduced_hull_mask[idx_previous_hull_point] = False | ||
if convex_distance(points[idx_previous_hull_point], points[reduced_hull_mask]) <= interior_thresh: | ||
# Remove this as a hull point. | ||
n_hull_removed += 1 | ||
hull_mask[idx_previous_hull_point] = False | ||
else: | ||
# The point wasn't interior, so add it back in. | ||
reduced_hull_mask[idx_previous_hull_point] = True | ||
|
||
print("Eliminated {} interior points.".format(len(idxs_interior_points) - 1 + n_hull_removed)) | ||
|
||
# If we hit either of our stopping criteria, return now. | ||
if ( | ||
approximation_error <= desired_approximation_error or | ||
np.sum(hull_mask) >= max_n_vertices | ||
): | ||
return np.nonzero(hull_mask)[0], approximation_error | ||
|
||
# FIXME: remove this. | ||
if __name__ == "__main__": | ||
|
||
dims = np.arange(2, 11) | ||
times = np.empty_like(dims, dtype=float) | ||
n_points = 100 | ||
|
||
from qinfer.perf_testing import timing | ||
|
||
for idx_dim, dim in enumerate(dims): | ||
print("======= DIM {} =======".format(dim)) | ||
points = np.random.randn(n_points, dim) | ||
points /= np.linalg.norm(points, axis=0) | ||
points *= np.random.random((n_points,))[:, None] | ||
with timing() as t: | ||
hull_pts, eps = approximate_convex_hull(points, n_points - 1, 0, seed_method='extreme') | ||
times[idx_dim] = t.delta_t | ||
print(t) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
#!/usr/bin/python | ||
# -*- coding: utf-8 -*- | ||
## | ||
# test_geometry.py: Checks correctness of computational geometry functionality. | ||
## | ||
# © 2014 Chris Ferrie ([email protected]) and | ||
# Christopher E. Granade ([email protected]) | ||
# | ||
# This file is a part of the Qinfer project. | ||
# Licensed under the AGPL version 3. | ||
## | ||
# This program is free software: you can redistribute it and/or modify | ||
# it under the terms of the GNU Affero General Public License as published by | ||
# the Free Software Foundation, either version 3 of the License, or | ||
# (at your option) any later version. | ||
# | ||
# This program is distributed in the hope that it will be useful, | ||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
# GNU Affero General Public License for more details. | ||
# | ||
# You should have received a copy of the GNU Affero General Public License | ||
# along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
## | ||
|
||
## FEATURES ################################################################### | ||
|
||
from __future__ import absolute_import | ||
from __future__ import division # Ensures that a/b is always a float. | ||
|
||
## IMPORTS #################################################################### | ||
|
||
import numpy as np | ||
from numpy.testing import assert_almost_equal | ||
|
||
from qinfer.tests.base_test import DerandomizedTestCase | ||
|
||
from qinfer.geometry import convex_distance | ||
from qinfer.utils import ( | ||
assert_sigfigs_equal, requires_optional_module, | ||
) | ||
from qinfer.perf_testing import numpy_err_policy | ||
|
||
## TESTS ##################################################################### | ||
|
||
class TestConvexGeometry(DerandomizedTestCase): | ||
|
||
@requires_optional_module("cvxopt", if_missing="skip") | ||
def test_convex_distance_known_cases(self, dimension=3): | ||
""" | ||
Convex geometry: Checks convex_distance for known cases. | ||
""" | ||
assert_almost_equal( | ||
convex_distance(np.ones((dimension,)), np.eye(dimension)), | ||
np.linalg.norm(((dimension - 1) / dimension) * np.ones(dimension,), ord=2) | ||
) | ||
|
||
# TODO: add cases. | ||
|
||
@requires_optional_module("cvxopt", if_missing="skip") | ||
def test_convex_distance_interior_point(self, n_points=500, dimension=5): | ||
""" | ||
Convex geometry: Checks that convex_distance is zero for interior pts. | ||
""" | ||
S = np.random.random((n_points, dimension)) | ||
alpha = np.random.random((n_points)) | ||
alpha /= alpha.sum() | ||
|
||
z = np.dot(alpha, S) | ||
|
||
with numpy_err_policy(invalid='raise'): | ||
assert_almost_equal( | ||
convex_distance(z, S), | ||
0 | ||
) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
#!/usr/bin/env python
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good catch, this is what I get from copying a older header template, heh.