Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CLUEstering/CLUEstering.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import pandas as pd
from sklearn.datasets import make_blobs
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import davies_bouldin_score
from os.path import dirname, exists, join
path = dirname(__file__)
sys.path.insert(1, join(path, 'lib'))
Expand Down Expand Up @@ -1183,3 +1184,9 @@ def import_clusterer(self, input_folder: str, file_name: str) -> None:
points_per_cluster,
df_
)

if __name__ == "__main__":
c = clusterer(4, 2.5, 4)
c.read_data('../data/toyDetector_1000.csv')
c.run_clue()
print(davies_bouldin_score(c.coords.T, c.cluster_ids))
78 changes: 77 additions & 1 deletion include/CLUEstering/core/DistanceMetrics.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,10 @@ namespace clue {
} // namespace concepts

template <std::size_t Ndim>
using Point = std::array<float, Ndim + 1>;
using Point = std::array<float, Ndim>;

template <std::size_t Ndim>
using WeightedPoint = std::array<float, Ndim + 1>;

/// @brief Euclidean distance metric
//// This class implements the Euclidean distance metric in Ndim dimensions.
Expand All @@ -50,6 +53,18 @@ namespace clue {
[&]<std::size_t Dim>() { return (lhs[Dim] - rhs[Dim]) * (lhs[Dim] - rhs[Dim]); });
return math::sqrt(distance2);
}
/// @brief Compute the Euclidean distance between two points
///
/// @param lhs First point
/// @param rhs Second point
/// @return Euclidean distance between the two points
/// NOTE: The weight is not used in the computation of the distance
ALPAKA_FN_HOST_ACC constexpr inline auto operator()(const WeightedPoint<Ndim>& lhs,
const WeightedPoint<Ndim>& rhs) const {
const auto distance2 = meta::accumulate<Ndim>(
[&]<std::size_t Dim>() { return (lhs[Dim] - rhs[Dim]) * (lhs[Dim] - rhs[Dim]); });
return math::sqrt(distance2);
}
};

/// @brief Weighted Euclidean distance metric
Expand Down Expand Up @@ -95,6 +110,19 @@ namespace clue {
});
return math::sqrt(distance2);
}
/// @brief Compute the Weighted Euclidean distance between two points
///
/// @param lhs First point
/// @param rhs Second point
/// @return Weighted Euclidean distance between the two points
/// NOTE: The weight is not used in the computation of the distance
ALPAKA_FN_HOST_ACC constexpr inline auto operator()(const WeightedPoint<Ndim>& lhs,
const WeightedPoint<Ndim>& rhs) const {
const auto distance2 = meta::accumulate<Ndim>([&]<std::size_t Dim>() {
return m_weights[Dim] * (lhs[Dim] - rhs[Dim]) * (lhs[Dim] - rhs[Dim]);
});
return math::sqrt(distance2);
}
};

/// @brief Periodic Euclidean distance metric
Expand Down Expand Up @@ -140,6 +168,21 @@ namespace clue {
});
return math::sqrt(distance2);
}
/// @brief Compute the Periodic Euclidean distance between two points
///
/// @param lhs First point
/// @param rhs Second point
/// @return Periodic Euclidean distance between the two points
/// NOTE: The weight is not used in the computation of the distance
ALPAKA_FN_HOST_ACC constexpr inline auto operator()(const WeightedPoint<Ndim>& lhs,
const WeightedPoint<Ndim>& rhs) const {
const auto distance2 = meta::accumulate<Ndim>([&]<std::size_t Dim>() {
const auto diff = math::fabs(lhs[Dim] - rhs[Dim]);
const auto periodic_diff = math::min(diff, m_periods[Dim] - diff);
return periodic_diff * periodic_diff;
});
return math::sqrt(distance2);
}
};

/// @brief Manhattan distance metric
Expand All @@ -162,6 +205,17 @@ namespace clue {
return meta::accumulate<Ndim>(
[&]<std::size_t Dim>() { return math::fabs(lhs[Dim] - rhs[Dim]); });
}
/// @brief Compute the Manhattan distance between two points
///
/// @param lhs First point
/// @param rhs Second point
/// @return Manhattan distance between the two points
/// NOTE: The weight is not used in the computation of the distance
ALPAKA_FN_HOST_ACC constexpr inline auto operator()(const WeightedPoint<Ndim>& lhs,
const WeightedPoint<Ndim>& rhs) const {
return meta::accumulate<Ndim>(
[&]<std::size_t Dim>() { return math::fabs(lhs[Dim] - rhs[Dim]); });
}
};

/// @brief Chebyshev distance metric
Expand All @@ -184,6 +238,17 @@ namespace clue {
return meta::maximum<Ndim>(
[&]<std::size_t Dim>() { return math::fabs(lhs[Dim] - rhs[Dim]); });
}
/// @brief Compute the Chebyshev distance between two points
///
/// @param lhs First point
/// @param rhs Second point
/// @return Chebyshev distance between the two points
/// NOTE: The weight is not used in the computation of the distance
ALPAKA_FN_HOST_ACC constexpr inline auto operator()(const WeightedPoint<Ndim>& lhs,
const WeightedPoint<Ndim>& rhs) const {
return meta::maximum<Ndim>(
[&]<std::size_t Dim>() { return math::fabs(lhs[Dim] - rhs[Dim]); });
}
};

/// @brief Weighted Chebyshev distance metric
Expand Down Expand Up @@ -227,6 +292,17 @@ namespace clue {
return meta::maximum<Ndim>(
[&]<std::size_t Dim>() { return m_weights[Dim] * math::fabs(lhs[Dim] - rhs[Dim]); });
}
/// @brief Compute the Weighted Chebyshev distance between two points
///
/// @param lhs First point
/// @param rhs Second point
/// @return Weighted Chebyshev distance between the two points
/// NOTE: The weight is not used in the computation of the distance
ALPAKA_FN_HOST_ACC constexpr inline auto operator()(const WeightedPoint<Ndim>& lhs,
const WeightedPoint<Ndim>& rhs) const {
return meta::maximum<Ndim>(
[&]<std::size_t Dim>() { return m_weights[Dim] * math::fabs(lhs[Dim] - rhs[Dim]); });
}
};

namespace metrics {
Expand Down
3 changes: 3 additions & 0 deletions include/CLUEstering/data_structures/PointsHost.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ namespace clue {

float weight() const;
float cluster_index() const;

operator std::array<float, Ndim>() { return m_coordinates; }
// operator std::array<float, Ndim + 1>() {}
};

/// @brief Constructs a container for the points allocated on the host
Expand Down
40 changes: 40 additions & 0 deletions include/CLUEstering/utils/detail/scores.hpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@

#pragma once

#include "CLUEstering/core/DistanceMetrics.hpp"
#include "CLUEstering/data_structures/PointsHost.hpp"
#include "CLUEstering/data_structures/AssociationMap.hpp"
#include "CLUEstering/utils/cluster_centroid.hpp"
#include <algorithm>
#include <cmath>
#include <cstddef>
Expand Down Expand Up @@ -98,4 +100,42 @@ namespace clue {
return std::reduce(scores.begin(), scores.end(), 0.f) / static_cast<float>(scores.size());
}

template <std::size_t Ndim, concepts::distance_metric<Ndim> DistanceMetric>
auto davies_bouldin(const clue::PointsHost<Ndim>& points, const DistanceMetric& metric) {
auto cluster_centroids = clue::cluster_centroids(points);
auto clusters = clue::get_clusters(points);

std::vector<float> clusters_scatter(cluster_centroids.size(), 0.f);
for (auto i = 0; i < points.size(); ++i) {
auto cluster_id = points[i].cluster_index();
if (cluster_id == -1)
continue;
clusters_scatter[cluster_id] += metric(points[i], cluster_centroids[cluster_id]);
}
for (auto i = 0; i < cluster_centroids.size(); ++i) {
clusters_scatter[i] /= static_cast<float>(clusters.count(i));
}
std::vector<std::vector<float>> clusters_separation(
cluster_centroids.size(), std::vector<float>(cluster_centroids.size(), 0.f));
for (auto i = 0u; i < cluster_centroids.size(); ++i) {
for (auto j = 0u; j < cluster_centroids.size(); ++j) {
if (i == j)
continue;
clusters_separation[i][j] = metric(cluster_centroids[i], cluster_centroids[j]);
}
}

std::vector<float> R_values(cluster_centroids.size(), 0.f);
for (auto i = 0u; i < cluster_centroids.size(); ++i) {
for (auto j = 0u; j < clusters_separation[i].size(); ++j) {
if (i == j)
continue;
R_values[i] = std::max(
R_values[i], (clusters_scatter[i] + clusters_scatter[j]) / clusters_separation[i][j]);
}
}

return std::reduce(R_values.begin(), R_values.end(), 0.f) / static_cast<float>(R_values.size());
}

} // namespace clue
6 changes: 6 additions & 0 deletions include/CLUEstering/utils/scores.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

#pragma once

#include "CLUEstering/core/DistanceMetrics.hpp"
#include "CLUEstering/data_structures/PointsHost.hpp"

namespace clue {
Expand All @@ -27,6 +28,11 @@ namespace clue {
template <std::size_t Ndim>
auto silhouette(const clue::PointsHost<Ndim>& points);

template <std::size_t Ndim,
concepts::distance_metric<Ndim> DistanceMetric = clue::EuclideanMetric<Ndim>>
auto davies_bouldin(const clue::PointsHost<Ndim>& points,
const DistanceMetric& metric = clue::EuclideanMetric<Ndim>());

} // namespace clue

#include "CLUEstering/utils/detail/scores.hpp"
6 changes: 6 additions & 0 deletions tests/test_validation_scores.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,10 @@ TEST_CASE("Test validation scores on toy detector dataset") {
CHECK(silhouette >= -1.f);
CHECK(silhouette <= 1.f);
}

SUBCASE("Test computation of davies-bouldin score") {
const auto davies_bouldin = clue::davies_bouldin(points);
std::cout << "Davies-Bouldin score: " << davies_bouldin << std::endl;
CHECK(davies_bouldin >= 0.f);
}
}
Loading