-
Couldn't load subscription status.
- Fork 576
feat(tdigest): add the support of TDIGEST.REVRANK command #3130
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: unstable
Are you sure you want to change the base?
Changes from all commits
03b69e1
97df4a1
70a39d3
dde8410
0d3e9cc
bb172a8
a64add4
3954b1f
05d1202
f688e14
8bcad0f
46ac984
495e072
2b6785d
3af3b54
f3d85d3
e68689d
eb8674f
c70f410
b991d0d
4c9a41d
543fda0
e0d39a7
a4ed14c
9d6c532
ff658f8
53e82f8
6df3309
201afed
0851c22
c7ed36f
3a898fe
3711578
4b4f684
bd268b4
8e6a7f9
6662240
e7f06a2
367981c
4b8cd6a
07836fd
f44bc56
2aded75
f4a9c53
5023de8
e3629d9
ae05623
0cf8c8a
f855895
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -67,18 +67,18 @@ class DummyCentroids { | |||||
| if (Valid()) { | ||||||
| std::advance(iter_, 1); | ||||||
| } | ||||||
| return iter_ != centroids_.cend(); | ||||||
| return Valid(); | ||||||
| } | ||||||
|
|
||||||
| // The Prev function can only be called for item is not cend, | ||||||
| // because we must guarantee the iterator to be inside the valid range before iteration. | ||||||
| bool Prev() { | ||||||
| if (Valid() && iter_ != centroids_.cbegin()) { | ||||||
| if (Valid()) { | ||||||
|
||||||
| if (Valid()) { | |
| if (Valid() && iter_ != centroids_.cbegin()) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It seems that iter_ should always greater or equal to the cbegin().
LindaSummer marked this conversation as resolved.
Show resolved
Hide resolved
| Original file line number | Diff line number | Diff line change | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -22,6 +22,8 @@ | |||||||||
|
|
||||||||||
| #include <fmt/format.h> | ||||||||||
|
|
||||||||||
| #include <map> | ||||||||||
| #include <numeric> | ||||||||||
| #include <vector> | ||||||||||
|
|
||||||||||
| #include "common/status.h" | ||||||||||
|
|
@@ -150,3 +152,70 @@ inline StatusOr<double> TDigestQuantile(TD&& td, double q) { | |||||||||
| diff /= (lc.weight / 2 + rc.weight / 2); | ||||||||||
| return Lerp(lc.mean, rc.mean, diff); | ||||||||||
| } | ||||||||||
|
|
||||||||||
| inline void AssignRankForEqualInputs(const std::vector<size_t>& indices, double cumulative_weight, | ||||||||||
| std::vector<int>& result) { | ||||||||||
| for (auto index : indices) { | ||||||||||
| result[index] = static_cast<int>(cumulative_weight); | ||||||||||
| } | ||||||||||
| } | ||||||||||
|
|
||||||||||
| template <typename TD> | ||||||||||
| inline Status TDigestRevRank(TD&& td, const std::vector<double>& inputs, std::vector<int>& result) { | ||||||||||
| std::map<double, std::vector<size_t>> value_to_indices; | ||||||||||
| for (size_t i = 0; i < inputs.size(); ++i) { | ||||||||||
| value_to_indices[inputs[i]].push_back(i); | ||||||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it is hard to compare two double number for a map. we need a stable way for the compare operator. |
||||||||||
| } | ||||||||||
|
|
||||||||||
| double cumulative_weight = 0; | ||||||||||
| result.resize(inputs.size()); | ||||||||||
| auto it = value_to_indices.rbegin(); | ||||||||||
|
|
||||||||||
| // handle inputs larger than maximum | ||||||||||
| while (it != value_to_indices.rend() && it->first > td.Max()) { | ||||||||||
| AssignRankForEqualInputs(it->second, -1, result); | ||||||||||
| ++it; | ||||||||||
| } | ||||||||||
|
|
||||||||||
| auto iter = td.End(); | ||||||||||
| while (iter->Valid() && it != value_to_indices.rend()) { | ||||||||||
| auto centroid = GET_OR_RET(iter->GetCentroid()); | ||||||||||
| auto input_value = it->first; | ||||||||||
| if (centroid.mean == input_value) { | ||||||||||
| auto current_mean = centroid.mean; | ||||||||||
| auto current_mean_cumulative_weight = cumulative_weight + centroid.weight / 2; | ||||||||||
| cumulative_weight += centroid.weight; | ||||||||||
|
|
||||||||||
| // handle all the prev centroids which has the same mean | ||||||||||
| while (iter->Prev()) { | ||||||||||
| auto next_centroid = GET_OR_RET(iter->GetCentroid()); | ||||||||||
| if (current_mean != next_centroid.mean) { | ||||||||||
| // move back to the last equal centroid, because we will process it in the next loop | ||||||||||
| iter->Next(); | ||||||||||
| break; | ||||||||||
| } | ||||||||||
| current_mean_cumulative_weight += centroid.weight / 2; | ||||||||||
| cumulative_weight += centroid.weight; | ||||||||||
|
Comment on lines
+197
to
+198
|
||||||||||
| current_mean_cumulative_weight += centroid.weight / 2; | |
| cumulative_weight += centroid.weight; | |
| current_mean_cumulative_weight += next_centroid.weight / 2; | |
| cumulative_weight += next_centroid.weight; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
refer to tdigest.revrank.