|
30 | 30 |
|
31 | 31 | namespace doris {
|
32 | 32 |
|
33 |
| -class OldCounts { |
34 |
| -public: |
35 |
| - OldCounts() = default; |
36 |
| - |
37 |
| - inline void merge(const OldCounts* other) { |
38 |
| - if (other == nullptr || other->_counts.empty()) { |
39 |
| - return; |
40 |
| - } |
41 |
| - |
42 |
| - for (auto& cell : other->_counts) { |
43 |
| - increment(cell.first, cell.second); |
44 |
| - } |
45 |
| - } |
46 |
| - |
47 |
| - void increment(int64_t key, uint32_t i) { |
48 |
| - auto item = _counts.find(key); |
49 |
| - if (item != _counts.end()) { |
50 |
| - item->second += i; |
51 |
| - } else { |
52 |
| - _counts.emplace(std::make_pair(key, i)); |
53 |
| - } |
54 |
| - } |
55 |
| - |
56 |
| - uint32_t serialized_size() const { |
57 |
| - return sizeof(uint32_t) + sizeof(int64_t) * _counts.size() + |
58 |
| - sizeof(uint32_t) * _counts.size(); |
59 |
| - } |
60 |
| - |
61 |
| - void serialize(uint8_t* writer) const { |
62 |
| - uint32_t size = _counts.size(); |
63 |
| - memcpy(writer, &size, sizeof(uint32_t)); |
64 |
| - writer += sizeof(uint32_t); |
65 |
| - for (auto& cell : _counts) { |
66 |
| - memcpy(writer, &cell.first, sizeof(int64_t)); |
67 |
| - writer += sizeof(int64_t); |
68 |
| - memcpy(writer, &cell.second, sizeof(uint32_t)); |
69 |
| - writer += sizeof(uint32_t); |
70 |
| - } |
71 |
| - } |
72 |
| - |
73 |
| - void unserialize(const uint8_t* type_reader) { |
74 |
| - uint32_t size; |
75 |
| - memcpy(&size, type_reader, sizeof(uint32_t)); |
76 |
| - type_reader += sizeof(uint32_t); |
77 |
| - for (uint32_t i = 0; i < size; ++i) { |
78 |
| - int64_t key; |
79 |
| - uint32_t count; |
80 |
| - memcpy(&key, type_reader, sizeof(int64_t)); |
81 |
| - type_reader += sizeof(int64_t); |
82 |
| - memcpy(&count, type_reader, sizeof(uint32_t)); |
83 |
| - type_reader += sizeof(uint32_t); |
84 |
| - _counts.emplace(std::make_pair(key, count)); |
85 |
| - } |
86 |
| - } |
87 |
| - |
88 |
| - double get_percentile(std::vector<std::pair<int64_t, uint32_t>>& counts, |
89 |
| - double position) const { |
90 |
| - long lower = long(std::floor(position)); |
91 |
| - long higher = long(std::ceil(position)); |
92 |
| - |
93 |
| - auto iter = counts.begin(); |
94 |
| - for (; iter != counts.end() && iter->second < lower + 1; ++iter) |
95 |
| - ; |
96 |
| - |
97 |
| - int64_t lower_key = iter->first; |
98 |
| - if (higher == lower) { |
99 |
| - return lower_key; |
100 |
| - } |
101 |
| - |
102 |
| - if (iter->second < higher + 1) { |
103 |
| - iter++; |
104 |
| - } |
105 |
| - |
106 |
| - int64_t higher_key = iter->first; |
107 |
| - if (lower_key == higher_key) { |
108 |
| - return lower_key; |
109 |
| - } |
110 |
| - |
111 |
| - return (higher - position) * lower_key + (position - lower) * higher_key; |
112 |
| - } |
113 |
| - |
114 |
| - double terminate(double quantile) const { |
115 |
| - if (_counts.empty()) { |
116 |
| - // Although set null here, but the value is 0.0 and the call method just |
117 |
| - // get val in aggregate_function_percentile_approx.h |
118 |
| - return 0.0; |
119 |
| - } |
120 |
| - |
121 |
| - std::vector<std::pair<int64_t, uint32_t>> elems(_counts.begin(), _counts.end()); |
122 |
| - sort(elems.begin(), elems.end(), |
123 |
| - [](const std::pair<int64_t, uint32_t> l, const std::pair<int64_t, uint32_t> r) { |
124 |
| - return l.first < r.first; |
125 |
| - }); |
126 |
| - |
127 |
| - long total = 0; |
128 |
| - for (auto& cell : elems) { |
129 |
| - total += cell.second; |
130 |
| - cell.second = total; |
131 |
| - } |
132 |
| - |
133 |
| - long max_position = total - 1; |
134 |
| - double position = max_position * quantile; |
135 |
| - return get_percentile(elems, position); |
136 |
| - } |
137 |
| - |
138 |
| -private: |
139 |
| - std::unordered_map<int64_t, uint32_t> _counts; |
140 |
| -}; |
141 | 33 | template <typename Ty>
|
142 | 34 | class Counts {
|
143 | 35 | public:
|
|
0 commit comments