Skip to content

Commit 0339deb

Browse files
committed
🐐 identify non-alignment between LTTB_py and LTTB_rs 🔍
1 parent 2bc1c31 commit 0339deb

File tree

1 file changed

+14
-2
lines changed

1 file changed

+14
-2
lines changed

tsdownsample/_python/downsamplers.py

+14-2
Original file line numberDiff line numberDiff line change
@@ -78,8 +78,8 @@ def _downsample(
7878

7979
# Construct the output array
8080
sampled_x = np.empty(n_out, dtype="int64")
81+
# Add the first point
8182
sampled_x[0] = 0
82-
sampled_x[-1] = x.shape[0] - 1
8383

8484
# Convert x & y to int if it is boolean
8585
if x.dtype == np.bool_:
@@ -93,7 +93,17 @@ def _downsample(
9393
LTTB_py._argmax_area(
9494
prev_x=x[a],
9595
prev_y=y[a],
96-
avg_next_x=np.mean(x[offset[i + 1] : offset[i + 2]]),
96+
# NOTE: In a 100% correct implementation of LTTB the next x average
97+
# should be implemented as the following:
98+
# avg_next_x=np.mean(x[offset[i + 1] : offset[i + 2]]),
99+
# To improve performance we use the following approximation
100+
# which is the average of the first and last point of the next bucket
101+
# NOTE: this is not as accurate when x is not sampled equidistant
102+
# or when the buckets do not contain tht much data points, but it:
103+
# (1) aligns with visual perception (visual middle)
104+
# (2) is much faster
105+
# (3) is how the LTTB rust implementation works
106+
avg_next_x=(x[offset[i + 1]] + x[offset[i + 2] - 1]) / 2.0,
97107
avg_next_y=y[offset[i + 1] : offset[i + 2]].mean(),
98108
x_bucket=x[offset[i] : offset[i + 1]],
99109
y_bucket=y[offset[i] : offset[i + 1]],
@@ -115,6 +125,8 @@ def _downsample(
115125
)
116126
+ offset[-2]
117127
)
128+
# Always include the last point
129+
sampled_x[-1] = x.shape[0] - 1
118130
return sampled_x
119131

120132

0 commit comments

Comments
 (0)