Skip to content

Commit d572363

Browse files
committed
improvements and bugfixes
- added PELT pruning for the CSSD - added a comparison to a baseline method based on the Python module ruptures - fixed a bug in the scaling of the cv-score - refactoring of the functions
1 parent e15ea0b commit d572363

14 files changed

+631
-90
lines changed

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
2+
demos/__pycache__/ruptures_cssd.cpython-39.pyc
3+
.DS_Store
4+
.DS_Store

cssd.m

Lines changed: 177 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
1-
function output = cssd(x,y,p,gamma,xx,delta)
1+
function output = cssd(x,y,p,gamma,xx,delta,varargin)
22
%CSSD Cubic smoothing spline with discontinuities
33
%
44
% cssd(x, y, p, gamma, xx, delta) computes a cubic smoothing spline with discontinuities for the
55
% given data (x,y). The data values may be scalars or vectors. Data points with the
66
% same site are replaced by their (weighted) average as in the builtin csaps
7-
% function.
7+
% function.
88
%
99
% Input
1010
% x: vector of data sites
@@ -49,18 +49,25 @@
4949
assert( (0 <= p) && (p <= 1), 'The p parameter must fulfill 0 <= p <= 1')
5050
assert( 0 <= gamma, 'The gamma parameter must fulfill 0 < gamma')
5151

52-
% Matlab uses the parameter w which is related to delta of De Boor's book by w = 1./delta.^2
53-
w = 1./delta.^2;
54-
55-
% checks arguments and creates column vectors (chckxywp is Matlab built in)
56-
[xi,yi,~,wi] = chckxywp(x,y,2,w,p);
57-
deltai = sqrt(1./wi);
52+
[xi, yi, wi, deltai] = chkxydelta(x, y, delta);
5853

5954
% Note: from now on we use the xi, yi, wi, deltai versions
6055
%%% END CHECK ARGUMENTS
6156

6257
[N,D] = size(yi);
6358

59+
% rolling cv score
60+
rcv_score = 0;
61+
62+
% counts the number of times an input data point is visited
63+
% (for determination of computational complexity)
64+
complexity_counter = 0;
65+
66+
parser = inputParser;
67+
addOptional(parser, 'pruning', 'PELT');
68+
parse(parser, varargin{:});
69+
pruning = parser.Results.pruning;
70+
6471
% if gamma == Inf (discontinuity has infinite penalty), we may directly
6572
% compute a classical smoothing spline
6673
% also, if p == 1, we may straight compute an interpolating spline, no
@@ -70,12 +77,14 @@
7077
discont = [];
7178
interval_cell = {1:N};
7279
pp_cell = {fnxtr(pp,2)};
80+
complexity_counter = N;
7381
else
7482
% F stores Bellmann values
7583
F = zeros(N, 1);
7684
% partition: stores the optimal partition
7785
partition = zeros(N, 1);
7886

87+
7988
%%% BEGIN PIECEWISE LINEAR CASE
8089
if p == 0 % the piecewise linear case
8190
B = [ones(N,1), xi]./deltai(:);
@@ -85,8 +94,11 @@
8594
G = planerot(A(1:2,1));
8695
A(1:2, :) = G*A(1:2, :);
8796
eps_1r = 0;
97+
8898
% loop starts from index three because eps_11 and eps_12 are zero
8999
for r=3:N
100+
101+
90102
G = planerot(A([1,r],1));
91103
A([1,r],:) = G * A([1,r],:);
92104
G = planerot(A([2,r],2));
@@ -96,6 +108,8 @@
96108
% solution without discontinuities
97109
F(r) = eps_1r;
98110
end
111+
complexity_counter = N;
112+
99113
%%% BEGIN MAIN LOOP
100114
for rb=2:N
101115

@@ -108,6 +122,7 @@
108122
eps_lr = 0;
109123
% the loop is performed in reverse order so that we may use pruning
110124
for lb = rb-1:-1:2
125+
complexity_counter = complexity_counter + 1;
111126
if lb == rb-1
112127
G = planerot(A([end,end-1],1));
113128
A([end,end-1], :) = G*A([end,end-1], :);
@@ -144,54 +159,170 @@
144159

145160
% precompute eps_1r for r=1,...,N
146161
[eps_1r, R, z] = startEpsLR(yi(1:2,:), d(1), alpha(1:2), beta);
162+
147163
% loop starts from index three because eps_11 and eps_12 are zero
148164
for r=3:N
149165
[eps_1r, R, z] = updateEpsLR(eps_1r, R, yi(r,:), d(r-1), z, alpha(r), beta);
150166
% store the eps_1r as the initial Bellman value corresponding to a
151167
% solution without discontinuities
152168
F(r) = eps_1r;
153169
end
170+
complexity_counter = N;
154171
%%% END PRECOMPUTATIONS
172+
173+
% if gamma is hihger than the energy of the zero jump solution, we
174+
% dont need to go into the main loop
175+
if gamma >= F(end)
176+
pruning = 'FULL_SKIP';
177+
end
155178

156179
%%% BEGIN MAIN LOOP
157-
for rb=2:N
158-
159-
% best left bound (blb) initialized with 1 corresponding to interval 1:rb
160-
% corresponding Bellman value has been set in the precomputation
161-
blb = 1;
162180

163-
% the loop is performed in reverse order so that we may use pruning
164-
for lb = rb-1:-1:2
165-
if lb == rb-1
166-
% get start configuration and store start state in R, z
167-
[eps_lr, R, z] = startEpsLR(yi([rb,rb-1],:), d(rb-1), alpha([rb,rb-1]), beta);
168-
else
169-
% perform fast energy update and store current state in R, z
170-
[eps_lr, R, z] = updateEpsLR(eps_lr, R, yi(lb,:), d(lb), z, alpha(lb), beta);
181+
switch pruning % two different pruning strategies are supported
182+
case 'FULL_SKIP'
183+
partition( end ) = 0;
184+
185+
%%% BEGIN PELT Pruning
186+
case 'PELT'
187+
active_arrlist = 0:N-1; % pointers to previous index
188+
%active_list = java.util.LinkedList();
189+
190+
state_cell = cell(N, 3);
191+
for rb=2:N-1
192+
% generates the initial states
193+
[eps_lr, R, z] = startEpsLR(yi(rb:rb+1,:), d(rb), alpha(rb:rb+1), beta);
194+
state_cell{rb, 1} = eps_lr;
195+
state_cell{rb, 2} = R;
196+
state_cell{rb, 3} = z;
197+
stored_R = R;
198+
stored_z = z;
171199
end
172-
173-
% pruning to skip unreachable configurations
174-
% (if this condition is met the following if-condition cannot never
175-
% be fulfilled because eps_lr is monote increasing and F >= 0.)
176-
if (eps_lr + gamma) >= F(rb)
177-
break
200+
%active_list.add(2)
201+
active_arrlist_endidx = 2;
202+
for rb=3:N
203+
% best left bound (blb) initialized with 1 corresponding to interval 1:rb
204+
% corresponding Bellman value has been set in the precomputation
205+
blb = 1;
206+
%listIterator = active_list.listIterator(active_list.size());
207+
%while (listIterator.hasPrevious())
208+
iter = active_arrlist_endidx;
209+
while active_arrlist(iter) > 1
210+
lb = active_arrlist(iter);
211+
eps_lr = state_cell{lb, 1};
212+
R = state_cell{lb, 2};
213+
z = state_cell{lb, 3};
214+
if rb - lb > 1
215+
[eps_lr, R, z] = updateEpsLR(eps_lr, R, yi(rb,:), d(rb-1), z, alpha(rb), beta);
216+
state_cell{lb, 1} = eps_lr;
217+
state_cell{lb, 2} = R;
218+
state_cell{lb, 3} = z;
219+
complexity_counter = complexity_counter + 1;
220+
end
221+
% check if setting a discontinuity between lb-1 and lb gives a
222+
% better energy
223+
candidate_value = F(lb-1) + gamma + eps_lr;
224+
if candidate_value < F(rb)
225+
F(rb ) = candidate_value;
226+
blb = lb;
227+
stored_R = R;
228+
stored_z = z;
229+
end
230+
iter = lb;
231+
end
232+
233+
% store the best left bound corresponding to the right bound rb
234+
partition( rb ) = blb-1;
235+
236+
%active_list.add(rb);
237+
active_arrlist_endidx = active_arrlist_endidx + 1;
238+
% PELT pruning
239+
%listIterator = active_list.listIterator();
240+
% while (listIterator.hasNext())
241+
% lb = listIterator.next();
242+
% if F(lb-1) + state_cell{lb, 1} > F(rb)
243+
% listIterator.remove();
244+
% end
245+
% end
246+
iter = active_arrlist_endidx;
247+
while active_arrlist(iter) > 1
248+
lb = active_arrlist(iter);
249+
if F(lb-1) + state_cell{lb, 1} > F(rb)
250+
active_arrlist(iter) = active_arrlist(lb);
251+
end
252+
iter = lb;
253+
end
254+
255+
if rb < N
256+
% compute estimated point and slope at end
257+
aux_ps = stored_R\stored_z;
258+
a_end = aux_ps(end-1, :);
259+
b_end = aux_ps(end, :);
260+
% compute rolling cv_score (for a future use)
261+
rcv_score = rcv_score + sum( (a_end + b_end * (xi(rb+1) - xi(rb)) - yi(rb+1,:)).^2 );
262+
end
178263
end
179-
180-
% check if setting a discontinuity between lb-1 and lb gives a
181-
% better energy
182-
candidate_value = F(lb-1) + gamma + eps_lr;
183-
if candidate_value < F(rb)
184-
F(rb ) = candidate_value;
185-
blb = lb;
264+
%%% END PELT PRUNING
265+
266+
%%% BEGIN FPVVI PRUNING
267+
otherwise
268+
for rb=3:N
269+
270+
% best left bound (blb) initialized with 1 corresponding to interval 1:rb
271+
% corresponding Bellman value has been set in the precomputation
272+
blb = 1;
273+
274+
% the loop is performed in reverse order so that we may use FPVVI-pruning
275+
for lb = rb-1:-1:2
276+
277+
if lb == rb-1
278+
complexity_counter = complexity_counter + 2;
279+
% get start configuration and store start state in R, z
280+
[eps_lr, R, z] = startEpsLR(yi([rb,rb-1],:), d(rb-1), alpha([rb,rb-1]), beta);
281+
stored_R = R;
282+
stored_z = z;
283+
else
284+
complexity_counter = complexity_counter + 1;
285+
% perform fast energy update and store current state in R, z
286+
[eps_lr, R, z] = updateEpsLR(eps_lr, R, yi(lb,:), d(lb), z, alpha(lb), beta);
287+
end
288+
289+
% pruning to skip unreachable configurations
290+
% (if this condition is met the following if-condition cannot never
291+
% be fulfilled because eps_lr is monote increasing and F >= 0.)
292+
if (eps_lr + gamma) >= F(rb)
293+
break
294+
end
295+
296+
% check if setting a discontinuity between lb-1 and lb gives a
297+
% better energy
298+
candidate_value = F(lb-1) + gamma + eps_lr;
299+
if candidate_value < F(rb)
300+
F(rb ) = candidate_value;
301+
blb = lb;
302+
stored_R = R;
303+
stored_z = z;
304+
end
305+
306+
end
307+
% store the best left bound corresponding to the right bound rb
308+
partition( rb ) = blb-1;
309+
310+
% print for debugging
311+
%fprintf(['rb:' num2str(rb) ', rb -lb:' num2str(rb - lb) '\n'])
312+
313+
if rb < N
314+
% compute estimated point and slope at end
315+
aux_ps = stored_R\stored_z;
316+
a_end = aux_ps(end-1, :);
317+
b_end = aux_ps(end, :);
318+
% compute rolling cv_score
319+
rcv_score = rcv_score + sum( (a_end + b_end * (xi(rb+1) - xi(rb)) - yi(rb+1,:)).^2 );
320+
end
186321
end
187322

188-
end
189-
% store the best left bound corresponding to the right bound rb
190-
partition( rb ) = blb-1;
323+
%%% END FPVVI PRUNING
191324
end
192325
%%% END MAIN LOOP
193-
194-
195326
end
196327
%%% END CSSD CASE
197328

@@ -217,7 +348,7 @@
217348
interval_cell{end+1} = interval; %#ok<AGROW> (runtime not critical in this part of the algorithm)
218349
if length(interval) == 1 % this case should happen rarely but may happen e.g. for data of uneven length and low gamma parameter
219350
ymtx = zeros(4,D);
220-
ymtx(:,D) = yi(interval, :);
351+
ymtx(end,:) = yi(interval, :);
221352
pp = ppmak([lower_discont, upper_discont], ymtx', D);
222353
else
223354
pp = csaps(xi(interval),yi(interval, :)', p, [], wi(interval));
@@ -253,17 +384,23 @@
253384
output.discont_idx(i) = output.interval_cell{i}(end);
254385
end
255386

387+
output.rcv_score = rcv_score/(N-2); % prediction is from point 3 to point N
388+
256389
if isempty(xx)
257390
output.yy = [];
258391
else
259392
output.yy = ppval(pp, xx);
260393
end
394+
output.xx = xx;
395+
output.x = xi;
396+
output.y = yi;
261397

262398
fun_cell = cell(numel(pp_cell),1);
263399
for i=1:numel(pp_cell)
264400
fun_cell{i} = @(xx) ppval(pp_cell{i}, xx);
265401
end
266402
output.pcw_fun = PcwFunReal([-Inf; discont(:); Inf], fun_cell);
403+
output.complexity_counter = complexity_counter;
267404
%%% END SET OUTPUT
268405

269406
end

0 commit comments

Comments
 (0)