-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathoptimize.py
556 lines (414 loc) · 15.9 KB
/
optimize.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
'''
This file defines the primary step in the parameter estimation workflow, plus
several accessory functions.
'''
from __future__ import division
from itertools import izip
import time
import numpy as np
import utils.liveout as lo
import structure
import fitting
import equations
import utils.linalg as la
import utils.vectorops as vo
from initialization import build_initial_parameter_values
# TODO: some way to pass these as optional arguments
DISEQU_WEIGHTS = (
np.logspace(-5, +15, 41)
)
MAX_ITERATIONS = int(1e6) # maximum number of iteration steps per epoch
PERTURBATION_SCALE = np.logspace(
+2,
-6,
MAX_ITERATIONS
)
CONVERGENCE_RATE = ( # if the objective fails to improve at this rate, assume convergence and move on to the next step
1e-4 # default
# 0 # force all iterations - very slow
)
CONVERGENCE_TIME = int(1e4) # number of iterations between checks to compare - should probably scale with problem size
FORCE_BETTER_INIT = True # use bounds for parsimonious perturbations for initialization, regardless of perturbation approach
TARGET_PYRUVATE_PRODUCTION = 0.14e-3 # the target rate at which the system produces pyruvate, in M/s
LOG_TIME = 10.0 # max time, in seconds, between logging events
TABLE_FIELDS = (
lo.Field('Epoch', 'n'),
lo.Field('Con. pen.', '.2e', 10),
lo.Field('Iteration', 'n'),
lo.Field('Total error', '.3e', 12),
lo.Field('Misfit error', '.3e', 12),
lo.Field('UnSS error', '.3e', 10),
)
# These options are just for demonstration purposes (enhanced speed vs.
# default numpy operations). Using my custom functions significantly improves
# optimization times (roughly half of the overall time utilized by the
# numpy-based composed operations). I attribute these gains to 1) reduced
# overhead and 2) benefits of BLAS (or similar) accelerated dot products.
USE_CUSTOM_FUNCTIONS = True # uses several custom operations designed and optimized for short vectors
USE_NORMS = False # if USE_CUSTOM_FUNCTIONS is False, will try to use norms instead of composed operations (norms are slightly slower)
def fast_square_singleton(x):
'''
Probably faster than alternatives. Apparent gains are minimal.
'''
return x*x
if USE_CUSTOM_FUNCTIONS:
square_singleton = fast_square_singleton
median1d = vo.fast_shortarray_median1d
# median1d = vo.fast_shortarray_median1d_partition # alternative; scales better but worse for small vectors
sumabs1d = vo.fast_shortarray_sumabs1d
sumsq1d = vo.fast_shortarray_sumsq1d
else:
square_singleton = np.square
median1d = np.median
if USE_NORMS:
sumabs1d = lambda x: np.linalg.norm(x, 1)
sumsq1d = lambda x: np.square(np.linalg.norm(x, 2))
else:
sumabs1d = lambda x: np.sum(np.abs(x))
sumsq1d = lambda x: np.sum(np.square(x))
def compute_abs_fit(pars, fitting_tensors):
'''
Computes the degree of fit to the training data.
'''
(fitting_matrix, fitting_values) = fitting_tensors[:2] # TODO: check overhead of these unpacking operations
return sumabs1d(fitting_matrix.dot(pars) - fitting_values)
def compute_upper_fit(pars, upper_fitting_tensors):
'''
Computes the degree of fit to the training data (single-sided penalty).
'''
(fitting_matrix, fitting_values) = upper_fitting_tensors[:2]
if fitting_values.size:
# TODO: consider optimizing sum-fmax operation e.g. (x > 0).dot(x)
return np.sum(np.fmax(fitting_matrix.dot(pars) - fitting_values, 0.0))
else:
# In the standard problem we don't use these penalty functions, so this
# allows us to skip several expensive (but empty) operations
return 0.0
def compute_relative_fit(pars, relative_fitting_tensor_sets):
'''
Computes the degree of fit to the training data (relative).
'''
cost = 0.0
for fm, fv, fe in relative_fitting_tensor_sets:
predicted = fm.dot(pars) # TODO: combine dot products into one operation? might speed up
d = predicted - fv
m = median1d(d)
cost += sumabs1d(d - m)
return cost
def compute_overall_fit(pars, fitting_tensors, upper_fitting_tensors, relative_fitting_tensor_sets):
'''
Computes the total degree of fit (misfit cost).
'''
return (
compute_abs_fit(pars, fitting_tensors)
+ compute_upper_fit(pars, upper_fitting_tensors)
+ compute_relative_fit(pars, relative_fitting_tensor_sets)
)
class ObjectiveValues(object):
'''
Convenience class for storing objective values.
'''
def __init__(self, pars, fitting_tensors, upper_fitting_tensors, relative_fitting_tensor_sets = ()):
(v, dc_dt, dglc_dt) = equations.compute_all(pars, *equations.args)
self.mass_eq = sumsq1d(structure.dynamic_molar_masses * dc_dt)
self.energy_eq = sumsq1d(dglc_dt)
net_pyruvate_production = v[-2] - v[-1]
self.flux = square_singleton(net_pyruvate_production / TARGET_PYRUVATE_PRODUCTION - 1.0)
self.fit = compute_overall_fit(
pars,
fitting_tensors,
upper_fitting_tensors,
relative_fitting_tensor_sets
)
def misfit_error(self):
return self.fit
def disequilibrium_error(self): # TODO: rename as unsteadystate_error
return self.mass_eq + self.energy_eq + self.flux
def total(self, disequ_weight):
return self.misfit_error() + disequ_weight * self.disequilibrium_error()
def nonredundant_vectors(vectors, tolerance = 1e-15):
'''
Discards vectors that are equivalent to subsequent vectors under scaling.
'''
retained = []
normed = [v / np.sqrt(sumsq1d(v)) for v in vectors]
# TODO: refactor as a matrix-matrix product?
for i, vi in enumerate(normed):
for vj in normed[i+1:]:
if 1 - np.abs(vi.dot(vj)) < tolerance:
break
else:
retained.append(vectors[i])
return retained
def build_perturbation_vectors(naive = False):
'''
Builds the perturbation vectors using the procedure described in the text.
By default, the perturbations are 'parsimonious'.
'''
if naive:
# The standard parameter matrix corresponds to the 'usual'
# parameterization of a kinetic model, in terms of kcat's, metabolite
# and enzyme concentrations, saturation constants (KM's), and Gibbs
# standard energies of formation.
# These perturbations are 'simple' in the sense that a perturbation to
# a given 'standard' parameter does not influence the value of any
# other 'standard' parameter.
perturbation_vectors = list(
np.linalg.pinv(structure.standard_parameter_matrix).T.copy()
)
# TODO: assert perturbations are simple
else:
perturbation_vectors = list(
la.bilevel_elementwise_pseudoinverse(
np.concatenate([
structure.activity_matrix,
np.identity(structure.n_parameters),
]),
structure.activity_matrix
).T.copy()
)
# Many of the basic parameter perturbation vectors end up being
# identical to the 'activity matrix' perturbation vectors, so I detect
# and strip those from the set.
perturbation_vectors = nonredundant_vectors(perturbation_vectors)
return np.array(perturbation_vectors)
import bounds
def build_bounds(naive = False):
'''
Builds the problem bounds. By default, the bounds are 'parsimonious'.
'''
if naive:
# TODO: move this to bounds.py
import constants
lower_conc = bounds.LOWER_CONC
upper_conc = bounds.UPPER_CONC
lower_enz_conc = 1e-10 # avg 1/10 molecules per cell
upper_enz_conc = 1e-3 # most abundant protein is about 750 copies per cell, 1 molecule per E. coli cell ~ 1 nM
# average protein abundance ~ 1 uM
lower_kcat = 1e-2 # gives average kcat of about 100 w/ upper kcat of 1e6
upper_kcat = 1e6 # catalase is around 1e5 /s
lower_KM = lower_conc
upper_KM = upper_conc
lower_glc = constants.RT * np.log(lower_conc)
upper_glc = constants.RT * np.log(upper_conc)
lower_gelc = constants.RT * np.log(lower_enz_conc)
upper_gelc = constants.RT * np.log(upper_enz_conc)
lower_log_kcat = -constants.RT * np.log(upper_kcat / constants.K_STAR)
upper_log_kcat = -constants.RT * np.log(lower_kcat / constants.K_STAR)
lower_log_KM = -constants.RT * np.log(upper_KM)
upper_log_KM = -constants.RT * np.log(lower_KM)
bounds_matrix = np.concatenate([
structure.full_glc_association_matrix,
structure.gelc_association_matrix,
structure.kcat_f_matrix,
# structure.kcat_r_matrix,
structure.KM_f_matrix,
structure.KM_r_matrix,
# structure.Keq_matrix
])
lowerbounds = np.concatenate([
[lower_glc] * structure.full_glc_association_matrix.shape[0],
[lower_gelc] * structure.gelc_association_matrix.shape[0],
[lower_log_kcat] * structure.kcat_f_matrix.shape[0],
# [lower_log_kcat] * structure.kcat_r_matrix.shape[0],
[lower_log_KM] * structure.KM_f_matrix.shape[0],
[lower_log_KM] * structure.KM_r_matrix.shape[0],
# [lower_log_Keq] * structure.Keq_matrix.shape[0],
])
upperbounds = np.concatenate([
[upper_glc] * structure.full_glc_association_matrix.shape[0],
[upper_gelc] * structure.gelc_association_matrix.shape[0],
[upper_log_kcat] * structure.kcat_f_matrix.shape[0],
# [upper_log_kcat] * structure.kcat_r_matrix.shape[0],
[upper_log_KM] * structure.KM_f_matrix.shape[0],
[upper_log_KM] * structure.KM_r_matrix.shape[0],
# [upper_log_Keq] * structure.Keq_matrix.shape[0],
])
# I'm excluding reverse kcat's because it adds some non-trivial (that
# is, not 'naive') bounding logic, which in turn allows for non-naive
# perturbations on the system.
# Including reverse kcat bounds does improve the frequency of
# convergence to some extent, but I want a clean separation between
# the 'naive' system and my augmented approach.
# I can't include Keq bounds because those are interdependent (and
# therefore not 'naive'). Removing the relationship between reverse
# and forward kcat's would fundamentally change the model in such a
# fashion that the energy balance (non-decreasing entropy) is not
# enforced.
# Unlike all other 'standard' parameters, I do not bound Gibbs standard
# energies of formation. There are no obvious or useful bounds on
# those values.
else:
bounds_matrix = bounds.BOUNDS_MATRIX
lowerbounds = bounds.LOWERBOUNDS
upperbounds = bounds.UPPERBOUNDS
return bounds_matrix, lowerbounds, upperbounds
def seconds_to_hms(t):
'''
Convenience function for computing time in total seconds to time in hours,
minutes, and seconds.
'''
hours = t//3600
minutes = t//60 - 60*hours
seconds = t - 60*minutes - 3600*hours
return (hours, minutes, seconds)
def empty_callback(epoch, iteration, constraint_penalty_weight, obj_components):
'''
Example, empty callback provided to the estimate_parameters function.
'''
pass
def estimate_parameters(
fitting_rules_and_weights = tuple(),
random_state = None,
naive = False,
random_direction = False,
callback = empty_callback
):
'''
The optimization procedure, described extensively in the manuscript.
'''
# TODO: pass initial parameters, bounds, perturbation vectors
# TODO: pass metaparameters
# TODO: more results/options/callbacks
# TODO: logging as callbacks
print 'Initializing optimization.'
time_start = time.time() # TODO: timing convenience classes
if random_state is None:
print 'No random state provided.'
random_state = np.random.RandomState()
fitting_tensors = (
fitting_matrix,
fitting_values,
fitting_entries
) = fitting.build_fitting_tensors(*fitting_rules_and_weights)
upper_fitting_tensors = (
upper_fitting_matrix,
upper_fitting_values,
upper_fitting_entries
) = fitting.build_upper_fitting_tensors(*fitting_rules_and_weights)
relative_fitting_tensor_sets = fitting.build_relative_fitting_tensor_sets(
*fitting_rules_and_weights
)
(bounds_matrix, lowerbounds, upperbounds) = build_bounds(naive)
inverse_bounds_matrix = np.linalg.pinv(bounds_matrix)
# TODO: if the bounds become more sophisticated, use the calculation for
# the bilevel elementwise pseudoinverse to acquire the inverse bounds
# reprojection vectors. right now it isequivalent to the normal
# pseudoinverse
if FORCE_BETTER_INIT:
init_lowerbounds = bounds.LOWERBOUNDS
init_upperbounds = bounds.UPPERBOUNDS
init_bounds_matrix = bounds.BOUNDS_MATRIX
else:
init_lowerbounds = lowerbounds
init_upperbounds = upperbounds
init_bounds_matrix = bounds_matrix
(init_pars, init_fitness) = build_initial_parameter_values(
init_bounds_matrix, (init_lowerbounds + init_upperbounds)/2.0,
np.concatenate([-init_bounds_matrix, +init_bounds_matrix]), np.concatenate([-init_lowerbounds, +init_upperbounds]),
fitting_matrix, fitting_values,
upper_fitting_matrix, upper_fitting_values,
*[(fm, fv) for (fm, fv, fe) in relative_fitting_tensor_sets]
)
# init_pars = np.load('optimized_pars.npy')
print 'Initial (minimal) fitness: {:0.2f}'.format(init_fitness)
init_obj_components = ObjectiveValues(
init_pars,
fitting_tensors,
upper_fitting_tensors,
relative_fitting_tensor_sets
)
table = lo.Table(TABLE_FIELDS)
last_log_time = time.time()
best_pars = init_pars.copy()
best_obj_components = init_obj_components
perturbation_vectors = build_perturbation_vectors(naive)
n_perturb = len(perturbation_vectors)
ibm_v = inverse_bounds_matrix.T.copy() # Copy needed to enforce memory-contiguity
for (epoch, disequ_weight) in enumerate(DISEQU_WEIGHTS):
history_best_objective = []
# Need to re-evaluate the objective at the start of every epoch since the weight changes
best_obj = best_obj_components.total(disequ_weight)
for (iteration, deviation) in enumerate(PERTURBATION_SCALE):
if not random_direction:
dimension = random_state.randint(n_perturb)
direction = perturbation_vectors[dimension]
else:
# TODO: optimize this optional approach
coeffs = random_state.normal(size = n_perturb)
coeffs /= np.sqrt(sumsq1d(coeffs))
direction = perturbation_vectors.T.dot(coeffs) # dot product on transposed matrix is probably slow
scale = deviation * random_state.normal()
perturbation = scale * direction
new_pars = best_pars + perturbation
# There are various possible execution strategies for bounding.
# This setup gives the best performance, probably because
# 'unbounded' is typically False everywhere.
new_acts = bounds_matrix.dot(new_pars)
unbounded = (lowerbounds > new_acts) | (upperbounds < new_acts)
for i in np.where(unbounded)[0]:
bounded = random_state.uniform(lowerbounds[i], upperbounds[i])
new_pars += (bounded - new_acts[i]) * ibm_v[i]
new_obj_components = ObjectiveValues(
new_pars,
fitting_tensors,
upper_fitting_tensors,
relative_fitting_tensor_sets
)
new_obj = new_obj_components.total(disequ_weight)
did_accept = (new_obj < best_obj)
history_best_objective.append(best_obj)
if did_accept:
best_obj_components = new_obj_components
best_obj = new_obj
best_pars = new_pars
log = False
quit = False
if (iteration == 0) or (iteration == MAX_ITERATIONS-1):
log = True
if time.time() - last_log_time > LOG_TIME:
log = True
if (iteration >= CONVERGENCE_TIME) and (
(
1.0 - best_obj / history_best_objective[-CONVERGENCE_TIME]
) < CONVERGENCE_RATE
) and (
epoch < len(DISEQU_WEIGHTS)-1
):
# TODO: instead of ending the epoch, jump ahead to a time where
# perturbations are smaller
log = True
quit = True
if log:
table.write(
epoch,
disequ_weight,
iteration,
best_obj,
best_obj_components.misfit_error(),
best_obj_components.disequilibrium_error()
)
last_log_time = time.time()
if log or did_accept:
callback(
epoch, iteration, disequ_weight,
new_obj_components
)
if quit:
break
time_run = time.time() - time_start
print 'Total optimization time: {:02n}:{:02n}:{:05.2f} (H:M:S)'.format(*seconds_to_hms(time_run))
# TODO: consider polishing via gradient descent or just be minimizing
# misfit using an L1 norm subject to constraints on intermediate values,
# either at the end of the whole optimization or the end of each epoch.
return (best_pars, best_obj_components)
if __name__ == '__main__':
import problems
definition = problems.DEFINITIONS['all_scaled']
(pars, obj) = estimate_parameters(
definition,
random_state = np.random.RandomState(0),
# naive = True,
# random_direction = True
)
np.save('optimized_pars.npy', pars)