diff --git a/bounds.py b/bounds.py index af23899..ca80ff8 100644 --- a/bounds.py +++ b/bounds.py @@ -1,4 +1,13 @@ +''' + +Defines the parameter value bounds. Only used in 'parsimonious' perturbation. +See optimize.py for the 'naive' bounds. + +TODO: move 'naive' bounds to this file or another file + +''' + from __future__ import division import numpy as np diff --git a/constants.py b/constants.py index 8ba7678..37e74d5 100644 --- a/constants.py +++ b/constants.py @@ -1,4 +1,10 @@ +''' + +Various constants used in the optimization or elsewhere. + +''' + import numpy as np RT = 0.6 # kcal/mol - dependent on temperature diff --git a/data.py b/data.py index 2421bce..2618854 100644 --- a/data.py +++ b/data.py @@ -1,4 +1,11 @@ +''' + +Defines the datatypes that will be loaded, and loads them into a 'knowledge +base' object (kb). + +''' + from __future__ import division import os diff --git a/determinacy.py b/determinacy.py deleted file mode 100644 index 7328d9b..0000000 --- a/determinacy.py +++ /dev/null @@ -1,88 +0,0 @@ - -from __future__ import division - -import numpy as np - -import problems - -import fitting - -from utils.linalg import nullspace_projector - -import structure - -# problem = ( -# # 'all_scaled' -# 'all_scaled_upper_sat_limits_1e0' # should be the same determinacy for any weighting -# ) - -def get_determinacy_vectors(problem): - fitting_rules_and_weights = problems.DEFINITIONS[problem] - - fitting_tensors = ( - fitting_matrix, - fitting_values, - fitting_entries - ) = fitting.build_fitting_tensors(*fitting_rules_and_weights) - - upper_fitting_tensors = ( - upper_fitting_matrix, - upper_fitting_values, - upper_fitting_entries - ) = fitting.build_upper_fitting_tensors(*fitting_rules_and_weights) - - relative_fitting_tensor_sets = fitting.build_relative_fitting_tensor_sets( - *fitting_rules_and_weights - ) - - n_original_variables = fitting_matrix.shape[1] - n_accessory_variables = len(relative_fitting_tensor_sets) - - def ones_column_matrix(n, m, i): - out = np.zeros((n, m)) - out[:, i] = 1 - return out - - full_fitting_matrix = np.vstack([ - np.hstack([fitting_matrix, np.zeros((fitting_matrix.shape[0], n_accessory_variables))]), - np.hstack([upper_fitting_matrix, np.zeros((upper_fitting_matrix.shape[0], n_accessory_variables))]), - ] + [ - np.hstack([rfts[0], -ones_column_matrix(rfts[0].shape[0], n_accessory_variables, i)]) - for i, rfts in enumerate(relative_fitting_tensor_sets) - ]) - - undetermined = np.all(full_fitting_matrix == 0, 0)[:n_original_variables] - - nullspace = np.round(nullspace_projector(full_fitting_matrix), 10) # 64bit floating point precision is about 15 decimal places; I go with 10 to be safe - - fully_determined = np.all(nullspace == 0, 0)[:n_original_variables] - - partially_determined = ~undetermined & ~fully_determined - - return undetermined, partially_determined, fully_determined - -# print 'Undetermined parameters ({})'.format(undetermined.sum()) -# print '-'*79 -# print '\n'.join(np.array(structure.parameters)[undetermined]) - -# print - -# print 'Partially determined parameters ({})'.format(partially_determined.sum()) -# print '-'*79 -# print '\n'.join(np.array(structure.parameters)[partially_determined]) - -# print - -# print 'Fully determined parameters ({})'.format(fully_determined.sum()) -# print '-'*79 -# print '\n'.join(np.array(structure.parameters)[fully_determined]) - -if __name__ == '__main__': - u1, p1, f1 = get_determinacy_vectors('all_scaled') - - u2, p2, f2 = get_determinacy_vectors('all_scaled_upper_sat_limits_1e0') - - print (u1 & u2).sum(), (p1 & u2).sum(), (f1 & u2).sum(), u2.sum() - print (u1 & p2).sum(), (p1 & p2).sum(), (f1 & p2).sum(), p2.sum() - print (u1 & f2).sum(), (p1 & f2).sum(), (f1 & f2).sum(), f2.sum() - print u1.sum(), p1.sum(), f1.sum(), structure.n_parameters diff --git a/distribution_shift.py b/distribution_shift.py deleted file mode 100644 index e80a31a..0000000 --- a/distribution_shift.py +++ /dev/null @@ -1,66 +0,0 @@ - -from __future__ import division - -import os - -from itertools import izip - -import numpy as np - -import figure5 -from ks_test import ks_test - -(residuals_standard, indexing_standard) = figure5.get_residuals_and_indexing( - os.path.join('out', 'all_scaled') - ) - -(residuals_penalized, indexing_penalized) = figure5.get_residuals_and_indexing( - os.path.join('out', 'all_scaled_upper_sat_limits_1e2') - ) - -assert np.all(indexing_standard == indexing_penalized) - -indexing = indexing_standard.copy() - -unique_values, unique_indices = np.unique(indexing, True) - -# p_values = [] - -# for i in unique_indices: -# rs = residuals_standard[i] -# rp = residuals_penalized[i] -# p_values.append(ks_test(rs, rp)[1]) - -# sorting = np.argsort(p_values) - -median_standard = np.median(residuals_standard[unique_indices, :], 1) -median_penalized = np.median(residuals_penalized[unique_indices, :], 1) - -abs_diff = np.abs(median_penalized - median_standard) - -sorting = np.argsort(abs_diff) - -import utils.residuals - -print 'Median differences exceeding 10x' -print '-'*40 - -for s in sorting: - - if abs_diff[s] < utils.residuals.TENFOLD: - continue - - (d, r, c) = unique_values[s] - - out = [] - - if d != -1: - out.append(figure5.DATATYPES_ORDERED[d]) - - if r != -1: - out.append(figure5.REACTIONS_ORDERED[r]) - - if c != -1: - out.append(figure5.COMPOUNDS_ORDERED[c]) - - print ':'.join(out) diff --git a/equations.py b/equations.py index 459f65b..81feafd 100644 --- a/equations.py +++ b/equations.py @@ -1,186 +1,198 @@ -from __future__ import division +''' -def build_equations_uncompiled(): - import numpy as np +Defines the equations for the modeled dynamical system. Most of the arguments +to these functions are matrices, generated by structure.py. - def reaction_rates( - gibbs_energies, +Call pattern: - mu, +(v, dc_dt, dglc_dt) = equations.compute_all(parameter_values, *equations.args) - k_star, - RT, +''' - forward_reaction_potential, - reverse_reaction_potential, - forward_binding_potential, - reverse_binding_potential, +from __future__ import division - reaction_forward_binding_association, - reaction_reverse_binding_association, +import numpy as np - stoich, +def reaction_rates( + gibbs_energies, - glc_association, - ): - frp = forward_reaction_potential.dot(gibbs_energies) - rrp = reverse_reaction_potential.dot(gibbs_energies) + mu, - fbp = forward_binding_potential.dot(gibbs_energies) - rbp = reverse_binding_potential.dot(gibbs_energies) + k_star, + RT, - denom = ( - 1 - + reaction_forward_binding_association.dot(np.exp(-fbp/RT)) - + reaction_reverse_binding_association.dot(np.exp(-rbp/RT)) - ) + forward_reaction_potential, + reverse_reaction_potential, - return k_star * (np.exp(-frp/RT) - np.exp(-rrp/RT)) / denom + forward_binding_potential, + reverse_binding_potential, - def dc_dt( - gibbs_energies, + reaction_forward_binding_association, + reaction_reverse_binding_association, - mu, + stoich, - k_star, - RT, + glc_association, + ): + ''' + Computes net reaction rates. + ''' + + frp = forward_reaction_potential.dot(gibbs_energies) + rrp = reverse_reaction_potential.dot(gibbs_energies) + + fbp = forward_binding_potential.dot(gibbs_energies) + rbp = reverse_binding_potential.dot(gibbs_energies) + + denom = ( + 1 + + reaction_forward_binding_association.dot(np.exp(-fbp/RT)) + + reaction_reverse_binding_association.dot(np.exp(-rbp/RT)) + ) - forward_reaction_potential, - reverse_reaction_potential, + return k_star * (np.exp(-frp/RT) - np.exp(-rrp/RT)) / denom - forward_binding_potential, - reverse_binding_potential, +def dc_dt( + gibbs_energies, - reaction_forward_binding_association, - reaction_reverse_binding_association, + mu, - stoich, + k_star, + RT, - glc_association, - ): - frp = forward_reaction_potential.dot(gibbs_energies) - rrp = reverse_reaction_potential.dot(gibbs_energies) + forward_reaction_potential, + reverse_reaction_potential, - fbp = forward_binding_potential.dot(gibbs_energies) - rbp = reverse_binding_potential.dot(gibbs_energies) + forward_binding_potential, + reverse_binding_potential, - denom = ( - 1 - + reaction_forward_binding_association.dot(np.exp(-fbp/RT)) - + reaction_reverse_binding_association.dot(np.exp(-rbp/RT)) - ) + reaction_forward_binding_association, + reaction_reverse_binding_association, - v = k_star * (np.exp(-frp/RT) - np.exp(-rrp/RT)) / denom + stoich, - glc = glc_association.dot(gibbs_energies) + glc_association, + ): + ''' + Computes dc/dt, where 'c' is the metabolite concentrations. + ''' - c = np.exp(glc/RT) + frp = forward_reaction_potential.dot(gibbs_energies) + rrp = reverse_reaction_potential.dot(gibbs_energies) - return stoich.dot(v) - mu * c + fbp = forward_binding_potential.dot(gibbs_energies) + rbp = reverse_binding_potential.dot(gibbs_energies) - def dglc_dt( - gibbs_energies, + denom = ( + 1 + + reaction_forward_binding_association.dot(np.exp(-fbp/RT)) + + reaction_reverse_binding_association.dot(np.exp(-rbp/RT)) + ) - mu, + v = k_star * (np.exp(-frp/RT) - np.exp(-rrp/RT)) / denom - k_star, - RT, + glc = glc_association.dot(gibbs_energies) - forward_reaction_potential, - reverse_reaction_potential, + c = np.exp(glc/RT) - forward_binding_potential, - reverse_binding_potential, + return stoich.dot(v) - mu * c - reaction_forward_binding_association, - reaction_reverse_binding_association, +def dglc_dt( + gibbs_energies, - stoich, + mu, - glc_association, - ): - frp = forward_reaction_potential.dot(gibbs_energies) - rrp = reverse_reaction_potential.dot(gibbs_energies) + k_star, + RT, - fbp = forward_binding_potential.dot(gibbs_energies) - rbp = reverse_binding_potential.dot(gibbs_energies) + forward_reaction_potential, + reverse_reaction_potential, - denom = ( - 1 - + reaction_forward_binding_association.dot(np.exp(-fbp/RT)) - + reaction_reverse_binding_association.dot(np.exp(-rbp/RT)) - ) + forward_binding_potential, + reverse_binding_potential, - v = k_star * (np.exp(-frp/RT) - np.exp(-rrp/RT)) / denom + reaction_forward_binding_association, + reaction_reverse_binding_association, - glc = glc_association.dot(gibbs_energies) + stoich, - return RT * np.exp(-glc/RT) * stoich.dot(v) - RT * mu + glc_association, + ): + ''' + Computes dglc/dt, where 'glc' is the Gibbs energy from the logarithm on the + metabolite concentrations. + ''' - def compute_all( - gibbs_energies, + frp = forward_reaction_potential.dot(gibbs_energies) + rrp = reverse_reaction_potential.dot(gibbs_energies) - mu, + fbp = forward_binding_potential.dot(gibbs_energies) + rbp = reverse_binding_potential.dot(gibbs_energies) - k_star, - RT, + denom = ( + 1 + + reaction_forward_binding_association.dot(np.exp(-fbp/RT)) + + reaction_reverse_binding_association.dot(np.exp(-rbp/RT)) + ) - forward_reaction_potential, - reverse_reaction_potential, + v = k_star * (np.exp(-frp/RT) - np.exp(-rrp/RT)) / denom - forward_binding_potential, - reverse_binding_potential, + glc = glc_association.dot(gibbs_energies) - reaction_forward_binding_association, - reaction_reverse_binding_association, + return RT * np.exp(-glc/RT) * stoich.dot(v) - RT * mu - stoich, +def compute_all( + gibbs_energies, - glc_association, - ): - frp = forward_reaction_potential.dot(gibbs_energies) - rrp = reverse_reaction_potential.dot(gibbs_energies) + mu, - fbp = forward_binding_potential.dot(gibbs_energies) - rbp = reverse_binding_potential.dot(gibbs_energies) + k_star, + RT, - denom = ( - 1 - + reaction_forward_binding_association.dot(np.exp(-fbp/RT)) - + reaction_reverse_binding_association.dot(np.exp(-rbp/RT)) - ) + forward_reaction_potential, + reverse_reaction_potential, - v = k_star * (np.exp(-frp/RT) - np.exp(-rrp/RT)) / denom + forward_binding_potential, + reverse_binding_potential, - glc = glc_association.dot(gibbs_energies) + reaction_forward_binding_association, + reaction_reverse_binding_association, - c = np.exp(glc/RT) + stoich, - dc = stoich.dot(v) - mu * c + glc_association, + ): + ''' + Computes all the terms needed for the optimization. This is done in one + function rather than calling the individual functions because it can reuse + several intermediate calculations. + ''' - dglc = RT * np.exp(-glc/RT) * stoich.dot(v) - RT * mu + frp = forward_reaction_potential.dot(gibbs_energies) + rrp = reverse_reaction_potential.dot(gibbs_energies) - return (v, dc, dglc) + fbp = forward_binding_potential.dot(gibbs_energies) + rbp = reverse_binding_potential.dot(gibbs_energies) - return ( - reaction_rates, - dc_dt, - dglc_dt, - compute_all + denom = ( + 1 + + reaction_forward_binding_association.dot(np.exp(-fbp/RT)) + + reaction_reverse_binding_association.dot(np.exp(-rbp/RT)) ) -# jac_dglc_dt = build_jacobian() + v = k_star * (np.exp(-frp/RT) - np.exp(-rrp/RT)) / denom + + glc = glc_association.dot(gibbs_energies) -( - reaction_rates, - dc_dt, - dglc_dt, - compute_all - ) = build_equations_uncompiled() + c = np.exp(glc/RT) -import numpy as np + dc = stoich.dot(v) - mu * c + + dglc = RT * np.exp(-glc/RT) * stoich.dot(v) - RT * mu + + return (v, dc, dglc) def forward_reaction_rates( gibbs_energies, @@ -203,6 +215,10 @@ def forward_reaction_rates( glc_association, ): + ''' + Computes the forward rates of reaction. + ''' + frp = forward_reaction_potential.dot(gibbs_energies) # rrp = reverse_reaction_potential.dot(gibbs_energies) @@ -238,6 +254,11 @@ def reverse_reaction_rates( glc_association, ): + + ''' + Computes the reverse rates of reaction. + ''' + # frp = forward_reaction_potential.dot(gibbs_energies) rrp = reverse_reaction_potential.dot(gibbs_energies) @@ -334,6 +355,11 @@ def build_jacobian(): import constants import structure +''' +args is the set of arguments passed to the equations above that aren't +parameter values. +''' + args = ( constants.MU, constants.K_STAR, diff --git a/figure_fitness_vs_stability.py b/figure_fitness_vs_stability.py deleted file mode 100644 index 30290f0..0000000 --- a/figure_fitness_vs_stability.py +++ /dev/null @@ -1,136 +0,0 @@ - -from __future__ import division - -import os - -from itertools import izip - -import numpy as np -import matplotlib.pyplot as plt - -import constants -import problems -import fitting - -SOURCES = ( - os.path.join('out', 'all_scaled'), - os.path.join('out', 'all_scaled_upper_sat_limits_1e-1'), - os.path.join('out', 'all_scaled_upper_sat_limits_1e0'), - os.path.join('out', 'all_scaled_upper_sat_limits_1e1'), - os.path.join('out', 'all_scaled_upper_sat_limits_1e2'), - ) - -NAMES = ( - 'original', - 'penalty 1e-1', - 'penalty 1e0', - 'penalty 1e1', - 'penalty 1e2', - ) - -COLORS = [ - np.array((225, 6, 133), np.float64)/255., - np.array((83, 49, 0), np.float64)/255., - np.array((143, 85, 0), np.float64)/255., - np.array((221, 145, 23), np.float64)/255., - np.array((251, 177, 37), np.float64)/255., - # np.array((0, 0, 255), np.float64)/255., - ] - -REFERENCE_RULES = problems.DEFINITIONS['all_scaled'] - -def get_absolute_fit_residuals(pars): - (fm, fv, fe) = fitting.build_fitting_tensors(*REFERENCE_RULES) - - residuals = fm.dot(pars) - fv[:, None] - - return residuals - -def get_relative_fit_residuals(pars): - all_residuals = [] - - for (fm, fv, fe) in fitting.build_relative_fitting_tensor_sets(*REFERENCE_RULES): - raw_residuals = fm.dot(pars) - fv[:, None] - - medians = np.median(raw_residuals, 0) - - residuals = raw_residuals - medians[None, :] - - all_residuals.append(residuals) - - return np.row_stack(all_residuals) - -def load_pars(directory): - valid = np.load(os.path.join(directory, 'valid.npy')) - pars = np.load(os.path.join(directory, 'pars.npy')) - - return pars[:, valid] - -# def load_fit(directory): -# valid = np.load(os.path.join(directory, 'valid.npy')) -# obj = np.load(os.path.join(directory, 'obj.npy')) - -# return obj[3, valid] - -def load_lre(directory): - valid = np.load(os.path.join(directory, 'valid.npy')) - lre = np.load(os.path.join(directory, 'lre.npy')) - - return lre[valid] - -def compute_fitness(pars): - return ( - np.abs(get_absolute_fit_residuals(pars)).sum(0) - + np.abs(get_relative_fit_residuals(pars)).sum(0) - ) - -plt.figure(figsize = (6, 6)) - -N_COLS = int(np.ceil(np.sqrt(len(SOURCES)))) -N_ROWS = int(np.ceil(len(SOURCES) / N_COLS)) - -for (i, (name, source, color)) in enumerate(izip(NAMES, SOURCES, COLORS)): - - if i == 0: - ax1 = plt.subplot(N_ROWS, N_COLS, i+1) - - else: - plt.subplot(N_ROWS, N_COLS, i+1, sharex = ax1, sharey = ax1) - - crt = -constants.MU/load_lre(source) - pars = load_pars(source) - - fit = compute_fitness(pars) - - plt.plot( - crt, fit, - 'o', - ms = 3, markeredgewidth = 0.5, - markerfacecolor = color, markeredgecolor = 'w', - label = name - ) - - plt.plot( - np.median(crt), np.median(fit), - '*', - ms = 8, markeredgewidth = 1, - markerfacecolor = 'w', markeredgecolor = 'k', - zorder = 10 - ) - - plt.title(name) - - plt.xscale('log') - # plt.yscale('log') - - # plt.legend(loc = 'best') - - # plt.xlabel(r'Characteristic recovery time, relative to $\tau = 1 / \mu$') - # plt.ylabel(r'Misfit error $f$') - - plt.xlabel('CRT') - plt.ylabel('f') - -plt.tight_layout() - -plt.savefig('crt_vs_fit.pdf') diff --git a/fitting.py b/fitting.py index 12cf42a..3d6a918 100644 --- a/fitting.py +++ b/fitting.py @@ -1,4 +1,9 @@ +''' +Builds the vectors and matrices used in the misfit cost function, utilized by +the optimization problem. +''' + from __future__ import division import numpy as np @@ -8,6 +13,10 @@ import structure def find_weight(rules_and_weights, entry): + ''' + Find the right weight for a data entry, given certain rules (filters) and + corresponding weights. + ''' for rule, weight in rules_and_weights: if rule(entry): return weight @@ -16,6 +25,9 @@ def find_weight(rules_and_weights, entry): raise Exception('Entry {} was not assigned a weight.'.format(entry)) def field_value_rule(**fields_and_values): + ''' + Convenience metafunction for generation a rule based on field values. + ''' def rule(entry): return all( hasattr(entry, field) and (getattr(entry, field) in values) @@ -25,6 +37,10 @@ def rule(entry): return rule def build_fitting_tensors(*rules_and_weights): + ''' + Builds standard fitting problem matrices, vectors, and metadata. + ''' + if len(rules_and_weights) == 0: rules_and_weights = ((lambda entry: True, 1.0),) @@ -435,8 +451,10 @@ def build_fitting_tensors(*rules_and_weights): return fitting_mat, fitting_values, fitting_entries def build_upper_fitting_tensors(*rules_and_weights): - # Similar to normal fitting, except we only penalize for values above - # some threshold. + ''' + Same as build_fitting_tensors, except these rules will only be used to + penalize for quantities above some threshold. + ''' if len(rules_and_weights) == 0: rules_and_weights = ((lambda entry: True, 1.0),) @@ -568,6 +586,12 @@ def build_upper_fitting_tensors(*rules_and_weights): return fitting_mat, fitting_values, fitting_entries def build_relative_fitting_tensor_sets(*rules_and_weights): + ''' + Same as build_fitting_tensors, except these will be used to fit data with + some degree of freedom (e.g. a set of values on an arbitrary but shared + basis). + ''' + if len(rules_and_weights) == 0: rules_and_weights = ((lambda entry: True, 1.0),) @@ -635,10 +659,3 @@ def build_relative_fitting_tensor_sets(*rules_and_weights): tensor_sets.append((gelc_mat, gelc_values, gelc_entries)) return tensor_sets - -def test(): - (fv, fm, fe) = build_fitting_tensors() - tensor_sets = build_relative_fitting_tensor_sets() - -if __name__ == '__main__': - test() diff --git a/gather_output.py b/gather_output.py index 80b5e07..b02b8fa 100644 --- a/gather_output.py +++ b/gather_output.py @@ -1,4 +1,13 @@ +''' + +Gathers input from a set of parameter estimation runs into single files. +Assumes n = 300 seeds (0-299). Call pattern + +python gather_output.py + +''' + from __future__ import division import os.path as pa diff --git a/initialization.py b/initialization.py index ba2aed7..c42b994 100644 --- a/initialization.py +++ b/initialization.py @@ -1,4 +1,12 @@ +''' + +Defines the function used to initialize the parameter estimation problem with +a set of well-fit parameter values. + +''' + + from __future__ import division import numpy as np @@ -31,8 +39,14 @@ def build_initial_parameter_values( # TODO: meaningful defaults upper_penalty_matrix, upper_penalty_values, *relative_penalty_matrices_and_values # matrix-value pairs ): + ''' + + Finds the best-fit set of parameter values given some optimization problem, + and then regularizes the unfit values (e.g. undetermined and partially + determined values) to the middle of the provided bounds. + + ''' - # TODO: documentation # TODO: thorough error checking on dimensions of inputs and outputs # Stage 1: set up and solve the linear program @@ -295,25 +309,3 @@ def constraints_jacobian_stage2(dx_aug): # TODO: error checking on stage 1, 2 output return x, f - -def test(): - import fitting - import problems - - for problem, definition in problems.DEFINITIONS.viewitems(): - fitting_tensors = fitting.build_fitting_tensors(*definition) - relative_fitting_tensor_sets = fitting.build_relative_fitting_tensor_sets(*definition) - - try: - (init_pars, fitness, residuals) = build_initial_parameter_values( - fitting_tensors, relative_fitting_tensor_sets - ) - - except Exception as e: - print 'Failed to initialize problem "{}" with exception {}'.format( - problem, - e - ) - -if __name__ == '__main__': - test() diff --git a/loader.py b/loader.py index 3fc2291..59c7b1a 100644 --- a/loader.py +++ b/loader.py @@ -5,11 +5,6 @@ default parameter assignments. It collects the explicit data as well as the metadata into a 'knowledge base' object. -# TODO: more documentation -# TODO: consider a format that distinguishes between keys and values -# TODO: investigate parser libraries -# TODO: tests - """ from __future__ import division diff --git a/main.py b/main.py index 7629a1b..a75a46f 100644 --- a/main.py +++ b/main.py @@ -1,4 +1,14 @@ +''' + +One of two general parameter estimation execution scripts (also see +main_alt.py). For details, consult README.md as well as the command-line info +given by + +python main.py -h + +''' + from __future__ import division import os diff --git a/main_alt.py b/main_alt.py index faaa44d..4c94ae3 100644 --- a/main_alt.py +++ b/main_alt.py @@ -1,4 +1,13 @@ +''' + +One of two general parameter estimation execution scripts (also see main.py). +For details, consult README.md as well as the command-line info given by + +python main_alt.py -h + +''' + from __future__ import division import os diff --git a/number_of_training_data_points.py b/number_of_training_data_points.py index 9e58cc1..9f62765 100644 --- a/number_of_training_data_points.py +++ b/number_of_training_data_points.py @@ -1,4 +1,11 @@ +''' + +Simple script that outputs the number of training data points in the basic +optimization problem. + +''' + from __future__ import division import numpy as np diff --git a/optimize.py b/optimize.py index 93df3c3..8cef059 100644 --- a/optimize.py +++ b/optimize.py @@ -1,4 +1,11 @@ +''' + +This file defines the primary step in the parameter estimation workflow, plus +several accessory functions. + +''' + from __future__ import division from itertools import izip @@ -7,7 +14,7 @@ import numpy as np -import liveout as lo +import utils.liveout as lo import structure import fitting @@ -87,11 +94,17 @@ def fast_square_singleton(x): sumsq1d = lambda x: np.sum(np.square(x)) def compute_abs_fit(pars, fitting_tensors): + ''' + Computes the degree of fit to the training data. + ''' (fitting_matrix, fitting_values) = fitting_tensors[:2] # TODO: check overhead of these unpacking operations return sumabs1d(fitting_matrix.dot(pars) - fitting_values) def compute_upper_fit(pars, upper_fitting_tensors): + ''' + Computes the degree of fit to the training data (single-sided penalty). + ''' (fitting_matrix, fitting_values) = upper_fitting_tensors[:2] if fitting_values.size: @@ -104,6 +117,9 @@ def compute_upper_fit(pars, upper_fitting_tensors): return 0.0 def compute_relative_fit(pars, relative_fitting_tensor_sets): + ''' + Computes the degree of fit to the training data (relative). + ''' cost = 0.0 for fm, fv, fe in relative_fitting_tensor_sets: @@ -117,6 +133,9 @@ def compute_relative_fit(pars, relative_fitting_tensor_sets): return cost def compute_overall_fit(pars, fitting_tensors, upper_fitting_tensors, relative_fitting_tensor_sets): + ''' + Computes the total degree of fit (misfit cost). + ''' return ( compute_abs_fit(pars, fitting_tensors) + compute_upper_fit(pars, upper_fitting_tensors) @@ -124,6 +143,9 @@ def compute_overall_fit(pars, fitting_tensors, upper_fitting_tensors, relative_f ) class ObjectiveValues(object): + ''' + Convenience class for storing objective values. + ''' def __init__(self, pars, fitting_tensors, upper_fitting_tensors, relative_fitting_tensor_sets = ()): (v, dc_dt, dglc_dt) = equations.compute_all(pars, *equations.args) @@ -151,7 +173,9 @@ def total(self, disequ_weight): return self.misfit_error() + disequ_weight * self.disequilibrium_error() def nonredundant_vectors(vectors, tolerance = 1e-15): - # Discards vectors that are equivalent to subsequent vectors under scaling. + ''' + Discards vectors that are equivalent to subsequent vectors under scaling. + ''' retained = [] @@ -169,6 +193,13 @@ def nonredundant_vectors(vectors, tolerance = 1e-15): return retained def build_perturbation_vectors(naive = False): + ''' + + Builds the perturbation vectors using the procedure described in the text. + By default, the perturbations are 'parsimonious'. + + ''' + if naive: # The standard parameter matrix corresponds to the 'usual' # parameterization of a kinetic model, in terms of kcat's, metabolite @@ -207,6 +238,12 @@ def build_perturbation_vectors(naive = False): import bounds def build_bounds(naive = False): + ''' + + Builds the problem bounds. By default, the bounds are 'parsimonious'. + + ''' + if naive: # TODO: move this to bounds.py import constants @@ -294,6 +331,10 @@ def build_bounds(naive = False): return bounds_matrix, lowerbounds, upperbounds def seconds_to_hms(t): + ''' + Convenience function for computing time in total seconds to time in hours, + minutes, and seconds. + ''' hours = t//3600 minutes = t//60 - 60*hours seconds = t - 60*minutes - 3600*hours @@ -301,6 +342,9 @@ def seconds_to_hms(t): return (hours, minutes, seconds) def empty_callback(epoch, iteration, constraint_penalty_weight, obj_components): + ''' + Example, empty callback provided to the estimate_parameters function. + ''' pass def estimate_parameters( @@ -310,6 +354,11 @@ def estimate_parameters( random_direction = False, callback = empty_callback ): + ''' + + The optimization procedure, described extensively in the manuscript. + + ''' # TODO: pass initial parameters, bounds, perturbation vectors # TODO: pass metaparameters diff --git a/problems.py b/problems.py index b400a9e..b299724 100644 --- a/problems.py +++ b/problems.py @@ -1,4 +1,12 @@ +''' + +The various parameter estimation problems are defined here, as well as +several convenience functions that can be used to build up new problems. All +problems are stored in the global constant DEFINITIONS. + +''' + from collections import OrderedDict from data import kb @@ -6,8 +14,11 @@ import fitting _DEFAULT_EXCLUDE_DATATYPE = ( + # Pseudo-fitting data; not desired by default 'upper_reactant_saturation_limit', 'upper_product_saturation_limit', + + # SEOF data proved to be very inaccurate and was ultimately excluded 'standard_energy_of_formation' ) @@ -19,21 +30,25 @@ ) def accept_all(value = 1e0): + ''' + A rule-generation function that accepts everything. + ''' return ( lambda entry: True, value ) -def exclude_entry(excluded_entry): - return ( - (lambda entry: entry.id == excluded_entry.id, 0), - accept_all() - ) - DEFINITIONS = OrderedDict() +''' +The 'data agnostic' problem weights all data evenly. +''' + DEFINITIONS['data_agnostic'] = _DEFAULT_RULES + (accept_all(),) +''' +The 'no data' problem excludes all training data. +''' DEFINITIONS['no_data'] = _DEFAULT_RULES + (accept_all(0),) _KINETICS_TYPES = ( @@ -61,6 +76,16 @@ def gather_by_field(dataset, field): from itertools import izip def normalize_by_number_of_observations(dataset, *fields): + ''' + + Creates a rule-weight pair that normalizes the weighting on the data by the + number of observations. This was found to give better fits, since data + were often consistent and errors were usually consequent of a systemic + change rather than experimental error. E.g. chemical equilibria constants + were found to not be totally consistent for the cell interior, despite + having many (sometimes 10+) observations on each constant. + + ''' return tuple( ( fitting.field_value_rule(**{ @@ -75,8 +100,9 @@ def normalize_by_number_of_observations(dataset, *fields): ).viewitems() ) -def scale_weights(rules_and_weights, scale): - return tuple( (rule, weight * scale) for rule, weight in rules_and_weights) +''' +The 'all scaled' ruleset is the default. +''' DEFINITIONS['all_scaled'] = ( _DEFAULT_RULES @@ -95,6 +121,11 @@ def scale_weights(rules_and_weights, scale): ), ) +''' +Variants on the 'all scaled' data set that include the saturation penalty, with +increasing weight. +''' + DEFINITIONS['all_scaled_upper_sat_limits_1e-1'] = ( ( fitting.field_value_rule(source = ('custom_saturation_limits',)), @@ -123,15 +154,5 @@ def scale_weights(rules_and_weights, scale): ), ) + DEFINITIONS['all_scaled'] -def test(): - print '{} problem definitions:'.format(len(DEFINITIONS)) - print '\n'.join(DEFINITIONS.viewkeys()) - - import fitting - - for name, rules_and_weights in DEFINITIONS.viewitems(): - fitting.build_fitting_tensors(*rules_and_weights) - fitting.build_relative_fitting_tensor_sets(*rules_and_weights) - if __name__ == '__main__': test() diff --git a/table1_reaction_rates.py b/tables/table1_reaction_rates.py similarity index 100% rename from table1_reaction_rates.py rename to tables/table1_reaction_rates.py diff --git a/table2_determinacy_shift.py b/tables/table2_determinacy_shift.py similarity index 100% rename from table2_determinacy_shift.py rename to tables/table2_determinacy_shift.py diff --git a/table3_summary.py b/tables/table3_summary.py similarity index 100% rename from table3_summary.py rename to tables/table3_summary.py diff --git a/ks_test.py b/utils/ks_test.py similarity index 100% rename from ks_test.py rename to utils/ks_test.py diff --git a/liveout.py b/utils/liveout.py similarity index 100% rename from liveout.py rename to utils/liveout.py diff --git a/svm.py b/utils/svm.py similarity index 100% rename from svm.py rename to utils/svm.py