From 13eb27d04b5e9278f3c60efbcd0bedead2aadcc3 Mon Sep 17 00:00:00 2001 From: John Mason Date: Mon, 11 Jun 2018 16:07:09 -0700 Subject: [PATCH] more clean up --- TODO | 56 ----------- TODO 2018-2-26.txt | 123 ----------------------- figures/figure4_fit.py | 24 +---- figures/figure4_fit_alt.py | 6 +- figures/figure5_dynamics.py | 2 +- figures/figure6_predictions.py | 5 +- figures/figure6_predictions_alt.py | 4 +- figures/figure6_shifted_distributions.py | 17 ++-- 8 files changed, 20 insertions(+), 217 deletions(-) delete mode 100644 TODO delete mode 100644 TODO 2018-2-26.txt diff --git a/TODO b/TODO deleted file mode 100644 index c17442a..0000000 --- a/TODO +++ /dev/null @@ -1,56 +0,0 @@ -priority --------- - -collect/annotate all data sources - -verify equations on figure 1 - -recolor symbols on figure 1 - -plot data for figure 2 - -separate and organize figure 3 plots - -finalize data for figure 4 - -choose timescale, organization for plotting histograms + PRDs for figure 4 - -better main.py interface - -structure with options for optimize - -choose which files to include/exclude from repo - -unify pipeline - -unify ODE evaluation - -ongoing -------- - -repository clean-up (delete unused code) - -comments - -deferred --------- - -faster stopping conditions with staggered perturbation size decrease, instead of exp dropoff -- will need extensive testing - -review included kinetics, esp. for isozymes -- worth another pass but this is mostly done - -test for hitting bounds in initialization -- easy, just run twice - -interactive jupyter notebook for selecting weights -- later... - -random init - -profiling - -selective job launching (check for output prior to scheduling) - -tests diff --git a/TODO 2018-2-26.txt b/TODO 2018-2-26.txt deleted file mode 100644 index d9d573f..0000000 --- a/TODO 2018-2-26.txt +++ /dev/null @@ -1,123 +0,0 @@ -Several things that should probably be tested... ---- - -Sensitivity analysis/optimization on all metaparameters -- including bounds - -Test cross-initialization (naive bounds w/ parsimonious perturbations, vice-versa) -- want to establish that this isn't a case of better initialization - -Test alternative bounding logic -- truncating -- bouncing -- wrapping - -Show that parsimonious can rescue naive but not vice-versa - -SVD on resulting parameters? - -Assert that the problem is not solvable by convex optimizers - -Assert that the problem is not solvable by diff evo - -Test -- All combinbations of - - naive vs. parsimonious bounds - - naive vs. parsimonious perturbations - - ordinal vs. random-direction perturbations -- naive-naive-ordinal and parsi-parsi-ordinal in paper - - not certain but pretty confident that the ranking is - - naive-naive-ordinal - - naive-naive-random - - naive-parsi-*/parsi-naive-* - - parsi-parsi-ordinal - - unsure about parsi-parsi-random - -Simulated annealing -- I *really* don't want to do this; adds new metaparameters -- will probably improve naive perturbations but that's beside the point - -Other tasks ---- - -Primary and secondary estimation interface -- primary: parameter-heavy, logic-light -- secondary: parameter-light, logic-heavy, calls primary - -Move naive bounds constants to a file - -Various repo cleanup tasks -- moved/removed a lot of files -- still needs restructuring - -File documentation - -Function documentation - -Readme - -Tests - -Review modules under 'utils' and consider removing - -Done ----- - -Remove redundant objective calculations -- Shouldn't be slowing things down much (only when logging) but still not good - -Clean up constants -- Lots of constants that are set to unity - -Possible major issue: naive opt has really poor init. -- Appears to be a regularization problem, as the data is well-fit. -- Just checked - this was NOT true in the past. I don't know of a solution outside using the parsimonious initial values for the naive opt, or similar. -- Only change I can tell was made was to increase the number of iterations on the linprog solver. However, reverting this did not fix the problem. -- pyenv does not seem to matter. -- data_agnostic (vs all_scaled) also has this issue -- the parameters themselves seem fine but some of the activities are extreme -- biggest problem seems to be the enolase reverse reaction -- I can't seem to associate this with any particular diff. I *did* make some changes to metaparameters but *not* to bounds or initialization. -- Theories: - - Past results were never correct - False, checked history and validated that the diseq error was low - - Changes to solver arguments - Probably false, can't find anything in recent git history - - NumPy/SciPy version issues - Probably false, there aren't any differences b/t pyenv parest and parest2 - - Differences in batch vs. solo execution - Nothing suspicious here - - Changes missing from git - Can't prove. There *are* differences between my backup and working version; - both are still wrong but wrong in different ways. - - Optimization metaparameter changes - False, metaparameters don't come into play until after init - - Bounds changes - This is the most likely cause IMO. - - Previously used parsi bounds for init - False, gives same result as parsimonious. - - Random direction perturbation changes - Tested, no difference - - Training data changed - Probably false, init fit appears to be the same - - Sign changes on some bounded values - Metabolite, enzyme concentrations appear fine - Kcat and KM also appear to be fine - Probably false, would cause bigger init. issues -- Notes - - current naive init f = 7.57, g = 8.01e19 - - current parsi init f = 7.57, g = 111.4 - - old naive init (*iteration 6) f = 7.57, g = 2.55 - - old parsi init (*iteration 1206) f = 7.61, g = 96.0 - Unfortunately it looks like I didn't save the initial value in the history, only subsequent changes. Irritating. - - backup naive init f = 7.57, g = 1.14e12 - - backup parsi init f = 7.57, g = 124.0 - Backup is similar yet clearly distinct. - - pgi and gap are specifically misbehaving (very negative v's) - these are two of the three (including pps) reactions that lack equi data - - removing kcatf bounds makes g worse (2.76e23) - - adding kcatr bounds makes g reasonable (3.91) - - backup vs current: init_matrix row order has changed - - there are other difference b/t backup and current, all appear to be subsequent of ordering differences -- Unless I have some profound insight, I've decided to force the naive optimizations to use the parsimonious optimizations' initial values. This seems to be the most equitable choice. -- I *should* reevaluate the naive optimizations but I really don't want to diff --git a/figures/figure4_fit.py b/figures/figure4_fit.py index 57f82a8..47e9d3b 100644 --- a/figures/figure4_fit.py +++ b/figures/figure4_fit.py @@ -157,25 +157,6 @@ def get_residuals_and_indexing(directory): return residuals, indexing def main(input_directory, output_directory): - # valid = np.load(os.path.join(input_directory, 'valid.npy')) - - # pars = np.load(os.path.join(input_directory, 'pars.npy'))[ - # :, valid - # ] - - # abs_res, abs_ind = get_absolute_fit_residuals(pars) - # rel_res, rel_ind = get_relative_fit_residuals(pars) - - # residuals = np.row_stack([abs_res, rel_res]) - # indexing = np.concatenate([abs_ind, rel_ind]) - - # sorting = np.argsort(indexing) - - # residuals = residuals[sorting, :] - # indexing = indexing[sorting] - - # datatypes = indexing['datatype'] - (residuals, indexing) = get_residuals_and_indexing(input_directory) datatypes = indexing['datatype'] @@ -184,6 +165,9 @@ def main(input_directory, output_directory): # make_clean_directory(output_directory) + if not os.path.exists(output_directory): + os.makedirs(output_directory) + n_unique = 0 n_within_2x_median = 0 n_within_10x_median = 0 @@ -239,5 +223,5 @@ def main(input_directory, output_directory): if __name__ == '__main__': main( os.path.join('out', 'all_scaled'), - 'figure5' + 'figure4' ) diff --git a/figures/figure4_fit_alt.py b/figures/figure4_fit_alt.py index b21f508..713c832 100644 --- a/figures/figure4_fit_alt.py +++ b/figures/figure4_fit_alt.py @@ -1,16 +1,16 @@ import os -import figure5 +import figures.figure4_fit inputs_and_outputs = tuple( ( os.path.join('out', 'all_scaled_upper_sat_limits_{}'.format(penalty)), - os.path.join('figure5', 'saturation penalized', 'penalty_{}'.format(penalty)) + os.path.join('figure4', 'saturation penalized', 'penalty_{}'.format(penalty)) ) for penalty in ('1e-1', '1e0', '1e1', '1e2') ) for input_and_output in inputs_and_outputs: - figure5.main(*input_and_output) + figures.figure4_fit.main(*input_and_output) print '' diff --git a/figures/figure5_dynamics.py b/figures/figure5_dynamics.py index f6c9923..d396ef9 100644 --- a/figures/figure5_dynamics.py +++ b/figures/figure5_dynamics.py @@ -294,4 +294,4 @@ def init_dg_dt(pars): plt.subplot(len(sources), 2, i*2+2) plot_prd(pars, i) -plt.savefig('figure4.pdf') +plt.savefig('figure5.pdf') diff --git a/figures/figure6_predictions.py b/figures/figure6_predictions.py index d688517..1bd7f15 100644 --- a/figures/figure6_predictions.py +++ b/figures/figure6_predictions.py @@ -10,8 +10,6 @@ import structure import constants -from figure5 import make_clean_directory - def main(input_directory, output_directory): pars = np.load(os.path.join(input_directory, 'pars.npy'))[ :, np.load(os.path.join(input_directory, 'valid.npy')) @@ -83,7 +81,8 @@ def main(input_directory, output_directory): fig = utils.residuals.plot(residuals, indexing) - # make_clean_directory(output_directory) + if not os.path.exists(output_directory): + os.makedirs(output_directory) fig.savefig(os.path.join(output_directory, 'specific_activity.pdf'), dpi = DPI) diff --git a/figures/figure6_predictions_alt.py b/figures/figure6_predictions_alt.py index f059e13..1dd7ca3 100644 --- a/figures/figure6_predictions_alt.py +++ b/figures/figure6_predictions_alt.py @@ -1,7 +1,7 @@ import os -import figure6 +import figures.figure6_predictions inputs_and_outputs = tuple( ( @@ -12,5 +12,5 @@ ) for input_and_output in inputs_and_outputs: - figure6.main(*input_and_output) + figures.figure6_predictions.main(*input_and_output) diff --git a/figures/figure6_shifted_distributions.py b/figures/figure6_shifted_distributions.py index 11f95da..738bc20 100644 --- a/figures/figure6_shifted_distributions.py +++ b/figures/figure6_shifted_distributions.py @@ -8,8 +8,7 @@ import numpy as np import matplotlib.pyplot as plt -import figure5 -from ks_test import ks_test +import figures.figure4_fit import utils.residuals @@ -18,15 +17,15 @@ if not os.path.exists(OUTPUT_DIRECTORY): os.mkdir(OUTPUT_DIRECTORY) -(residuals_standard, indexing_standard) = figure5.get_residuals_and_indexing( +(residuals_standard, indexing_standard) = figures.figure4_fit.get_residuals_and_indexing( os.path.join('out', 'all_scaled') ) -(residuals_small, indexing_small) = figure5.get_residuals_and_indexing( +(residuals_small, indexing_small) = figures.figure4_fit.get_residuals_and_indexing( os.path.join('out', 'all_scaled_upper_sat_limits_1e-1') ) -(residuals_large, indexing_large) = figure5.get_residuals_and_indexing( +(residuals_large, indexing_large) = figures.figure4_fit.get_residuals_and_indexing( os.path.join('out', 'all_scaled_upper_sat_limits_1e2') ) @@ -61,14 +60,14 @@ fig = utils.residuals.plot(residuals[plotted], indexing[plotted]) - fig.savefig(os.path.join(OUTPUT_DIRECTORY, '{}.pdf'.format(name)), dpi = figure5.DPI) + fig.savefig(os.path.join(OUTPUT_DIRECTORY, '{}.pdf'.format(name)), dpi = figures.figure4_fit.DPI) plt.close(fig) with open(os.path.join(OUTPUT_DIRECTORY, 'key.txt'), 'w') as f: for uni in unique_values[shifted_large]: f.write(':'.join([ - figure5.DATATYPES_ORDERED[uni['datatype']] if uni['datatype'] >= 0 else '', - figure5.REACTIONS_ORDERED[uni['reaction']] if uni['reaction'] >= 0 else '', - figure5.COMPOUNDS_ORDERED[uni['compound']] if uni['compound'] >= 0 else '', + figures.figure4_fit.DATATYPES_ORDERED[uni['datatype']] if uni['datatype'] >= 0 else '', + figures.figure4_fit.REACTIONS_ORDERED[uni['reaction']] if uni['reaction'] >= 0 else '', + figures.figure4_fit.COMPOUNDS_ORDERED[uni['compound']] if uni['compound'] >= 0 else '', ])+'\n')