From 13eb27d04b5e9278f3c60efbcd0bedead2aadcc3 Mon Sep 17 00:00:00 2001
From: John Mason <jmason42@stanford.edu>
Date: Mon, 11 Jun 2018 16:07:09 -0700
Subject: [PATCH] more clean up

---
 TODO                                     |  56 -----------
 TODO 2018-2-26.txt                       | 123 -----------------------
 figures/figure4_fit.py                   |  24 +----
 figures/figure4_fit_alt.py               |   6 +-
 figures/figure5_dynamics.py              |   2 +-
 figures/figure6_predictions.py           |   5 +-
 figures/figure6_predictions_alt.py       |   4 +-
 figures/figure6_shifted_distributions.py |  17 ++--
 8 files changed, 20 insertions(+), 217 deletions(-)
 delete mode 100644 TODO
 delete mode 100644 TODO 2018-2-26.txt

diff --git a/TODO b/TODO
deleted file mode 100644
index c17442a..0000000
--- a/TODO
+++ /dev/null
@@ -1,56 +0,0 @@
-priority
---------
-
-collect/annotate all data sources
-
-verify equations on figure 1
-
-recolor symbols on figure 1
-
-plot data for figure 2
-
-separate and organize figure 3 plots
-
-finalize data for figure 4
-
-choose timescale, organization for plotting histograms + PRDs for figure 4
-
-better main.py interface
-
-structure with options for optimize
-
-choose which files to include/exclude from repo
-
-unify pipeline
-
-unify ODE evaluation
-
-ongoing
--------
-
-repository clean-up (delete unused code)
-
-comments
-
-deferred
---------
-
-faster stopping conditions with staggered perturbation size decrease, instead of exp dropoff
-- will need extensive testing
-
-review included kinetics, esp. for isozymes
-- worth another pass but this is mostly done
-
-test for hitting bounds in initialization
-- easy, just run twice
-
-interactive jupyter notebook for selecting weights
-- later...
-
-random init
-
-profiling
-
-selective job launching (check for output prior to scheduling)
-
-tests
diff --git a/TODO 2018-2-26.txt b/TODO 2018-2-26.txt
deleted file mode 100644
index d9d573f..0000000
--- a/TODO 2018-2-26.txt	
+++ /dev/null
@@ -1,123 +0,0 @@
-Several things that should probably be tested...
----
-
-Sensitivity analysis/optimization on all metaparameters
-- including bounds
-
-Test cross-initialization (naive bounds w/ parsimonious perturbations, vice-versa)
-- want to establish that this isn't a case of better initialization
-
-Test alternative bounding logic
-- truncating
-- bouncing
-- wrapping
-
-Show that parsimonious can rescue naive but not vice-versa
-
-SVD on resulting parameters?
-
-Assert that the problem is not solvable by convex optimizers
-
-Assert that the problem is not solvable by diff evo
-
-Test
-- All combinbations of
-	- naive vs. parsimonious bounds
-	- naive vs. parsimonious perturbations
-	- ordinal vs. random-direction perturbations
-- naive-naive-ordinal and parsi-parsi-ordinal in paper
-	- not certain but pretty confident that the ranking is
-	- naive-naive-ordinal
-	- naive-naive-random
-	- naive-parsi-*/parsi-naive-*
-	- parsi-parsi-ordinal
-	- unsure about parsi-parsi-random
-
-Simulated annealing
-- I *really* don't want to do this; adds new metaparameters
-- will probably improve naive perturbations but that's beside the point
-
-Other tasks
----
-
-Primary and secondary estimation interface
-- primary: parameter-heavy, logic-light
-- secondary: parameter-light, logic-heavy, calls primary
-
-Move naive bounds constants to a file
-
-Various repo cleanup tasks
-- moved/removed a lot of files
-- still needs restructuring
-
-File documentation
-
-Function documentation
-
-Readme
-
-Tests
-
-Review modules under 'utils' and consider removing
-
-Done
-----
-
-Remove redundant objective calculations
-- Shouldn't be slowing things down much (only when logging) but still not good
-
-Clean up constants
-- Lots of constants that are set to unity
-
-Possible major issue: naive opt has really poor init.
-- Appears to be a regularization problem, as the data is well-fit.
-- Just checked - this was NOT true in the past.  I don't know of a solution outside using the parsimonious initial values for the naive opt, or similar.
-- Only change I can tell was made was to increase the number of iterations on the linprog solver.  However, reverting this did not fix the problem.
-- pyenv does not seem to matter.
-- data_agnostic (vs all_scaled) also has this issue
-- the parameters themselves seem fine but some of the activities are extreme
-- biggest problem seems to be the enolase reverse reaction
-- I can't seem to associate this with any particular diff.  I *did* make some changes to metaparameters but *not* to bounds or initialization.
-- Theories:
-	- Past results were never correct
-		False, checked history and validated that the diseq error was low
-	- Changes to solver arguments
-		Probably false, can't find anything in recent git history
-	- NumPy/SciPy version issues
-		Probably false, there aren't any differences b/t pyenv parest and parest2
-	- Differences in batch vs. solo execution
-		Nothing suspicious here
-	- Changes missing from git
-		Can't prove.  There *are* differences between my backup and working version;
-		both are still wrong but wrong in different ways.
-	- Optimization metaparameter changes
-		False, metaparameters don't come into play until after init
-	- Bounds changes
-		This is the most likely cause IMO.
-	- Previously used parsi bounds for init
-		False, gives same result as parsimonious.
-	- Random direction perturbation changes
-		Tested, no difference
-	- Training data changed
-		Probably false, init fit appears to be the same
-	- Sign changes on some bounded values
-		Metabolite, enzyme concentrations appear fine
-		Kcat and KM also appear to be fine
-		Probably false, would cause bigger init. issues
-- Notes
-	- current naive init f = 7.57, g = 8.01e19
-	- current parsi init f = 7.57, g = 111.4
-	- old naive init (*iteration 6) f = 7.57, g = 2.55
-	- old parsi init (*iteration 1206) f = 7.61, g = 96.0
-		Unfortunately it looks like I didn't save the initial value in the history, only subsequent changes.  Irritating.
-	- backup naive init f = 7.57, g = 1.14e12
-	- backup parsi init f = 7.57, g = 124.0
-		Backup is similar yet clearly distinct.
-	- pgi and gap are specifically misbehaving (very negative v's)
-		these are two of the three (including pps) reactions that lack equi data
-	- removing kcatf bounds makes g worse (2.76e23)
-	- adding kcatr bounds makes g reasonable (3.91)
-	- backup vs current: init_matrix row order has changed
-	- there are other difference b/t backup and current, all appear to be subsequent of ordering differences
-- Unless I have some profound insight, I've decided to force the naive optimizations to use the parsimonious optimizations' initial values.  This seems to be the most equitable choice.
-- I *should* reevaluate the naive optimizations but I really don't want to
diff --git a/figures/figure4_fit.py b/figures/figure4_fit.py
index 57f82a8..47e9d3b 100644
--- a/figures/figure4_fit.py
+++ b/figures/figure4_fit.py
@@ -157,25 +157,6 @@ def get_residuals_and_indexing(directory):
 	return residuals, indexing
 
 def main(input_directory, output_directory):
-	# valid = np.load(os.path.join(input_directory, 'valid.npy'))
-
-	# pars = np.load(os.path.join(input_directory, 'pars.npy'))[
-	# 	:, valid
-	# 	]
-
-	# abs_res, abs_ind = get_absolute_fit_residuals(pars)
-	# rel_res, rel_ind = get_relative_fit_residuals(pars)
-
-	# residuals = np.row_stack([abs_res, rel_res])
-	# indexing = np.concatenate([abs_ind, rel_ind])
-
-	# sorting = np.argsort(indexing)
-
-	# residuals = residuals[sorting, :]
-	# indexing = indexing[sorting]
-
-	# datatypes = indexing['datatype']
-
 	(residuals, indexing) = get_residuals_and_indexing(input_directory)
 
 	datatypes = indexing['datatype']
@@ -184,6 +165,9 @@ def main(input_directory, output_directory):
 
 	# make_clean_directory(output_directory)
 
+	if not os.path.exists(output_directory):
+		os.makedirs(output_directory)
+
 	n_unique = 0
 	n_within_2x_median = 0
 	n_within_10x_median = 0
@@ -239,5 +223,5 @@ def main(input_directory, output_directory):
 if __name__ == '__main__':
 	main(
 		os.path.join('out', 'all_scaled'),
-		'figure5'
+		'figure4'
 		)
diff --git a/figures/figure4_fit_alt.py b/figures/figure4_fit_alt.py
index b21f508..713c832 100644
--- a/figures/figure4_fit_alt.py
+++ b/figures/figure4_fit_alt.py
@@ -1,16 +1,16 @@
 
 import os
 
-import figure5
+import figures.figure4_fit
 
 inputs_and_outputs = tuple(
 	(
 		os.path.join('out', 'all_scaled_upper_sat_limits_{}'.format(penalty)),
-		os.path.join('figure5', 'saturation penalized', 'penalty_{}'.format(penalty))
+		os.path.join('figure4', 'saturation penalized', 'penalty_{}'.format(penalty))
 		)
 	for penalty in ('1e-1', '1e0', '1e1', '1e2')
 	)
 
 for input_and_output in inputs_and_outputs:
-	figure5.main(*input_and_output)
+	figures.figure4_fit.main(*input_and_output)
 	print ''
diff --git a/figures/figure5_dynamics.py b/figures/figure5_dynamics.py
index f6c9923..d396ef9 100644
--- a/figures/figure5_dynamics.py
+++ b/figures/figure5_dynamics.py
@@ -294,4 +294,4 @@ def init_dg_dt(pars):
 	plt.subplot(len(sources), 2, i*2+2)
 	plot_prd(pars, i)
 
-plt.savefig('figure4.pdf')
+plt.savefig('figure5.pdf')
diff --git a/figures/figure6_predictions.py b/figures/figure6_predictions.py
index d688517..1bd7f15 100644
--- a/figures/figure6_predictions.py
+++ b/figures/figure6_predictions.py
@@ -10,8 +10,6 @@
 import structure
 import constants
 
-from figure5 import make_clean_directory
-
 def main(input_directory, output_directory):
 	pars = np.load(os.path.join(input_directory, 'pars.npy'))[
 		:, np.load(os.path.join(input_directory, 'valid.npy'))
@@ -83,7 +81,8 @@ def main(input_directory, output_directory):
 
 	fig = utils.residuals.plot(residuals, indexing)
 
-	# make_clean_directory(output_directory)
+	if not os.path.exists(output_directory):
+		os.makedirs(output_directory)
 
 	fig.savefig(os.path.join(output_directory, 'specific_activity.pdf'), dpi = DPI)
 
diff --git a/figures/figure6_predictions_alt.py b/figures/figure6_predictions_alt.py
index f059e13..1dd7ca3 100644
--- a/figures/figure6_predictions_alt.py
+++ b/figures/figure6_predictions_alt.py
@@ -1,7 +1,7 @@
 
 import os
 
-import figure6
+import figures.figure6_predictions
 
 inputs_and_outputs = tuple(
 	(
@@ -12,5 +12,5 @@
 	)
 
 for input_and_output in inputs_and_outputs:
-	figure6.main(*input_and_output)
+	figures.figure6_predictions.main(*input_and_output)
 
diff --git a/figures/figure6_shifted_distributions.py b/figures/figure6_shifted_distributions.py
index 11f95da..738bc20 100644
--- a/figures/figure6_shifted_distributions.py
+++ b/figures/figure6_shifted_distributions.py
@@ -8,8 +8,7 @@
 import numpy as np
 import matplotlib.pyplot as plt
 
-import figure5
-from ks_test import ks_test
+import figures.figure4_fit
 
 import utils.residuals
 
@@ -18,15 +17,15 @@
 if not os.path.exists(OUTPUT_DIRECTORY):
 	os.mkdir(OUTPUT_DIRECTORY)
 
-(residuals_standard, indexing_standard) = figure5.get_residuals_and_indexing(
+(residuals_standard, indexing_standard) = figures.figure4_fit.get_residuals_and_indexing(
 	os.path.join('out', 'all_scaled')
 	)
 
-(residuals_small, indexing_small) = figure5.get_residuals_and_indexing(
+(residuals_small, indexing_small) = figures.figure4_fit.get_residuals_and_indexing(
 	os.path.join('out', 'all_scaled_upper_sat_limits_1e-1')
 	)
 
-(residuals_large, indexing_large) = figure5.get_residuals_and_indexing(
+(residuals_large, indexing_large) = figures.figure4_fit.get_residuals_and_indexing(
 	os.path.join('out', 'all_scaled_upper_sat_limits_1e2')
 	)
 
@@ -61,14 +60,14 @@
 
 	fig = utils.residuals.plot(residuals[plotted], indexing[plotted])
 
-	fig.savefig(os.path.join(OUTPUT_DIRECTORY, '{}.pdf'.format(name)), dpi = figure5.DPI)
+	fig.savefig(os.path.join(OUTPUT_DIRECTORY, '{}.pdf'.format(name)), dpi = figures.figure4_fit.DPI)
 
 	plt.close(fig)
 
 with open(os.path.join(OUTPUT_DIRECTORY, 'key.txt'), 'w') as f:
 	for uni in unique_values[shifted_large]:
 		f.write(':'.join([
-			figure5.DATATYPES_ORDERED[uni['datatype']] if uni['datatype'] >= 0 else '',
-			figure5.REACTIONS_ORDERED[uni['reaction']] if uni['reaction'] >= 0 else '',
-			figure5.COMPOUNDS_ORDERED[uni['compound']] if uni['compound'] >= 0 else '',
+			figures.figure4_fit.DATATYPES_ORDERED[uni['datatype']] if uni['datatype'] >= 0 else '',
+			figures.figure4_fit.REACTIONS_ORDERED[uni['reaction']] if uni['reaction'] >= 0 else '',
+			figures.figure4_fit.COMPOUNDS_ORDERED[uni['compound']] if uni['compound'] >= 0 else '',
 			])+'\n')