diff --git a/colibri/analytic_fit.py b/colibri/analytic_fit.py index 637ea0bd..82cb9b2c 100644 --- a/colibri/analytic_fit.py +++ b/colibri/analytic_fit.py @@ -130,14 +130,14 @@ def analytic_fit( """ # Ensure that the PDF model is linear before running the fit. log.info("Checking that the PDF model is linear...") - check_pdf_model_is_linear(pdf_model, forward_map, FIT_XGRID, data) + check_pdf_model_is_linear(forward_map, FIT_XGRID, data) log.warning("The prior is assumed to be flat in the parameters.") log.warning( "Assuming that the prior is wide enough to fully cover the gaussian likelihood." ) - parameters = pdf_model.param_names + parameters = forward_map.param_names # Precompute predictions for the basis of the model bases = jnp.identity(len(parameters)) diff --git a/colibri/bayes_prior.py b/colibri/bayes_prior.py index aa098ed5..01a0125d 100644 --- a/colibri/bayes_prior.py +++ b/colibri/bayes_prior.py @@ -20,6 +20,10 @@ def bayesian_prior(prior_settings, forward_map): prior_settings: dict The settings for the prior transform. + forward_map: ForwardMap + The forward map of the problem, used to determine parameter names and ordering. + + Returns ------- prior_transform: @jax.jit CompiledFunction diff --git a/colibri/checks.py b/colibri/checks.py index fc4aab4e..8111d218 100644 --- a/colibri/checks.py +++ b/colibri/checks.py @@ -7,7 +7,7 @@ from reportengine.checks import make_argcheck import jax.numpy as jnp import jax -from colibri.theory_predictions import make_pred_data, fast_kernel_arrays +from colibri.theory_predictions import fast_kernel_arrays from colibri.utils import get_fit_path, get_pdf_model @@ -42,16 +42,15 @@ def check_pdf_models_equal(prior_settings, forward_map, theoryid): ) -def check_pdf_model_is_linear(pdf_model, forward_map, FIT_XGRID, data): +def check_pdf_model_is_linear(forward_map, FIT_XGRID, data): """ Decorator that can be added to functions to check that the PDF model is linear. """ - pred_data = make_pred_data(data, FIT_XGRID) fk = fast_kernel_arrays(data, FIT_XGRID) - parameters = pdf_model.param_names + parameters = forward_map.param_names intercept, _ = forward_map(fk, jnp.zeros(len(parameters))) # Run the check for 10 random points in the parameter space diff --git a/colibri/hessian_fit.py b/colibri/hessian_fit.py index 997acb40..86b3bcdb 100644 --- a/colibri/hessian_fit.py +++ b/colibri/hessian_fit.py @@ -15,7 +15,7 @@ def hessian_fit( - pdf_model, + forward_map, log_likelihood, optimizer_provider, max_epochs, @@ -27,8 +27,9 @@ def hessian_fit( Parameters ---------- - pdf_model: pdf_model.PDFModel - The PDF model to be fitted. + forward_map: forward_map.ForwardMap + The forward map whose .param_names (PDF params + extra params) are used + for initialization and result labeling. log_likelihood: callable The log-likelihood function to be maximized. optimizer_provider: optax.GradientTransformation @@ -72,7 +73,7 @@ def valid_chi2(params): log.info(f"Hessian fit initialization iteration {i+1}") # Generate random initial parameters initial_parameters = pdf_initial_parameters( - pdf_model, param_initialiser_settings, rng_seed + i + forward_map, param_initialiser_settings, rng_seed + i ) # Delegate to generic gradient descent @@ -191,7 +192,7 @@ def valid_chi2(params): hessian=hessian, cov_params=cov_params, resampled_posterior=hessian_param_set, - param_names=pdf_model.param_names, + param_names=forward_map.param_names, ) diff --git a/colibri/monte_carlo_fit.py b/colibri/monte_carlo_fit.py index 76809ffe..41a16954 100644 --- a/colibri/monte_carlo_fit.py +++ b/colibri/monte_carlo_fit.py @@ -108,7 +108,7 @@ def loss_validation(parameters): ) -def run_monte_carlo_fit(monte_carlo_fit, pdf_model, output_path, replica_index, Q0): +def run_monte_carlo_fit(monte_carlo_fit, forward_map, output_path, replica_index, Q0): """ Runs the Monte Carlo fit and writes the output to the output directory. @@ -117,8 +117,10 @@ def run_monte_carlo_fit(monte_carlo_fit, pdf_model, output_path, replica_index, monte_carlo_fit: MonteCarloFit The results of the Monte Carlo fit. - pdf_model: pdf_model.PDFModel - The PDF model used in the fit. + forward_map: forward_map.ForwardMap + The forward map used in the fit. Its .param_names (PDF + extra params) + label the optimized parameters; its .pdf_model is used to write the + export grid. output_path: pathlib.PosixPath Path to the output folder. @@ -131,7 +133,7 @@ def run_monte_carlo_fit(monte_carlo_fit, pdf_model, output_path, replica_index, """ mc_fit = monte_carlo_fit - df = pd.DataFrame(mc_fit.optimized_parameters, index=pdf_model.param_names).T + df = pd.DataFrame(mc_fit.optimized_parameters, index=forward_map.param_names).T # In a Monte Carlo fit, replicas are written to the fit_replicas # directory, and mc_postfit must then be applied to select valid ones @@ -144,7 +146,7 @@ def run_monte_carlo_fit(monte_carlo_fit, pdf_model, output_path, replica_index, log.info(f"Writing exportgrid for replica {replica_index}") write_exportgrid_mc( jnp.array(df.iloc[0, :].tolist()), - pdf_model, + forward_map.pdf_model, replica_index, output_path, Q0, diff --git a/colibri/param_initialisation.py b/colibri/param_initialisation.py index a6b3351b..3e172940 100644 --- a/colibri/param_initialisation.py +++ b/colibri/param_initialisation.py @@ -5,14 +5,15 @@ log = logging.getLogger(__name__) -def pdf_initial_parameters(pdf_model, param_initialiser_settings, replica_index=-1): +def pdf_initial_parameters(forward_map, param_initialiser_settings, replica_index=-1): """ - This function provides initial parameters for the PDF model. + This function provides initial parameters for the forward map. Parameters ---------- - pdf_model: pdf_mode.PDFModel - The PDF model to initialise the parameters for. + forward_map: forward_map.ForwardMap + The forward map whose .param_names (PDF parameters + any extra parameters) + are used to determine the size and ordering of the initialised array. param_initialiser_settings: dict The settings for the initialiser. @@ -34,7 +35,7 @@ def pdf_initial_parameters(pdf_model, param_initialiser_settings, replica_index= param_initialiser_settings["type"] = "zeros" if param_initialiser_settings["type"] == "zeros": - return jnp.array([0.0] * len(pdf_model.param_names)) + return jnp.array([0.0] * len(forward_map.param_names)) if "random_seed" in param_initialiser_settings: random_seed = jax.random.PRNGKey( @@ -43,7 +44,7 @@ def pdf_initial_parameters(pdf_model, param_initialiser_settings, replica_index= else: random_seed = jax.random.PRNGKey(replica_index) - param_names = pdf_model.param_names + param_names = forward_map.param_names if param_initialiser_settings["type"] == "normal": means_setting = param_initialiser_settings.get("means", 0.0) @@ -127,7 +128,7 @@ def expand(setting, default, name): initial_values = jax.random.uniform( key=random_seed, - shape=(len(pdf_model.param_names),), + shape=(len(forward_map.param_names),), minval=min_val, maxval=max_val, ) diff --git a/colibri/tests/test_checks.py b/colibri/tests/test_checks.py index a0145512..0346b5aa 100644 --- a/colibri/tests/test_checks.py +++ b/colibri/tests/test_checks.py @@ -111,9 +111,8 @@ def test_check_pdf_models_equal_false_param_names( check_pdf_models_equal.__wrapped__(prior_settings, forward_map, theoryid) -@patch("colibri.checks.make_pred_data") @patch("colibri.checks.fast_kernel_arrays") -def test_check_pdf_model_is_linear(mock_fast_kernel_arrays, mock_make_pred_data): +def test_check_pdf_model_is_linear(mock_fast_kernel_arrays): # Create test data FIT_XGRID = jnp.array([1.0, 2.0, 3.0]) data = jnp.array([0.1, 0.2, 0.3]) @@ -140,7 +139,7 @@ def pdf_linear_model(params): ) # Test for linear model (should not raise an exception) - check_pdf_model_is_linear(mock_pdf_model, forward_map_lin, FIT_XGRID, data) + check_pdf_model_is_linear(forward_map_lin, FIT_XGRID, data) # Now mock a non-linear model to ensure the ValueError is raised non_linear_model = FKTableForwardMap( @@ -152,4 +151,4 @@ def pdf_linear_model(params): # Ensure ValueError is raised for non-linear model with pytest.raises(ValueError): - check_pdf_model_is_linear(mock_pdf_model, non_linear_model, FIT_XGRID, data) + check_pdf_model_is_linear(non_linear_model, FIT_XGRID, data) diff --git a/colibri/tests/test_hessian_fit.py b/colibri/tests/test_hessian_fit.py index 5113aa33..28501649 100644 --- a/colibri/tests/test_hessian_fit.py +++ b/colibri/tests/test_hessian_fit.py @@ -11,9 +11,9 @@ from numpy.testing import assert_allclose from colibri.hessian_fit import HessianFit, hessian_fit, run_hessian_fit -from colibri.tests.conftest import MOCK_PDF_MODEL +from colibri.tests.conftest import MOCK_PDF_MODEL, TEST_FORWARD_MAP_DIS -N_PARAMS = len(MOCK_PDF_MODEL.param_names) +N_PARAMS = len(TEST_FORWARD_MAP_DIS.param_names) class MockOptimizerProvider: @@ -53,7 +53,7 @@ def test_hessian_fit_runs_and_shapes(): param_initialiser_settings = {"type": "zeros"} result = hessian_fit( - pdf_model=MOCK_PDF_MODEL, + forward_map=TEST_FORWARD_MAP_DIS, log_likelihood=log_likelihood, optimizer_provider=MockOptimizerProvider(), max_epochs=5, @@ -62,7 +62,7 @@ def test_hessian_fit_runs_and_shapes(): ) assert isinstance(result, HessianFit) - assert result.param_names == MOCK_PDF_MODEL.param_names + assert result.param_names == TEST_FORWARD_MAP_DIS.param_names # Hessian of ||p||^2 with the internal 0.5 factor should be identity assert_allclose(result.hessian, jnp.eye(N_PARAMS), rtol=1e-12, atol=1e-12) @@ -92,7 +92,7 @@ def test_run_hessian_fit_exports(mock_write_exportgrid, tmp_path): param_initialiser_settings = {"type": "zeros"} fit_result = hessian_fit( - pdf_model=MOCK_PDF_MODEL, + forward_map=TEST_FORWARD_MAP_DIS, log_likelihood=log_likelihood, optimizer_provider=MockOptimizerProvider(), max_epochs=1, @@ -127,7 +127,7 @@ def test_hessian_fit_raises_when_require_local_min_fails(): with pytest.raises(ValueError): hessian_fit( - pdf_model=MOCK_PDF_MODEL, + forward_map=TEST_FORWARD_MAP_DIS, log_likelihood=log_likelihood, optimizer_provider=MockOptimizerProvider(), max_epochs=1, @@ -149,7 +149,7 @@ def test_hessian_fit_logs_warning_on_local_min_check_failed(caplog): with caplog.at_level("WARNING"): res = hessian_fit( - pdf_model=MOCK_PDF_MODEL, + forward_map=TEST_FORWARD_MAP_DIS, log_likelihood=log_likelihood, optimizer_provider=MockOptimizerProvider(), max_epochs=1, @@ -179,7 +179,7 @@ def test_hessian_fit_logs_critical_on_non_pd_hessian(caplog): with caplog.at_level("CRITICAL"): hessian_fit( - pdf_model=MOCK_PDF_MODEL, + forward_map=TEST_FORWARD_MAP_DIS, log_likelihood=bad_loglike, optimizer_provider=MockOptimizerProvider(), max_epochs=1, @@ -210,7 +210,7 @@ def test_hessian_fit_raises_on_nonfinite_min_chi2(): with pytest.raises(ValueError): hessian_fit( - pdf_model=MOCK_PDF_MODEL, + forward_map=TEST_FORWARD_MAP_DIS, log_likelihood=nan_loglike, optimizer_provider=MockOptimizerProvider(), max_epochs=1, diff --git a/colibri/tests/test_monte_carlo_fit.py b/colibri/tests/test_monte_carlo_fit.py index 1ce522d4..cc743a43 100644 --- a/colibri/tests/test_monte_carlo_fit.py +++ b/colibri/tests/test_monte_carlo_fit.py @@ -12,11 +12,10 @@ from numpy.testing import assert_allclose from colibri.monte_carlo_fit import MonteCarloFit, monte_carlo_fit, run_monte_carlo_fit -from colibri.tests.conftest import MOCK_PDF_MODEL +from colibri.tests.conftest import MOCK_PDF_MODEL, TEST_FORWARD_MAP_DIS from colibri.data_batch import data_batches -mock_pdf_model = MOCK_PDF_MODEL -N_PARAMS = len(MOCK_PDF_MODEL.param_names) +N_PARAMS = len(TEST_FORWARD_MAP_DIS.param_names) class MockOptimizerProvider: @@ -86,7 +85,11 @@ def create_directory_side_effect(*args, **kwargs): output_path = str(tmp_path) run_monte_carlo_fit( - mock_monte_carlo_fit, mock_pdf_model, output_path, replica_index=1, Q0=1.65 + mock_monte_carlo_fit, + TEST_FORWARD_MAP_DIS, + output_path, + replica_index=1, + Q0=1.65, ) # Check if the write_exportgrid function was called once as expected diff --git a/colibri/tests/test_param_initialisation.py b/colibri/tests/test_param_initialisation.py index 76ab803a..a6f31937 100644 --- a/colibri/tests/test_param_initialisation.py +++ b/colibri/tests/test_param_initialisation.py @@ -18,16 +18,16 @@ logging.basicConfig(level=logging.DEBUG) -# Mock PDF model setup -pdf_model = MagicMock() -pdf_model.param_names = ["param1", "param2", "param3"] +# Mock forward map (3 params: 2 PDF + 1 extra, but treated uniformly here) +forward_map = MagicMock() +forward_map.param_names = ["param1", "param2", "param3"] def test_zeros_initializer(): settings = {"type": "zeros"} replica_index = 0 - result = pdf_initial_parameters(pdf_model, settings, replica_index) - expected_result = jnp.array([0.0] * len(pdf_model.param_names)) + result = pdf_initial_parameters(forward_map, settings, replica_index) + expected_result = jnp.array([0.0] * len(forward_map.param_names)) np.testing.assert_array_equal(result, expected_result) @@ -41,11 +41,11 @@ def test_normal_initializer(mock_normal, mock_PRNGKey, caplog): mock_normal.return_value = jnp.array([0.1, -0.1, 0.2]) with caplog.at_level("WARNING"): - result = pdf_initial_parameters(pdf_model, settings, replica_index) + result = pdf_initial_parameters(forward_map, settings, replica_index) mock_PRNGKey.assert_called_once_with(42) mock_normal.assert_called_once_with( - key=jax.random.PRNGKey(42), shape=(len(pdf_model.param_names),) + key=jax.random.PRNGKey(42), shape=(len(forward_map.param_names),) ) assert "param_initialiser_settings: No 'means' or 'stds' provided." in caplog.text @@ -56,7 +56,7 @@ def test_normal_initializer(mock_normal, mock_PRNGKey, caplog): replica_index = 1 mock_normal.return_value = jnp.array([0.5, -0.5, 0.0]) - result = pdf_initial_parameters(pdf_model, settings, replica_index) + result = pdf_initial_parameters(forward_map, settings, replica_index) mock_PRNGKey.assert_called_with(1) @@ -83,7 +83,7 @@ def test_normal_initializer(mock_normal, mock_PRNGKey, caplog): with caplog.at_level("WARNING"): caplog.clear() - result = pdf_initial_parameters(pdf_model, settings_both, replica_index=0) + result = pdf_initial_parameters(forward_map, settings_both, replica_index=0) # No warning should be issued when both are provided assert len(caplog.records) == 0 @@ -107,7 +107,7 @@ def test_normal_initializer(mock_normal, mock_PRNGKey, caplog): with caplog.at_level("WARNING"): caplog.clear() - pdf_initial_parameters(pdf_model, settings_means_dict_only, replica_index=0) + pdf_initial_parameters(forward_map, settings_means_dict_only, replica_index=0) assert "'means' provided without 'stds'" in caplog.text assert "Using default std=1.0 for all parameters." in caplog.text @@ -121,7 +121,7 @@ def test_normal_initializer(mock_normal, mock_PRNGKey, caplog): with caplog.at_level("WARNING"): caplog.clear() - pdf_initial_parameters(pdf_model, settings_means_scalar_only, replica_index=0) + pdf_initial_parameters(forward_map, settings_means_scalar_only, replica_index=0) assert "'means' provided without 'stds'" in caplog.text assert "Using default std=1.0 for all parameters." in caplog.text @@ -135,7 +135,7 @@ def test_normal_initializer(mock_normal, mock_PRNGKey, caplog): with caplog.at_level("WARNING"): caplog.clear() - pdf_initial_parameters(pdf_model, settings_stds_dict_only, replica_index=0) + pdf_initial_parameters(forward_map, settings_stds_dict_only, replica_index=0) assert "'stds' provided without 'means'" in caplog.text assert "Using default mean=0.0 for all parameters." in caplog.text @@ -149,7 +149,7 @@ def test_normal_initializer(mock_normal, mock_PRNGKey, caplog): with caplog.at_level("WARNING"): caplog.clear() - pdf_initial_parameters(pdf_model, settings_stds_scalar_only, replica_index=0) + pdf_initial_parameters(forward_map, settings_stds_scalar_only, replica_index=0) assert "'stds' provided without 'means'" in caplog.text assert "Using default mean=0.0 for all parameters." in caplog.text @@ -170,7 +170,7 @@ def test_normal_initializer(mock_normal, mock_PRNGKey, caplog): with caplog.at_level("WARNING"): caplog.clear() - result = pdf_initial_parameters(pdf_model, settings_scalars, replica_index=0) + result = pdf_initial_parameters(forward_map, settings_scalars, replica_index=0) # No warning should be issued when both are provided assert len(caplog.records) == 0 @@ -198,7 +198,7 @@ def test_normal_initializer(mock_normal, mock_PRNGKey, caplog): with caplog.at_level("WARNING"): caplog.clear() - result = pdf_initial_parameters(pdf_model, settings_mixed, replica_index=0) + result = pdf_initial_parameters(forward_map, settings_mixed, replica_index=0) # No warning should be issued assert len(caplog.records) == 0 @@ -225,7 +225,7 @@ def test_normal_initializer(mock_normal, mock_PRNGKey, caplog): with pytest.raises( ValueError, match="'means' dict must have one entry per parameter" ): - pdf_initial_parameters(pdf_model, settings_few_means, replica_index=0) + pdf_initial_parameters(forward_map, settings_few_means, replica_index=0) # Too few stds in dict settings_few_stds = { @@ -238,7 +238,7 @@ def test_normal_initializer(mock_normal, mock_PRNGKey, caplog): with pytest.raises( ValueError, match="'stds' dict must have one entry per parameter" ): - pdf_initial_parameters(pdf_model, settings_few_stds, replica_index=0) + pdf_initial_parameters(forward_map, settings_few_stds, replica_index=0) # Invalid type for means settings_invalid_means = { @@ -248,7 +248,7 @@ def test_normal_initializer(mock_normal, mock_PRNGKey, caplog): } with pytest.raises(TypeError, match="'means' must be dict or scalar"): - pdf_initial_parameters(pdf_model, settings_invalid_means, replica_index=0) + pdf_initial_parameters(forward_map, settings_invalid_means, replica_index=0) # Invalid type for stds settings_invalid_stds = { @@ -258,7 +258,7 @@ def test_normal_initializer(mock_normal, mock_PRNGKey, caplog): } with pytest.raises(TypeError, match="'stds' must be dict or scalar"): - pdf_initial_parameters(pdf_model, settings_invalid_stds, replica_index=0) + pdf_initial_parameters(forward_map, settings_invalid_stds, replica_index=0) @patch("jax.random.PRNGKey") @@ -268,12 +268,12 @@ def test_uniform_initializer(mock_uniform, mock_PRNGKey): replica_index = 1 mock_uniform.return_value = jnp.array([0.5, -0.5, 0.0]) - result = pdf_initial_parameters(pdf_model, settings, replica_index) + result = pdf_initial_parameters(forward_map, settings, replica_index) mock_PRNGKey.assert_called_once_with(43) mock_uniform.assert_called_once_with( key=jax.random.PRNGKey(43), - shape=(len(pdf_model.param_names),), + shape=(len(forward_map.param_names),), minval=-1.0, maxval=1.0, ) @@ -297,7 +297,7 @@ def test_uniform_initializer(mock_uniform, mock_PRNGKey): # Mock return value to match param count mock_uniform.return_value = jnp.array([0.1, 1.5, 0.0]) - result_bounds = pdf_initial_parameters(pdf_model, settings_bounds, replica_index) + result_bounds = pdf_initial_parameters(forward_map, settings_bounds, replica_index) np.testing.assert_array_equal(result_bounds, jnp.array([0.1, 1.5, 0.0])) @@ -308,7 +308,7 @@ def test_uniform_initializer(mock_uniform, mock_PRNGKey): assert called_kwargs["key"] == jax.random.PRNGKey(43) # Check the 'shape' argument matches - assert called_kwargs["shape"] == (len(pdf_model.param_names),) + assert called_kwargs["shape"] == (len(forward_map.param_names),) # Use numpy/jax testing utilities for arrays np.testing.assert_array_equal(called_kwargs["minval"], jnp.array([-1.0, 0.0, -0.5])) @@ -328,7 +328,7 @@ def test_uniform_initializer(mock_uniform, mock_PRNGKey): } with pytest.raises(ValueError, match="Missing bounds for parameters"): - pdf_initial_parameters(pdf_model, settings_missing_bounds, 1) + pdf_initial_parameters(forward_map, settings_missing_bounds, 1) # ---- Test missing min_val/max_val and bounds ---- settings_invalid = { @@ -340,15 +340,57 @@ def test_uniform_initializer(mock_uniform, mock_PRNGKey): with pytest.raises( ValueError, match="param_initialiser_settings must define either" ): - pdf_initial_parameters(pdf_model, settings_invalid, 1) + pdf_initial_parameters(forward_map, settings_invalid, 1) def test_invalid_initializer_type(): settings = {"type": "invalid_type"} replica_index = 0 with unittest.TestCase().assertLogs(level="WARNING") as log: - result = pdf_initial_parameters(pdf_model, settings, replica_index) + result = pdf_initial_parameters(forward_map, settings, replica_index) # Asserting that at least one warning was logged assert log.output - expected_result = jnp.array([0.0] * len(pdf_model.param_names)) + expected_result = jnp.array([0.0] * len(forward_map.param_names)) np.testing.assert_array_equal(result, expected_result) + + +def test_extra_params_included_in_initialization(): + """When forward_map has extra_param_names, initialization covers all params.""" + fm_with_extras = MagicMock() + fm_with_extras.param_names = ["pdf_p1", "pdf_p2", "norm"] + + # zeros mode: all three params initialized to zero + result = pdf_initial_parameters(fm_with_extras, {"type": "zeros"}) + assert len(result) == 3 + np.testing.assert_array_equal(result, jnp.zeros(3)) + + # uniform mode with global bounds: length matches all three params + result = pdf_initial_parameters( + fm_with_extras, + {"type": "uniform", "random_seed": 0, "min_val": -1.0, "max_val": 1.0}, + replica_index=0, + ) + assert len(result) == 3 + + # normal mode with scalar means/stds: length matches all three params + result = pdf_initial_parameters( + fm_with_extras, + {"type": "normal", "means": 0.0, "stds": 1.0, "random_seed": 0}, + replica_index=0, + ) + assert len(result) == 3 + + # uniform mode with per-parameter bounds including the extra param + bounds = {"pdf_p1": (-1.0, 1.0), "pdf_p2": (-1.0, 1.0), "norm": (0.5, 1.5)} + result = pdf_initial_parameters( + fm_with_extras, + {"type": "uniform", "random_seed": 0, "bounds": bounds}, + replica_index=0, + ) + assert len(result) == 3 + + # backward compatibility: no extra params -> length matches pdf params only + fm_no_extras = MagicMock() + fm_no_extras.param_names = ["pdf_p1", "pdf_p2"] + result = pdf_initial_parameters(fm_no_extras, {"type": "zeros"}) + assert len(result) == 2 diff --git a/colibri/tests/test_time_likelihood.py b/colibri/tests/test_time_likelihood.py index 6408586c..4f5af972 100644 --- a/colibri/tests/test_time_likelihood.py +++ b/colibri/tests/test_time_likelihood.py @@ -13,7 +13,7 @@ import pytest from colibri.time_likelihood import time_log_likelihood -from colibri.tests.conftest import MOCK_PDF_MODEL +from colibri.tests.conftest import TEST_FORWARD_MAP_DIS, TEST_FORWARD_MAP_DIS logging.basicConfig(level=logging.DEBUG) @@ -55,7 +55,7 @@ def test_time_log_likelihood_default_sizes( sizes, times = time_log_likelihood( mock_log_likelihood, mock_param_initialiser_settings, - MOCK_PDF_MODEL, + TEST_FORWARD_MAP_DIS, tmp_output_path, batch_sample_sizes=[1, 10, 100], # Use small sizes for testing ) @@ -101,7 +101,7 @@ def test_time_log_likelihood_custom_sizes( sizes, times = time_log_likelihood( mock_log_likelihood, mock_param_initialiser_settings, - MOCK_PDF_MODEL, + TEST_FORWARD_MAP_DIS, tmp_output_path, batch_sample_sizes=custom_sizes, ) @@ -125,7 +125,7 @@ def test_time_log_likelihood_generates_correct_number_of_samples( time_log_likelihood( mock_log_likelihood, mock_param_initialiser_settings, - MOCK_PDF_MODEL, + TEST_FORWARD_MAP_DIS, tmp_output_path, batch_sample_sizes=custom_sizes, ) @@ -135,7 +135,7 @@ def test_time_log_likelihood_generates_correct_number_of_samples( # Check that it was called with correct replica indices expected_calls = [ - call(MOCK_PDF_MODEL, mock_param_initialiser_settings, i) + call(TEST_FORWARD_MAP_DIS, mock_param_initialiser_settings, i) for i in range(max_size) ] mock_init.assert_has_calls(expected_calls) @@ -160,7 +160,7 @@ def counting_likelihood(params): time_log_likelihood( counting_likelihood, mock_param_initialiser_settings, - MOCK_PDF_MODEL, + TEST_FORWARD_MAP_DIS, tmp_output_path, batch_sample_sizes=[2, 5], ) @@ -179,7 +179,7 @@ def test_time_log_likelihood_csv_format( _, __ = time_log_likelihood( mock_log_likelihood, mock_param_initialiser_settings, - MOCK_PDF_MODEL, + TEST_FORWARD_MAP_DIS, tmp_output_path, batch_sample_sizes=[3, 7], ) @@ -219,7 +219,7 @@ def test_time_log_likelihood_none_uses_defaults( sizes, _ = time_log_likelihood( mock_log_likelihood, mock_param_initialiser_settings, - MOCK_PDF_MODEL, + TEST_FORWARD_MAP_DIS, tmp_output_path, batch_sample_sizes=None, ) @@ -245,7 +245,7 @@ def failing_likelihood(_): time_log_likelihood( failing_likelihood, mock_param_initialiser_settings, - MOCK_PDF_MODEL, + TEST_FORWARD_MAP_DIS, tmp_output_path, batch_sample_sizes=[2, 5], ) @@ -278,7 +278,7 @@ def failing_block(x): sizes, times = time_log_likelihood( lambda params: jnp.sum(params, axis=-1), mock_param_initialiser_settings, - MOCK_PDF_MODEL, + TEST_FORWARD_MAP_DIS, tmp_output_path, batch_sample_sizes=[2, 5, 10], ) @@ -318,7 +318,7 @@ def failing_block(x): sizes, times = time_log_likelihood( lambda params: jnp.sum(params, axis=-1), mock_param_initialiser_settings, - MOCK_PDF_MODEL, + TEST_FORWARD_MAP_DIS, tmp_output_path, batch_sample_sizes=[2, 5, 10], ) @@ -342,7 +342,7 @@ def test_time_log_likelihood_all_sizes_successful( sizes, times = time_log_likelihood( mock_log_likelihood, mock_param_initialiser_settings, - MOCK_PDF_MODEL, + TEST_FORWARD_MAP_DIS, tmp_output_path, batch_sample_sizes=[2, 5, 10], ) @@ -368,7 +368,7 @@ def test_time_log_likelihood_relative_time_calculation( _, __ = time_log_likelihood( mock_log_likelihood, mock_param_initialiser_settings, - MOCK_PDF_MODEL, + TEST_FORWARD_MAP_DIS, tmp_output_path, batch_sample_sizes=[2, 5, 10], ) @@ -419,7 +419,7 @@ def simple_likelihood(params): time_log_likelihood( simple_likelihood, mock_param_initialiser_settings, - MOCK_PDF_MODEL, + TEST_FORWARD_MAP_DIS, tmp_output_path, batch_sample_sizes=[2, 5], ) @@ -438,7 +438,7 @@ def test_time_log_likelihood_with_single_batch_size( sizes, times = time_log_likelihood( mock_log_likelihood, mock_param_initialiser_settings, - MOCK_PDF_MODEL, + TEST_FORWARD_MAP_DIS, tmp_output_path, batch_sample_sizes=[10], ) diff --git a/colibri/tests/test_utils.py b/colibri/tests/test_utils.py index d7a7459c..c0b0ac64 100644 --- a/colibri/tests/test_utils.py +++ b/colibri/tests/test_utils.py @@ -458,7 +458,9 @@ def test_ns_fit_resampler_normal_case(mock_resample, mock_read_csv, mock_exists) @patch("colibri.utils.os.system") @patch("colibri.utils.os.path.exists") @patch("colibri.utils.write_exportgrid") +@patch("colibri.utils.pd.read_csv") def test_write_resampled_bayesian_fit( + mock_read_csv, mock_write_exportgrid, mock_exists, mock_os_system, @@ -481,6 +483,9 @@ def test_write_resampled_bayesian_fit( ] mock_dill_load.return_value = mock_pdf_model + # Mock pd.read_csv to return a header-only DataFrame for full_posterior_sample.csv + mock_read_csv.return_value = pd.DataFrame(columns=["param1", "param2"]) + # Ensure os.path.exists returns True for necessary paths mock_exists.side_effect = lambda path: ( True @@ -511,7 +516,7 @@ def test_write_resampled_bayesian_fit( mock_os_system.assert_any_call(f"rm -r {resampled_fit_path}/replicas/*") # Verify the correct data was written to CSV - df = pd.DataFrame(resampled_posterior, columns=mock_pdf_model.param_names) + df = pd.DataFrame(resampled_posterior, columns=["param1", "param2"]) expected_csv_path = str(resampled_fit_path) + "/ns_result.csv" with patch("pandas.DataFrame.to_csv") as mock_to_csv: df.to_csv(expected_csv_path, float_format="%.5e") @@ -542,7 +547,10 @@ def test_creates_replicas_dir_when_missing(tmp_path): "colibri.utils.os.mkdir" ) as mock_mkdir, mock.patch( "colibri.utils.write_exportgrid" - ) as mock_we: + ) as mock_we, mock.patch( + "colibri.utils.pd.read_csv", + return_value=pd.DataFrame(columns=[]), + ): # Call with an empty posterior so loop won’t actually try to mkdir again write_resampled_bayesian_fit( @@ -584,6 +592,9 @@ def test_creates_each_replica_dir_when_missing(tmp_path): "colibri.utils.os.mkdir" ) as m_mkdir, mock.patch( "colibri.utils.write_exportgrid" + ), mock.patch( + "colibri.utils.pd.read_csv", + return_value=pd.DataFrame(columns=["a", "b"]), ): write_resampled_bayesian_fit( diff --git a/colibri/time_likelihood.py b/colibri/time_likelihood.py index beaa42e0..cecf2b8b 100644 --- a/colibri/time_likelihood.py +++ b/colibri/time_likelihood.py @@ -19,7 +19,7 @@ def time_log_likelihood( log_likelihood, param_initialiser_settings, - pdf_model, + forward_map, output_path, batch_sample_sizes=None, ): @@ -32,8 +32,8 @@ def time_log_likelihood( The log likelihood function that takes parameter vector(s) param_initialiser_settings : dict Settings for parameter initialization - pdf_model : pdf_model.PDFModel - The PDF model to fit + forward_map : forward_map.ForwardMap + The forward map whose .param_names are used for parameter initialization. batch_sample_sizes : sequence of int, optional Batch sizes (number of parameter vectors per batch) to time output_path : pathlib.PosixPath @@ -68,7 +68,7 @@ def time_log_likelihood( all_samples = [] for replica_idx in range(max_size): params = pdf_initial_parameters( - pdf_model, param_initialiser_settings, replica_idx + forward_map, param_initialiser_settings, replica_idx ) all_samples.append(params) diff --git a/colibri/utils.py b/colibri/utils.py index aee86d0d..ac670233 100644 --- a/colibri/utils.py +++ b/colibri/utils.py @@ -314,7 +314,7 @@ def log_likelihood(params, central_values, inv_covmat, fast_kernel_arrays): return -0.5 * loss_function(central_values, predictions, inv_covmat) params = bayesian_prior.prior_transform( - jax.random.uniform(jax.random.PRNGKey(0), shape=(len(pdf_model.param_names),)) + jax.random.uniform(jax.random.PRNGKey(0), shape=(len(forward_map.param_names),)) ) dtype = log_likelihood(params, central_values, covmat, fast_kernel_arrays).dtype @@ -451,9 +451,13 @@ def write_resampled_bayesian_fit( os.system(f"rm -r {resampled_fit_path}/replicas/*") # overwrite old ns_result.csv with resampled posterior - parameters = pdf_model.param_names + all_param_names = list( + pd.read_csv( + fit_path / "full_posterior_sample.csv", nrows=0, index_col=0 + ).columns + ) n_pdf_params = len(pdf_model.param_names) - df = pd.DataFrame(resampled_posterior, columns=parameters) + df = pd.DataFrame(resampled_posterior, columns=all_param_names) df.to_csv(str(resampled_fit_path) + f"/{csv_results_name}.csv", float_format="%.5e") new_rep_path = resampled_fit_path / "replicas"