diff --git a/invisible_cities/core/core_functions.py b/invisible_cities/core/core_functions.py index e471021578..f33097b554 100644 --- a/invisible_cities/core/core_functions.py +++ b/invisible_cities/core/core_functions.py @@ -3,13 +3,19 @@ This module includes utility functions. """ import time +import warnings import numpy as np from typing import Sequence from typing import Tuple +from typing import Optional + +from . exceptions import ValueOutOfRange from ..types.symbols import NormMode +from ..types.symbols import Strictness + def timefunc(f): """ @@ -77,6 +83,61 @@ def in_range(data, minval=-np.inf, maxval=np.inf, left_closed=True, right_closed return lower_bound & upper_bound +def all_in_range(data : np.ndarray , + minval : float , + maxval : float , + display_name : Optional[str] = '' , + strictness : Optional[Strictness] = Strictness.raise_error, + **kwargs)->bool: + """ + Checks whether input values are all inside the interval (minval, maxval). + + Parameters + ---------- + data : np.ndarray + Input array to check. + minval: float + Lower limit of the interval. + maxval: float + Upper limit of the interval. + display_name: string + Label to be displayed in case of warning or exception. + strictness: Strictness + Describes the behaviour when the output is `False`. If `strictness` is: + - `Strictness.raise_error`: an exception is raised. + - `Strictness.warning`: a warning is raised before returning. + - `Strictness.silent`: the function returns quietly. + + **kwargs: + Optional arguments being passed to `in_range`. + Returns + ------- + True if values are in the interval. False otherwise. + + Raises + ------ + ValueOutOfRange: if the output is `False` and `strictness` is `Strictness.raise_error` + """ + + values_in_interval = in_range(data, minval, maxval, **kwargs) + outliers = data[~values_in_interval] + n_outliers = len(outliers) + + if values_in_interval.all(): + return True + elif strictness is Strictness.silent: + return False + + text = f'Variable {display_name} has {n_outliers} values out of bounds ({minval}, {maxval}\n' + text += f'Outliers: {outliers}' + + if strictness is Strictness.warning: + warnings.warn(text, UserWarning) + return False + else: + raise ValueOutOfRange(text) + + def weighted_mean_and_var(data : Sequence, weights : Sequence, unbiased : bool = False, diff --git a/invisible_cities/core/core_functions_test.py b/invisible_cities/core/core_functions_test.py index b60b5662e4..883dda3fe0 100644 --- a/invisible_cities/core/core_functions_test.py +++ b/invisible_cities/core/core_functions_test.py @@ -9,6 +9,8 @@ from pytest import approx from pytest import mark from pytest import raises +from pytest import warns + from flaky import flaky from hypothesis import given @@ -111,6 +113,38 @@ def test_in_range_right_close_interval(data): assert all(output) +@given(random_length_float_arrays(min_length = 1, + min_value = 0, + max_value = 100)) +def test_all_in_range_when_all_fall_inside(data): + assert core.all_in_range(data, 0, 100, left_closed=True,right_closed=True, + strictness=core.Strictness.raise_error) + + +@given(random_length_float_arrays(min_length = 1, + min_value = 0, + max_value = 100)) +def test_all_in_range_when_some_fall_outside_silent(data): + minvalue = np.min(data) + assert not core.all_in_range(data, minvalue+1, 100, strictness=core.Strictness.silent) + +@given(random_length_float_arrays(min_length = 1, + min_value = 0, + max_value = 100)) +def test_all_in_range_when_some_fall_outside_warns(data): + minvalue = np.min(data) + with warns(UserWarning, match="values out of bounds"): + core.all_in_range(data, minvalue+1, 100, strictness=core.Strictness.warning) + +@given(random_length_float_arrays(min_length = 1, + min_value = 0, + max_value = 100)) +def test_all_in_range_when_some_fall_outside_exception(data): + minvalue = np.min(data) + with raises(core.ValueOutOfRange, match="values out of bounds"): + core.all_in_range(data, minvalue+1, 100, strictness=core.Strictness.raise_error) + + @mark.parametrize(" first second norm_mode expected".split(), (( 1 , 2 , core.NormMode.first , -1 ), ( 4 , 2 , core.NormMode.second, 1 ), diff --git a/invisible_cities/core/exceptions.py b/invisible_cities/core/exceptions.py index 5f6bb32690..bc83895058 100644 --- a/invisible_cities/core/exceptions.py +++ b/invisible_cities/core/exceptions.py @@ -55,3 +55,6 @@ class MCEventNotFound(ICException): class SensorIDMismatch(ICException): pass + +class ValueOutOfRange(ICException): + pass diff --git a/invisible_cities/reco/corrections.py b/invisible_cities/reco/corrections.py index 57abbf02e5..167554d9af 100644 --- a/invisible_cities/reco/corrections.py +++ b/invisible_cities/reco/corrections.py @@ -243,6 +243,40 @@ def get_normalization_factor(map_e0 : ASectorMap, return norm_value +def apply_geo_correction(map_e0 : ASectorMap, + norm_strat : NormStrategy = NormStrategy.max, + norm_value : Optional[float] = None + ) -> Callable: + """ + Given a map and a correction strategy, it returns a + function that provides a geometric only correction factor + for a given hit collection when (x,y) is provided. + + Parameters + ---------- + map_e0 : AsectorMap + Correction map for geometric effects. + norm_strat : NormStrategy + Provides the desired normalization to be used. + norm_value : Float(optional) + If norm_strat is selected to be custom, user must provide the + desired scale. + Returns + ------- + A function that returns geometric correction factor without passing a map. + """ + normalization = get_normalization_factor(map_e0, norm_strat, norm_value) + get_xy_corr_fun = maps_coefficient_getter(map_e0.mapinfo, map_e0.e0) + + def geo_correction_factor(x : np.ndarray, + y : np.ndarray)-> np.ndarray: + geo_factor = correct_geometry_(get_xy_corr_fun(x,y)) + factor = geo_factor * normalization + return factor + + return geo_correction_factor + + def apply_all_correction_single_maps(map_e0 : ASectorMap, map_lt : ASectorMap, diff --git a/invisible_cities/reco/corrections_test.py b/invisible_cities/reco/corrections_test.py index 3311caef06..ed7677ba68 100644 --- a/invisible_cities/reco/corrections_test.py +++ b/invisible_cities/reco/corrections_test.py @@ -11,6 +11,7 @@ from . corrections import get_normalization_factor from . corrections import apply_all_correction_single_maps from . corrections import apply_all_correction +from . corrections import apply_geo_correction from pytest import fixture from numpy.testing import assert_allclose @@ -268,21 +269,30 @@ def test_apply_all_correction_single_maps_raises_exception_when_invalid_map(map_ @few_examples @given(float_arrays(size = 1, - min_value = -198, - max_value = +198), - float_arrays(size = 1, - min_value = -198, - max_value = +198), - float_arrays(size = 1, - min_value = 0, - max_value = 5e2), + min_value = -198, + max_value = +198), float_arrays(size = 1, - min_value = 0, - max_value = 1e5)) + min_value = -198, + max_value = +198)) +def test_apply_geo_correction_properly(map_filename, x, y): + """ + The input map is homogeneous, therefore the geometric + correction factor must be 1. + """ + maps = read_maps(map_filename) + load_corr = apply_geo_correction(map_e0=maps, norm_strat=NormStrategy.max) + corr = load_corr(x, y) + assert_allclose (corr, np.ones_like(corr)) + +@few_examples +@given(float_arrays(size = 1, min_value = -198, max_value = 198), + float_arrays(size = 1, min_value = -198, max_value = 198), + float_arrays(size = 1, min_value = 0, max_value = 500), + float_arrays(size = 1, min_value = 0, max_value = 100*1000)) def test_apply_all_correction_single_maps_properly(map_filename, x, y, z, t): """ - Due the map taken as input, the geometric correction - factor must be 1, the temporal correction 1 and the + The input map is homogeneous, therefore the geometric + correction factor must be 1, the temporal correction 1 and the lifetime one: exp(Z/5000). """ maps = read_maps(map_filename) diff --git a/invisible_cities/reco/icaro_components.py b/invisible_cities/reco/icaro_components.py new file mode 100644 index 0000000000..7c35bf0c56 --- /dev/null +++ b/invisible_cities/reco/icaro_components.py @@ -0,0 +1,219 @@ +import numpy as np +import pandas as pd + +from typing import Tuple, Optional +from sklearn.linear_model import RANSACRegressor + +from . corrections import ASectorMap +from . corrections import apply_geo_correction + +from .. types.symbols import type_of_signal +from .. types.symbols import Strictness +from .. types.symbols import NormStrategy +from .. core.core_functions import all_in_range +from .. core.core_functions import in_range +from .. core.core_functions import shift_to_bin_centers +from .. core.fit_functions import fit +from .. core.fit_functions import gauss + +def select_nS_mask_and_check(dst : pd.DataFrame , + column : type_of_signal , + input_mask : Optional[np.ndarray] = None , + eff_interval : Optional[Tuple[float, float]] = [0,1] , + strictness : Optional[Strictness] = Strictness.raise_error + )->np.ndarray: + """ + Selects nS1(or nS2) == 1 for a given kr dst and + returns the mask. It also computes selection efficiency, + checking if the value is within a given interval. + Parameters + ---------- + dst: pd.Dataframe + Krypton dst dataframe. + column: type_of_signal + The function can be appplied over nS1 or nS2. + input_mask: np.array (Optional) + Selection mask of the previous cut. If this is the first selection + /no previous maks is input, input_mask is set to be an all True array. + Defaults to None. + interval: length-2 tuple (Optional) + If the selection efficiency is out of this interval + the map production will abort/just warn, depending on "strictness". + Defaults to [0, 1]. + strictness: Strictness (Optional) + If 'warning', function returns a False if the criteria + is not matched. If 'stop_proccess' it raises an exception. + Defaults to `raise_error`. + Returns + ---------- + A mask corresponding to the selected events. + """ + input_mask = input_mask if input_mask is not None else [True] * len(dst) + mask = np.zeros_like(input_mask) + mask[input_mask] = dst.loc[input_mask, column.value] == 1 + + nevts_after = dst[mask] .event.nunique() + nevts_before = dst[input_mask].event.nunique() + eff = nevts_after / nevts_before + all_in_range(data = np.array(eff) , + minval = eff_interval[0], + maxval = eff_interval[1], + display_name = column.value , + strictness = strictness , + right_closed = True) + + return mask + + +def select_band_and_check(dst : pd.DataFrame , + boot_map : ASectorMap , + norm_strat : Optional[NormStrategy] = NormStrategy.max, + input_mask : Optional[np.ndarray] = None , + range_DT : Optional[Tuple[np.ndarray, np.ndarray]] = (10, 1300) , + range_E : Optional[Tuple[np.ndarray, np.ndarray]] = (10.0e+3,14e+3) , + nsigma_sel : Optional[float] = 3.5 , + eff_interval: Optional[Tuple[float, float]] = [0,1] , + strictness : Optional[Strictness] = Strictness.raise_error + )->np.array: + """ + This function returns a selection of the events that + are inside the Kr E vz Z band, and checks + if the selection efficiency is correct. + + Parameters + ---------- + dst : pd.DataFrame + Krypton dataframe. + boot_map: str + Name of bootstrap map file. + norm_strat: norm_strategy + Provides the desired normalization to be used. + Defaults to `max`. + mask_input: np.array (Optional) + Mask of the previous selection cut. + Defaults to None. + range_DT: Tuple[np.array, np.array] (Optional) + Range in DT-axis + Defaults to (10, 300). + range_E: Tuple[np.array, np.array] (Optional) + Range in Energy-axis. + Defaults to (10e3, 14e3). + nsigma_sel: float (Optional) + Number of sigmas to set the band width. + Defaults to 3.5. + eff_interval (Optional) + If the selection efficiency is out of this interval + the map production will abort/just warn, depending on "strictness". + Defaults to [0, 1]. + strictness: Strictness (Optional) + If 'warning', function returns a False if the criteria + is not matched. If 'stop_proccess' it raises an exception. + Defaults to `raise_error`. + Returns + ------- + A mask corresponding to the selection made. + """ + if input_mask is None: + input_mask = [True] * len(dst) + + dst_sel = dst[input_mask] + + emaps = apply_geo_correction(boot_map, norm_strat=norm_strat) + E0 = dst_sel.S2e.values * emaps(dst_sel.X.values, dst_sel.Y.values) + + sel_krband = np.zeros_like(input_mask) + sel_krband[input_mask] = selection_in_band(dst_sel.DT, E0 , + range_dt = range_DT, + range_e = range_E , + nsigma = nsigma_sel) + + effsel = dst[sel_krband].event.nunique()/dst[input_mask].event.nunique() + + all_in_range(data = np.array(effsel) , + minval = eff_interval[0] , + maxval = eff_interval[1] , + display_name = "DT-band selection", + strictness = strictness , + right_closed = True) + + return sel_krband + + +def select_band(dt : np.ndarray , + e : np.ndarray , + range_dt : Tuple[float, float], + range_e : Tuple[float, float], + nsigma : Optional[float] = 3.5) ->np.ndarray: + """ + This function returns a mask for the selection of the events + that are inside the Kr E vz Z + + Parameters + ---------- + dt: np.array + axial (dt/z) values + e: np.array + energy values + range_dt: Tuple[float, float] + Range in DT-axis + range_e: Tuple[float, float] + Range in Energy-axis + nsigma: float (Optional) + Number of sigmas to set the band width + Defaults to 3.5 + Returns + ------- + A mask corresponding to the selection made. + """ + # Reshapes and flattens are needed for RANSAC function + + dt_sel = dt[in_range(dt, *range_dt)] + e_sel = e [in_range( e, *range_e )] + + res_fit = RANSACRegressor().fit(dt_sel.reshape(-1,1), + np.log(e_sel).reshape(-1, 1)) + sigma = estimate_sigma(dt_sel, np.log(e_sel), res_fit) + + prefict_fun = lambda dt: res_fit.predict(dt.reshape(-1, 1)).flatten() + upper_band = lambda dt: prefict_fun(dt) + nsigma * sigma + lower_band = lambda dt: prefict_fun(dt) - nsigma * sigma + sel_inband = in_range(np.log(e), lower_band(dt), upper_band(dt)) + + return sel_inband + +def estimate_sigma(dt : np.ndarray , + e : np.ndarray , + res_fit: RANSACRegressor + ) -> float: + """ + This function estimates the sigma from the residuals to a line fit + + Parameters + ---------- + dt: np.array + axial (dt/z) values + e: np.array + energy values + res_fit: RANSACRegressor + RANSAC object fitted to the data + + Returns + ------- + The sigma of the residuals as a float. + """ + # Reshapes and flattens are needed for RANSAC function + + in_mask = res_fit.inlier_mask_ + e_predict = res_fit.predict(dt[in_mask].reshape(-1, 1)).flatten() + residuals_ln = e[in_mask] - e_predict + resy, resx = np.histogram(residuals_ln, 100) + resx = shift_to_bin_centers(resx) + seed = [np.max(resy) * (2 * np.pi) **.5 * np.std(residuals_ln), + 0., np.std(residuals_ln)] + # Ideally, a np.mean for the mean should be better that a plain 0 but the + # fit does not cope well with that input value, so 0 (fair guess anyway) is + # used instead + fitres = fit(gauss, resx, resy, seed=seed) + fitsigma = fitres.values[2] + + return fitsigma diff --git a/invisible_cities/reco/icaro_components_test.py b/invisible_cities/reco/icaro_components_test.py new file mode 100644 index 0000000000..59b3424d00 --- /dev/null +++ b/invisible_cities/reco/icaro_components_test.py @@ -0,0 +1,86 @@ +import numpy as np +import numpy.testing as npt +import pandas as pd + +from hypothesis.strategies import integers +from hypothesis.strategies import floats +from hypothesis import given +from pytest import mark +from pytest import raises + +from sklearn.linear_model import RANSACRegressor + +from .. core.testing_utils import assert_dataframes_equal +from .. core import core_functions as core + +from . import icaro_components as icarcomp + +@mark.parametrize("nsignals", [0, 1, 100, 9999, 10*1000]) +@mark.parametrize("signal" , icarcomp.type_of_signal) +def test_select_nS_mask_and_check_right_output(nsignals, signal): + nevt = 10*1000 + data = np.concatenate([np.zeros(nevt- nsignals), np.ones(nsignals)]) + np.random.shuffle(data) + data = pd.DataFrame({'nS1': data, 'nS2': data, 'event': range(nevt)}) + mask = icarcomp.select_nS_mask_and_check(data, signal) + assert np.count_nonzero(mask) == nsignals + + +@mark.parametrize("nsignals", [1, 100, 9999, 10*1000]) +def test_select_nS_mask_and_check_consistency(nsignals): + nevt = 10*1000 + data = np.concatenate([np.zeros(nevt - nsignals), np.ones(nsignals)]) + np.random.shuffle(data) + data = pd.DataFrame({'nS1': data, 'event': range(nevt)}) + mask = icarcomp.select_nS_mask_and_check(data, icarcomp.type_of_signal.nS1) + mask_re = icarcomp.select_nS_mask_and_check(data, icarcomp.type_of_signal.nS1, input_mask=mask) + npt.assert_equal(mask_re, mask) + +# The zero option for ns1 not contemplated since the function expects at a non +# zero number of input events to filter. +@mark.parametrize("ns1", [ 1, 100, 9999, 10*1000]) +@mark.parametrize("ns2", [0, 1, 100, 9999, 10*1000]) +def test_select_nS_mask_and_check_concatenating(ns1, ns2): + nevt = 10*1000 + dataS1 = np.concatenate([np.zeros(nevt- ns1), + np.ones (ns1)]) + dataS2 = np.concatenate([np.zeros(nevt- ns2), + np.ones (ns2)]) + np.random.shuffle(dataS1) + np.random.shuffle(dataS2) + data = pd.DataFrame({'nS1': dataS1, 'nS2': dataS2, 'event': range(nevt)}) + maskS1 = icarcomp.select_nS_mask_and_check(data, icarcomp.type_of_signal.nS1) + maskS2 = icarcomp.select_nS_mask_and_check(data, icarcomp.type_of_signal.nS2, maskS1) + + assert np.count_nonzero(maskS1) >= np.count_nonzero(maskS2) + assert not maskS2[~maskS1].any() + + +def test_select_nS_mask_and_check_range_assertion(): + nevt = 10*1000 + ns1 = 1000 + min_eff = 0.5 + max_eff = 1 + dataS1 = np.concatenate([np.zeros(nevt- ns1), + np.ones (ns1)]) + np.random.shuffle(dataS1) + data = pd.DataFrame({'nS1': dataS1, 'event': range(nevt)}) + eff = ns1 / nevt + + with raises(core.ValueOutOfRange, match="values out of bounds"): + icarcomp.select_nS_mask_and_check(data, icarcomp.type_of_signal.nS1, + None,[min_eff, max_eff], + icarcomp.Strictness.raise_error) + +@mark.parametrize("sigma", (0.5, 1, 5, 10, 20)) +def test_estimate_sigma(sigma): + nevt = 10*1000 + xrange = [0, 1000] + slope = 100 + n0 = 100 + x = np.random.uniform(*xrange, nevt) + y = slope * x + n0 + y = np.random.normal(y, sigma) + res_fit = RANSACRegressor().fit(x.reshape(-1,1), y.reshape(-1, 1)) + sigma_est = icarcomp.estimate_sigma(x, y, res_fit) + npt.assert_allclose(sigma_est, sigma, rtol=0.1) diff --git a/invisible_cities/types/symbols.py b/invisible_cities/types/symbols.py index cd25b91c1d..c4230bc29d 100644 --- a/invisible_cities/types/symbols.py +++ b/invisible_cities/types/symbols.py @@ -74,6 +74,15 @@ class MCTableType(AutoNameEnumBase): string_map = auto() +class Strictness(AutoNameEnumBase): + silent = auto() + warning = auto() + raise_error = auto() + +class type_of_signal(AutoNameEnumBase): + nS1 = auto() + nS2 = auto() + class NormStrategy(AutoNameEnumBase): mean = auto() max = auto() diff --git a/manage.sh b/manage.sh index 16c63a65c1..dc83365b83 100644 --- a/manage.sh +++ b/manage.sh @@ -110,6 +110,7 @@ dependencies: - coverage = 5.5 - pip = 21.2.4 - setuptools = 58.0.4 +- scikit-learn = 1.3.0 - pip: - pytest-instafail==0.4.2 EOF