diff --git a/invisible_cities/cities/icaro.py b/invisible_cities/cities/icaro.py new file mode 100644 index 000000000..cfa5655d0 --- /dev/null +++ b/invisible_cities/cities/icaro.py @@ -0,0 +1,531 @@ +""" +----------------------------------------------------------------------- + Icaro +----------------------------------------------------------------------- + + +""" + +import numpy as np +import pandas as pd +import tables as tb +import warnings + +from .. reco import tbl_functions as tbl +from .. reco.corrections import ASectorMap +from .. reco.corrections import maps_coefficient_getter +from .. reco.corrections import correct_geometry_ +from .. reco.corrections import norm_strategy +from .. reco.corrections import get_normalization_factor +from .. reco.corrections import apply_all_correction +from .. io.krmaps_io import write_krmap +from .. io.krmaps_io import write_krevol +from .. io.krmaps_io import write_mapinfo +from .. core import fit_functions as fitf +from .. core.core_functions import in_range +from .. core.core_functions import shift_to_bin_centers + +from .. dataflow import dataflow as fl + +from .. types.ic_types import Measurement +from .. types.ic_types import MasksContainer + +from . components import city +from . components import print_numberofevents +from . components import dst_from_files +from . components import quality_check +from . components import kr_selection + +from typing import Tuple +from typing import List +from pandas import DataFrame + +from .. reco.krmap_components import KrMap +from .. reco.krmap_components import get_number_of_bins, get_XY_bins, get_binned_data +from .. reco.krmap_components import lifetime_fit +from .. reco.krmap_components import calculate_residuals_in_bin, calculate_pval, valid_bin_counter + + + +@city +def icaro(files_in, file_out, compression, event_range, + detector_db, run_number, bootstrap, quality_ranges, + band_sel_params, map_params, + r_fid, nStimeprofile): + + + quality_check_before = fl.map(quality_check(quality_ranges), + args = "input_data", + out = "checks") + quality_check_after = fl.map(quality_check(quality_ranges), + args = "kr_data", + out = "checks") + + kr_selection_map = fl.map(kr_selection(bootstrap, band_sel_params), + args = "input_data", + out = ("kr_data", "kr_mask")) + + map_builder_map = fl.map(map_builder(detector_db, run_number, map_params), + args = "kr_data", + out = ("map_info", "map")) + + add_krevol_map = fl.map(add_krevol(r_fid, nStimeprofile), + args = ("map", "kr_data", "kr_mask"), + out = "evolution") + + with tb.open_file(file_out, "w", filters = tbl.filters(compression)) as h5out: + + # Define writers... + write_krmap_sink = fl.sink( write_krmap(h5out), args=("run_number", "event_number", "timestamp")) + write_krevol_sink = fl.sink( write_krevol(h5out), args="pointlike_event") + write_mapinfo_sink = fl.sink(write_mapinfo(h5out), args=("event_number", "pmap_passed")) + + return fl.push(source = dst_from_files(files_in, "DST", "Events") , + pipe = fl.pipe( print_numberofevents , + quality_check_before , + kr_selection_map , + quality_check_after , + print_numberofevents , + map_builder_map , + add_krevol_map , + fl.fork(write_krmap_sink , + write_krevol_sink , + write_mapinfo_sink )), + result = None) + + +def map_builder(detector_db, run_number, map_params): + + def compute_map(dst : pd.DataFrame, + n_bins : int = None, + fittype : str = 'linear', + n_min : int = 50): + + ''' + Given a dataframe with data, some bining and a fittype, + it initializes the Kr_Map object and computes the correction + factors. + + Parameters + ---------- + dst : pd.DataFrame + data to compute the map with + n_bins : int, default None + Number of bins wanted for map computation. If no value is chosen, it + automatically converts into 50 or 100 depending on number of events + fittype : str + Kind of fit to perform. + n_min : float + Minimum number of events in bin to perform the fit. Default = 50 + + + Returns + ------- + correction_map : Kr_Map object + + A Kr_Map object containing the info of the map: binning, E0, + LT, etc. + ''' + + nbins = get_number_of_bins(dst, n_bins = n_bins) # Get number of bins used + bins = get_XY_bins(nbins) # Get the actual bin arrays + counts = get_binned_data(dst, bins) # Divide the data into bins in get the event count per bin + + correction_map = KrMap(x_bins = bins[0], y_bins = bins[1], counts = counts) # Initialize map class + + correction_map.valid = counts >= n_min + + r, c = counts.shape + + for i in range(r): + for j in range(c): + + dst_bin = dst.query(f'xbin_index == {i} & ybin_index == {j}') + + if (not correction_map.valid[i, j]): + + warnings.warn(f'Cannot fit: events in bin[{i}][{j}] ={correction_map.counts[i,j]} < {n_min}', UserWarning) + + correction_map.e0 [i,j] = np.nan + correction_map.ue0 [i,j] = np.nan + correction_map.lt [i,j] = np.nan + correction_map.ult [i,j] = np.nan + correction_map.cov [i,j] = np.nan + correction_map.res_std[i,j] = np.nan + correction_map.pval [i,j] = np.nan + + else: + + par, err, cov, success = lifetime_fit(dst_bin.DT, dst_bin.S2e, fittype) + + correction_map.e0 [i,j] = par[0] + correction_map.ue0 [i,j] = err[0] + correction_map.lt [i,j] = par[1] + correction_map.ult [i,j] = err[1] + correction_map.cov [i,j] = cov + correction_map.res_std[i,j] = calculate_residuals_in_bin(dst_bin, par, fittype) + correction_map.pval [i,j] = calculate_pval(dst_bin.residuals) + correction_map.valid [i,j] &= success + + success_count, inner_count = valid_bin_counter(correction_map, r_max = None) + + if success_count/inner_count <= 0.9: + + warnings.warn(f"{inner_count-success_count} inner bins are not valid. Only {success_count/inner_count*100:.1f}% are successful.", UserWarning) + + return correction_map + + return compute_map + + +def add_krevol(r_fid : float, + nStimeprofile: int): + """ + Adds time evolution dataframe to the map + Parameters + --------- + r_fid: float + Maximum radius for fiducial sample + nStimeprofile: int + Number of seconds for each time bin + Returns + --------- + Function which takes as input map, kr_data, and kr_mask + and returns the time evolution + """ + + def add_krevol(map, kr_data, kr_mask): + fmask = (kr_data.R < r_fid) & kr_mask.s1 & kr_mask.s2 & kr_mask.band + dstf = kr_data[fmask] + min_time = dstf.time.min() + max_time = dstf.time.max() + ntimebins = get_number_of_time_bins(nStimeprofile = nStimeprofile, + tstart = min_time, + tfinal = max_time) + + ts, masks_time = get_time_series_df(time_bins = ntimebins, + time_range = (min_time, max_time), + dst = kr_data) + + masks_timef = [mask[fmask] for mask in masks_time] + pars = kr_time_evolution(ts = ts, + masks_time = masks_timef, + dst = dstf, + emaps = map) + + pars_ec = cut_time_evolution(masks_time = masks_time, + dst = kr_data, + masks_cuts = kr_mask, + pars_table = pars) + + e0par = np.array([pars['e0'].mean(), pars['e0'].var()**0.5]) + ltpar = np.array([pars['lt'].mean(), pars['lt'].var()**0.5]) + print(" Mean core E0: {0:.1f}+-{1:.1f} pes".format(*e0par)) + print(" Mean core Lt: {0:.1f}+-{1:.1f} mus".format(*ltpar)) + + return pars_ec + + return add_krevol + +def get_number_of_time_bins(nStimeprofile: int, + tstart : int, + tfinal : int )->int: + """ + Computes the number of time bins to compute for a given time step + in seconds. + + Parameters + ---------- + nStimeprofile: int + Time step in seconds + tstart: int + Initial timestamp for the data set + tfinal: int + Final timestamp for the data set + + Returns + ------- + ntimebins: int + Number of bins + """ + ntimebins = int( np.floor( ( tfinal - tstart) / nStimeprofile) ) + ntimebins = np.max([ntimebins, 1]) + + return ntimebins + + +def get_time_series_df(time_bins : int, + time_range : Tuple[float, float], + dst : DataFrame, + time_column : str = 'time')->Tuple[np.array, List[np.array]]: + """ + Given a dst (DataFrame) with a time column specified by the name time, + this function returns a time series (ts) and a list of masks which are used to divide + the event in time tranches. + More generically, one can produce a "time series" using any column of the dst + simply specifying time_column = ColumName + Parameters + ---------- + time_bins + Number of time bines. + time_range + Time range. + dst + A Data Frame + time_column + A string specifyng the dst column to be divided in time slices. + Returns + ------- + A Tuple with: + np.array : This is the ts vector + List[np.array] : This are the list of masks defining the events in the time series. + """ + #Add small number to right edge to be included with in_range function + modified_right_limit = np.nextafter(time_range[-1], np.inf) + ip = np.linspace(time_range[0], modified_right_limit, time_bins+1) + masks = np.array([in_range(dst[time_column].values, ip[i], ip[i + 1]) for i in range(len(ip) -1)]) + return shift_to_bin_centers(ip), masks + + +def kr_time_evolution(ts : np.array, + masks_time : List[np.array], + dst : DataFrame, + emaps : ASectorMap, + zslices_lt : int = 50, + zrange_lt : Tuple[float,float] = (0, 550), + nbins_dv : int = 35, + zrange_dv : Tuple[float, float] = (500, 625), + detector : str = 'new')->DataFrame: + """ + Computes some average parameters (e0, lt, drift v, + S1w, S1h, S1e, S2w, S2h, S2e, S2q, Nsipm, 'Xrms, Yrms) + for a given krypton distribution and for different time slices. + Returns a DataFrame. + Parameters + ---------- + ts: np.array of floats + Sequence of central times for the different time slices. + masks_time: list of boolean lists + Allows dividing the distribution into time slices. + data: DataFrame + Kdst distribution to analyze. + emaps: correction map + Allows geometrical correction of the energy. + z_slices: int (optional) + Number of Z-coordinate bins for doing the exponential fit to compute + the lifetime. + zrange_lt: length-2 tuple (optional) + Number of Z-coordinate range for doing the exponential fit to compute + the lifetime. + nbins_dv: int (optional) + Number of bins in Z-coordinate for doing the histogram to compute + the drift velocity. + zrange_dv: int (optional) + Range in Z-coordinate for doing the histogram to compute the drift + velocity. + detector: string (optional) + Used to get the cathode position from DB for the drift velocity + computation. + Returns + ------- + pars: DataFrame + Each column corresponds to the average value for a given parameter. + Each row corresponds to the parameters for a given time slice. + """ + + frames = [] + for index in range(len(masks_time)): + sel_dst = dst[masks_time[index]] + pars = computing_kr_parameters(sel_dst, ts[index], + emaps, + zslices_lt, zrange_lt, + nbins_dv, zrange_dv, + detector) + frames.append(pars) + + total_pars = pd.concat(frames, ignore_index=True) + + return total_pars + + +def e0_xy_correction(map : ASectorMap , + norm_strat : norm_strategy = norm_strategy.max): + """ + Temporal function to perfrom IC geometric corrections only + """ + normalization = get_normalization_factor(map , norm_strat) + get_xy_corr_fun = maps_coefficient_getter (map.mapinfo, map.e0) + def geo_correction_factor(x : np.array, + y : np.array) -> np.array: + return correct_geometry_(get_xy_corr_fun(x,y))* normalization + return geo_correction_factor + + +def computing_kr_parameters(data : DataFrame, + ts : float, + emaps : ASectorMap, + zslices_lt : int, + zrange_lt : Tuple[float,float] = (0, 550), + nbins_dv : int = 35, + zrange_dv : Tuple[float, float] = (500, 625), + detector : str = 'new')->DataFrame: + + """ + Computes some average parameters (e0, lt, drift v, energy + resolution, S1w, S1h, S1e, S2w, S2h, S2e, S2q, Nsipm, 'Xrms, Yrms) + for a given krypton distribution. Returns a DataFrame. + Parameters + ---------- + data: DataFrame + Kdst distribution to analyze. + ts: float + Central time of the distribution. + emaps: correction map + Allows geometrical correction of the energy. + xr_map, yr_map: length-2 tuple + Set the X/Y-coordinate range of the correction map. + nx_map, ny_map: int + Set the number of X/Y-coordinate bins for the correction map. + zslices_lt: int + Number of Z-coordinate bins for doing the exponential fit to compute + the lifetime. + zrange_lt: length-2 tuple (optional) + Number of Z-coordinate range for doing the exponential fit to compute + the lifetime. + nbins_dv: int (optional) + Number of bins in Z-coordinate for doing the histogram to compute + the drift velocity. + zrange_dv: int (optional) + Range in Z-coordinate for doing the histogram to compute the drift + velocity. + detector: string (optional) + Used to get the cathode position from DB for the drift velocity + computation. + Returns + ------- + pars: DataFrame + Each column corresponds to the average value of a different parameter. + """ + + ## lt and e0 + geo_correction_factor = e0_xy_correction(map = emaps , + norm_strat = norm_strategy.max) + + _, _, fr = fitf.fit_lifetime_profile(data.Z, + data.S2e.values*geo_correction_factor( + data.X.values, + data.Y.values), + zslices_lt, zrange_lt) + e0, lt = fr.par + e0u, ltu = fr.err + + ## compute drift_v + dv, dvu = fitf.compute_drift_v(data.Z, nbins=nbins_dv, + zrange=zrange_dv, detector=detector) + + ## energy resolution and error + tot_corr_factor = apply_all_correction(maps = emaps, + apply_temp=False) + nbins = int((len(data.S2e))**0.5) + f = fitf.quick_gauss_fit(data.S2e.values*tot_corr_factor( + data.X.values, + data.Y.values, + data.Z.values, + data.time.values), + bins=nbins) + R = resolution(f.values, f.errors, 41.5) + resol, err_resol = R[0][0], R[0][1] + ## average values + parameters = ['S1w', 'S1h', 'S1e', + 'S2w', 'S2h', 'S2e', 'S2q', + 'Nsipm', 'Xrms', 'Yrms'] + mean_d, var_d = {}, {} + for parameter in parameters: + data_value = getattr(data, parameter) + mean_d[parameter] = np.mean(data_value) + var_d [parameter] = (np.var(data_value)/len(data_value))**0.5 + + ## saving as DataFrame + pars = DataFrame({'ts' : [ts] , + 'e0' : [e0] , 'e0u' : [e0u] , + 'lt' : [lt] , 'ltu' : [ltu] , + 'dv' : [dv] , 'dvu' : [dvu] , + 'resol': [resol] , 'resolu': [err_resol] , + 's1w' : [mean_d['S1w']] , 's1wu' : [var_d['S1w']] , + 's1h' : [mean_d['S1h']] , 's1hu' : [var_d['S1h']] , + 's1e' : [mean_d['S1e']] , 's1eu' : [var_d['S1e']] , + 's2w' : [mean_d['S2w']] , 's2wu' : [var_d['S2w']] , + 's2h' : [mean_d['S2h']] , 's2hu' : [var_d['S2h']] , + 's2e' : [mean_d['S2e']] , 's2eu' : [var_d['S2e']] , + 's2q' : [mean_d['S2q']] , 's2qu' : [var_d['S2q']] , + 'Nsipm': [mean_d['Nsipm']], 'Nsipmu': [var_d['Nsipm']], + 'Xrms' : [mean_d['Xrms']] , 'Xrmsu' : [var_d['Xrms']] , + 'Yrms' : [mean_d['Yrms']] , 'Yrmsu' : [var_d['Yrms']]}) + + return pars + +def cut_time_evolution(masks_time : List[np.array], + dst : DataFrame, + masks_cuts : MasksContainer, + pars_table : DataFrame): + + """ + Computes the efficiency evolution in time for a given krypton distribution + for different time slices. + Returns the input DataFrame updated with new 3 columns. + Parameters + ---------- + masks: list of boolean lists + Allows dividing the distribution into time slices. + data: DataFrame + Kdst distribution to analyze. + masks_cuts: MasksContainer + Container for the S1, S2 and Band cuts masks. + The masks don't have to be inclusive. + pars: DataFrame + Each column corresponds to the average value for a given parameter. + Each row corresponds to the parameters for a given time slice. + Returns + ------- + parspars_table_out: DataFrame + pars Table imput updated with 3 new columns, one for each cut. + """ + + len_ts = len(masks_time) + n0 = np.zeros(len_ts) + nS1 = np.zeros(len_ts) + nS2 = np.zeros(len_ts) + nBand = np.zeros(len_ts) + for index in range(len_ts): + t_mask = masks_time[index] + n0 [index] = dst[t_mask].event.nunique() + nS1mask = t_mask & masks_cuts.s1 + nS1 [index] = dst[nS1mask].event.nunique() + nS2mask = nS1mask & masks_cuts.s2 + nS2 [index] = dst[nS2mask].event.nunique() + nBandmask = nS2mask & masks_cuts.band + nBand[index] = dst[nBandmask].event.nunique() + + pars_table_out = pars_table.assign(S1eff = nS1 / n0, + S2eff = nS2 / nS1, + Bandeff = nBand / nS2) + return pars_table_out + + +def resolution(values, errors = None, E_from=41.5, E_to=2458): + """ + Compute resolution at E_from and resolution at E_to + with uncertainty propagation. + """ + if errors is None: + errors = np.zeros_like(values) + + amp , mu, sigma, *_ = values + u_amp, u_mu, u_sigma, *_ = errors + + r = 235. * sigma/mu + u_r = r * (u_mu**2/mu**2 + u_sigma**2/sigma**2)**0.5 + + scale = (E_from/E_to)**0.5 + return Measurement(r , u_r ), \ + Measurement(r * scale, u_r * scale) \ No newline at end of file diff --git a/invisible_cities/cities/icaro_test.py b/invisible_cities/cities/icaro_test.py new file mode 100644 index 000000000..e69de29bb diff --git a/invisible_cities/core/fit_functions.py b/invisible_cities/core/fit_functions.py index 9032adfaa..fba580534 100644 --- a/invisible_cities/core/fit_functions.py +++ b/invisible_cities/core/fit_functions.py @@ -4,6 +4,10 @@ GML November 2016 """ +import logging +import warnings +log = logging.getLogger(__name__) + import numpy as np import pandas as pd import inspect as insp @@ -15,6 +19,13 @@ from . import core_functions as coref from . stat_functions import poisson_sigma from .. evm.ic_containers import FitFunction +from .. database import load_db as DB +from .. types.ic_types import FitPar +from .. types.ic_types import FitResult +from .. types.ic_types import NN +from .. core.core_functions import shift_to_bin_centers + +from typing import Tuple def fixed_parameters(fn, **kwargs): @@ -356,3 +367,190 @@ def profileXY(xdata, ydata, zdata, nbinsx, nbinsy, deviation[indices_x - 1, indices_y - 1] = np.where(notnan, deviation_.values, 0) return bin_centers_x, bin_centers_y, mean, deviation + + +def profile1d(z : np.array, + e : np.array, + nbins_z : int, + range_z : np.array)->Tuple[float, float, float]: + """Adds an extra layer to profileX, returning only valid points""" + x, y, yu = profileX(z, e, nbins_z, range_z) + valid_points = ~np.isnan(yu) + x = x [valid_points] + y = y [valid_points] + yu = yu[valid_points] + return x, y, yu + + +def sigmoid(x : np.array, + scale : float, + inflection : float, + slope : float, + offset : float)->np.array: + + return scale / ( 1 + np.exp( - slope * ( x - inflection ) ) ) + offset + + +def compute_drift_v(zdata : np.array, + nbins : int = 35, + zrange : Tuple[float, float] = (500, 640), + seed : Tuple[float, float, float, float] = None, + detector : str = 'new')->Tuple[float, float]: + """ + Computes the drift velocity for a given distribution + using the sigmoid function to get the cathode edge. + Parameters + ---------- + zdata: array_like + Values of Z coordinate. + nbins: int (optional) + The number of bins in the z coordinate for the binned fit. + zrange: length-2 tuple (optional) + Fix the range in z. + seed: length-4 tuple (optional) + Seed for the fit. + detector: string (optional) + Used to get the cathode position from DB. + plot_fit: boolean (optional) + Flag for plotting the results. + Returns + ------- + dv: float + Drift velocity. + dvu: float + Drift velocity uncertainty. + """ + + y, x = np.histogram(zdata, nbins, zrange) + x = shift_to_bin_centers(x) + + if seed is None: seed = np.max(y), np.mean(zrange), 0.5, np.min(y) + + z_cathode = DB.DetectorGeo(detector).ZMAX[0] + try: + f = fit(sigmoid, x, y, seed, sigma=poisson_sigma(y), fit_range=zrange) + dv = z_cathode/f.values[1] + dvu = dv / f.values[1] * f.errors[1] + except RuntimeError: + print("WARNING: Sigmoid fit for dv computation fails. NaN value will be set in its place.") + dv, dvu = np.nan, np.nan + + return dv, dvu + + +def expo_seed(x, y, eps=1e-12): + """ + Estimate the seed for a exponential fit to the input data. + """ + x, y = zip(*sorted(zip(x, y))) + const = y[0] + slope = (x[-1] - x[0]) / np.log(y[-1] / (y[0] + eps)) + seed = const, slope + return seed + + +def fit_lifetime_profile(z : np.array, + e : np.array, + nbins_z : int, + range_z : Tuple[float,float])->Tuple[FitPar, FitPar, FitResult]: + """ + Make a profile of the input data and fit it to an exponential + function. + Parameters + ---------- + z + Array of z values. + e + Array of energy values. + nbins_z + Number of bins in Z for the profile fit. + range_z + Range in Z for fit. + Returns + ------- + A Tuple with: + FitPar : Fit parameters (arrays of fitted values and errors, fit function) + FitPar : Fit parameters (duplicated to make it compatible with fit_liftime_unbined) + FirResults: Fit results (lt, e0, errors, chi2) + @dataclass + class ProfilePar: + x : np.array + y : np.array + xu : np.array + yu : np.array + @dataclass + class FitPar(ProfilePar): + f : FitFunction + @dataclass + class FitResult: + par : np.array + err : np.array + chi2 : float + valid : bool + """ + + logging.debug(' fit_liftime_profile') + logging.debug(f' len (z) ={len(z)}, len (e) ={len(e)} ') + logging.debug(f' nbins_z ={nbins_z}, range_z ={range_z} ') + fp = None + valid = True + c2 = NN + par = NN * np.ones(2) + err = NN * np.ones(2) + + x, y, yu = profile1d(z, e, nbins_z, range_z) + xu = np.diff(x) * 0.5 + seed = expo_seed(x, y) + + logging.debug(f' after profile: len (x) ={len(x)}, len (y) ={len(y)} ') + try: + f = fit(expo, x, y, seed, sigma=yu) + c2 = f.chi2 + par = np.array(f.values) + par[1] = - par[1] + err = np.array(f.errors) + + logging.debug(f' e0z ={par[0]} +- {err[0]} ') + logging.debug(f' lt ={par[1]} +- {err[1]} ') + logging.debug(f' c2 ={c2} ') + fp = FitPar(x = x, + y = y, + xu = xu, + yu = yu, + f = f.fn) + except: + warnings.warn(f' fit failed for seed = {seed} in fit_lifetime_profile', UserWarning) + valid = False + raise + + fr = FitResult(par = par, + err = err, + chi2 = c2, + valid = valid) + + return fp, fp, fr + + +def gauss_seed(x, y, sigma_rel=0.05): + """ + Estimate the seed for a gaussian fit to the input data. + """ + y_max = np.argmax(y) # highest bin + x_max = x[y_max] + sigma = sigma_rel * x_max + amp = y_max * (2 * np.pi)**0.5 * sigma * np.diff(x)[0] + seed = amp, x_max, sigma + return seed + + +def quick_gauss_fit(data, bins): + """ + Histogram input data and fit it to a gaussian with the parameters + automatically estimated. + """ + y, x = np.histogram(data, bins) + x = shift_to_bin_centers(x) + seed = gauss_seed(x, y) + f = fit(gauss, x, y, seed) + assert np.all(f.values != seed) + return f \ No newline at end of file diff --git a/invisible_cities/io/krmaps_io.py b/invisible_cities/io/krmaps_io.py new file mode 100644 index 000000000..3f9cb5329 --- /dev/null +++ b/invisible_cities/io/krmaps_io.py @@ -0,0 +1,8 @@ +def write_krmap(*args, **kwargs): + raise NotImplementedError("Not yet implemented") + +def write_krevol(*args, **kwargs): + raise NotImplementedError("Not yet implemented") + +def write_mapinfo(*args, **kwargs): + raise NotImplementedError("Not yet implemented") \ No newline at end of file diff --git a/invisible_cities/reco/corrections.py b/invisible_cities/reco/corrections.py index 57abbf02e..481112c41 100644 --- a/invisible_cities/reco/corrections.py +++ b/invisible_cities/reco/corrections.py @@ -6,6 +6,8 @@ from dataclasses import dataclass from typing import Callable from typing import Optional +from typing import Tuple + from .. core.core_functions import in_range from .. core import system_of_units as units @@ -350,3 +352,41 @@ def apply_all_correction(maps : ASectorMap , return apply_all_correction_single_maps(maps, maps, maps, apply_temp, norm_strat, norm_value) + +def add_mapinfo(asm : ASectorMap, + xr : Tuple[float, float], + yr : Tuple[float, float], + nx : int, + ny : int, + run_number : int) -> ASectorMap: + """ + Add metadata to a ASectorMap + + Parameters + ---------- + asm + ASectorMap object. + xr, yr + Ranges in (x, y) defining the map. + nx, ny + Number of bins in (x, y) defining the map. + run_number + run number defining the map. + + Returns + ------- + A new ASectorMap containing metadata (in the variable mapinfo) + + """ + mapinfo = pd.Series([*xr, *yr, nx, ny, run_number], + index=['xmin','xmax', + 'ymin','ymax', + 'nx' , 'ny' , + 'run_number']) + + return ASectorMap(chi2 = asm.chi2, + e0 = asm.e0 , + lt = asm.lt , + e0u = asm.e0u , + ltu = asm.ltu , + mapinfo = mapinfo ) diff --git a/invisible_cities/reco/krmap_components.py b/invisible_cities/reco/krmap_components.py new file mode 100644 index 000000000..ca572d82e --- /dev/null +++ b/invisible_cities/reco/krmap_components.py @@ -0,0 +1,322 @@ +import numpy as np +import pandas as pd +import scipy.stats as stats +import scipy.optimize as so + +from typing import Tuple +from invisible_cities.core.core_functions import in_range, shift_to_bin_centers + + +class KrMap(): + + ''' Class for the Kr Map container. + + x_bins (1D-array of floats) : bins in coordinate X + y_bins (1D-array of floats) : bins in coordinate Y + counts (2D-array of ints) : number of events in every X,Y bin of the map + e0 (2D-array of floats) : energy correction for every X,Y bin of the map + ue0 (2D-array of floats) : uncertainty for EO + lt (2D-array of floats) : lifetime correction for every X,Y bin of the map + ult (2D-array of floats) : uncertainty for lt + cov (2D-array of floats) : non-diag covariance matrix element + pval (2D-array of floats) : pvalue + res_std (2D-array of floats) : std for residuals + valid (2D-array of boolean) : success in the bin''' + + + def __init__(self, + x_bins : np.array, + y_bins : np.array, + counts : np.array): + + shape = (len(x_bins)-1, len(y_bins)-1) + + self.x_bins = x_bins + self.y_bins = y_bins + self.counts = counts + self.e0 = np.zeros(shape=shape, dtype=float) + self.ue0 = np.zeros(shape=shape, dtype=float) + self.lt = np.zeros(shape=shape, dtype=float) + self.ult = np.zeros(shape=shape, dtype=float) + self.cov = np.zeros(shape=shape, dtype=float) + self.pval = np.zeros(shape=shape, dtype=float) + self.res_std = np.zeros(shape=shape, dtype=float) + self.valid = np.zeros(shape=shape, dtype= bool) + + + + def __str__(self): + return '''{}(\nx_bins={.x_bins},\ny_bins={.y_bins},\ncounts={.counts},\ne0={.e0},\nue0={.ue0},\nlt={.lt},\nult={.ult},\ncov={.cov},\npval = {.pval},\nres = {.res_std})'''.format(self.__class__.__name__, self, self, self, self, self, self, self, self, self, self) + + __repr__ = __str__ + + + +def get_number_of_bins(dst : pd.DataFrame, + thr : int = 1e6, + n_bins : int = None)->int: #Similar to ICAROS + """ + Computes the number of XY bins to be used in the creation + of correction map regarding the number of selected events. + Parameters + --------- + dst: pd.DataFrame + File containing the events for the map computation. + thr: int (optional) + Threshold to use 50x50 or 100x100 maps (standard values). + n_bins: int (optional) + The number of events to use can be chosen a priori. + Returns + --------- + n_bins: int + Number of bins in each direction (X,Y) (square map). + """ + + if n_bins != None: pass; + elif len(dst.index._data) < thr: n_bins = 50 + else: n_bins = 100; + return n_bins + + +def get_XY_bins(n_bins : int, + XYrange : Tuple[float, float] = (-490, 490)): + + # ATTENTION! The values for XYrange are temporary, ideally I'd suggest + # to obtain them through the DataBase depending on the chosen detector + # (the detector parameter 'next100', 'demopp', etc is present in every + # argument list for the cities on IC). That way, the right geometry is + # selected when running the city. In case one prefers to have a custom + # range, it can be specified anyways. + # + # I was thinking of something like this: + # from invisible_cities.database.load_db import DetectorGeo + # XYrange = DetectorGeo('next100').values[0][0:2] + # + # I just left this as a first approach + + """ + Returns the bins that will be used to make the map. + Parameters + --------- + dst: int + Number of bins to use per axis + XYrange: Tuple[float, float] + Limits in X and Y for the map computation + Returns + --------- + n_bins: int + Number of bins in each direction (X,Y) (square map). + """ + + xbins = np.linspace(*XYrange, n_bins+1) + ybins = xbins # I'm assuming it's always going to be the same for both + # directions, but in case we don't want to build it like + # this we can always define two different Xrange, Yrange + # and make one np.linspace(...) for each of them. + + return xbins, ybins + + +def get_binned_data(dst : pd.DataFrame, + bins : Tuple[np.array, np.array]): + + '''This function distributes all the events in the DST into the selected + bins, and updates the DST in order to include the X, Y bin index of its + corresponding bin. + + Parameters + -------------- + dst : pd.DataFrame + Krypton dataframe. + bins : Tuple[np.array, np.array] + Bins used to compute the map. + + Returns + ------------- + counts : np.array + Total number of events falling into each bin + bin_centers : list + List contaning np.arrays with the center of the bins + + + ''' + + counts, bin_edges, bin_index = stats.binned_statistic_dd((dst.X, dst.Y), dst.S2e, bins = bins, + statistic = 'count', expand_binnumbers = True) + + + bin_centers = [shift_to_bin_centers(axis) for axis in bin_edges] # Not necessary... maybe it's dropped in next version + bin_index -= 1 + # dst.assign(xbin_index=bin_index[0], ybin_index=bin_index[1]) pd.assign is not working for me... i also tried with + # dst = dst.assign(**{xbinindex:bin_index[0], .....}) + # but i can't get the dst to update with the new cols + dst['xbin_index'] = bin_index[0] + dst['ybin_index'] = bin_index[1] + + return counts + + +def linear_function(DT, E0, LT): + + '''Given the DriftTime, and the geometrical (E0) and lifetime (LT) + corrections, this function computes the energy. + + Parameters: + DT : np.array + Drift Time of selected events. + E0 : np.array + E0 correction factor. + LT : np.array + Lifetime correction factor. + + Returns: + E : np.array + Energies + ''' + + dt0 = 680 # Middle point of chamber. FUTURE: taken from the database + + E = E0 - LT * (DT - dt0) + return E + + +def function_(fittype): + + if fittype == 'linear': + return linear_function + + # Additional types to be included + + +def lifetime_fit_linear(DT : np.array, + E : np.array): + + '''This function performs the linear lifetime fit. It returns the parameters, errors, + non-diagonal element in cov matrix and a success variable of the fit.''' + + par = np.nan * np.ones(2) + err = np.nan * np.ones(2) + + par, var, _, _, ier = so.curve_fit(linear_function, DT, E, full_output=True) + + err[0] = np.sqrt(var[0, 0]) + err[1] = np.sqrt(var[1, 1]) + cov = var + + success = True if ier in [1, 2, 3, 4] else False # See scipy.optimize.curve_fit documentation + + return par, err, cov[0,1], success + + +def lifetime_fit(DT : np.array, + E : np.array, + fittype : str): + + ''' Performs the kind of unbined lifetime fit that fittype specifies. Rudimentary. + + Parameters: + DT : np.array + Drift Time of selected events. + E : np.array + Energies of selected events. + fittype : str. + Desired fit: can be either 'icaros' (linear fit w/ np.polyfit), 'linear' (linear fit w/ scipy.optimize) or 'exp' (expo fit w/ scipy.optimize).''' + + # if fittype == 'icaros': + + # return lifetime_fit_icaros(DT, E) + + if fittype == 'linear': + + return lifetime_fit_linear(DT, E) + + +def calculate_residuals_in_bin(dst, par, fittype): + + '''This function computes the corrected energy based on the correction + parameters coming from a previous fit, and then calculates the residuals, + comparing with the measured value. The dst is updated so it contains these + residuals. Ultimately, the function returns a single std value of the resi- + duals in order to have an idea of their distribution.''' + + fit_function = function_(fittype=fittype) + res = dst.S2e - fit_function(dst.DT, *par) + series = pd.Series(res, index=dst.index) + dst['residuals'] = series.values + std = res.std() + return std + + +def calculate_pval(residuals): + + pval = stats.shapiro(residuals)[1] if (len(residuals) > 10) else 0. + + return pval + + +def get_inner_core(bins_x, bins_y, r_max = None): + + r_fid = r_max if r_max != None else max(max(abs(bins_x)), max(abs(bins_y))) # Either select it or take it from the binning given + + binsizes_x = bins_x[1:] - bins_x[:-1] # Compute bin sizes + binsizes_y = bins_y[1:] - bins_y[:-1] + bin_size = min(max(binsizes_x), max(binsizes_y)) + + centers = [shift_to_bin_centers(axis) for axis in (bins_x, bins_y)] # Compute bin centers + + xx, yy = np.meshgrid(*centers) + r = np.sqrt(xx**2 + yy**2) # Define r based on the bin grid + + mask = in_range(r, 0, r_fid + bin_size/2) # Select the allowed r values: + # Those falling within the r_max. Since the binning doesn't have to be + # the same in X and Y, the additional bin_size/2 makes sure that you're + # choosing all the pixels that are partially into the chosen r_max (You + # can have a corner of a bin that falls into the r_max region, but maybe + # its center doesn't). I believe this way it would be included. + + return mask + + +def valid_bin_counter(correction_map, r_max = None): + + core_mask = get_inner_core(correction_map.x_bins, correction_map.y_bins, r_max) # Select inner part (avoid corners) + inner_count = core_mask.sum() # Total number of inner bins + successful = np.where(core_mask, correction_map.valid, 0) # Succesful bins among the inner bins + success_count = np.count_nonzero(successful) # Number of successful bins + + return success_count, inner_count + + + + +# ================================================================================================================================================== +# ========================================================= NOT USED FOR THE MAP CORE ============================================================== +# ================================================================================================================================================== + + +def apply_correction_factors(dst, kr_map, function): + + """ + Computes the corrected energy given a certain kr map and + the corresponding function. + Parameters + ---------- + dst : pd.Dataframe + Dataframe containing the data + kr_map : Kr_Map object + Map where the corrections are stored + function : Callable + Function to calculate the energy based on DT and correction factors + + Returns + ------- + Ecorr : pd.Series + Array containing the corrected energy + """ + + corr_factors_e0 = maps_coefficient_getter(kr_map, 'e0')(dst.X, dst.Y) + corr_factors_lt = maps_coefficient_getter(kr_map, 'lt')(dst.X, dst.Y) + + Ecorr = function(dst.DT, corr_factors_e0, corr_factors_lt) + + return Ecorr diff --git a/invisible_cities/types/ic_types.py b/invisible_cities/types/ic_types.py index ddb40ba9b..9436d7ac9 100644 --- a/invisible_cities/types/ic_types.py +++ b/invisible_cities/types/ic_types.py @@ -1,10 +1,42 @@ from enum import Enum from collections import OrderedDict - +from collections import namedtuple +from dataclasses import dataclass +from ..evm.ic_containers import FitFunction import numpy as np NN= -999999 # No Number, a trick to aovid nans in data structs +Measurement = namedtuple('Measurement', 'value uncertainty') + + +@dataclass +class MasksContainer: + s1 : np.array + s2 : np.array + band : np.array + + +@dataclass +class ProfilePar: + x : np.array + y : np.array + xu : np.array + yu : np.array + + +@dataclass +class FitPar(ProfilePar): + f : FitFunction + + +@dataclass +class FitResult: + par : np.array + err : np.array + chi2 : float + valid : bool + class NNN: diff --git a/test.txt b/test.txt new file mode 100644 index 000000000..e69de29bb