Skip to content
51 changes: 51 additions & 0 deletions invisible_cities/icaros/correction_functions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import pandas as pd
import numpy as np
from scipy.interpolate import griddata



def normalization(krmap, method, x_low, x_high, y_low, y_high):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The last 4 parameters need to be optional, since they are only relevant for method == "region". I also think it would be interesting to gather them in a dictionary to simplify the logic.


mu_values = krmap.mu
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

since you are doing .dropna() everywhere, you can do it here already


if method == 'max':
E_reference_max = mu_values.dropna().max()
return E_reference_max

if method == 'mean chamber':
E_reference_chamber = mu_values.dropna().mean()
return E_reference_chamber

if method == 'mean anode':
mu_values_anode = map[map.k == 0].mu
E_reference_anode = mu_values_anode.dropna().mean()
return E_reference_anode

mask_region = (map['x'] <= x_high) & (map['x'] >= x_low) & (map['y'] <= y_high) & (map['y'] >= y_low)

if method == 'mean region anode':
region = map[map.k == 0][mask_region]
E_reference_slice_anode = region.mu.dropna().mean()
return E_reference_slice_anode

if method == 'mean region chamber':
region = map[mask_region]
E_reference_region = region.mu.dropna().mean()
return E_reference_region


def apply_3Dmap(krmap, method, dt, x, y, E, keV = False):

map_points = krmap['dt x y'.split()].values
norm = normalization(krmap, method)

data_points = np.stack([dt, x, y], axis = 1)
E_interpolated_data = griddata(map_points, krmap.mu.values, data_points, method = 'nearest')

correction_factor = norm/E_interpolated_data
Ec = E * correction_factor

if keV:
Ec = Ec * (41.55 / norm)

return Ec
43 changes: 43 additions & 0 deletions invisible_cities/icaros/correction_functions_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from correction_funcitons import normalization


def test_method_norm():

k_vals = np.arange(10)
i_vals = np.arange(12)
j_vals = np.arange(12)

k, i, j = np.meshgrid(k_vals, i_vals, j_vals, indexing='ij')
k = k.ravel()
i = i.ravel()
j = j.ravel()
Comment on lines +13 to +16
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
k, i, j = np.meshgrid(k_vals, i_vals, j_vals, indexing='ij')
k = k.ravel()
i = i.ravel()
j = j.ravel()
k, i, j = map(np.ravel,
np.meshgrid(k_vals, i_vals, j_vals, indexing='ij'))


x = i * 25
y = j * 25
dt = k * 45

mu = (k*i + 800)*j

map_test = pd.DataFrame({
'k': k,
'i': i,
'j': j,
'x': x,
'y': y,
'dt': dt,
'mu': mu
})

region_mask = (
(map_test.x <= 100) & (map_test.x >= -100) &
(map_test.y <= 100) & (map_test.y >= -100)
)

assert normalization(map_test, 'max', None) == 9889
assert normalization(map_test, 'mean chamber', None) == 4536.125
assert normalization(map_test, 'mean anode', None) == 4400
assert normalization(map_test, 'mean region anode', None) == map_test.loc[(map_test.k == 0) & region_mask,'mu'].mean()
assert normalization(map_test, 'mean region chamber', None) == map_test.loc[region_mask,'mu'].mean()
149 changes: 149 additions & 0 deletions invisible_cities/icaros/krmap_functions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
import pandas as pd
import numpy as np

from invisible_cities.core.fit_functions import expo, gauss, fit
from invisible_cities.io.dst_io import load_dst, load_dsts
from invisible_cities.core.core_functions import in_range, shift_to_bin_centers

import tables as tb
from scipy import stats
from scipy import optimize
from scipy.optimize import curve_fit

import itertools

"""

This script contains functions to compute NaN_maps, 3D_maps from kdsts and to merge them to create an 'uniform' map.

"""

def create_NaN_map(xy_range, dt_range, xy_nbins, dt_nbins):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
def create_NaN_map(xy_range, dt_range, xy_nbins, dt_nbins):
def create_empty_map(xy_range, dt_range, xy_nbins, dt_nbins):

"""
- Uses every possible indices combination
- Creates a DataFrame filled with NaNs for each (i, j, k) combination
"""

xy_bins = np.linspace(xy_range[0], xy_range[1], xy_nbins + 1)
dt_bins = np.linspace(dt_range[0], dt_range[1], dt_nbins + 1)

#shift to bin centers invisible_cities.core.core_functions

i_range = np.arange(0, len(xy_bins)-1)
j_range = np.arange(0, len(xy_bins)-1)
k_range = np.arange(0, len(dt_bins)-1)
Comment on lines +27 to +34
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

xy_bins and dt_bins are only used to compute their length, which are given by the variables xy_nbins and dt_nbins


combinations = pd.DataFrame(itertools.product(k_range, i_range, j_range),
columns=['k', 'i', 'j'])
Nan_map = pd.DataFrame(
np.nan,
index=range(len(combinations)),
columns=['nevents', 'mu', 'sigma', 'mu_error', 'sigma_error']
)
Nan_map = pd.concat([combinations, Nan_map], axis=1)

return Nan_map


def med_fun(df):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

rename to median, get_median or similar

sigma = (0.04/2.35)*df.S2e.median()
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add a comment explaining this value


return pd.DataFrame({
'nevents': len(df),
'median': df.S2e.median(),
'sigma': sigma,
'median_error': sigma/np.sqrt(len(df)),
'sigma_error': np.nan}, index = [0]) #error de std?



def fit_function(df, bins, size =10):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

rename to gaussian_fit or similar
rename bins to ebins, energy_bins, or similar, to clarify that they refer to energy
rename size to something that gives a better hint of its meaning (e.g. min_events)

if len(df)<size:
results = med_fun(df)

bin_centers = shift_to_bin_centers(bin_edges)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if bin_edges is only used to compute bin_centers, then the argument to this function should be bin_centers


try:
f = fit(gauss, bin_centers, counts, seed = (len(df), 8000, 400), maxfev = 2500)
except:

results = pd.DataFrame({'nevents': len(df), 'mu': np.nan, 'sigma': np.nan, 'mu_error': np.nan, 'sigma_error': np.nan}, index = [0])

return results
results = pd.DataFrame({'nevents': len(df), 'mu': f.values[1], 'sigma': f.values[2], 'mu_error': f.errors[1], 'sigma_error': f.errors[2]}, index = [0])

return results


def map_3D_fits(df, xy_range, dt_range, xy_nbins, dt_nbins):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

try to come up with a name like verb_noun, like create_something


df = df.loc[:,['X', 'Y', 'DT', 'S2e']]

mask = in_range(df.X, *xy_range) & in_range(df.Y, *xy_range) & in_range(df.DT, *dt_range)

df = df[mask]

x = df.X
y = df.Y
dt = df.DT
Comment on lines +87 to +89
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
x = df.X
y = df.Y
dt = df.DT
x, y, dt, _ = df.values.T

and then you don't need to do .values on each column


xy_bins = np.linspace(*xy_range, xy_nbins + 1)
dt_bins = np.linspace(*dt_range, dt_nbins + 1)

map_df = df.assign( i = np.digitize(x.values, bins = xy_bins) - 1,
j = np.digitize(y.values, bins = xy_bins) - 1,
k = np.digitize(dt.values, bins = dt_bins) - 1)
Comment on lines +94 to +96
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add a comment explaining the -1 at the end



result = map_df.groupby(['k', 'i', 'j']).apply(fit_function, bins = np.linspace(3000, 9000, 101))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the argument bins should probably be an argument to this function as well


result = result.reset_index()

if 'level_3' in result.columns:
result = result.drop(columns = 'level_3')
Comment on lines +101 to +104
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be deterministic, either the condition is true or false, but it will not change depending on the data, I don't think


return result


def merge_maps(NaN_map, map_3D):

full_map = pd.concat([map_3D, NaN_map], ignore_index = True)
full_map = full_map.groupby(['k', 'i','j']).first().reset_index()
Comment on lines +110 to +112
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Explain this bit in a comment


return full_map


def include_coordinates(map, xy_range, dt_range, xy_nbins, dt_nbins):

xy_bins = np.linspace(*xy_range, xy_nbins + 1)
dt_bins = np.linspace(*dt_range, dt_nbins + 1)

x = shift_to_bin_centers(xy_bins)
y = shift_to_bin_centers(xy_bins)
dt = shift_to_bin_centers(dt_bins)

combinaciones_bins = pd.DataFrame(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

english :)

itertools.product(dt, x, y),
columns=['dt', 'x', 'y'])


map = pd.concat([combinaciones_bins.reset_index(drop=True), map.reset_index(drop=True)], axis = 1)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why do you need to reset indices here?


return map


def compute_3D_map(df, xy_range, dt_range, xy_nbins, dt_nbins):
NaN_map = create_NaN_map(xy_range, dt_range, xy_nbins, dt_nbins)
map_3D_fit = map_3D_fits(df, xy_range = xy_range, dt_range = dt_range, xy_nbins = xy_nbins, dt_nbins = dt_nbins)

map = merge_maps(NaN_map, map_3D_fit)
full_map = include_coordinates(map, xy_range, dt_range, xy_nbins, dt_nbins)

return full_map

#def compute_metadata():
# ['rmax', 'zmax', 'bin_size_z', 'bin_size_x', 'bin_size_y', 'method',
# 'zbins', 'xbins', 'ybins', 'nbins_z', 'nbins_x', 'nbins_y', 'map_shape',
# 'map_extent']

#def save_map(map, metadata)
85 changes: 85 additions & 0 deletions invisible_cities/icaros/krmap_functions_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
import numpy as np
import pandas as pd
from invisible_cities.core.core_functions import in_range, shift_to_bin_centers
import itertools
from new_icaromaps import create_NaN_map, med_fun, fit_function, map_3D_fits, merge_maps, include_coordinates, is_even
from pytest import raises


def test_empty_medfun():
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Try to be a bit more descriptive in the test name. Also keep the structure test_TESTEDFUN_DESCRIPTION:

Suggested change
def test_empty_medfun():
def test_medfun_empty_input():

empty_dst = pd.DataFrame(columns = ['DT', 'x', 'y', 'S2e'])
result = med_fun(empty_dst)

assert result.shape == (1, 5)
assert result['nevents'].sum() == 0
for col in ['median', 'sigma', 'median_error', 'sigma_error']:
assert pd.isna(result[col].iloc[0])
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
assert pd.isna(result[col].iloc[0])
assert pd.isna(result[col].iloc[0]), f"{col} is not a nan"

this adds a bit of information in case of failure




def test_empty_medfun_2():
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
def test_empty_medfun_2():
def test_medfun_empty_input_does_not_raise():

empty_dst = pd.DataFrame(columns = ['DT', 'x', 'y', 'S2e'])

#with raises(ValueError):
med_fun(empty_dst)


def test_fit_fun():
empty_dst = pd.DataFrame(columns = ['DT', 'x', 'y', 'S2e'])
result = fit_function(empty_dst)

assert result == med_fun(empty_dst)


def test_Nevents():
d = {'DT': np.empty(6, dtype=int), 'x' : np.empty(6), 'y' : np.empty(6),'S2e' : np.ones(6)}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why dtype=int here?

df_test = pd.DataFrame(data = d, index = range(0, 6))
N = len(df_test)
result_med = med_fun(df_test)
assert result_med['nevents'].iloc[0] == N
assert fit_function(df_test, bins = 5)['nevents'].iloc[0] == N


def test_merge_maps():
Nan_map_test = create_NaN_map(xy_range = (-500, 500), dt_range = (0, 1400), xy_nbins = 100, dt_nbins = 10)
d = {'nevents': np.empty(6, dtype = int), 'mu' : np.empty(6), 'sigma' : np.empty(6), 'mu_error' : np.empty(6), 'sigma_error' : np.empty(6)}
map_3D_test = pd.DataFrame(data = d, index = range(0, 6))
assert merge_maps(Nan_map_test, map_3D_test).shape == Nan_map_test.shape

test_merge_maps()
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

remove this line



def test_medfun_even():
d = {'DT': np.empty(6, dtype=int), 'x' : np.empty(6), 'y' : np.empty(6),'S2e' : [8000, 7500, 8300, 7900, 9000, 8100]}
df_test = pd.DataFrame(data = d, index = range(0,6))
S2e = df_test['S2e']
result_med_fun = med_fun(df_test)
result_med_fun = result_med_fun['median']
result_med_data = 8050

assert (result_med_fun == result_med_data).all()

def test_medfun_odd():
d = {'DT': np.empty(7, dtype=int), 'x' : np.empty(7), 'y' : np.empty(7),'S2e' : [8000, 7500, 8300, 7900, 9000, 8100, 9100]}
df_test = pd.DataFrame(data = d, index = range(0,7))
S2e = df_test['S2e']
result_med_fun = med_fun(df_test)
result_med_fun = result_med_fun['median']
result_med_data = 8100
assert (result_med_fun == result_med_data).all()



def test_fit_fun2():

with fix_random_seed(42):
S2e_df = pd.DataFrame({
'S2e': np.random.normal(loc = 8000, scale = 10.0, size = 10000)
})

results = fit_function(S2e_df, bins = 50)

assert np.isclose(results['mu'[0], S2e_df.S2e.mean()], atol = 1)
assert np.isclose(results['sigma'][0], S2e_df.S2e.std(), atol = 0.5)
assert np.isclose(results['mu_error'][0], S2e_df.S2e.std()/np.sqrt(len(S2e_df)), atol = 0.1)
#error de sigma?
Loading
Loading