From 63f39aab96fff2e5e405f05e2eaf56db9de54465 Mon Sep 17 00:00:00 2001 From: MeloDi-23 Date: Mon, 18 Nov 2024 11:17:33 -0700 Subject: [PATCH] some modify to utils convert function --- Corrfunc/utils.py | 55 ++++++++++++++++++++++++++++++++++------------- README_fork.md | 51 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+), 15 deletions(-) create mode 100644 README_fork.md diff --git a/Corrfunc/utils.py b/Corrfunc/utils.py index b4da33f2..7f369bdd 100644 --- a/Corrfunc/utils.py +++ b/Corrfunc/utils.py @@ -29,21 +29,26 @@ def convert_3d_counts_to_cf(ND1, ND2, NR1, NR2, estimator='LS'): """ Converts raw pair counts to a correlation function. + I rewrite the code so that it take weighting into account. Parameters ---------- - ND1 : integer - Number of points in the first dataset + ND1 : integer | float + Number of points in the first dataset + Or sum of weights in the first dataset - ND2 : integer + ND2 : integer | float Number of points in the second dataset + Or sum of weights in the second dataset - NR1 : integer + NR1 : integer | float Number of points in the randoms for first dataset + Or sum of weights in the randoms for first dataset - NR2 : integer + NR2 : integer | float Number of points in the randoms for second dataset + Or sum of weights in the randoms for second dataset D1D2 : array-like, integer Pair-counts for the cross-correlation between D1 and D2 @@ -124,6 +129,7 @@ def convert_3d_counts_to_cf(ND1, ND2, NR1, NR2, import numpy as np pair_counts = dict() + weights = dict() fields = ['D1D2', 'D1R2', 'D2R1', 'R1R2'] arrays = [D1D2, D1R2, D2R1, R1R2] for (field, array) in zip(fields, arrays): @@ -133,6 +139,21 @@ def convert_3d_counts_to_cf(ND1, ND2, NR1, NR2, except IndexError: pair_counts[field] = array + try: + weight = array['weightavg'] + if np.isclose(weight, 0.0).all(): + # this happens if you call DD- function without providing the weighting. + weights[field] = np.ones_like(pair_counts[field]) + else: + weights[field] = weight + except IndexError: + weights[field] = np.ones_like(pair_counts[field]) + + pair_weight_product = { + k: pair_counts[k]*weights[k] + for k in fields + } + nbins = len(pair_counts['D1D2']) if (nbins != len(pair_counts['D1R2'])) or \ (nbins != len(pair_counts['D2R1'])) or \ @@ -146,10 +167,10 @@ def convert_3d_counts_to_cf(ND1, ND2, NR1, NR2, fN2 = np.float64(NR2) / np.float64(ND2) cf = np.zeros(nbins) cf[:] = np.nan - cf[nonzero] = (fN1 * fN2 * pair_counts['D1D2'][nonzero] - - fN1 * pair_counts['D1R2'][nonzero] - - fN2 * pair_counts['D2R1'][nonzero] + - pair_counts['R1R2'][nonzero]) / pair_counts['R1R2'][nonzero] + cf[nonzero] = (fN1 * fN2 * pair_weight_product['D1D2'][nonzero] - + fN1 * pair_weight_product['D1R2'][nonzero] - + fN2 * pair_weight_product['D2R1'][nonzero] + + pair_weight_product['R1R2'][nonzero]) / pair_weight_product['R1R2'][nonzero] if len(cf) != nbins: msg = 'Bug in code. Calculated correlation function does not '\ 'have the same number of bins as input arrays. Input bins '\ @@ -174,17 +195,21 @@ def convert_rp_pi_counts_to_wp(ND1, ND2, NR1, NR2, Parameters ---------- - ND1 : integer - Number of points in the first dataset + ND1 : integer | float + Number of points in the first dataset + Or sum of weights in the first dataset - ND2 : integer + ND2 : integer | float Number of points in the second dataset + Or sum of weights in the second dataset - NR1 : integer + NR1 : integer | float Number of points in the randoms for first dataset + Or sum of weights in the randoms for first dataset - NR2 : integer + NR2 : integer | float Number of points in the randoms for second dataset + Or sum of weights in the randoms for second dataset D1D2 : array-like, integer Pair-counts for the cross-correlation between D1 and D2 @@ -1046,7 +1071,7 @@ def sys_pipes(): see also https://github.com/manodeep/Corrfunc/issues/157, https://github.com/manodeep/Corrfunc/issues/269. ''' - + kwargs = {} if sys.stdout.isatty() or (sys.stdout is sys.__stdout__): kwargs['stdout'] = None diff --git a/README_fork.md b/README_fork.md new file mode 100644 index 00000000..5e0ed5de --- /dev/null +++ b/README_fork.md @@ -0,0 +1,51 @@ +# Corrfunc forked: +I rewrite the utils.py, so that the `convert_rp_pi_counts_to_wp` and `convert_3d_counts_to_cf` works more properly. The formal call is still valid, and I added that if you provide weight in the pair-counting, it will account for the weighting. + +```python +# load some catalogue... + +# Code 1 +wei_norm = galaxy['w'] / (galaxy['w'].mean()) +wei_norm_r = random['w'] / (random['w'].mean()) + +dd = DDrppi_mocks(autocorr=True, cosmology=1, nthreads=50, pimax=pimax, binfile=rp_bin, + RA1=galaxy['ra'], DEC1=galaxy['dec'], CZ1=galaxy['distance'], weights1=wei_norm, is_comoving_dist=True, weight_type='pair_product') +dr = DDrppi_mocks( + autocorr=False, cosmology=1, nthreads=50, pimax=pimax, binfile=rp_bin, + RA1=galaxy['ra'], DEC1=galaxy['dec'], CZ1=galaxy['distance'], weights1=wei_norm, + RA2=random['ra'], DEC2=random['dec'], CZ2=random['distance'], weights2=wei_norm_r, + is_comoving_dist=True, weight_type='pair_product') +rr = DDrppi_mocks(autocorr=True, cosmology=1, nthreads=50, pimax=pimax, binfile=rp_bin, + RA1=random['ra'], DEC1=random['dec'], CZ1=random['distance'], weights1=wei_norm_r, is_comoving_dist=True, weight_type='pair_product') + +Nd = len(galaxy) +Nr = len(random) + +wp_1 = convert_rp_pi_counts_to_wp(Nd, Nd, Nr, Nr, dd, dr, dr, rr, pimax=pimax, nrpbins=Nbins) + + +# Code 2 +dd = DDrppi_mocks(autocorr=True, cosmology=1, nthreads=50, pimax=pimax, binfile=rp_bin, + RA1=galaxy['ra'], DEC1=galaxy['dec'], CZ1=galaxy['distance'], weights1=galaxy['w'], is_comoving_dist=True, weight_type='pair_product') +dr = DDrppi_mocks( + autocorr=False, cosmology=1, nthreads=50, pimax=pimax, binfile=rp_bin, + RA1=galaxy['ra'], DEC1=galaxy['dec'], CZ1=galaxy['distance'], weights1=galaxy['w'], + RA2=random['ra'], DEC2=random['dec'], CZ2=random['distance'], weights2=random['w'], + is_comoving_dist=True, weight_type='pair_product') +rr = DDrppi_mocks(autocorr=True, cosmology=1, nthreads=50, pimax=pimax, binfile=rp_bin, + RA1=random['ra'], DEC1=random['dec'], CZ1=random['distance'], weights1=random['w'], is_comoving_dist=True, weight_type='pair_product') + +Nd = galaxy['w'].sum() +Nr = random['w'].sum() + +wp_2 = convert_rp_pi_counts_to_wp(Nd, Nd, Nr, Nr, dd, dr, dr, rr, pimax=pimax, nrpbins=Nbins) +assert np.isclose(wp_1, wp_2).all() +``` +This code will work. + +Note that, for simplicity, I didn't add new parameters to the function. +Instead you can +- normalize the weight first, e.g. `weight_normal = weight / weight.mean()`, and pass the parameter in the old way. +- pass the sum of weight of dataset1 to `ND1`, sum of weight of dataset2 to `ND2`, etc. This is reasonable, because if you assume no weighting(that is weight of every point is 1), then the sum of weights equals to number of points. + +Both way will work. \ No newline at end of file