From 63f39aab96fff2e5e405f05e2eaf56db9de54465 Mon Sep 17 00:00:00 2001
From: MeloDi-23 <hedi123@mail.ustc.edu.cn>
Date: Mon, 18 Nov 2024 11:17:33 -0700
Subject: [PATCH] some modify to utils convert function

---
 Corrfunc/utils.py | 55 ++++++++++++++++++++++++++++++++++-------------
 README_fork.md    | 51 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 91 insertions(+), 15 deletions(-)
 create mode 100644 README_fork.md

diff --git a/Corrfunc/utils.py b/Corrfunc/utils.py
index b4da33f2..7f369bdd 100644
--- a/Corrfunc/utils.py
+++ b/Corrfunc/utils.py
@@ -29,21 +29,26 @@ def convert_3d_counts_to_cf(ND1, ND2, NR1, NR2,
                             estimator='LS'):
     """
     Converts raw pair counts to a correlation function.
+    I rewrite the code so that it take weighting into account.
 
     Parameters
     ----------
 
-    ND1 : integer
-       Number of points in the first dataset
+    ND1 : integer | float
+        Number of points in the first dataset
+        Or sum of weights in the first dataset
 
-    ND2 : integer
+    ND2 : integer | float
         Number of points in the second dataset
+        Or sum of weights in the second dataset
 
-    NR1 : integer
+    NR1 : integer | float
         Number of points in the randoms for first dataset
+        Or sum of weights in the randoms for first dataset
 
-    NR2 : integer
+    NR2 : integer | float
         Number of points in the randoms for second dataset
+        Or sum of weights in the randoms for second dataset
 
     D1D2 : array-like, integer
         Pair-counts for the cross-correlation between D1 and D2
@@ -124,6 +129,7 @@ def convert_3d_counts_to_cf(ND1, ND2, NR1, NR2,
 
     import numpy as np
     pair_counts = dict()
+    weights = dict()
     fields = ['D1D2', 'D1R2', 'D2R1', 'R1R2']
     arrays = [D1D2, D1R2, D2R1, R1R2]
     for (field, array) in zip(fields, arrays):
@@ -133,6 +139,21 @@ def convert_3d_counts_to_cf(ND1, ND2, NR1, NR2,
         except IndexError:
             pair_counts[field] = array
 
+        try:
+            weight = array['weightavg']
+            if np.isclose(weight, 0.0).all():
+                # this happens if you call DD- function without providing the weighting.
+                weights[field] = np.ones_like(pair_counts[field])
+            else:
+                weights[field] = weight
+        except IndexError:
+            weights[field] = np.ones_like(pair_counts[field])
+
+    pair_weight_product = {
+        k: pair_counts[k]*weights[k]
+        for k in fields
+    }
+
     nbins = len(pair_counts['D1D2'])
     if (nbins != len(pair_counts['D1R2'])) or \
        (nbins != len(pair_counts['D2R1'])) or \
@@ -146,10 +167,10 @@ def convert_3d_counts_to_cf(ND1, ND2, NR1, NR2,
         fN2 = np.float64(NR2) / np.float64(ND2)
         cf = np.zeros(nbins)
         cf[:] = np.nan
-        cf[nonzero] = (fN1 * fN2 * pair_counts['D1D2'][nonzero] -
-                       fN1 * pair_counts['D1R2'][nonzero] -
-                       fN2 * pair_counts['D2R1'][nonzero] +
-                       pair_counts['R1R2'][nonzero]) / pair_counts['R1R2'][nonzero]
+        cf[nonzero] = (fN1 * fN2 * pair_weight_product['D1D2'][nonzero] -
+                       fN1 * pair_weight_product['D1R2'][nonzero] -
+                       fN2 * pair_weight_product['D2R1'][nonzero] +
+                       pair_weight_product['R1R2'][nonzero]) / pair_weight_product['R1R2'][nonzero]
         if len(cf) != nbins:
             msg = 'Bug in code. Calculated correlation function does not '\
                   'have the same number of bins as input arrays. Input bins '\
@@ -174,17 +195,21 @@ def convert_rp_pi_counts_to_wp(ND1, ND2, NR1, NR2,
     Parameters
     ----------
 
-    ND1 : integer
-       Number of points in the first dataset
+    ND1 : integer | float
+        Number of points in the first dataset
+        Or sum of weights in the first dataset
 
-    ND2 : integer
+    ND2 : integer | float
         Number of points in the second dataset
+        Or sum of weights in the second dataset
 
-    NR1 : integer
+    NR1 : integer | float
         Number of points in the randoms for first dataset
+        Or sum of weights in the randoms for first dataset
 
-    NR2 : integer
+    NR2 : integer | float
         Number of points in the randoms for second dataset
+        Or sum of weights in the randoms for second dataset
 
     D1D2 : array-like, integer
         Pair-counts for the cross-correlation between D1 and D2
@@ -1046,7 +1071,7 @@ def sys_pipes():
     see also https://github.com/manodeep/Corrfunc/issues/157,
     https://github.com/manodeep/Corrfunc/issues/269.
     '''
-    
+
     kwargs = {}
     if sys.stdout.isatty() or (sys.stdout is sys.__stdout__):
         kwargs['stdout'] = None
diff --git a/README_fork.md b/README_fork.md
new file mode 100644
index 00000000..5e0ed5de
--- /dev/null
+++ b/README_fork.md
@@ -0,0 +1,51 @@
+# Corrfunc forked:
+I rewrite the utils.py, so that the `convert_rp_pi_counts_to_wp` and `convert_3d_counts_to_cf` works more properly. The formal call is still valid, and I added that if you provide weight in the pair-counting, it will account for the weighting.
+
+```python
+# load some catalogue...
+
+# Code 1
+wei_norm = galaxy['w'] / (galaxy['w'].mean())
+wei_norm_r = random['w'] / (random['w'].mean())
+
+dd = DDrppi_mocks(autocorr=True, cosmology=1, nthreads=50, pimax=pimax, binfile=rp_bin,
+                   RA1=galaxy['ra'], DEC1=galaxy['dec'], CZ1=galaxy['distance'], weights1=wei_norm, is_comoving_dist=True, weight_type='pair_product')
+dr = DDrppi_mocks(
+    autocorr=False, cosmology=1, nthreads=50, pimax=pimax, binfile=rp_bin, 
+    RA1=galaxy['ra'], DEC1=galaxy['dec'], CZ1=galaxy['distance'], weights1=wei_norm, 
+    RA2=random['ra'], DEC2=random['dec'], CZ2=random['distance'], weights2=wei_norm_r, 
+    is_comoving_dist=True, weight_type='pair_product')
+rr = DDrppi_mocks(autocorr=True, cosmology=1, nthreads=50, pimax=pimax, binfile=rp_bin,
+                   RA1=random['ra'], DEC1=random['dec'], CZ1=random['distance'], weights1=wei_norm_r, is_comoving_dist=True, weight_type='pair_product')
+
+Nd = len(galaxy)
+Nr = len(random)
+
+wp_1 = convert_rp_pi_counts_to_wp(Nd, Nd, Nr, Nr, dd, dr, dr, rr, pimax=pimax, nrpbins=Nbins)
+
+
+# Code 2
+dd = DDrppi_mocks(autocorr=True, cosmology=1, nthreads=50, pimax=pimax, binfile=rp_bin,
+                   RA1=galaxy['ra'], DEC1=galaxy['dec'], CZ1=galaxy['distance'], weights1=galaxy['w'], is_comoving_dist=True, weight_type='pair_product')
+dr = DDrppi_mocks(
+    autocorr=False, cosmology=1, nthreads=50, pimax=pimax, binfile=rp_bin, 
+    RA1=galaxy['ra'], DEC1=galaxy['dec'], CZ1=galaxy['distance'], weights1=galaxy['w'], 
+    RA2=random['ra'], DEC2=random['dec'], CZ2=random['distance'], weights2=random['w'], 
+    is_comoving_dist=True, weight_type='pair_product')
+rr = DDrppi_mocks(autocorr=True, cosmology=1, nthreads=50, pimax=pimax, binfile=rp_bin,
+                   RA1=random['ra'], DEC1=random['dec'], CZ1=random['distance'], weights1=random['w'], is_comoving_dist=True, weight_type='pair_product')
+
+Nd = galaxy['w'].sum()
+Nr = random['w'].sum()
+
+wp_2 = convert_rp_pi_counts_to_wp(Nd, Nd, Nr, Nr, dd, dr, dr, rr, pimax=pimax, nrpbins=Nbins)
+assert np.isclose(wp_1, wp_2).all()
+```
+This code will work.
+
+Note that, for simplicity, I didn't add new parameters to the function. 
+Instead you can 
+- normalize the weight first, e.g. `weight_normal = weight / weight.mean()`, and pass the parameter in the old way.
+- pass the sum of weight of dataset1 to `ND1`, sum of weight of dataset2 to `ND2`, etc. This is reasonable, because if you assume no weighting(that is weight of every point is 1), then the sum of weights equals to number of points.
+
+Both way will work. 
\ No newline at end of file