ec-jrc · NikosMastrantonas · Jan 13, 2025 · Jan 13, 2025 · Jan 13, 2025 · Jan 13, 2025
diff --git a/README.md b/README.md
@@ -980,6 +980,42 @@ mct_mask_ds = mct_mask(channels_slope_file='changrad.nc', ldd_file='ldd.nc', upa
 ```
 
 
+## profiler
+
+This tool extracts for all downstream cells from a user-defined point, all values for a selected spatial variable, and saves a csv file with the extracted points, their coordinates, the values of the variable, and the order of the points in the river flow.
+
+### Usage
+
+The tool requires the following mandatory input arguments:
+
+- `-i`, `--inputfile`: Input nc file from where the values of the downstream areas will be extracted (e.g., changrad.nc)
+- `-l`, `--LDDfile`: LISFLOOD local drain direction file (ldd.nc)
+- `-X`, `--Location_x`: Longitude of the initial point (use the same projection system as the input nc files!)
+- `-Y`, `--Location_y`: Longitude of the initial point (use the same projection system as the input nc files!)
+
+The tool can take the following additional input arguments:
+
+- `-E`, `--coordsnames`: Coordinates names for lat, lon (in this order with space!) used in the netcdf files. The function checks for 3 commonly used names (x, lon, rlon for longitudes, and y, lat, rlat for latitudes). Therefere, it is recommended to keep the default value.
+
+The tool generates the following outputs (when called from command line as main script):
+
+- `-O`, `--outputfilename`: Output file with the points, their location and order in the river flow, and the values of the variable of interest  (default: profiler.csv)
+
+It can be used either from command line, or also as a python function. Below follow some examples:
+
+Example of command that will generate a profiler csv file for a point with coordinates 10.01, 24.01 lon lat, with coordinates names in the nc being 'x' and 'y' respectively:
+
+```bash
+profiler.py -i changrad.nc -l ec_ldd.nc -X 10.01 -Y 24.01 -O profiler.csv [-E y x]
+```
+
+```python
+# no data are saved when called inside python
+from lisfloodutilities.profiler.profiler import profiler
+data_df = profiler(input_file='changrad.nc', ldd_file='ldd.nc', x_coord=10.01, y_coord=24.01, coords_names=['y' , 'x'])
+```
+
+
 ## Using `lisfloodutilities` programmatically 
 
 You can use lisflood utilities in your python programs. As an example, the script below creates the mask map for a set of stations (stations.txt). The mask map is a boolean map with 1 and 0. 1 is used for all (and only) the pixels hydrologically connected to one of the stations. The resulting mask map is in pcraster format.

diff --git a/src/lisfloodutilities/profiler/__init__.py b/src/lisfloodutilities/profiler/__init__.py
@@ -0,0 +1,17 @@
+"""
+
+Copyright 2019-2020 European Union
+
+Licensed under the EUPL, Version 1.2 or as soon they will be approved by the European Commission  subsequent versions of the EUPL (the "Licence");
+
+You may not use this work except in compliance with the Licence.
+You may obtain a copy of the Licence at:
+
+https://joinup.ec.europa.eu/sites/default/files/inline-files/EUPL%20v1_2%20EN(1).txt
+
+Unless required by applicable law or agreed to in writing, software distributed under the Licence is distributed on an "AS IS" basis,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the Licence for the specific language governing permissions and limitations under the Licence.
+
+"""
+
diff --git a/src/lisfloodutilities/profiler/profiler.py b/src/lisfloodutilities/profiler/profiler.py
@@ -0,0 +1,189 @@
+#   This is a script to retrieve from a selected point and variable the values for all downstream points, ordered from upstream to downstream
+#   It takes the netcdf of the variable of interest, the LDD (ldd.nc) and the coordinates of the point of interest
+#   It requires PCRaster.
+#   
+#   Usage:
+#   profiler.py -i changrad.nc -l ldd.nc -X 10.01 -Y 24.01 -E y x -O profiler.csv
+
+
+import pcraster as pcr
+import xarray as xr
+import pandas as pd
+import numpy as np
+
+def getarg():
+    """ Get program arguments.
+
+    :return: args:  namespace of program arguments
+    """
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-i', '--Inputfile', type=str, required=True,
+                        help='Input nc file from where the values of the downstream areas will be extracted')
+    parser.add_argument('-l', '--LDDfile', type=str, required=True,
+                        help='LISFLOOD LDDD file (ldd.nc)')
+    parser.add_argument('-X', '--Location_x', type=str, required=True,
+                        help='Longitude of the initial point [use the same projection system as the input nc files!]')
+    parser.add_argument('-Y', '--Location_y', type=str, required=True,
+                        help='Latitude of the initial point [use the same projection system as the input nc files!]')
+    parser.add_argument('-E', '--coordsnames', type=str, nargs='+', required=False, default="None",
+                        help='Coords names for lat, lon (in this order with space!) from the netcdf files used')
+    parser.add_argument('-O', '--outputfilename', type=str, required=False, default="profiler.csv",
+                        help='Output file (profiler.csv)')
+    args = parser.parse_args()  # assign namespace to args
+    return args
+
+
+def profiler(input_file, ldd_file, x_coord, y_coord, coords_names='None'):
+    """
+
+    Creates a dataframe with the values for all grid cells downstream of a user-defined point from a netcdf file specified by the user
+
+    Usage:
+    The tool requires the following input arguments:
+
+    Inputfile: Input nc file from where the values of the downstream areas will be extracted (e.g., chagrad.nc)
+    LDDfile: LISFLOOD local drain direction file (ldd.nc)
+    Location_x: LISFLOOD Uustream area file (upArea.nc)
+    Location_y: a mask nc file; if not given (default) all cells are considered valid.
+    coords_names: Coordinates names for lat, lon (in this order as list) used in the the netcdf files (default: 'None'; checks for commonly used names ['x', 'lon', 'rlon'], similar for lat names)
+    outputfile: Output file containing the extracted values of the downsteam points, their coordinates, and their order in the river network (default: profiler.csv)
+
+    Example:
+    profiler(input_file='changrad.nc', ldd_file='ldd.nc', Location_x=15.21, Location_y=25.14, coords_names=['y' , 'x'])
+    """
+    # ---------------- Read LDD (Note that for EFAS5 there is small shift of values for CH)
+    LDD = xr.open_dataset(ldd_file)
+    input_file = xr.open_dataset(input_file)
+
+    # ---------------- Auxiliary variables
+    x_checks = ['lon', 'x', 'rlon']
+    y_checks = ['lat', 'y', 'rlat']
+    if coords_names == "None":
+        x_proj = set(list(LDD.coords)) & set(x_checks)
+        y_proj = set(list(LDD.coords)) & set(y_checks)
+
+        if len(x_proj)!=1 or len(y_proj)!=1:
+            print('Input dataset coords names for lat/lon are not as expected.')
+            print(f'The available coords are: {list(LDD.coords)}')
+            print(f'The checked names are {y_checks} and {x_checks} for lat and lon respectively.')
+            print('Please use -E argument and give the coords names !with space in between! in order: lan lon')
+            exit(1)
+
+        x_proj = list(x_proj)[0]
+        y_proj = list(y_proj)[0]
+    else:
+        y_proj, x_proj = coords_names
+
+    # assign values of coordinates in both dataset based on LDD, in case of small precision issues
+    input_file = input_file.assign_coords({y_proj: LDD[y_proj].values, x_proj: LDD[x_proj].values})
+
+    # ---------------- Process LDD
+    old_name = [i for i in list(LDD.data_vars) if sorted(LDD[i].dims)==sorted([x_proj, y_proj])]
+    LDD = LDD.rename({old_name[0]: "ldd"})['ldd']  # only 1 variable complies with above if
+
+    # sometimes the masked value is flagged not with NaN (e.g., with cutmaps it was flagged with 0)
+    # pcr.Ldd takes only integer values 1-9, so any other value needs to be masked
+    LDD = LDD.fillna(-1)  # fill NaN so it can be converted to integer with no issues
+    LDD = LDD.astype('int')
+    LDD = LDD.where((LDD>0) & (LDD<10)).fillna(-1)
+
+    # convert the xarray to pcraster
+    LDD = LDD.transpose(y_proj, x_proj)  # make sure dims order is as pcraster needs
+
+    # ---------------- Set clone map for pcraster
+    rows, cols = len(LDD[y_proj]), len(LDD[x_proj])
+    pcr.setclone(rows, cols, 1, 0, 0)
+
+    ldd_pcr = pcr.numpy2pcr(pcr.Ldd, LDD.values, -1)  # missing values in the np are flagged as -1
+
+    # repair the ldd; needed in case ldd is created from cutmaps, so outlet is not flagged with 5 (pit) 
+    ldd_pcr = pcr.lddrepair(ldd_pcr)
+
+    # ---------------- Get coordinates index for the point of interest    
+    try:
+        final_point = LDD.sel({y_proj: y_coord, x_proj: x_coord} , method='nearest', tolerance=0.1)
+    except:
+        print('The provided coordinates are not valid, please check again!')
+        exit()
+
+    Y_index = np.argmax(LDD[y_proj].values==final_point[y_proj].values)
+    X_index = np.argmax(LDD[x_proj].values==final_point[x_proj].values)
+
+    # ---------------- Create a mask with the downstream points 
+    profile_mask_pcr = LDD.fillna(0)*0
+    profile_mask_pcr[Y_index, X_index] = 1
+    profile_mask_pcr = profile_mask_pcr.astype('int')
+
+    profile_mask_pcr = pcr.numpy2pcr(pcr.Boolean, profile_mask_pcr.values, -1)  # convert to Boolean with no NaN (-1 is not possible)
+    profile_mask_pcr = pcr.path(ldd_pcr, profile_mask_pcr)  # get the actual paths
+
+    profile_mask_np = pcr.pcr2numpy(profile_mask_pcr, 0)
+    ProfPath = LDD.fillna(0)*0+profile_mask_np
+    ProfPath = ProfPath.where(ProfPath==1)
+    ProfPath.name = 'Profile'
+    total_points = int(ProfPath.sum().values)
+    print(f'There are {total_points} points from the point of interest until the end of the river network')
+
+    # ---------------- Subset data for keeping only the domain of interest, and speeding up the analysis 
+    lats_used = ProfPath.sum(x_proj).where(ProfPath.sum(x_proj)!=0).dropna(y_proj)
+    lons_used = ProfPath.sum(y_proj).where(ProfPath.sum(y_proj)!=0).dropna(x_proj)
+
+    ProfPath = ProfPath.sel({y_proj:lats_used[y_proj].values, x_proj:lons_used[x_proj].values})
+
+    input_file = input_file.sel({y_proj: ProfPath[y_proj].values, x_proj: ProfPath[x_proj].values})
+    LDD = LDD.sel({y_proj: ProfPath[y_proj].values, x_proj: ProfPath[x_proj].values})
+
+    # define clone for pcraster for cropped data
+    rows, cols = len(ProfPath[y_proj]), len(ProfPath[x_proj])
+    pcr.setclone(rows, cols, 1, 0, 0)
+
+    rivers_mask_pcr = pcr.numpy2pcr(pcr.Scalar, ProfPath.values, 0)
+    ldd_pcr = pcr.numpy2pcr(pcr.Ldd, LDD.values, -1)
+    ldd_pcr = pcr.lddrepair(ldd_pcr)  # need to repair again, because the outlet is (possible) lost due to subsetting
+
+    # ---------------- Find the order of the points in the river network 
+    # generate initial data with the river mask
+    downstream_cells_pcr = rivers_mask_pcr
+    sum_rivers_pcr = rivers_mask_pcr
+
+    # modify data, so that the most downstream point is masked out (otherwise the below loop gives for all points the same value)
+    downstream_actual_mask_pcr = pcr.downstreamdist(ldd_pcr)
+    downstream_actual_mask_pcr = pcr.ifthenelse(downstream_actual_mask_pcr == 0, pcr.boolean(0), pcr.boolean(1))
+    downstream_actual_mask_pcr = pcr.scalar(downstream_actual_mask_pcr)
+
+    # Loop {number of cells -1} times and use downstream function to find out the order of the cells on the river flow
+    print('Calculating the order of the identified points')
+    for loops in range(total_points-1):
+        downstream_cells_pcr = pcr.downstream(ldd_pcr, downstream_cells_pcr)
+        downstream_cells_pcr = downstream_cells_pcr*downstream_actual_mask_pcr
+        sum_rivers_pcr = sum_rivers_pcr + downstream_cells_pcr
+
+    sum_rivers = (ProfPath.fillna(0)*0+pcr.pcr2numpy(sum_rivers_pcr, 0))
+    sum_rivers.name = 'order'
+
+    all_data = xr.merge([input_file, LDD, ProfPath, sum_rivers])
+    all_data_df = all_data.to_dataframe().reset_index()
+    all_data_df = all_data_df[~all_data_df.Profile.isna()]
+    all_data_df = all_data_df.sort_values('order')
+
+    return all_data_df
+
+
+def main():
+    'function for running from command line'
+    # ---------------- Read settings
+    args = getarg()
+    input_file_arg = args.Inputfile
+    ldd_file_arg = args.LDDfile
+    x_coord_arg = args.Location_x
+    y_coord_arg = args.Location_y
+    coords_names_arg = args.coordsnames
+    outputfile_arg = args.outputfilename
+
+    data_df = profiler(input_file=input_file_arg, ldd_file=ldd_file_arg, x_coord=x_coord_arg, y_coord=y_coord_arg, coords_names=coords_names_arg)
+    data_df.to_csv(outputfile_arg)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/data/profiler/LF_ETRS89_UseCase/changrad.nc b/tests/data/profiler/LF_ETRS89_UseCase/changrad.nc
diff --git a/tests/data/profiler/LF_ETRS89_UseCase/ldd.nc b/tests/data/profiler/LF_ETRS89_UseCase/ldd.nc
diff --git a/tests/data/profiler/LF_ETRS89_UseCase/reference/profiler.csv b/tests/data/profiler/LF_ETRS89_UseCase/reference/profiler.csv
@@ -0,0 +1,33 @@
+,y,x,changrad,laea,ldd,Profile,order
+83,2437500.0,4227500.0,0.00051658927,-2147483647,6.0,1.0,1.0
+103,2432500.0,4222500.0,0.00078742416,-2147483647,9.0,1.0,2.0
+82,2437500.0,4222500.0,0.0007874241,-2147483647,2.0,1.0,3.0
+102,2432500.0,4217500.0,0.0006504565,-2147483647,9.0,1.0,4.0
+81,2437500.0,4217500.0,0.0007635837,-2147483647,2.0,1.0,5.0
+80,2437500.0,4212500.0,0.0008235728,-2147483647,6.0,1.0,6.0
+59,2442500.0,4212500.0,0.0007947716,-2147483647,2.0,1.0,7.0
+37,2447500.0,4207500.0,0.0006950562,-2147483647,3.0,1.0,8.0
+15,2452500.0,4202500.0,0.0006950562,-2147483647,3.0,1.0,9.0
+36,2447500.0,4202500.0,0.00077848596,-2147483647,8.0,1.0,10.0
+35,2447500.0,4197500.0,0.0009760228,-2147483647,6.0,1.0,11.0
+34,2447500.0,4192500.0,0.0009977153,-2147483647,6.0,1.0,12.0
+13,2452500.0,4192500.0,0.0010435288,-2147483647,2.0,1.0,13.0
+12,2452500.0,4187500.0,0.0010387731,-2147483647,6.0,1.0,14.0
+11,2452500.0,4182500.0,0.0012380448,-2147483647,6.0,1.0,15.0
+10,2452500.0,4177500.0,0.0012613096,-2147483647,6.0,1.0,16.0
+9,2452500.0,4172500.0,0.0011043664,-2147483647,6.0,1.0,17.0
+8,2452500.0,4167500.0,0.0012292609,-2147483647,6.0,1.0,18.0
+7,2452500.0,4162500.0,0.0013680093,-2147483647,6.0,1.0,19.0
+6,2452500.0,4157500.0,0.0012355647,-2147483647,6.0,1.0,20.0
+5,2452500.0,4152500.0,0.0015237641,-2147483647,6.0,1.0,21.0
+25,2447500.0,4147500.0,0.0012041999,-2147483647,9.0,1.0,22.0
+24,2447500.0,4142500.0,0.0017168315,-2147483647,6.0,1.0,23.0
+45,2442500.0,4142500.0,0.0023591225,-2147483647,8.0,1.0,24.0
+44,2442500.0,4137500.0,0.0018403421,-2147483647,6.0,1.0,25.0
+65,2437500.0,4137500.0,0.0017679577,-2147483647,8.0,1.0,26.0
+86,2432500.0,4137500.0,0.0009146803,-2147483647,8.0,1.0,27.0
+107,2427500.0,4137500.0,0.00016101284,-2147483647,8.0,1.0,28.0
+128,2422500.0,4137500.0,0.0004378503,-2147483647,8.0,1.0,29.0
+149,2417500.0,4137500.0,0.0007494286,-2147483647,8.0,1.0,30.0
+148,2417500.0,4132500.0,0.00082243764,-2147483647,6.0,1.0,31.0
+168,2412500.0,4127500.0,0.00096970645,-2147483647,9.0,1.0,32.0
diff --git a/tests/data/profiler/LF_lat_lon_UseCase/changrad.nc b/tests/data/profiler/LF_lat_lon_UseCase/changrad.nc
diff --git a/tests/data/profiler/LF_lat_lon_UseCase/ldd.nc b/tests/data/profiler/LF_lat_lon_UseCase/ldd.nc
diff --git a/tests/data/profiler/LF_lat_lon_UseCase/reference/profiler.csv b/tests/data/profiler/LF_lat_lon_UseCase/reference/profiler.csv
@@ -0,0 +1,26 @@
+,lat,lon,changrad,ldd,Profile,order
+153,52.65000000000212,-122.45000000000326,0.0008084648,2.0,1.0,1.0
+142,52.75000000000212,-122.45000000000326,0.00097223686,2.0,1.0,2.0
+131,52.85000000000211,-122.45000000000326,0.0015036418,2.0,1.0,3.0
+119,52.950000000002106,-122.55000000000325,0.0019222051,3.0,1.0,4.0
+108,53.0500000000021,-122.55000000000325,0.0018676079,2.0,1.0,5.0
+96,53.150000000002095,-122.65000000000325,0.0013966679,3.0,1.0,6.0
+95,53.150000000002095,-122.75000000000324,0.0008484858,6.0,1.0,7.0
+83,53.25000000000209,-122.85000000000323,0.002747498,3.0,1.0,8.0
+72,53.35000000000208,-122.85000000000323,0.0030551292,2.0,1.0,9.0
+73,53.35000000000208,-122.75000000000324,0.00051982753,4.0,1.0,10.0
+74,53.35000000000208,-122.65000000000325,0.0004815376,4.0,1.0,11.0
+63,53.45000000000208,-122.65000000000325,0.00052186823,2.0,1.0,12.0
+51,53.55000000000207,-122.75000000000324,0.0012226264,3.0,1.0,13.0
+41,53.650000000002066,-122.65000000000325,0.0009240197,1.0,1.0,14.0
+29,53.75000000000206,-122.75000000000324,0.00074306637,3.0,1.0,15.0
+18,53.850000000002055,-122.75000000000324,0.0006388169,2.0,1.0,16.0
+7,53.95000000000205,-122.75000000000324,0.00071776606,2.0,1.0,17.0
+6,53.95000000000205,-122.85000000000323,0.001410268,6.0,1.0,18.0
+5,53.95000000000205,-122.95000000000323,0.0012500631,6.0,1.0,19.0
+15,53.850000000002055,-123.05000000000322,0.0012879174,9.0,1.0,20.0
+14,53.850000000002055,-123.15000000000322,0.000889672,6.0,1.0,21.0
+3,53.95000000000205,-123.15000000000322,0.000889672,2.0,1.0,22.0
+2,53.95000000000205,-123.25000000000321,0.00088967197,6.0,1.0,23.0
+1,53.95000000000205,-123.3500000000032,0.0007190692,6.0,1.0,24.0
+0,53.95000000000205,-123.4500000000032,0.0005076786,6.0,1.0,25.0
diff --git a/tests/test_profiler.py b/tests/test_profiler.py
@@ -0,0 +1,60 @@
+import os
+import shutil
+import pandas as pd
+import xarray as xr
+
+from lisfloodutilities.profiler.profiler import profiler
+
+
+def mk_path_out(p):
+    'make folder for storing output data'
+    path_out = os.path.join(os.path.dirname(__file__), p)
+    if os.path.exists(path_out):
+        shutil.rmtree(path_out, ignore_errors=True)
+    os.mkdir(path_out)
+
+
+class TestProfiler():
+
+    case_dir = os.path.join(os.path.dirname(__file__), 'data', 'profiler')
+
+    def run(self, x_coord, y_coord, coords_names, case_name):
+        'generate the profiler data'
+        # setting
+        self.out_path_ref = os.path.join(self.case_dir, case_name, 'reference')
+        self.out_path_run = os.path.join(self.case_dir, case_name, 'out')
+        mk_path_out(self.out_path_run)
+
+        input_file = os.path.join(self.case_dir, case_name, 'changrad.nc')
+        ldd_file = os.path.join(self.case_dir, case_name, 'ldd.nc')
+        outputfile = os.path.join(self.out_path_run, 'profiler.csv')
+
+        profiler(input_file, ldd_file, x_coord, y_coord, coords_names='None')
+
+        # generate the profiler
+        profiler_data = profiler(input_file, ldd_file, x_coord, y_coord, coords_names)
+        profiler_data.to_csv(outputfile)
+
+        # compare the generated mask with the reference one
+        ref_file = self.out_path_ref+'/profiler.csv'
+        reference = pd.read_csv(ref_file)
+        out_file = self.out_path_run+'/profiler.csv'
+        generated = pd.read_csv(out_file)
+        all_equal = reference.compare(generated)
+
+        if len(all_equal)!=0:
+            fail_message = f'Test profiler generation for {case_name} failed. Please check differences between' 
+            fail_message += f' the generated mask "{out_file}" and the expected mask "{ref_file}".'
+            assert all_equal, fail_message
+        else:
+            # if equal just delete the out folder
+            shutil.rmtree(self.out_path_run, ignore_errors=True)
+
+
+class TestProfilers(TestProfiler):
+
+    def test_mctrivers_etrs89(self):
+        self.run(4127500, 2412500, 'None', 'LF_ETRS89_UseCase')
+
+    def test_mctrivers_latlon(self):
+        self.run(-123.45, 53.95, ['lat', 'lon'], 'LF_lat_lon_UseCase')