diff --git a/README.md b/README.md index a9192a7..26cbfcf 100644 --- a/README.md +++ b/README.md @@ -980,6 +980,42 @@ mct_mask_ds = mct_mask(channels_slope_file='changrad.nc', ldd_file='ldd.nc', upa ``` +## profiler + +This tool extracts for all downstream cells from a user-defined point, all values for a selected spatial variable, and saves a csv file with the extracted points, their coordinates, the values of the variable, and the order of the points in the river flow. + +### Usage + +The tool requires the following mandatory input arguments: + +- `-i`, `--inputfile`: Input nc file from where the values of the downstream areas will be extracted (e.g., changrad.nc) +- `-l`, `--LDDfile`: LISFLOOD local drain direction file (ldd.nc) +- `-X`, `--Location_x`: Longitude of the initial point (use the same projection system as the input nc files!) +- `-Y`, `--Location_y`: Longitude of the initial point (use the same projection system as the input nc files!) + +The tool can take the following additional input arguments: + +- `-E`, `--coordsnames`: Coordinates names for lat, lon (in this order with space!) used in the netcdf files. The function checks for 3 commonly used names (x, lon, rlon for longitudes, and y, lat, rlat for latitudes). Therefere, it is recommended to keep the default value. + +The tool generates the following outputs (when called from command line as main script): + +- `-O`, `--outputfilename`: Output file with the points, their location and order in the river flow, and the values of the variable of interest (default: profiler.csv) + +It can be used either from command line, or also as a python function. Below follow some examples: + +Example of command that will generate a profiler csv file for a point with coordinates 10.01, 24.01 lon lat, with coordinates names in the nc being 'x' and 'y' respectively: + +```bash +profiler.py -i changrad.nc -l ec_ldd.nc -X 10.01 -Y 24.01 -O profiler.csv [-E y x] +``` + +```python +# no data are saved when called inside python +from lisfloodutilities.profiler.profiler import profiler +data_df = profiler(input_file='changrad.nc', ldd_file='ldd.nc', x_coord=10.01, y_coord=24.01, coords_names=['y' , 'x']) +``` + + ## Using `lisfloodutilities` programmatically You can use lisflood utilities in your python programs. As an example, the script below creates the mask map for a set of stations (stations.txt). The mask map is a boolean map with 1 and 0. 1 is used for all (and only) the pixels hydrologically connected to one of the stations. The resulting mask map is in pcraster format. diff --git a/src/lisfloodutilities/profiler/__init__.py b/src/lisfloodutilities/profiler/__init__.py new file mode 100644 index 0000000..90c039e --- /dev/null +++ b/src/lisfloodutilities/profiler/__init__.py @@ -0,0 +1,17 @@ +""" + +Copyright 2019-2020 European Union + +Licensed under the EUPL, Version 1.2 or as soon they will be approved by the European Commission subsequent versions of the EUPL (the "Licence"); + +You may not use this work except in compliance with the Licence. +You may obtain a copy of the Licence at: + +https://joinup.ec.europa.eu/sites/default/files/inline-files/EUPL%20v1_2%20EN(1).txt + +Unless required by applicable law or agreed to in writing, software distributed under the Licence is distributed on an "AS IS" basis, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the Licence for the specific language governing permissions and limitations under the Licence. + +""" + diff --git a/src/lisfloodutilities/profiler/profiler.py b/src/lisfloodutilities/profiler/profiler.py new file mode 100644 index 0000000..51d8ee3 --- /dev/null +++ b/src/lisfloodutilities/profiler/profiler.py @@ -0,0 +1,189 @@ +# This is a script to retrieve from a selected point and variable the values for all downstream points, ordered from upstream to downstream +# It takes the netcdf of the variable of interest, the LDD (ldd.nc) and the coordinates of the point of interest +# It requires PCRaster. +# +# Usage: +# profiler.py -i changrad.nc -l ldd.nc -X 10.01 -Y 24.01 -E y x -O profiler.csv + + +import pcraster as pcr +import xarray as xr +import pandas as pd +import numpy as np + +def getarg(): + """ Get program arguments. + + :return: args: namespace of program arguments + """ + import argparse + parser = argparse.ArgumentParser() + parser.add_argument('-i', '--Inputfile', type=str, required=True, + help='Input nc file from where the values of the downstream areas will be extracted') + parser.add_argument('-l', '--LDDfile', type=str, required=True, + help='LISFLOOD LDDD file (ldd.nc)') + parser.add_argument('-X', '--Location_x', type=str, required=True, + help='Longitude of the initial point [use the same projection system as the input nc files!]') + parser.add_argument('-Y', '--Location_y', type=str, required=True, + help='Latitude of the initial point [use the same projection system as the input nc files!]') + parser.add_argument('-E', '--coordsnames', type=str, nargs='+', required=False, default="None", + help='Coords names for lat, lon (in this order with space!) from the netcdf files used') + parser.add_argument('-O', '--outputfilename', type=str, required=False, default="profiler.csv", + help='Output file (profiler.csv)') + args = parser.parse_args() # assign namespace to args + return args + + +def profiler(input_file, ldd_file, x_coord, y_coord, coords_names='None'): + """ + + Creates a dataframe with the values for all grid cells downstream of a user-defined point from a netcdf file specified by the user + + Usage: + The tool requires the following input arguments: + + Inputfile: Input nc file from where the values of the downstream areas will be extracted (e.g., chagrad.nc) + LDDfile: LISFLOOD local drain direction file (ldd.nc) + Location_x: LISFLOOD Uustream area file (upArea.nc) + Location_y: a mask nc file; if not given (default) all cells are considered valid. + coords_names: Coordinates names for lat, lon (in this order as list) used in the the netcdf files (default: 'None'; checks for commonly used names ['x', 'lon', 'rlon'], similar for lat names) + outputfile: Output file containing the extracted values of the downsteam points, their coordinates, and their order in the river network (default: profiler.csv) + + Example: + profiler(input_file='changrad.nc', ldd_file='ldd.nc', Location_x=15.21, Location_y=25.14, coords_names=['y' , 'x']) + """ + # ---------------- Read LDD (Note that for EFAS5 there is small shift of values for CH) + LDD = xr.open_dataset(ldd_file) + input_file = xr.open_dataset(input_file) + + # ---------------- Auxiliary variables + x_checks = ['lon', 'x', 'rlon'] + y_checks = ['lat', 'y', 'rlat'] + if coords_names == "None": + x_proj = set(list(LDD.coords)) & set(x_checks) + y_proj = set(list(LDD.coords)) & set(y_checks) + + if len(x_proj)!=1 or len(y_proj)!=1: + print('Input dataset coords names for lat/lon are not as expected.') + print(f'The available coords are: {list(LDD.coords)}') + print(f'The checked names are {y_checks} and {x_checks} for lat and lon respectively.') + print('Please use -E argument and give the coords names !with space in between! in order: lan lon') + exit(1) + + x_proj = list(x_proj)[0] + y_proj = list(y_proj)[0] + else: + y_proj, x_proj = coords_names + + # assign values of coordinates in both dataset based on LDD, in case of small precision issues + input_file = input_file.assign_coords({y_proj: LDD[y_proj].values, x_proj: LDD[x_proj].values}) + + # ---------------- Process LDD + old_name = [i for i in list(LDD.data_vars) if sorted(LDD[i].dims)==sorted([x_proj, y_proj])] + LDD = LDD.rename({old_name[0]: "ldd"})['ldd'] # only 1 variable complies with above if + + # sometimes the masked value is flagged not with NaN (e.g., with cutmaps it was flagged with 0) + # pcr.Ldd takes only integer values 1-9, so any other value needs to be masked + LDD = LDD.fillna(-1) # fill NaN so it can be converted to integer with no issues + LDD = LDD.astype('int') + LDD = LDD.where((LDD>0) & (LDD<10)).fillna(-1) + + # convert the xarray to pcraster + LDD = LDD.transpose(y_proj, x_proj) # make sure dims order is as pcraster needs + + # ---------------- Set clone map for pcraster + rows, cols = len(LDD[y_proj]), len(LDD[x_proj]) + pcr.setclone(rows, cols, 1, 0, 0) + + ldd_pcr = pcr.numpy2pcr(pcr.Ldd, LDD.values, -1) # missing values in the np are flagged as -1 + + # repair the ldd; needed in case ldd is created from cutmaps, so outlet is not flagged with 5 (pit) + ldd_pcr = pcr.lddrepair(ldd_pcr) + + # ---------------- Get coordinates index for the point of interest + try: + final_point = LDD.sel({y_proj: y_coord, x_proj: x_coord} , method='nearest', tolerance=0.1) + except: + print('The provided coordinates are not valid, please check again!') + exit() + + Y_index = np.argmax(LDD[y_proj].values==final_point[y_proj].values) + X_index = np.argmax(LDD[x_proj].values==final_point[x_proj].values) + + # ---------------- Create a mask with the downstream points + profile_mask_pcr = LDD.fillna(0)*0 + profile_mask_pcr[Y_index, X_index] = 1 + profile_mask_pcr = profile_mask_pcr.astype('int') + + profile_mask_pcr = pcr.numpy2pcr(pcr.Boolean, profile_mask_pcr.values, -1) # convert to Boolean with no NaN (-1 is not possible) + profile_mask_pcr = pcr.path(ldd_pcr, profile_mask_pcr) # get the actual paths + + profile_mask_np = pcr.pcr2numpy(profile_mask_pcr, 0) + ProfPath = LDD.fillna(0)*0+profile_mask_np + ProfPath = ProfPath.where(ProfPath==1) + ProfPath.name = 'Profile' + total_points = int(ProfPath.sum().values) + print(f'There are {total_points} points from the point of interest until the end of the river network') + + # ---------------- Subset data for keeping only the domain of interest, and speeding up the analysis + lats_used = ProfPath.sum(x_proj).where(ProfPath.sum(x_proj)!=0).dropna(y_proj) + lons_used = ProfPath.sum(y_proj).where(ProfPath.sum(y_proj)!=0).dropna(x_proj) + + ProfPath = ProfPath.sel({y_proj:lats_used[y_proj].values, x_proj:lons_used[x_proj].values}) + + input_file = input_file.sel({y_proj: ProfPath[y_proj].values, x_proj: ProfPath[x_proj].values}) + LDD = LDD.sel({y_proj: ProfPath[y_proj].values, x_proj: ProfPath[x_proj].values}) + + # define clone for pcraster for cropped data + rows, cols = len(ProfPath[y_proj]), len(ProfPath[x_proj]) + pcr.setclone(rows, cols, 1, 0, 0) + + rivers_mask_pcr = pcr.numpy2pcr(pcr.Scalar, ProfPath.values, 0) + ldd_pcr = pcr.numpy2pcr(pcr.Ldd, LDD.values, -1) + ldd_pcr = pcr.lddrepair(ldd_pcr) # need to repair again, because the outlet is (possible) lost due to subsetting + + # ---------------- Find the order of the points in the river network + # generate initial data with the river mask + downstream_cells_pcr = rivers_mask_pcr + sum_rivers_pcr = rivers_mask_pcr + + # modify data, so that the most downstream point is masked out (otherwise the below loop gives for all points the same value) + downstream_actual_mask_pcr = pcr.downstreamdist(ldd_pcr) + downstream_actual_mask_pcr = pcr.ifthenelse(downstream_actual_mask_pcr == 0, pcr.boolean(0), pcr.boolean(1)) + downstream_actual_mask_pcr = pcr.scalar(downstream_actual_mask_pcr) + + # Loop {number of cells -1} times and use downstream function to find out the order of the cells on the river flow + print('Calculating the order of the identified points') + for loops in range(total_points-1): + downstream_cells_pcr = pcr.downstream(ldd_pcr, downstream_cells_pcr) + downstream_cells_pcr = downstream_cells_pcr*downstream_actual_mask_pcr + sum_rivers_pcr = sum_rivers_pcr + downstream_cells_pcr + + sum_rivers = (ProfPath.fillna(0)*0+pcr.pcr2numpy(sum_rivers_pcr, 0)) + sum_rivers.name = 'order' + + all_data = xr.merge([input_file, LDD, ProfPath, sum_rivers]) + all_data_df = all_data.to_dataframe().reset_index() + all_data_df = all_data_df[~all_data_df.Profile.isna()] + all_data_df = all_data_df.sort_values('order') + + return all_data_df + + +def main(): + 'function for running from command line' + # ---------------- Read settings + args = getarg() + input_file_arg = args.Inputfile + ldd_file_arg = args.LDDfile + x_coord_arg = args.Location_x + y_coord_arg = args.Location_y + coords_names_arg = args.coordsnames + outputfile_arg = args.outputfilename + + data_df = profiler(input_file=input_file_arg, ldd_file=ldd_file_arg, x_coord=x_coord_arg, y_coord=y_coord_arg, coords_names=coords_names_arg) + data_df.to_csv(outputfile_arg) + + +if __name__ == "__main__": + main() diff --git a/tests/data/profiler/LF_ETRS89_UseCase/changrad.nc b/tests/data/profiler/LF_ETRS89_UseCase/changrad.nc new file mode 100644 index 0000000..d5ee310 Binary files /dev/null and b/tests/data/profiler/LF_ETRS89_UseCase/changrad.nc differ diff --git a/tests/data/profiler/LF_ETRS89_UseCase/ldd.nc b/tests/data/profiler/LF_ETRS89_UseCase/ldd.nc new file mode 100644 index 0000000..5d2cbc3 Binary files /dev/null and b/tests/data/profiler/LF_ETRS89_UseCase/ldd.nc differ diff --git a/tests/data/profiler/LF_ETRS89_UseCase/reference/profiler.csv b/tests/data/profiler/LF_ETRS89_UseCase/reference/profiler.csv new file mode 100644 index 0000000..dd267ea --- /dev/null +++ b/tests/data/profiler/LF_ETRS89_UseCase/reference/profiler.csv @@ -0,0 +1,33 @@ +,y,x,changrad,laea,ldd,Profile,order +83,2437500.0,4227500.0,0.00051658927,-2147483647,6.0,1.0,1.0 +103,2432500.0,4222500.0,0.00078742416,-2147483647,9.0,1.0,2.0 +82,2437500.0,4222500.0,0.0007874241,-2147483647,2.0,1.0,3.0 +102,2432500.0,4217500.0,0.0006504565,-2147483647,9.0,1.0,4.0 +81,2437500.0,4217500.0,0.0007635837,-2147483647,2.0,1.0,5.0 +80,2437500.0,4212500.0,0.0008235728,-2147483647,6.0,1.0,6.0 +59,2442500.0,4212500.0,0.0007947716,-2147483647,2.0,1.0,7.0 +37,2447500.0,4207500.0,0.0006950562,-2147483647,3.0,1.0,8.0 +15,2452500.0,4202500.0,0.0006950562,-2147483647,3.0,1.0,9.0 +36,2447500.0,4202500.0,0.00077848596,-2147483647,8.0,1.0,10.0 +35,2447500.0,4197500.0,0.0009760228,-2147483647,6.0,1.0,11.0 +34,2447500.0,4192500.0,0.0009977153,-2147483647,6.0,1.0,12.0 +13,2452500.0,4192500.0,0.0010435288,-2147483647,2.0,1.0,13.0 +12,2452500.0,4187500.0,0.0010387731,-2147483647,6.0,1.0,14.0 +11,2452500.0,4182500.0,0.0012380448,-2147483647,6.0,1.0,15.0 +10,2452500.0,4177500.0,0.0012613096,-2147483647,6.0,1.0,16.0 +9,2452500.0,4172500.0,0.0011043664,-2147483647,6.0,1.0,17.0 +8,2452500.0,4167500.0,0.0012292609,-2147483647,6.0,1.0,18.0 +7,2452500.0,4162500.0,0.0013680093,-2147483647,6.0,1.0,19.0 +6,2452500.0,4157500.0,0.0012355647,-2147483647,6.0,1.0,20.0 +5,2452500.0,4152500.0,0.0015237641,-2147483647,6.0,1.0,21.0 +25,2447500.0,4147500.0,0.0012041999,-2147483647,9.0,1.0,22.0 +24,2447500.0,4142500.0,0.0017168315,-2147483647,6.0,1.0,23.0 +45,2442500.0,4142500.0,0.0023591225,-2147483647,8.0,1.0,24.0 +44,2442500.0,4137500.0,0.0018403421,-2147483647,6.0,1.0,25.0 +65,2437500.0,4137500.0,0.0017679577,-2147483647,8.0,1.0,26.0 +86,2432500.0,4137500.0,0.0009146803,-2147483647,8.0,1.0,27.0 +107,2427500.0,4137500.0,0.00016101284,-2147483647,8.0,1.0,28.0 +128,2422500.0,4137500.0,0.0004378503,-2147483647,8.0,1.0,29.0 +149,2417500.0,4137500.0,0.0007494286,-2147483647,8.0,1.0,30.0 +148,2417500.0,4132500.0,0.00082243764,-2147483647,6.0,1.0,31.0 +168,2412500.0,4127500.0,0.00096970645,-2147483647,9.0,1.0,32.0 diff --git a/tests/data/profiler/LF_lat_lon_UseCase/changrad.nc b/tests/data/profiler/LF_lat_lon_UseCase/changrad.nc new file mode 100644 index 0000000..62031f8 Binary files /dev/null and b/tests/data/profiler/LF_lat_lon_UseCase/changrad.nc differ diff --git a/tests/data/profiler/LF_lat_lon_UseCase/ldd.nc b/tests/data/profiler/LF_lat_lon_UseCase/ldd.nc new file mode 100644 index 0000000..017b64a Binary files /dev/null and b/tests/data/profiler/LF_lat_lon_UseCase/ldd.nc differ diff --git a/tests/data/profiler/LF_lat_lon_UseCase/reference/profiler.csv b/tests/data/profiler/LF_lat_lon_UseCase/reference/profiler.csv new file mode 100644 index 0000000..14166e9 --- /dev/null +++ b/tests/data/profiler/LF_lat_lon_UseCase/reference/profiler.csv @@ -0,0 +1,26 @@ +,lat,lon,changrad,ldd,Profile,order +153,52.65000000000212,-122.45000000000326,0.0008084648,2.0,1.0,1.0 +142,52.75000000000212,-122.45000000000326,0.00097223686,2.0,1.0,2.0 +131,52.85000000000211,-122.45000000000326,0.0015036418,2.0,1.0,3.0 +119,52.950000000002106,-122.55000000000325,0.0019222051,3.0,1.0,4.0 +108,53.0500000000021,-122.55000000000325,0.0018676079,2.0,1.0,5.0 +96,53.150000000002095,-122.65000000000325,0.0013966679,3.0,1.0,6.0 +95,53.150000000002095,-122.75000000000324,0.0008484858,6.0,1.0,7.0 +83,53.25000000000209,-122.85000000000323,0.002747498,3.0,1.0,8.0 +72,53.35000000000208,-122.85000000000323,0.0030551292,2.0,1.0,9.0 +73,53.35000000000208,-122.75000000000324,0.00051982753,4.0,1.0,10.0 +74,53.35000000000208,-122.65000000000325,0.0004815376,4.0,1.0,11.0 +63,53.45000000000208,-122.65000000000325,0.00052186823,2.0,1.0,12.0 +51,53.55000000000207,-122.75000000000324,0.0012226264,3.0,1.0,13.0 +41,53.650000000002066,-122.65000000000325,0.0009240197,1.0,1.0,14.0 +29,53.75000000000206,-122.75000000000324,0.00074306637,3.0,1.0,15.0 +18,53.850000000002055,-122.75000000000324,0.0006388169,2.0,1.0,16.0 +7,53.95000000000205,-122.75000000000324,0.00071776606,2.0,1.0,17.0 +6,53.95000000000205,-122.85000000000323,0.001410268,6.0,1.0,18.0 +5,53.95000000000205,-122.95000000000323,0.0012500631,6.0,1.0,19.0 +15,53.850000000002055,-123.05000000000322,0.0012879174,9.0,1.0,20.0 +14,53.850000000002055,-123.15000000000322,0.000889672,6.0,1.0,21.0 +3,53.95000000000205,-123.15000000000322,0.000889672,2.0,1.0,22.0 +2,53.95000000000205,-123.25000000000321,0.00088967197,6.0,1.0,23.0 +1,53.95000000000205,-123.3500000000032,0.0007190692,6.0,1.0,24.0 +0,53.95000000000205,-123.4500000000032,0.0005076786,6.0,1.0,25.0 diff --git a/tests/test_profiler.py b/tests/test_profiler.py new file mode 100644 index 0000000..b5705d7 --- /dev/null +++ b/tests/test_profiler.py @@ -0,0 +1,60 @@ +import os +import shutil +import pandas as pd +import xarray as xr + +from lisfloodutilities.profiler.profiler import profiler + + +def mk_path_out(p): + 'make folder for storing output data' + path_out = os.path.join(os.path.dirname(__file__), p) + if os.path.exists(path_out): + shutil.rmtree(path_out, ignore_errors=True) + os.mkdir(path_out) + + +class TestProfiler(): + + case_dir = os.path.join(os.path.dirname(__file__), 'data', 'profiler') + + def run(self, x_coord, y_coord, coords_names, case_name): + 'generate the profiler data' + # setting + self.out_path_ref = os.path.join(self.case_dir, case_name, 'reference') + self.out_path_run = os.path.join(self.case_dir, case_name, 'out') + mk_path_out(self.out_path_run) + + input_file = os.path.join(self.case_dir, case_name, 'changrad.nc') + ldd_file = os.path.join(self.case_dir, case_name, 'ldd.nc') + outputfile = os.path.join(self.out_path_run, 'profiler.csv') + + profiler(input_file, ldd_file, x_coord, y_coord, coords_names='None') + + # generate the profiler + profiler_data = profiler(input_file, ldd_file, x_coord, y_coord, coords_names) + profiler_data.to_csv(outputfile) + + # compare the generated mask with the reference one + ref_file = self.out_path_ref+'/profiler.csv' + reference = pd.read_csv(ref_file) + out_file = self.out_path_run+'/profiler.csv' + generated = pd.read_csv(out_file) + all_equal = reference.compare(generated) + + if len(all_equal)!=0: + fail_message = f'Test profiler generation for {case_name} failed. Please check differences between' + fail_message += f' the generated mask "{out_file}" and the expected mask "{ref_file}".' + assert all_equal, fail_message + else: + # if equal just delete the out folder + shutil.rmtree(self.out_path_run, ignore_errors=True) + + +class TestProfilers(TestProfiler): + + def test_mctrivers_etrs89(self): + self.run(4127500, 2412500, 'None', 'LF_ETRS89_UseCase') + + def test_mctrivers_latlon(self): + self.run(-123.45, 53.95, ['lat', 'lon'], 'LF_lat_lon_UseCase') \ No newline at end of file