Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -980,6 +980,42 @@ mct_mask_ds = mct_mask(channels_slope_file='changrad.nc', ldd_file='ldd.nc', upa
```


## profiler

This tool extracts for all downstream cells from a user-defined point, all values for a selected spatial variable, and saves a csv file with the extracted points, their coordinates, the values of the variable, and the order of the points in the river flow.

### Usage

The tool requires the following mandatory input arguments:

- `-i`, `--inputfile`: Input nc file from where the values of the downstream areas will be extracted (e.g., changrad.nc)
- `-l`, `--LDDfile`: LISFLOOD local drain direction file (ldd.nc)
- `-X`, `--Location_x`: Longitude of the initial point (use the same projection system as the input nc files!)
- `-Y`, `--Location_y`: Longitude of the initial point (use the same projection system as the input nc files!)

The tool can take the following additional input arguments:

- `-E`, `--coordsnames`: Coordinates names for lat, lon (in this order with space!) used in the netcdf files. The function checks for 3 commonly used names (x, lon, rlon for longitudes, and y, lat, rlat for latitudes). Therefere, it is recommended to keep the default value.

The tool generates the following outputs (when called from command line as main script):

- `-O`, `--outputfilename`: Output file with the points, their location and order in the river flow, and the values of the variable of interest (default: profiler.csv)

It can be used either from command line, or also as a python function. Below follow some examples:

Example of command that will generate a profiler csv file for a point with coordinates 10.01, 24.01 lon lat, with coordinates names in the nc being 'x' and 'y' respectively:

```bash
profiler.py -i changrad.nc -l ec_ldd.nc -X 10.01 -Y 24.01 -O profiler.csv [-E y x]
```

```python
# no data are saved when called inside python
from lisfloodutilities.profiler.profiler import profiler
data_df = profiler(input_file='changrad.nc', ldd_file='ldd.nc', x_coord=10.01, y_coord=24.01, coords_names=['y' , 'x'])
```


## Using `lisfloodutilities` programmatically

You can use lisflood utilities in your python programs. As an example, the script below creates the mask map for a set of stations (stations.txt). The mask map is a boolean map with 1 and 0. 1 is used for all (and only) the pixels hydrologically connected to one of the stations. The resulting mask map is in pcraster format.
Expand Down
17 changes: 17 additions & 0 deletions src/lisfloodutilities/profiler/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
"""

Copyright 2019-2020 European Union

Licensed under the EUPL, Version 1.2 or as soon they will be approved by the European Commission subsequent versions of the EUPL (the "Licence");

You may not use this work except in compliance with the Licence.
You may obtain a copy of the Licence at:

https://joinup.ec.europa.eu/sites/default/files/inline-files/EUPL%20v1_2%20EN(1).txt

Unless required by applicable law or agreed to in writing, software distributed under the Licence is distributed on an "AS IS" basis,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the Licence for the specific language governing permissions and limitations under the Licence.

"""

189 changes: 189 additions & 0 deletions src/lisfloodutilities/profiler/profiler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
# This is a script to retrieve from a selected point and variable the values for all downstream points, ordered from upstream to downstream
# It takes the netcdf of the variable of interest, the LDD (ldd.nc) and the coordinates of the point of interest
# It requires PCRaster.
#
# Usage:
# profiler.py -i changrad.nc -l ldd.nc -X 10.01 -Y 24.01 -E y x -O profiler.csv


import pcraster as pcr
import xarray as xr
import pandas as pd
import numpy as np

def getarg():
""" Get program arguments.

:return: args: namespace of program arguments
"""
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--Inputfile', type=str, required=True,
help='Input nc file from where the values of the downstream areas will be extracted')
parser.add_argument('-l', '--LDDfile', type=str, required=True,
help='LISFLOOD LDDD file (ldd.nc)')
parser.add_argument('-X', '--Location_x', type=str, required=True,
help='Longitude of the initial point [use the same projection system as the input nc files!]')
parser.add_argument('-Y', '--Location_y', type=str, required=True,
help='Latitude of the initial point [use the same projection system as the input nc files!]')
parser.add_argument('-E', '--coordsnames', type=str, nargs='+', required=False, default="None",
help='Coords names for lat, lon (in this order with space!) from the netcdf files used')
parser.add_argument('-O', '--outputfilename', type=str, required=False, default="profiler.csv",
help='Output file (profiler.csv)')
args = parser.parse_args() # assign namespace to args
return args


def profiler(input_file, ldd_file, x_coord, y_coord, coords_names='None'):
"""

Creates a dataframe with the values for all grid cells downstream of a user-defined point from a netcdf file specified by the user

Usage:
The tool requires the following input arguments:

Inputfile: Input nc file from where the values of the downstream areas will be extracted (e.g., chagrad.nc)
LDDfile: LISFLOOD local drain direction file (ldd.nc)
Location_x: LISFLOOD Uustream area file (upArea.nc)
Location_y: a mask nc file; if not given (default) all cells are considered valid.
coords_names: Coordinates names for lat, lon (in this order as list) used in the the netcdf files (default: 'None'; checks for commonly used names ['x', 'lon', 'rlon'], similar for lat names)
outputfile: Output file containing the extracted values of the downsteam points, their coordinates, and their order in the river network (default: profiler.csv)

Example:
profiler(input_file='changrad.nc', ldd_file='ldd.nc', Location_x=15.21, Location_y=25.14, coords_names=['y' , 'x'])
"""
# ---------------- Read LDD (Note that for EFAS5 there is small shift of values for CH)
LDD = xr.open_dataset(ldd_file)
input_file = xr.open_dataset(input_file)

# ---------------- Auxiliary variables
x_checks = ['lon', 'x', 'rlon']
y_checks = ['lat', 'y', 'rlat']
if coords_names == "None":
x_proj = set(list(LDD.coords)) & set(x_checks)
y_proj = set(list(LDD.coords)) & set(y_checks)

if len(x_proj)!=1 or len(y_proj)!=1:
print('Input dataset coords names for lat/lon are not as expected.')
print(f'The available coords are: {list(LDD.coords)}')
print(f'The checked names are {y_checks} and {x_checks} for lat and lon respectively.')
print('Please use -E argument and give the coords names !with space in between! in order: lan lon')
exit(1)

x_proj = list(x_proj)[0]
y_proj = list(y_proj)[0]
else:
y_proj, x_proj = coords_names

# assign values of coordinates in both dataset based on LDD, in case of small precision issues
input_file = input_file.assign_coords({y_proj: LDD[y_proj].values, x_proj: LDD[x_proj].values})

# ---------------- Process LDD
old_name = [i for i in list(LDD.data_vars) if sorted(LDD[i].dims)==sorted([x_proj, y_proj])]
LDD = LDD.rename({old_name[0]: "ldd"})['ldd'] # only 1 variable complies with above if

# sometimes the masked value is flagged not with NaN (e.g., with cutmaps it was flagged with 0)
# pcr.Ldd takes only integer values 1-9, so any other value needs to be masked
LDD = LDD.fillna(-1) # fill NaN so it can be converted to integer with no issues
LDD = LDD.astype('int')
LDD = LDD.where((LDD>0) & (LDD<10)).fillna(-1)

# convert the xarray to pcraster
LDD = LDD.transpose(y_proj, x_proj) # make sure dims order is as pcraster needs

# ---------------- Set clone map for pcraster
rows, cols = len(LDD[y_proj]), len(LDD[x_proj])
pcr.setclone(rows, cols, 1, 0, 0)

ldd_pcr = pcr.numpy2pcr(pcr.Ldd, LDD.values, -1) # missing values in the np are flagged as -1

# repair the ldd; needed in case ldd is created from cutmaps, so outlet is not flagged with 5 (pit)
ldd_pcr = pcr.lddrepair(ldd_pcr)

# ---------------- Get coordinates index for the point of interest
try:
final_point = LDD.sel({y_proj: y_coord, x_proj: x_coord} , method='nearest', tolerance=0.1)
except:
print('The provided coordinates are not valid, please check again!')
exit()

Y_index = np.argmax(LDD[y_proj].values==final_point[y_proj].values)
X_index = np.argmax(LDD[x_proj].values==final_point[x_proj].values)

# ---------------- Create a mask with the downstream points
profile_mask_pcr = LDD.fillna(0)*0
profile_mask_pcr[Y_index, X_index] = 1
profile_mask_pcr = profile_mask_pcr.astype('int')

profile_mask_pcr = pcr.numpy2pcr(pcr.Boolean, profile_mask_pcr.values, -1) # convert to Boolean with no NaN (-1 is not possible)
profile_mask_pcr = pcr.path(ldd_pcr, profile_mask_pcr) # get the actual paths

profile_mask_np = pcr.pcr2numpy(profile_mask_pcr, 0)
ProfPath = LDD.fillna(0)*0+profile_mask_np
ProfPath = ProfPath.where(ProfPath==1)
ProfPath.name = 'Profile'
total_points = int(ProfPath.sum().values)
print(f'There are {total_points} points from the point of interest until the end of the river network')

# ---------------- Subset data for keeping only the domain of interest, and speeding up the analysis
lats_used = ProfPath.sum(x_proj).where(ProfPath.sum(x_proj)!=0).dropna(y_proj)
lons_used = ProfPath.sum(y_proj).where(ProfPath.sum(y_proj)!=0).dropna(x_proj)

ProfPath = ProfPath.sel({y_proj:lats_used[y_proj].values, x_proj:lons_used[x_proj].values})

input_file = input_file.sel({y_proj: ProfPath[y_proj].values, x_proj: ProfPath[x_proj].values})
LDD = LDD.sel({y_proj: ProfPath[y_proj].values, x_proj: ProfPath[x_proj].values})

# define clone for pcraster for cropped data
rows, cols = len(ProfPath[y_proj]), len(ProfPath[x_proj])
pcr.setclone(rows, cols, 1, 0, 0)

rivers_mask_pcr = pcr.numpy2pcr(pcr.Scalar, ProfPath.values, 0)
ldd_pcr = pcr.numpy2pcr(pcr.Ldd, LDD.values, -1)
ldd_pcr = pcr.lddrepair(ldd_pcr) # need to repair again, because the outlet is (possible) lost due to subsetting

# ---------------- Find the order of the points in the river network
# generate initial data with the river mask
downstream_cells_pcr = rivers_mask_pcr
sum_rivers_pcr = rivers_mask_pcr

# modify data, so that the most downstream point is masked out (otherwise the below loop gives for all points the same value)
downstream_actual_mask_pcr = pcr.downstreamdist(ldd_pcr)
downstream_actual_mask_pcr = pcr.ifthenelse(downstream_actual_mask_pcr == 0, pcr.boolean(0), pcr.boolean(1))
downstream_actual_mask_pcr = pcr.scalar(downstream_actual_mask_pcr)

# Loop {number of cells -1} times and use downstream function to find out the order of the cells on the river flow
print('Calculating the order of the identified points')
for loops in range(total_points-1):
downstream_cells_pcr = pcr.downstream(ldd_pcr, downstream_cells_pcr)
downstream_cells_pcr = downstream_cells_pcr*downstream_actual_mask_pcr
sum_rivers_pcr = sum_rivers_pcr + downstream_cells_pcr

sum_rivers = (ProfPath.fillna(0)*0+pcr.pcr2numpy(sum_rivers_pcr, 0))
sum_rivers.name = 'order'

all_data = xr.merge([input_file, LDD, ProfPath, sum_rivers])
all_data_df = all_data.to_dataframe().reset_index()
all_data_df = all_data_df[~all_data_df.Profile.isna()]
all_data_df = all_data_df.sort_values('order')

return all_data_df


def main():
'function for running from command line'
# ---------------- Read settings
args = getarg()
input_file_arg = args.Inputfile
ldd_file_arg = args.LDDfile
x_coord_arg = args.Location_x
y_coord_arg = args.Location_y
coords_names_arg = args.coordsnames
outputfile_arg = args.outputfilename

data_df = profiler(input_file=input_file_arg, ldd_file=ldd_file_arg, x_coord=x_coord_arg, y_coord=y_coord_arg, coords_names=coords_names_arg)
data_df.to_csv(outputfile_arg)


if __name__ == "__main__":
main()
Binary file added tests/data/profiler/LF_ETRS89_UseCase/changrad.nc
Binary file not shown.
Binary file added tests/data/profiler/LF_ETRS89_UseCase/ldd.nc
Binary file not shown.
33 changes: 33 additions & 0 deletions tests/data/profiler/LF_ETRS89_UseCase/reference/profiler.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
,y,x,changrad,laea,ldd,Profile,order
83,2437500.0,4227500.0,0.00051658927,-2147483647,6.0,1.0,1.0
103,2432500.0,4222500.0,0.00078742416,-2147483647,9.0,1.0,2.0
82,2437500.0,4222500.0,0.0007874241,-2147483647,2.0,1.0,3.0
102,2432500.0,4217500.0,0.0006504565,-2147483647,9.0,1.0,4.0
81,2437500.0,4217500.0,0.0007635837,-2147483647,2.0,1.0,5.0
80,2437500.0,4212500.0,0.0008235728,-2147483647,6.0,1.0,6.0
59,2442500.0,4212500.0,0.0007947716,-2147483647,2.0,1.0,7.0
37,2447500.0,4207500.0,0.0006950562,-2147483647,3.0,1.0,8.0
15,2452500.0,4202500.0,0.0006950562,-2147483647,3.0,1.0,9.0
36,2447500.0,4202500.0,0.00077848596,-2147483647,8.0,1.0,10.0
35,2447500.0,4197500.0,0.0009760228,-2147483647,6.0,1.0,11.0
34,2447500.0,4192500.0,0.0009977153,-2147483647,6.0,1.0,12.0
13,2452500.0,4192500.0,0.0010435288,-2147483647,2.0,1.0,13.0
12,2452500.0,4187500.0,0.0010387731,-2147483647,6.0,1.0,14.0
11,2452500.0,4182500.0,0.0012380448,-2147483647,6.0,1.0,15.0
10,2452500.0,4177500.0,0.0012613096,-2147483647,6.0,1.0,16.0
9,2452500.0,4172500.0,0.0011043664,-2147483647,6.0,1.0,17.0
8,2452500.0,4167500.0,0.0012292609,-2147483647,6.0,1.0,18.0
7,2452500.0,4162500.0,0.0013680093,-2147483647,6.0,1.0,19.0
6,2452500.0,4157500.0,0.0012355647,-2147483647,6.0,1.0,20.0
5,2452500.0,4152500.0,0.0015237641,-2147483647,6.0,1.0,21.0
25,2447500.0,4147500.0,0.0012041999,-2147483647,9.0,1.0,22.0
24,2447500.0,4142500.0,0.0017168315,-2147483647,6.0,1.0,23.0
45,2442500.0,4142500.0,0.0023591225,-2147483647,8.0,1.0,24.0
44,2442500.0,4137500.0,0.0018403421,-2147483647,6.0,1.0,25.0
65,2437500.0,4137500.0,0.0017679577,-2147483647,8.0,1.0,26.0
86,2432500.0,4137500.0,0.0009146803,-2147483647,8.0,1.0,27.0
107,2427500.0,4137500.0,0.00016101284,-2147483647,8.0,1.0,28.0
128,2422500.0,4137500.0,0.0004378503,-2147483647,8.0,1.0,29.0
149,2417500.0,4137500.0,0.0007494286,-2147483647,8.0,1.0,30.0
148,2417500.0,4132500.0,0.00082243764,-2147483647,6.0,1.0,31.0
168,2412500.0,4127500.0,0.00096970645,-2147483647,9.0,1.0,32.0
Binary file not shown.
Binary file added tests/data/profiler/LF_lat_lon_UseCase/ldd.nc
Binary file not shown.
26 changes: 26 additions & 0 deletions tests/data/profiler/LF_lat_lon_UseCase/reference/profiler.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
,lat,lon,changrad,ldd,Profile,order
153,52.65000000000212,-122.45000000000326,0.0008084648,2.0,1.0,1.0
142,52.75000000000212,-122.45000000000326,0.00097223686,2.0,1.0,2.0
131,52.85000000000211,-122.45000000000326,0.0015036418,2.0,1.0,3.0
119,52.950000000002106,-122.55000000000325,0.0019222051,3.0,1.0,4.0
108,53.0500000000021,-122.55000000000325,0.0018676079,2.0,1.0,5.0
96,53.150000000002095,-122.65000000000325,0.0013966679,3.0,1.0,6.0
95,53.150000000002095,-122.75000000000324,0.0008484858,6.0,1.0,7.0
83,53.25000000000209,-122.85000000000323,0.002747498,3.0,1.0,8.0
72,53.35000000000208,-122.85000000000323,0.0030551292,2.0,1.0,9.0
73,53.35000000000208,-122.75000000000324,0.00051982753,4.0,1.0,10.0
74,53.35000000000208,-122.65000000000325,0.0004815376,4.0,1.0,11.0
63,53.45000000000208,-122.65000000000325,0.00052186823,2.0,1.0,12.0
51,53.55000000000207,-122.75000000000324,0.0012226264,3.0,1.0,13.0
41,53.650000000002066,-122.65000000000325,0.0009240197,1.0,1.0,14.0
29,53.75000000000206,-122.75000000000324,0.00074306637,3.0,1.0,15.0
18,53.850000000002055,-122.75000000000324,0.0006388169,2.0,1.0,16.0
7,53.95000000000205,-122.75000000000324,0.00071776606,2.0,1.0,17.0
6,53.95000000000205,-122.85000000000323,0.001410268,6.0,1.0,18.0
5,53.95000000000205,-122.95000000000323,0.0012500631,6.0,1.0,19.0
15,53.850000000002055,-123.05000000000322,0.0012879174,9.0,1.0,20.0
14,53.850000000002055,-123.15000000000322,0.000889672,6.0,1.0,21.0
3,53.95000000000205,-123.15000000000322,0.000889672,2.0,1.0,22.0
2,53.95000000000205,-123.25000000000321,0.00088967197,6.0,1.0,23.0
1,53.95000000000205,-123.3500000000032,0.0007190692,6.0,1.0,24.0
0,53.95000000000205,-123.4500000000032,0.0005076786,6.0,1.0,25.0
60 changes: 60 additions & 0 deletions tests/test_profiler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import os
import shutil
import pandas as pd
import xarray as xr

from lisfloodutilities.profiler.profiler import profiler


def mk_path_out(p):
'make folder for storing output data'
path_out = os.path.join(os.path.dirname(__file__), p)
if os.path.exists(path_out):
shutil.rmtree(path_out, ignore_errors=True)
os.mkdir(path_out)


class TestProfiler():

case_dir = os.path.join(os.path.dirname(__file__), 'data', 'profiler')

def run(self, x_coord, y_coord, coords_names, case_name):
'generate the profiler data'
# setting
self.out_path_ref = os.path.join(self.case_dir, case_name, 'reference')
self.out_path_run = os.path.join(self.case_dir, case_name, 'out')
mk_path_out(self.out_path_run)

input_file = os.path.join(self.case_dir, case_name, 'changrad.nc')
ldd_file = os.path.join(self.case_dir, case_name, 'ldd.nc')
outputfile = os.path.join(self.out_path_run, 'profiler.csv')

profiler(input_file, ldd_file, x_coord, y_coord, coords_names='None')

# generate the profiler
profiler_data = profiler(input_file, ldd_file, x_coord, y_coord, coords_names)
profiler_data.to_csv(outputfile)

# compare the generated mask with the reference one
ref_file = self.out_path_ref+'/profiler.csv'
reference = pd.read_csv(ref_file)
out_file = self.out_path_run+'/profiler.csv'
generated = pd.read_csv(out_file)
all_equal = reference.compare(generated)

if len(all_equal)!=0:
fail_message = f'Test profiler generation for {case_name} failed. Please check differences between'
fail_message += f' the generated mask "{out_file}" and the expected mask "{ref_file}".'
assert all_equal, fail_message
else:
# if equal just delete the out folder
shutil.rmtree(self.out_path_run, ignore_errors=True)


class TestProfilers(TestProfiler):

def test_mctrivers_etrs89(self):
self.run(4127500, 2412500, 'None', 'LF_ETRS89_UseCase')

def test_mctrivers_latlon(self):
self.run(-123.45, 53.95, ['lat', 'lon'], 'LF_lat_lon_UseCase')