diff --git a/.gitignore b/.gitignore index 546468914..70a481901 100644 --- a/.gitignore +++ b/.gitignore @@ -125,3 +125,6 @@ pygeoapi/db.sqlite3 # Pycharm project files .idea + +# TRASH +TRASH \ No newline at end of file diff --git a/README.md b/README.md index 2f4441e11..d8bfb4311 100644 --- a/README.md +++ b/README.md @@ -7,3 +7,16 @@ [pygeoapi](https://pygeoapi.io) is a Python server implementation of the [OGC API](https://ogcapi.ogc.org) suite of standards. The project emerged as part of the next generation OGC API efforts in 2018 and provides the capability for organizations to deploy a RESTful OGC API endpoint using OpenAPI, GeoJSON, and HTML. pygeoapi is [open source](https://opensource.org/) and released under an [MIT license](https://github.com/geopython/pygeoapi/blob/master/LICENSE.md). Please read the docs at [https://docs.pygeoapi.io](https://docs.pygeoapi.io) for more information. + + +## AquaInfra case + +Please add these steps when installing pygeoapi for the AquaInfra case, as they are needed to run the `snap_to_network` process! + +* Please install `geojson` in the virtual environment that runs the service (most likely running `cd /home/.../pygeoapi/pygeoapi; + . ../bin/activate; pip install geojson` will do the job). +* Please create a directory for the input data, e.g. `/home/.../work/pygeo/data` +* Into that directory, add a directory `basin_481051` and into that, put the two input rasters `segment_481051.tif` and `accumulation_481051.tif`. The same for any other river basin you want to support (`481051` being the drainage basin id). +* Then, tell pygeoapi where to find the data, by executing: `export PYGEOAPI_DATA_DIR='/home/.../work/pygeo/data'`. + +(Merret, 2023-10-18) diff --git a/pygeoapi-config.yml b/pygeoapi-config.yml index 97e3f5f13..b821dca72 100644 --- a/pygeoapi-config.yml +++ b/pygeoapi-config.yml @@ -38,7 +38,6 @@ server: languages: # First language is the default language - en-US - - fr-CA # cors: true pretty_print: true limit: 10 @@ -55,250 +54,44 @@ server: # ogc_schemas_location: /opt/schemas.opengis.net logging: - level: ERROR + level: DEBUG #logfile: /tmp/pygeoapi.log metadata: identification: title: - en: pygeoapi default instance - fr: instance par défaut de pygeoapi + en: AquaInfra processing services description: - en: pygeoapi provides an API to geospatial data - fr: pygeoapi fournit une API aux données géospatiales + en: An OGC conformant API to AquaInfra processing services keywords: en: - geospatial - data - api - fr: - - géospatiale - - données - - api keywords_type: theme terms_of_service: https://creativecommons.org/licenses/by/4.0/ url: https://example.org license: - name: CC-BY 4.0 license + name: CC-BY 4.0 license (TODO check) url: https://creativecommons.org/licenses/by/4.0/ provider: - name: Organization Name - url: https://pygeoapi.io + name: Leibniz Institute for Freshwater Ecology and Inland Fisheries (FVB-IGB), Berlin + url: https://igb-berlin.de contact: - name: Lastname, Firstname - position: Position Title - address: Mailing Address - city: City - stateorprovince: Administrative Area - postalcode: Zip or Postal Code - country: Country - phone: +xx-xxx-xxx-xxxx - fax: +xx-xxx-xxx-xxxx - email: you@example.org - url: Contact URL - hours: Mo-Fr 08:00-17:00 - instructions: During hours of service. Off on weekends. + name: Buurman, Merret + position: Scientific Staff + address: Müggelseedamm 310 + city: Berlin + stateorprovince: Berlin + postalcode: 12587 + country: Germany + email: merret.buurman@igb-berlin.de + url: https://glowabio.org/ role: pointOfContact resources: - obs: - type: collection - title: Observations - description: My cool observations - keywords: - - observations - - monitoring - linked-data: - context: - - datetime: https://schema.org/DateTime - - vocab: https://example.com/vocab# - stn_id: "vocab:stn_id" - value: "vocab:value" - links: - - type: text/csv - rel: canonical - title: data - href: https://github.com/mapserver/mapserver/blob/branch-7-0/msautotest/wxs/data/obs.csv - hreflang: en-US - - type: text/csv - rel: alternate - title: data - href: https://raw.githubusercontent.com/mapserver/mapserver/branch-7-0/msautotest/wxs/data/obs.csv - hreflang: en-US - extents: - spatial: - bbox: [-180,-90,180,90] - crs: http://www.opengis.net/def/crs/OGC/1.3/CRS84 - temporal: - begin: 2000-10-30T18:24:39Z - end: 2007-10-30T08:57:29Z - providers: - - type: feature - name: CSV - data: tests/data/obs.csv - id_field: id - geometry: - x_field: long - y_field: lat - - lakes: - type: collection - title: - en: Large Lakes - fr: Grands Lacs - description: - en: lakes of the world, public domain - fr: lacs du monde, domaine public - keywords: - en: - - lakes - - water bodies - fr: - - lacs - - plans d'eau - links: - - type: text/html - rel: canonical - title: information - href: http://www.naturalearthdata.com/ - hreflang: en-US - extents: - spatial: - bbox: [-180,-90,180,90] - crs: http://www.opengis.net/def/crs/OGC/1.3/CRS84 - temporal: - begin: 2011-11-11T11:11:11Z - end: null # or empty (either means open ended) - providers: - - type: feature - name: GeoJSON - data: tests/data/ne_110m_lakes.geojson - id_field: id - title_field: name - - mapserver_world_map: - type: collection - title: MapServer demo WMS world map - description: MapServer demo WMS world map - keywords: - - MapServer - - world map - links: - - type: text/html - rel: canonical - title: information - href: https://demo.mapserver.org - hreflang: en-US - extents: - spatial: - bbox: [-180,-90,180,90] - crs: http://www.opengis.net/def/crs/OGC/1.3/CRS84 - providers: - - type: map - name: WMSFacade - data: https://demo.mapserver.org/cgi-bin/msautotest - options: - layer: world_latlong - style: default - format: - name: png - mimetype: image/png - - gdps-temperature: - type: collection - title: Global Deterministic Prediction System sample - description: Global Deterministic Prediction System sample - keywords: - - gdps - - global - extents: - spatial: - bbox: [-180,-90,180,90] - crs: http://www.opengis.net/def/crs/OGC/1.3/CRS84 - links: - - type: text/html - rel: canonical - title: information - href: https://eccc-msc.github.io/open-data/msc-data/nwp_gdps/readme_gdps_en - hreflang: en-CA - providers: - - type: coverage - name: rasterio - data: tests/data/CMC_glb_TMP_TGL_2_latlon.15x.15_2020081000_P000.grib2 - options: - DATA_ENCODING: COMPLEX_PACKING - format: - name: GRIB - mimetype: application/x-grib2 - - test-data: - type: stac-collection - title: pygeoapi test data - description: pygeoapi test data - keywords: - - poi - - portugal - links: - - type: text/html - rel: canonical - title: information - href: https://github.com/geopython/pygeoapi/tree/master/tests/data - hreflang: en-US - extents: - spatial: - bbox: [-180,-90,180,90] - crs: http://www.opengis.net/def/crs/OGC/1.3/CRS84 - providers: - - type: stac - name: FileSystem - data: tests/data - file_types: - - .gpkg - - .sqlite - - .csv - - .grib2 - - .tif - - .shp - - canada-metadata: - type: collection - title: - en: Open Canada sample data - fr: Exemple de donn\u00e9es Canada Ouvert - description: - en: Sample metadata records from open.canada.ca - fr: Exemples d'enregistrements de m\u00e9tadonn\u00e9es sur ouvert.canada.ca - keywords: - en: - - canada - - open data - fr: - - canada - - donn\u00e9es ouvertes - links: - - type: text/html - rel: canonical - title: information - href: https://open.canada.ca/en/open-data - hreflang: en-CA - - type: text/html - rel: alternate - title: informations - href: https://ouvert.canada.ca/fr/donnees-ouvertes - hreflang: fr-CA - extents: - spatial: - bbox: [-180,-90,180,90] - crs: http://www.opengis.net/def/crs/OGC/1.3/CRS84 - providers: - - type: record - name: TinyDBCatalogue - data: tests/data/open.canada.ca/sample-records.tinydb - id_field: externalId - time_field: recordCreated - title_field: title - hello-world: + snap-to-network: type: process processor: - name: HelloWorld + name: SnapToNetwork diff --git a/pygeoapi-config.yml.original b/pygeoapi-config.yml.original new file mode 100644 index 000000000..97e3f5f13 --- /dev/null +++ b/pygeoapi-config.yml.original @@ -0,0 +1,304 @@ +# ================================================================= +# +# Authors: Tom Kralidis +# +# Copyright (c) 2020 Tom Kralidis +# +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation +# files (the "Software"), to deal in the Software without +# restriction, including without limitation the rights to use, +# copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following +# conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# ================================================================= + +server: + bind: + host: 0.0.0.0 + port: 5000 + url: http://localhost:5000 + mimetype: application/json; charset=UTF-8 + encoding: utf-8 + gzip: false + languages: + # First language is the default language + - en-US + - fr-CA + # cors: true + pretty_print: true + limit: 10 + # templates: + # path: /path/to/Jinja2/templates + # static: /path/to/static/folder # css/js/img + map: + url: https://tile.openstreetmap.org/{z}/{x}/{y}.png + attribution: '© OpenStreetMap contributors' +# manager: +# name: TinyDB +# connection: /tmp/pygeoapi-process-manager.db +# output_dir: /tmp/ + # ogc_schemas_location: /opt/schemas.opengis.net + +logging: + level: ERROR + #logfile: /tmp/pygeoapi.log + +metadata: + identification: + title: + en: pygeoapi default instance + fr: instance par défaut de pygeoapi + description: + en: pygeoapi provides an API to geospatial data + fr: pygeoapi fournit une API aux données géospatiales + keywords: + en: + - geospatial + - data + - api + fr: + - géospatiale + - données + - api + keywords_type: theme + terms_of_service: https://creativecommons.org/licenses/by/4.0/ + url: https://example.org + license: + name: CC-BY 4.0 license + url: https://creativecommons.org/licenses/by/4.0/ + provider: + name: Organization Name + url: https://pygeoapi.io + contact: + name: Lastname, Firstname + position: Position Title + address: Mailing Address + city: City + stateorprovince: Administrative Area + postalcode: Zip or Postal Code + country: Country + phone: +xx-xxx-xxx-xxxx + fax: +xx-xxx-xxx-xxxx + email: you@example.org + url: Contact URL + hours: Mo-Fr 08:00-17:00 + instructions: During hours of service. Off on weekends. + role: pointOfContact + +resources: + obs: + type: collection + title: Observations + description: My cool observations + keywords: + - observations + - monitoring + linked-data: + context: + - datetime: https://schema.org/DateTime + - vocab: https://example.com/vocab# + stn_id: "vocab:stn_id" + value: "vocab:value" + links: + - type: text/csv + rel: canonical + title: data + href: https://github.com/mapserver/mapserver/blob/branch-7-0/msautotest/wxs/data/obs.csv + hreflang: en-US + - type: text/csv + rel: alternate + title: data + href: https://raw.githubusercontent.com/mapserver/mapserver/branch-7-0/msautotest/wxs/data/obs.csv + hreflang: en-US + extents: + spatial: + bbox: [-180,-90,180,90] + crs: http://www.opengis.net/def/crs/OGC/1.3/CRS84 + temporal: + begin: 2000-10-30T18:24:39Z + end: 2007-10-30T08:57:29Z + providers: + - type: feature + name: CSV + data: tests/data/obs.csv + id_field: id + geometry: + x_field: long + y_field: lat + + lakes: + type: collection + title: + en: Large Lakes + fr: Grands Lacs + description: + en: lakes of the world, public domain + fr: lacs du monde, domaine public + keywords: + en: + - lakes + - water bodies + fr: + - lacs + - plans d'eau + links: + - type: text/html + rel: canonical + title: information + href: http://www.naturalearthdata.com/ + hreflang: en-US + extents: + spatial: + bbox: [-180,-90,180,90] + crs: http://www.opengis.net/def/crs/OGC/1.3/CRS84 + temporal: + begin: 2011-11-11T11:11:11Z + end: null # or empty (either means open ended) + providers: + - type: feature + name: GeoJSON + data: tests/data/ne_110m_lakes.geojson + id_field: id + title_field: name + + mapserver_world_map: + type: collection + title: MapServer demo WMS world map + description: MapServer demo WMS world map + keywords: + - MapServer + - world map + links: + - type: text/html + rel: canonical + title: information + href: https://demo.mapserver.org + hreflang: en-US + extents: + spatial: + bbox: [-180,-90,180,90] + crs: http://www.opengis.net/def/crs/OGC/1.3/CRS84 + providers: + - type: map + name: WMSFacade + data: https://demo.mapserver.org/cgi-bin/msautotest + options: + layer: world_latlong + style: default + format: + name: png + mimetype: image/png + + gdps-temperature: + type: collection + title: Global Deterministic Prediction System sample + description: Global Deterministic Prediction System sample + keywords: + - gdps + - global + extents: + spatial: + bbox: [-180,-90,180,90] + crs: http://www.opengis.net/def/crs/OGC/1.3/CRS84 + links: + - type: text/html + rel: canonical + title: information + href: https://eccc-msc.github.io/open-data/msc-data/nwp_gdps/readme_gdps_en + hreflang: en-CA + providers: + - type: coverage + name: rasterio + data: tests/data/CMC_glb_TMP_TGL_2_latlon.15x.15_2020081000_P000.grib2 + options: + DATA_ENCODING: COMPLEX_PACKING + format: + name: GRIB + mimetype: application/x-grib2 + + test-data: + type: stac-collection + title: pygeoapi test data + description: pygeoapi test data + keywords: + - poi + - portugal + links: + - type: text/html + rel: canonical + title: information + href: https://github.com/geopython/pygeoapi/tree/master/tests/data + hreflang: en-US + extents: + spatial: + bbox: [-180,-90,180,90] + crs: http://www.opengis.net/def/crs/OGC/1.3/CRS84 + providers: + - type: stac + name: FileSystem + data: tests/data + file_types: + - .gpkg + - .sqlite + - .csv + - .grib2 + - .tif + - .shp + + canada-metadata: + type: collection + title: + en: Open Canada sample data + fr: Exemple de donn\u00e9es Canada Ouvert + description: + en: Sample metadata records from open.canada.ca + fr: Exemples d'enregistrements de m\u00e9tadonn\u00e9es sur ouvert.canada.ca + keywords: + en: + - canada + - open data + fr: + - canada + - donn\u00e9es ouvertes + links: + - type: text/html + rel: canonical + title: information + href: https://open.canada.ca/en/open-data + hreflang: en-CA + - type: text/html + rel: alternate + title: informations + href: https://ouvert.canada.ca/fr/donnees-ouvertes + hreflang: fr-CA + extents: + spatial: + bbox: [-180,-90,180,90] + crs: http://www.opengis.net/def/crs/OGC/1.3/CRS84 + providers: + - type: record + name: TinyDBCatalogue + data: tests/data/open.canada.ca/sample-records.tinydb + id_field: externalId + time_field: recordCreated + title_field: title + + hello-world: + type: process + processor: + name: HelloWorld diff --git a/pygeoapi/plugin.py b/pygeoapi/plugin.py index 632d908fd..23d4527aa 100644 --- a/pygeoapi/plugin.py +++ b/pygeoapi/plugin.py @@ -66,7 +66,8 @@ 'CSV': 'pygeoapi.formatter.csv_.CSVFormatter' }, 'process': { - 'HelloWorld': 'pygeoapi.process.hello_world.HelloWorldProcessor' + 'HelloWorld': 'pygeoapi.process.hello_world.HelloWorldProcessor', + 'SnapToNetwork': 'pygeoapi.process.snap_to_network_pygeo.SnapToNetworkProcessor' }, 'process_manager': { 'Dummy': 'pygeoapi.process.manager.dummy.DummyManager', diff --git a/pygeoapi/plugin.py.original b/pygeoapi/plugin.py.original new file mode 100644 index 000000000..632d908fd --- /dev/null +++ b/pygeoapi/plugin.py.original @@ -0,0 +1,122 @@ +# ================================================================= +# +# Authors: Tom Kralidis +# +# Copyright (c) 2023 Tom Kralidis +# +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation +# files (the "Software"), to deal in the Software without +# restriction, including without limitation the rights to use, +# copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following +# conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# ================================================================= +"""Plugin loader""" + +import importlib +import logging +from typing import Any + +LOGGER = logging.getLogger(__name__) + +#: Loads provider plugins to be used by pygeoapi,\ +#: formatters and processes available +PLUGINS = { + 'provider': { + 'AzureBlobStorage': 'pygeoapi.provider.azure_.AzureBlobStorageProvider', # noqa + 'CSV': 'pygeoapi.provider.csv_.CSVProvider', + 'Elasticsearch': 'pygeoapi.provider.elasticsearch_.ElasticsearchProvider', # noqa + 'ElasticsearchCatalogue': 'pygeoapi.provider.elasticsearch_.ElasticsearchCatalogueProvider', # noqa + 'ERDDAPTabledap': 'pygeoapi.provider.erddap.TabledapProvider', + 'ESRI': 'pygeoapi.provider.esri.ESRIServiceProvider', + 'FileSystem': 'pygeoapi.provider.filesystem.FileSystemProvider', + 'GeoJSON': 'pygeoapi.provider.geojson.GeoJSONProvider', + 'Hateoas': 'pygeoapi.provider.hateoas.HateoasProvider', + 'MapScript': 'pygeoapi.provider.mapscript_.MapScriptProvider', + 'MongoDB': 'pygeoapi.provider.mongo.MongoProvider', + 'MVT': 'pygeoapi.provider.mvt.MVTProvider', + 'OracleDB': 'pygeoapi.provider.oracle.OracleProvider', + 'OGR': 'pygeoapi.provider.ogr.OGRProvider', + 'PostgreSQL': 'pygeoapi.provider.postgresql.PostgreSQLProvider', + 'rasterio': 'pygeoapi.provider.rasterio_.RasterioProvider', + 'SensorThings': 'pygeoapi.provider.sensorthings.SensorThingsProvider', + 'SQLiteGPKG': 'pygeoapi.provider.sqlite.SQLiteGPKGProvider', + 'Socrata': 'pygeoapi.provider.socrata.SODAServiceProvider', + 'TinyDBCatalogue': 'pygeoapi.provider.tinydb_.TinyDBCatalogueProvider', + 'WMSFacade': 'pygeoapi.provider.wms_facade.WMSFacadeProvider', + 'xarray': 'pygeoapi.provider.xarray_.XarrayProvider', + 'xarray-edr': 'pygeoapi.provider.xarray_edr.XarrayEDRProvider' + }, + 'formatter': { + 'CSV': 'pygeoapi.formatter.csv_.CSVFormatter' + }, + 'process': { + 'HelloWorld': 'pygeoapi.process.hello_world.HelloWorldProcessor' + }, + 'process_manager': { + 'Dummy': 'pygeoapi.process.manager.dummy.DummyManager', + 'MongoDB': 'pygeoapi.process.manager.mongodb_.MongoDBManager', + 'TinyDB': 'pygeoapi.process.manager.tinydb_.TinyDBManager' + } +} + + +def load_plugin(plugin_type: str, plugin_def: dict) -> Any: + """ + loads plugin by name + + :param plugin_type: type of plugin (provider, formatter) + :param plugin_def: plugin definition + + :returns: plugin object + """ + + name = plugin_def['name'] + + if plugin_type not in PLUGINS.keys(): + msg = f'Plugin type {plugin_type} not found' + LOGGER.exception(msg) + raise InvalidPluginError(msg) + + plugin_list = PLUGINS[plugin_type] + + LOGGER.debug(f'Plugins: {plugin_list}') + + if '.' not in name and name not in plugin_list.keys(): + msg = f'Plugin {name} not found' + LOGGER.exception(msg) + raise InvalidPluginError(msg) + + if '.' in name: # dotted path + packagename, classname = name.rsplit('.', 1) + else: # core formatter + packagename, classname = plugin_list[name].rsplit('.', 1) + + LOGGER.debug(f'package name: {packagename}') + LOGGER.debug(f'class name: {classname}') + + module = importlib.import_module(packagename) + class_ = getattr(module, classname) + plugin = class_(plugin_def) + + return plugin + + +class InvalidPluginError(Exception): + """Invalid plugin""" + pass diff --git a/pygeoapi/process/snap_to_network.sh b/pygeoapi/process/snap_to_network.sh new file mode 100755 index 000000000..649ceb2db --- /dev/null +++ b/pygeoapi/process/snap_to_network.sh @@ -0,0 +1,271 @@ +#! /bin/bash + +# Source: +# https://github.com/glowabio/hydrographr/blob/main/inst/sh/snap_to_network.sh + +## file (e.g. txt or csv) that has been generated at the beginning +## of the R function, based on the data.frame table the user provided +export DATA=$1 + +## name of the unique id column +export ID=$2 + +## names of the lon and lat columns +export LON=$3 +export LAT=$4 + +## stream raster file (e.g. .tif file) +export STR=$5 + +## accumulation raster files +export ACC=$6 + +## What to calculate +export METHOD=$7 + +## radius distance +export rdist=$8 + +## accumulation threshold +export acct=$9 + +## Full path to output snap_points.txt file +export SNAP=${10} + +## Temporary folder +export DIR=${11} + + + +## Set random string +export RAND_STRING=$(xxd -l 8 -c 32 -p < /dev/random) + +## save name of file without extension +#b=$(echo $DAT | awk -F"." '{print $1}') + +# Note: Tmp output from R is already a .csv +# if the file is not csv, add the comma and make it .csv +#if [ "${DAT: -4}" != ".csv" ] +#then +# cat $DAT | tr -s '[:blank:]' ',' > ${b}.csv +# export DATC=$(echo ${b}.csv) +#fi + +## make the file a gpkg -- works only if EPSG is 4326 (WGS 84) +# ogr2ogr -f "GPKG" -overwrite -nln ref_points -nlt POINT -a_srs EPSG:4326 \ +# $DIR/ref_points_${RAND_STRING}.gpkg $DATA -oo X_POSSIBLE_NAMES=$LON \ +# -oo Y_POSSIBLE_NAMES=$LAT -oo AUTODETECT_TYPE=YES + +# how many points originally (save as reference) +# export op=$(ogrinfo -so -al $DIR/ref_points_${RAND_STRING}.gpkg \ +# | awk '/Feature/ {print $3}') + +# how many points originally (save as reference) +export op=$(tail -n +2 $DATA | wc -l) + +## do the snapping in GRASS +grass -f --gtext --tmp-location $STR <<'EOF' + +# read stream raster file +r.in.gdal input=$STR output=stream + +# read reference points +# v.in.ogr --o input=$DIR/ref_points_${RAND_STRING}.gpkg layer=ref_points output=ref_points \ +# type=point key=$ID + +v.in.ascii -z in=$DATA out=ref_points separator=comma \ + cat=1 x=2 y=3 z=3 skip=1 + +# if not then identify id of those left out + +if [ "$METHOD" = "distance" ] +then + + r.stream.snap --o input=ref_points output=snap_points stream_rast=stream \ + radius=$rdist + + # are all the original points in there? + np=$(v.info snap_points | awk '/points:/{print $5}') + + # if not, then identify the ids of the points left out (out of extention of + # stream raster file) + if [[ $op != $np ]] + then + lo=($(comm -3 \ + <(v.report -c map=ref_points option=coor | awk -F"|" '{print $1}') \ + <(v.report -c map=snap_points option=coor | awk -F"|" '{print $1}'))) + + for i in ${lo[@]} + do + echo "$i,out-bbox,out-bbox,,NA" > $DIR/stream_ID_${RAND_STRING}_d_tmp.txt + done + fi + + r.what --o -v map=stream points=snap_points separator=comma \ + null_value=NA >> $DIR/stream_ID_${RAND_STRING}_d_tmp.txt + + echo "lon_snap_dist lat_snap_dist occu_id" > $DIR/snap_coords_${RAND_STRING}_d.txt + + if [[ "${#lo[@]}" -gt 0 ]] + then + for i in ${lo[@]} + do + echo "out-bbox out-bbox $i" >> $DIR/snap_coords_${RAND_STRING}_d.txt + done + fi + + v.out.ascii -c input=snap_points separator=space | awk 'NR > 1' \ + >> $DIR/snap_coords_${RAND_STRING}_d.txt + + cat $DATA | tr -s ',' ' ' > $DIR/coords_${RAND_STRING}.txt + + paste -d" " \ + <(sort -gk1n $DIR/coords_${RAND_STRING}.txt) \ + <(sort -gk3n $DIR/snap_coords_${RAND_STRING}_d.txt) \ + <(printf "%s\n" subc_id_snap_dist $(awk -F, '{print $1, $5}' $DIR/stream_ID_${RAND_STRING}_d_tmp.txt | sort -gk1n | awk '{print $2}')) \ + > $DIR/final_tmp_${RAND_STRING}.txt + + awk '{print $1, $2, $3, $4, $5, $7}' \ + $DIR/final_tmp_${RAND_STRING}.txt \ + > $SNAP + + rm $DIR/snap_coords_${RAND_STRING}_d.txt $DIR/stream_ID_${RAND_STRING}_d_tmp.txt \ + $DIR/final_tmp_${RAND_STRING}.txt +fi +# +# +if [ "$METHOD" = "accumulation" ] +then + r.in.gdal input=$ACC output=accum + r.stream.snap --o input=ref_points output=snap_points stream_rast=stream \ + radius=$rdist accumulation=accum threshold=$acct + + # are all the original points in there? + np=$(v.info snap_points | awk '/points:/{print $5}') + + # if not, then identify the ids of the points left out (out of extention of + # stream raster file) + if [[ $op != $np ]] + then + lo=($(comm -3 \ + <(v.report -c map=ref_points option=coor | awk -F"|" '{print $1}') \ + <(v.report -c map=snap_points option=coor | awk -F"|" '{print $1}'))) + + for i in ${lo[@]} + do + echo "$i,out-bbox,out-bbox,,NA" > $DIR/stream_ID_${RAND_STRING}_a_tmp.txt + done + fi + + r.what --o -v map=stream points=snap_points separator=comma \ + null_value=NA >> $DIR/stream_ID_${RAND_STRING}_a_tmp.txt + + echo "lon_snap_accu lat_snap_accu occu_id" > $DIR/snap_coords_${RAND_STRING}_a.txt + + if [[ "${#lo[@]}" -gt 0 ]] + then + for i in ${lo[@]} + do + echo "out-bbox out-bbox $i" >> $DIR/snap_coords_${RAND_STRING}_a.txt + done + fi + + v.out.ascii -c input=snap_points separator=space | awk 'NR > 1' \ + >> $DIR/snap_coords_${RAND_STRING}_a.txt + + cat $DATA | tr -s ',' ' ' > $DIR/coords_${RAND_STRING}.txt + + paste -d" " \ + <(sort -gk1n $DIR/coords_${RAND_STRING}.txt) \ + <(sort -gk3n $DIR/snap_coords_${RAND_STRING}_a.txt) \ + <(printf "%s\n" subc_id_snap_accu $(awk -F, '{print $1, $5}' $DIR/stream_ID_${RAND_STRING}_a_tmp.txt | sort -gk1n | awk '{print $2}')) \ + > $DIR/final_tmp_${RAND_STRING}.txt + + awk '{print $1, $2, $3, $4, $5, $7}' \ + $DIR/final_tmp_${RAND_STRING}.txt \ + > $SNAP + + rm $DIR/snap_coords_${RAND_STRING}_*.txt $DIR/stream_ID_${RAND_STRING}_*.txt \ + $DIR/final_tmp_${RAND_STRING}.txt +fi + + +if [ "$METHOD" = "both" ] +then + r.stream.snap --o input=ref_points output=snap_points_d stream_rast=stream \ + radius=$rdist + + # are all the original points in there? + np=$(v.info snap_points_d | awk '/points:/{print $5}') + + # if not, then identify the ids of the points left out (out of extention of + # stream raster file) + if [[ $op != $np ]] + then + lo=($(comm -3 \ + <(v.report -c map=ref_points option=coor | awk -F"|" '{print $1}') \ + <(v.report -c map=snap_points_d option=coor | awk -F"|" '{print $1}'))) + + for i in ${lo[@]} + do + echo "$i,out-bbox,out-bbox,,NA" > $DIR/stream_ID_${RAND_STRING}_d_tmp.txt + echo "$i,out-bbox,out-bbox,,NA" > $DIR/stream_ID_${RAND_STRING}_a_tmp.txt + done + fi + + r.what --o -v map=stream points=snap_points_d separator=comma \ + null_value=NA >> $DIR/stream_ID_${RAND_STRING}_d_tmp.txt + + + r.in.gdal input=$ACC output=accum + r.stream.snap --o input=ref_points output=snap_points_a stream_rast=stream \ + radius=$rdist accumulation=accum threshold=$acct + + r.what --o -v map=stream points=snap_points_a separator=comma \ + null_value=NA >> $DIR/stream_ID_${RAND_STRING}_a_tmp.txt + + + echo "lon_snap_dist lat_snap_dist occu_id" > $DIR/snap_coords_${RAND_STRING}_d.txt + echo "lon_snap_accu lat_snap_accu occu_id" > $DIR/snap_coords_${RAND_STRING}_a.txt + + if [[ "${#lo[@]}" -gt 0 ]] + then + for i in ${lo[@]} + do + echo "out-bbox out-bbox $i" >> $DIR/snap_coords_${RAND_STRING}_d.txt + echo "out-bbox out-bbox $i" >> $DIR/snap_coords_${RAND_STRING}_a.txt + done + fi + + + v.out.ascii -c input=snap_points_d separator=space | awk 'NR > 1' \ + >> $DIR/snap_coords_${RAND_STRING}_d.txt + + + v.out.ascii -c input=snap_points_a separator=space | awk 'NR > 1' \ + >> $DIR/snap_coords_${RAND_STRING}_a.txt + + + cat $DATA | tr -s ',' ' ' > $DIR/coords_${RAND_STRING}.txt + + paste -d" " \ + <(sort -gk1n $DIR/coords_${RAND_STRING}.txt) \ + <(sort -gk3n $DIR/snap_coords_${RAND_STRING}_d.txt) \ + <(printf "%s\n" subc_id_snap_dist $(awk -F, '{print $1, $5}' $DIR/stream_ID_${RAND_STRING}_d_tmp.txt | sort -gk1n | awk '{print $2}')) \ + <(sort -gk3n $DIR/snap_coords_${RAND_STRING}_a.txt) \ + <(printf "%s\n" subc_id_snap_accu $(awk -F, '{print $1, $5}' $DIR/stream_ID_${RAND_STRING}_a_tmp.txt | sort -gk1n | awk '{print $2}')) \ + > $DIR/final_tmp_${RAND_STRING}.txt #$SNAP + + awk '{print $1, $2, $3, $4, $5, $7, $8, $9, $11}' \ + $DIR/final_tmp_${RAND_STRING}.txt \ + > $SNAP + + + rm $DIR/snap_coords_${RAND_STRING}_*.txt $DIR/stream_ID_${RAND_STRING}_*.txt \ + $DIR/final_tmp_${RAND_STRING}.txt +fi + + +EOF + +# rm $DIR/ref_points_${RAND_STRING}.gpkg diff --git a/pygeoapi/process/snap_to_network_pygeo.py b/pygeoapi/process/snap_to_network_pygeo.py new file mode 100644 index 000000000..d5aed41c2 --- /dev/null +++ b/pygeoapi/process/snap_to_network_pygeo.py @@ -0,0 +1,299 @@ + +import logging +from pygeoapi.process.base import BaseProcessor, ProcessorExecuteError +LOGGER = logging.getLogger(__name__) +# TODO Improve logging! + +import argparse +import subprocess +import geojson +import os +import sys +import tempfile +import random +import string + + +''' + +Written by Merret on 2023-10-06 - 2023-10-11 +Just for testing purposes - licenses not checked yet! + +It does the same as snap_to_network.R, +see https://github.com/glowabio/hydrographr/blob/HEAD/R/snap_to_network.R +Which calls: +https://github.com/glowabio/hydrographr/blob/a818de3e9165d108034fd342c2a85e0a3c7ec7ca/inst/sh/snap_to_network.sh +Done to to understand what that one is doing exactly! + +Documentation: +https://glowabio.github.io/hydrographr/reference/snap_to_network.html + +How to call this, once deployed in pygeoapi? +curl -X POST "http://localhost:5000/processes/snap-to-network/execution" -H "Content-Type: application/json" -d "{\"inputs\":{\"method\": \"distance\", \"basin_id\": \"481051\", \"accumulation\": 0.5, \"distance\": 500, \"coordinate_multipoint\":{\"coordinates\": [[-44.885825, -17.25355], [-43.595833, -13.763611]], \"type\": \"MultiPoint\"}}}" + + +How to add this service to an existing pygeoapi instance? +* Add to plugins.py +* Add to pygeoapi-config.yml +* Add code into directory process (bash scripts need to be executable!) +* Install any python dependencies into the pygeoapo virtualenv! + +Locally, the data dir is: +export PYGEOAPI_DATA_DIR="/home/mbuurman/work/hydro_casestudy_saofra/data" + +''' + + + +#: Process metadata and description +PROCESS_METADATA = { + 'version': '0.0.1', + 'id': 'snap-to-network', + 'title': {'en': 'Snap to network'}, + 'description': { + 'en': 'Trying to expose snap_to_network from hydrographr package as ogc process: ' + 'https://glowabio.github.io/hydrographr/reference/snap_to_network.html.' + }, + 'jobControlOptions': ['sync-execute', 'async-execute'], + 'keywords': ['hydrographr', 'example', 'echo'], + 'links': [{ + 'type': 'text/html', + 'rel': 'about', + 'title': 'information', + 'href': 'https://example.org/process', + 'hreflang': 'en-US' + }], + 'inputs': { + 'coordinate_multipoint': { + 'title': 'Coordinate Multipoint', + 'description': 'Dunno exactly.', + 'schema': { + 'type': 'object', + 'contentMediaType': 'application/json' + }, + 'minOccurs': 1, + 'maxOccurs': 1, + 'metadata': None, # TODO how to use the Metadata item? + 'keywords': ['coordinates'] + }, + 'method': { + 'title': 'Method', + 'description': '"distance", "accumulation", or "both". Defines if the points are snapped using the distance or flow accumulation.', + 'schema': {'type': 'string'}, + 'minOccurs': 1, + 'maxOccurs': 1, + 'metadata': None, + 'keywords': ['snapping'] + }, + 'distance': { + 'title': 'Distance', + 'description': 'Maximum radius in map pixels. The points will be snapped to the next stream within this radius.', + 'schema': {'type': 'string'}, + 'minOccurs': 1, + 'maxOccurs': 1, + 'metadata': None, + 'keywords': ['bla'] + }, + 'accumulation': { + 'title': 'Accumulation', + 'description': 'Minimum flow accumulation. Points will be snapped to the next stream with a flow accumulation equal or higher than the given value.', + 'schema': {'type': 'string'}, + 'minOccurs': 1, + 'maxOccurs': 1, + 'metadata': None, + 'keywords': ['bla'] + }, + 'basin_id': { + 'title': 'Basin Id', + 'description': 'TODO', + 'schema': {'type': 'string'}, + 'minOccurs': 1, + 'maxOccurs': 1, + 'metadata': None, + 'keywords': ['river basin'] + } + }, + 'outputs': { + 'snapped_points': { + 'title': 'Snapped Points', + 'description': 'Resulting coordinates as multipoint.', + 'schema': { + 'type': 'object', + 'contentMediaType': 'application/json' + } + } + }, + 'example': { + 'inputs': { + 'coordinate_multipoint': "{\"coordinates\": [[-17.25355, -44.885825], [-13.763611, -43.595833]], \"type\": \"MultiPoint\"}" + } + } +} + +class SnapToNetworkProcessor(BaseProcessor): + """Get Snap-to-Network Processor example""" + + def __init__(self, processor_def): + """ + Initialize object + + :param processor_def: provider definition + + :returns: pygeoapi.process.get_tide_id.SnapToNetworkProcessor + """ + + super().__init__(processor_def, PROCESS_METADATA) + + def execute(self, data): + LOGGER.info('Starting "snap_to_network as ogc_service!"') + + + # Defaut directories, if environment var not set: + PYGEOAPI_DATA_DIR = '/home/mbuurman/work/hydro_casestudy_saofra/data' # TODO in production remove this! + + # Get PYGEOAPI_DATA_DIR from environment: + if not 'PYGEOAPI_DATA_DIR' in os.environ: + err_msg = 'ERROR: Missing environment variable PYGEOAPI_DATA_DIR. We cannot find the input data!\nPlease run:\nexport PYGEOAPI_DATA_DIR="/.../"' + LOGGER.error(err_msg) + #LOGGER.error('Exiting...') + #sys.exit(1) # This leads to curl error: (52) Empty reply from server. TODO: Send error message back!!! + raise ValueError(err_msg) + #path_data = os.environ.get('PYGEOAPI_DATA_DIR') + path_data = os.environ.get('PYGEOAPI_DATA_DIR', PYGEOAPI_DATA_DIR) + + # Get input: + method = data.get('method') + distance = data.get('distance') + accumulation = data.get('accumulation') + basin_id = data.get("basin_id") + + # Check validity of argument: + if not method in ['distance', 'accumulation', 'both']: + LOGGER.error('Error: Wrong method: %s' % method) + + # Already in geoJSON I think, no need to run geojson.loads() + multipoint = data.get('coordinate_multipoint') + LOGGER.debug('Input multipoint: %s (%s)' % (multipoint, type(multipoint))) + + # Make a file out of the input geojson, as the bash file wants them as a file + # Otherwise, rewrite the bash file to python! + input_coord_file_path = tempfile.gettempdir()+os.sep+'__input_snappingtool.txt' + col_name_lat = 'lat' + col_name_lon = 'lon' + col_name_id = 'dummy_unused' # or so! I don't think is is really used, is it? TODO check this third column business + self.make_file_from_geojson(multipoint, input_coord_file_path, col_name_lat, col_name_lon) + + # Hardcoded paths on server: + # TODO: Do we have to cut them smaller for processing? + #path_accumul_tif = '/home/mbuurman/work/testing_hydrographr/data/basin_481051/accumulation_481051.tif' + #path_stream_tif = '/home/mbuurman/work/testing_hydrographr/data/basin_481051/segment_481051.tif' + # data dir: /home/mbuurman/work/hydro_casestudy_saofra/data/ + #path_accumul_tif = path_data+os.sep+'basin_481051/accumulation_481051.tif' + #path_stream_tif = path_data+os.sep+'basin_481051/segment_481051.tif' + path_accumul_tif = "%s/basin_%s/accumulation_%s.tif" % (path_data, basin_id, basin_id) + path_stream_tif = "%s/basin_%s/segment_%s.tif" % (path_data, basin_id, basin_id) + tmp_dir = tempfile.gettempdir() + randomstring = (''.join(random.sample(string.ascii_lowercase+string.digits, 5))) + snap_tmp_path = tempfile.gettempdir()+os.sep+'__output_snappingtool_'+randomstring+'.txt' # intermediate result storage used by GRASS! + + # Now call the tool: + snap_tmp_path = self.call_snap_to_network_sh(input_coord_file_path, + col_name_id, col_name_lon, col_name_lat, + path_stream_tif, path_accumul_tif, + method, distance, accumulation, + snap_tmp_path, tmp_dir) + + # Now make geojson from the tool: + result_multipoint = self.csv_to_geojson(snap_tmp_path) + LOGGER.debug('________________________________') + LOGGER.info('Result multipoint: %s' % result_multipoint) + #LOGGER.info('Result multipoint: %s (dumped)' % geosjon.dumps(result_multipoint)) + + # Remove file - not during debugging, but in production we should probably + #if os.path.exists(snap_tmp_path): + # os.remove(snap_tmp_path) + # LOGGER.debug('Intermediate file "%s" has been removed.' % snap_tmp_path) + + outputs = { + 'id': 'snapped_points', + 'value': result_multipoint + } + + mimetype = 'application/json' + return mimetype, outputs + + def __repr__(self): + return f' {self.name}' + + + def make_file_from_geojson(self, multipoint, input_coord_file_path, col_name_lat='lat', col_name_lon='lon', sep=','): + + LOGGER.debug('Writing input coordinates from geojson into "%s"...' % input_coord_file_path) + with open(input_coord_file_path, 'w') as inputfile: # Overwrite previous input file! + inputfile.write('%s%s%s%s%s\n' % ("foo", sep, col_name_lon, sep, col_name_lat)) + for coord_pair in multipoint['coordinates']: + lon = coord_pair[0] + lat = coord_pair[1] + coord_pair_str = '%s%s%s%s%s\n' % (99999, sep, lat, sep, lon) # comma separated + inputfile.write(coord_pair_str) + + # DEBUG TODO FIXME This is a dirty little fix + # This is what happens in R in this line: + # https://github.com/glowabio/hydrographr/blob/HEAD/R/snap_to_network.R#L193C3-L193C17 + # They just export the coordinates, no other columns! + # But the input line in grass seems to expect more lines! + # Line 73 in snap_to_network.sh: + # https://github.com/glowabio/hydrographr/blob/24e350f1606e60a02594b4d655501ca68bc3e846/inst/sh/snap_to_network.sh#L73 + # So I now added another dummy foo column in front. + + return input_coord_file_path + + + def call_snap_to_network_sh(self, path_coord_file, id_col_name, lon_col_name, lat_col_name, path_stream_tif, path_accumul_tif, method, distance, accumulation, snap_tmp_path, tmp_dir): + + with open(snap_tmp_path, 'w') as myfile: + pass # To empty the file! #- can be done more elegant I'm sure! + + LOGGER.debug('Now calling bash which calls grass/gdal...') + LOGGER.debug('Current directory: %s' % os.getcwd()) + bash_file = os.getcwd()+'/pygeoapi/process/snap_to_network.sh' + cmd =[bash_file, path_coord_file, id_col_name, lon_col_name, lat_col_name, + path_stream_tif, path_accumul_tif, method, str(distance), str(accumulation), snap_tmp_path, tmp_dir] + LOGGER.debug('GRASS command: %s' % cmd) + p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE) + stdoutdata, stderrdata = p.communicate() + LOGGER.debug('Process exit code: %s' % p.returncode) + LOGGER.debug('Process output:\n____________\n%s\n_____(end of process stdout)_______' % stdoutdata.decode()) + if len(stderrdata.decode()) > 0: + LOGGER.debug('Process errors:\n____________\n%s\n_____(end of process stderr)_______' % stderrdata.decode()) + else: + LOGGER.debug('(Nothing written to stderr: "%s")' % stderrdata.decode()) + return snap_tmp_path + + + def csv_to_geojson(self, snap_tmp_path): + # Get results by reading them from result file! + # This is how GeoJSON Multipoints look: + # "{\"coordinates\": [[-17.25355, -44.885825], [-13.763611, -43.595833]], \"type\": \"MultiPoint\"}" + result_multipoint = {"type": "MultiPoint", "coordinates":[]} + LOGGER.debug('Reading snapped points from GRASS result file "%s" and writing them into a GeoJSON Multipoint...' % snap_tmp_path) + firstline = True + with open(snap_tmp_path, 'r') as resultfile: + for line in resultfile: + + # Ignore header line! + if firstline: + firstline = False + continue + + line = line.strip() + LOGGER.debug('Found coordinate line: %s' % line) # TODO Understand why we get so many columns here! + splitted = line.split() + + # Which columns? + coord1, coord2 = splitted[1], splitted[2] + LOGGER.debug('Extracted these coordinates %s, %s' % (coord1, coord2)) + + result_multipoint['coordinates'].append([float(coord2), float(coord1)]) + LOGGER.debug('Finished creating GeoJSON multipoint: %s' % geojson.dumps(result_multipoint)) + return result_multipoint