diff --git a/algorithm_catalog/fusets_mogpr.json b/algorithm_catalog/fusets_mogpr.json new file mode 100644 index 00000000..a4a44598 --- /dev/null +++ b/algorithm_catalog/fusets_mogpr.json @@ -0,0 +1,136 @@ +{ + "id": "fusets_mogpr", + "type": "Feature", + "conformsTo": [ + "http://www.opengis.net/spec/ogcapi-records-1/1.0/req/record-core" + ], + "geometry": null, + "properties": { + "created": "2025-01-09T00:00:00Z", + "updated": "2025-01-29T00:00:00Z", + "type": "apex_algorithm", + "title": "Multi output gaussian process regression", + "description": "Integrates timeseries in data cube using multi-output gaussian process regression. The service is designed to enable multi-output regression analysis using Gaussian Process Regression (GPR) on geospatial data. It provides a powerful tool for understanding and predicting spatiotemporal phenomena by filling gaps based on other indicators that are correlated with each other.", + "cost_estimate": 12, + "cost_unit": "platform credits per km²", + "keywords": [ + "timeseries", + "Gaussian Process Regression (GPR)" + ], + "language": { + "code": "en-US", + "name": "English (United States)" + }, + "languages": [ + { + "code": "en-US", + "name": "English (United States)" + } + ], + "contacts": [ + { + "name": "Bram Janssen", + "position": "Researcher", + "organization": "VITO", + "links": [ + { + "href": "https://www.vito.be/", + "rel": "about", + "type": "text/html" + }, + { + "href": "https://github.com/JanssenBrm", + "rel": "about", + "type": "text/html" + } + ], + "contactInstructions": "Contact via VITO", + "roles": [ + "principal investigator" + ] + }, + { + "name": "Pratichhya Sharma", + "position": "Researcher", + "organization": "VITO", + "links": [ + { + "href": "https://www.vito.be/", + "rel": "about", + "type": "text/html" + }, + { + "href": "https://github.com/Pratichhya", + "rel": "about", + "type": "text/html" + } + ], + "contactInstructions": "Contact via VITO", + "roles": [ + "service provider" + ] + }, + { + "name": "VITO", + "links": [ + { + "href": "https://www.vito.be/", + "rel": "about", + "type": "text/html" + } + ], + "contactInstructions": "SEE WEBSITE", + "roles": [ + "processor" + ] + } + ], + "themes": [ + { + "concepts": [ + { + "id": "Normalised vegetation difference index (NDVI)" + }, + { + "id": "Radar Vegetation Index (RVI)" + }, + { + "id": "Multi-output Gaussian Process Regression (MOGPR)" + } + ], + "scheme": "https://gcmd.earthdata.nasa.gov/kms/concepts/concept_scheme/sciencekeywords" + } + ], + "formats": [ + { + "name": "JSON" + } + ], + "license": "other" + }, + "linkTemplates": [], + "links": [ + { + "rel": "openeo-process", + "type": "application/json", + "title": "openEO Process Definition", + "href": "https://raw.githubusercontent.com/ESA-APEx/apex_algorithms/09413be3c27e0e695d426c9ffe5a0fe90beefe65/openeo_udp/fusets_mogpr/fusets_mogpr.json" + }, + { + "rel": "service", + "type": "application/json", + "title": "CDSE openEO federation", + "href": "https://openeofed.dataspace.copernicus.eu" + }, + { + "rel": "license", + "href": "https://apex.esa.int/license" + }, + { + "rel": "example", + "type": "application/json", + "title": "Example output", + "href": "https://s3.waw3-1.cloudferro.com/swift/v1/apex-examples/fusets_mogpr/fusets_mogpr.nc" + } + ] +} \ No newline at end of file diff --git a/benchmark_scenarios/fusets_mogpr.json b/benchmark_scenarios/fusets_mogpr.json new file mode 100644 index 00000000..1abf4dcf --- /dev/null +++ b/benchmark_scenarios/fusets_mogpr.json @@ -0,0 +1,52 @@ +[ + { + "id": "fusets_mogpr", + "type": "openeo", + "description": "Multi output gaussian process regression example on NDVI timeseries", + "backend": "openeofed.dataspace.copernicus.eu", + "process_graph": { + "fusetsmogpr": { + "arguments": { + "s1_collection": "RVI", + "s2_collection": "NDVI", + "spatial_extent": { + "coordinates": [ + [ + [ + 5.178303838475193, + 51.252856237848164 + ], + [ + 5.178003609252369, + 51.25109194151486 + ], + [ + 5.179280940922463, + 51.25103833409551 + ], + [ + 5.179565949577788, + 51.25278555186941 + ], + [ + 5.178303838475193, + 51.252856237848164 + ] + ] + ], + "type": "Polygon" + }, + "temporal_extent": [ + "2021-01-01", + "2021-12-15" + ] + }, + "namespace": "https://raw.githubusercontent.com/ESA-APEx/apex_algorithms/09413be3c27e0e695d426c9ffe5a0fe90beefe65/openeo_udp/fusets_mogpr/fusets_mogpr.json", + "process_id": "fusets_mogpr" + } + }, + "reference_data": { + "timeseries.nc": "https://s3.waw3-1.cloudferro.com/swift/v1/apex-examples/fusets_mogpr/fusets_mogpr.nc" + } + } +] \ No newline at end of file diff --git a/openeo_udp/fusets_mogpr/README.md b/openeo_udp/fusets_mogpr/README.md new file mode 100644 index 00000000..4ffb7a6d --- /dev/null +++ b/openeo_udp/fusets_mogpr/README.md @@ -0,0 +1,50 @@ +# Sentinel-1 and Sentinel-2 data fusion through Multi-output Gaussian process regression (MOGPR) + +This service is designed to enable multi-output regression analysis using Gaussian Process Regression (GPR) on geospatial data. It provides a powerful tool for understanding and predicting spatiotemporal phenomena by filling gaps based on other correlated indicators. This service focuses on fusing Sentinel-1 and Sentinel-2 data, allowing the user to select one of the predefined data sources. + +## Parameters + +The `fusets_mogpr_s1s2` service requires the following parameters: + + +| Name | Description | Type | Default | +| --------------- | -------------------------------------------------------------- | ------- | ------- | +| spatial_extent | Polygon representing the AOI on which to apply the data fusion | GeoJSON | | +| temporal_extent | Date range for which to apply the data fusion | Array | | +| s1_collection | S1 data collection to use for the fusion | Text | RVI | +| s2_collection | S2 data collection to use for fusing the data | Text | NDVI | + +## Supported collections + +#### Sentinel-1 + +* RVI +* GRD + +#### Sentinel-2 + +* NDVI +* FAPAR +* LAI +* FCOVER +* EVI +* CCC +* CWC + +## Limitations + +The spatial extent is limited to a maximum size equal to a Sentinel-2 MGRS tile (100 km x 100 km). + +## Dependencies + +In addition to various Python libraries, the workflow utilizes the following libraries included in the User-Defined Function (UDF): + +* Biopar: The `biopar` package retrieves biophysical parameters like FAPAR, FCOVER, and more, that were passed as the S2_collection. The biopar package is a Python package that calculates biophysical parameters from Sentinel-2 satellite images as described [here](https://step.esa.int/docs/extra/ATBD_S2ToolBox_L2B_V1.1.pdf). The `fusets_mogpr` udp directly uses the biopar udp shared in the APEX Algorithms repository. + +* FuseTS: The `fusets` library was developed to facilitate data fusion and time-series analytics using AI/ML to extract insights about land environments. It functions as a Time Series & Data Fusion toolbox integrated with openEO. For additional information, please refer to the [FuseTS documentation](https://open-eo.github.io/FuseTS/installation.html). + + + +## Output + +This User-Defined-Process (UDP) produces a datacube that contains a gap-filled time series for all pixels within the specified temporal and spatial range. This datacube can be seamlessly integrated with other openEO processes. \ No newline at end of file diff --git a/openeo_udp/fusets_mogpr/fusets_mogpr.json b/openeo_udp/fusets_mogpr/fusets_mogpr.json new file mode 100644 index 00000000..3e403037 --- /dev/null +++ b/openeo_udp/fusets_mogpr/fusets_mogpr.json @@ -0,0 +1,900 @@ +{ + "process_graph": { + "biopar1": { + "process_id": "biopar", + "arguments": { + "biopar_type": "CWC", + "date": { + "from_parameter": "temporal_extent" + }, + "polygon": { + "from_parameter": "spatial_extent" + } + }, + "namespace": "https://raw.githubusercontent.com/ESA-APEx/apex_algorithms/3b5a011a90f4a3050ff8fdf69ca5bc2fd1535881/openeo_udp/biopar/biopar.json" + }, + "biopar2": { + "process_id": "biopar", + "arguments": { + "biopar_type": "CCC", + "date": { + "from_parameter": "temporal_extent" + }, + "polygon": { + "from_parameter": "spatial_extent" + } + }, + "namespace": "https://raw.githubusercontent.com/ESA-APEx/apex_algorithms/3b5a011a90f4a3050ff8fdf69ca5bc2fd1535881/openeo_udp/biopar/biopar.json" + }, + "loadcollection1": { + "process_id": "load_collection", + "arguments": { + "bands": [ + "B02", + "B04", + "B08" + ], + "id": "SENTINEL2_L2A", + "spatial_extent": { + "from_parameter": "spatial_extent" + }, + "temporal_extent": { + "from_parameter": "temporal_extent" + } + } + }, + "loadcollection2": { + "process_id": "load_collection", + "arguments": { + "bands": [ + "SCL" + ], + "id": "SENTINEL2_L2A", + "spatial_extent": { + "from_parameter": "spatial_extent" + }, + "temporal_extent": { + "from_parameter": "temporal_extent" + } + } + }, + "toscldilationmask1": { + "process_id": "to_scl_dilation_mask", + "arguments": { + "data": { + "from_node": "loadcollection2" + } + } + }, + "mask1": { + "process_id": "mask", + "arguments": { + "data": { + "from_node": "loadcollection1" + }, + "mask": { + "from_node": "toscldilationmask1" + } + } + }, + "reducedimension1": { + "process_id": "reduce_dimension", + "arguments": { + "data": { + "from_node": "mask1" + }, + "dimension": "bands", + "reducer": { + "process_graph": { + "arrayelement1": { + "process_id": "array_element", + "arguments": { + "data": { + "from_parameter": "data" + }, + "index": 2 + } + }, + "arrayelement2": { + "process_id": "array_element", + "arguments": { + "data": { + "from_parameter": "data" + }, + "index": 1 + } + }, + "subtract1": { + "process_id": "subtract", + "arguments": { + "x": { + "from_node": "arrayelement1" + }, + "y": { + "from_node": "arrayelement2" + } + } + }, + "multiply1": { + "process_id": "multiply", + "arguments": { + "x": 2.5, + "y": { + "from_node": "subtract1" + } + } + }, + "multiply2": { + "process_id": "multiply", + "arguments": { + "x": 6, + "y": { + "from_node": "arrayelement2" + } + } + }, + "add1": { + "process_id": "add", + "arguments": { + "x": { + "from_node": "arrayelement1" + }, + "y": { + "from_node": "multiply2" + } + } + }, + "arrayelement3": { + "process_id": "array_element", + "arguments": { + "data": { + "from_parameter": "data" + }, + "index": 1 + } + }, + "multiply3": { + "process_id": "multiply", + "arguments": { + "x": 7.5, + "y": { + "from_node": "arrayelement3" + } + } + }, + "subtract2": { + "process_id": "subtract", + "arguments": { + "x": { + "from_node": "add1" + }, + "y": { + "from_node": "multiply3" + } + } + }, + "add2": { + "process_id": "add", + "arguments": { + "x": { + "from_node": "subtract2" + }, + "y": 1 + } + }, + "divide1": { + "process_id": "divide", + "arguments": { + "x": { + "from_node": "multiply1" + }, + "y": { + "from_node": "add2" + } + }, + "result": true + } + } + } + } + }, + "adddimension1": { + "process_id": "add_dimension", + "arguments": { + "data": { + "from_node": "reducedimension1" + }, + "label": "EVI", + "name": "bands", + "type": "bands" + } + }, + "biopar3": { + "process_id": "biopar", + "arguments": { + "biopar_type": "FCOVER", + "date": { + "from_parameter": "temporal_extent" + }, + "polygon": { + "from_parameter": "spatial_extent" + } + }, + "namespace": "https://raw.githubusercontent.com/ESA-APEx/apex_algorithms/3b5a011a90f4a3050ff8fdf69ca5bc2fd1535881/openeo_udp/biopar/biopar.json" + }, + "biopar4": { + "process_id": "biopar", + "arguments": { + "biopar_type": "LAI", + "date": { + "from_parameter": "temporal_extent" + }, + "polygon": { + "from_parameter": "spatial_extent" + } + }, + "namespace": "https://raw.githubusercontent.com/ESA-APEx/apex_algorithms/3b5a011a90f4a3050ff8fdf69ca5bc2fd1535881/openeo_udp/biopar/biopar.json" + }, + "biopar5": { + "process_id": "biopar", + "arguments": { + "biopar_type": "FAPAR", + "date": { + "from_parameter": "temporal_extent" + }, + "polygon": { + "from_parameter": "spatial_extent" + } + }, + "namespace": "https://raw.githubusercontent.com/ESA-APEx/apex_algorithms/3b5a011a90f4a3050ff8fdf69ca5bc2fd1535881/openeo_udp/biopar/biopar.json" + }, + "loadcollection3": { + "process_id": "load_collection", + "arguments": { + "bands": [ + "B04", + "B08" + ], + "id": "SENTINEL2_L2A", + "spatial_extent": { + "from_parameter": "spatial_extent" + }, + "temporal_extent": { + "from_parameter": "temporal_extent" + } + } + }, + "loadcollection4": { + "process_id": "load_collection", + "arguments": { + "bands": [ + "SCL" + ], + "id": "SENTINEL2_L2A", + "spatial_extent": { + "from_parameter": "spatial_extent" + }, + "temporal_extent": { + "from_parameter": "temporal_extent" + } + } + }, + "toscldilationmask2": { + "process_id": "to_scl_dilation_mask", + "arguments": { + "data": { + "from_node": "loadcollection4" + } + } + }, + "mask2": { + "process_id": "mask", + "arguments": { + "data": { + "from_node": "loadcollection3" + }, + "mask": { + "from_node": "toscldilationmask2" + } + } + }, + "ndvi1": { + "process_id": "ndvi", + "arguments": { + "data": { + "from_node": "mask2" + }, + "nir": "B08", + "red": "B04", + "target_band": "NDVI" + } + }, + "filterbands1": { + "process_id": "filter_bands", + "arguments": { + "bands": [ + "NDVI" + ], + "data": { + "from_node": "ndvi1" + } + } + }, + "eq1": { + "process_id": "eq", + "arguments": { + "case_sensitive": false, + "x": { + "from_parameter": "s2_collection" + }, + "y": "ndvi" + } + }, + "if1": { + "process_id": "if", + "arguments": { + "accept": { + "from_node": "filterbands1" + }, + "reject": null, + "value": { + "from_node": "eq1" + } + } + }, + "eq2": { + "process_id": "eq", + "arguments": { + "case_sensitive": false, + "x": { + "from_parameter": "s2_collection" + }, + "y": "fapar" + } + }, + "if2": { + "process_id": "if", + "arguments": { + "accept": { + "from_node": "biopar5" + }, + "reject": { + "from_node": "if1" + }, + "value": { + "from_node": "eq2" + } + } + }, + "eq3": { + "process_id": "eq", + "arguments": { + "case_sensitive": false, + "x": { + "from_parameter": "s2_collection" + }, + "y": "lai" + } + }, + "if3": { + "process_id": "if", + "arguments": { + "accept": { + "from_node": "biopar4" + }, + "reject": { + "from_node": "if2" + }, + "value": { + "from_node": "eq3" + } + } + }, + "eq4": { + "process_id": "eq", + "arguments": { + "case_sensitive": false, + "x": { + "from_parameter": "s2_collection" + }, + "y": "fcover" + } + }, + "if4": { + "process_id": "if", + "arguments": { + "accept": { + "from_node": "biopar3" + }, + "reject": { + "from_node": "if3" + }, + "value": { + "from_node": "eq4" + } + } + }, + "eq5": { + "process_id": "eq", + "arguments": { + "case_sensitive": false, + "x": { + "from_parameter": "s2_collection" + }, + "y": "evi" + } + }, + "if5": { + "process_id": "if", + "arguments": { + "accept": { + "from_node": "adddimension1" + }, + "reject": { + "from_node": "if4" + }, + "value": { + "from_node": "eq5" + } + } + }, + "eq6": { + "process_id": "eq", + "arguments": { + "case_sensitive": false, + "x": { + "from_parameter": "s2_collection" + }, + "y": "ccc" + } + }, + "if6": { + "process_id": "if", + "arguments": { + "accept": { + "from_node": "biopar2" + }, + "reject": { + "from_node": "if5" + }, + "value": { + "from_node": "eq6" + } + } + }, + "eq7": { + "process_id": "eq", + "arguments": { + "case_sensitive": false, + "x": { + "from_parameter": "s2_collection" + }, + "y": "cwc" + } + }, + "if7": { + "process_id": "if", + "arguments": { + "accept": { + "from_node": "biopar1" + }, + "reject": { + "from_node": "if6" + }, + "value": { + "from_node": "eq7" + } + } + }, + "loadcollection5": { + "process_id": "load_collection", + "arguments": { + "bands": [ + "VV", + "VH" + ], + "id": "SENTINEL1_GRD", + "spatial_extent": { + "from_parameter": "spatial_extent" + }, + "temporal_extent": { + "from_parameter": "temporal_extent" + } + } + }, + "sarbackscatter1": { + "process_id": "sar_backscatter", + "arguments": { + "coefficient": "sigma0-ellipsoid", + "contributing_area": false, + "data": { + "from_node": "loadcollection5" + }, + "elevation_model": null, + "ellipsoid_incidence_angle": false, + "local_incidence_angle": false, + "mask": false, + "noise_removal": true + } + }, + "renamelabels1": { + "process_id": "rename_labels", + "arguments": { + "data": { + "from_node": "sarbackscatter1" + }, + "dimension": "bands", + "target": [ + "VV", + "VH" + ] + } + }, + "reducedimension2": { + "process_id": "reduce_dimension", + "arguments": { + "data": { + "from_node": "renamelabels1" + }, + "dimension": "bands", + "reducer": { + "process_graph": { + "arrayelement4": { + "process_id": "array_element", + "arguments": { + "data": { + "from_parameter": "data" + }, + "index": 1 + } + }, + "add3": { + "process_id": "add", + "arguments": { + "x": { + "from_node": "arrayelement4" + }, + "y": { + "from_node": "arrayelement4" + } + } + }, + "arrayelement5": { + "process_id": "array_element", + "arguments": { + "data": { + "from_parameter": "data" + }, + "index": 0 + } + }, + "add4": { + "process_id": "add", + "arguments": { + "x": { + "from_node": "arrayelement5" + }, + "y": { + "from_node": "arrayelement4" + } + } + }, + "divide2": { + "process_id": "divide", + "arguments": { + "x": { + "from_node": "add3" + }, + "y": { + "from_node": "add4" + } + }, + "result": true + } + } + } + } + }, + "adddimension2": { + "process_id": "add_dimension", + "arguments": { + "data": { + "from_node": "reducedimension2" + }, + "label": "RVI", + "name": "bands", + "type": "bands" + } + }, + "loadcollection6": { + "process_id": "load_collection", + "arguments": { + "bands": [ + "VV", + "VH" + ], + "id": "SENTINEL1_GRD", + "spatial_extent": { + "from_parameter": "spatial_extent" + }, + "temporal_extent": { + "from_parameter": "temporal_extent" + } + } + }, + "sarbackscatter2": { + "process_id": "sar_backscatter", + "arguments": { + "coefficient": "sigma0-ellipsoid", + "contributing_area": false, + "data": { + "from_node": "loadcollection6" + }, + "elevation_model": null, + "ellipsoid_incidence_angle": false, + "local_incidence_angle": false, + "mask": false, + "noise_removal": true + } + }, + "renamelabels2": { + "process_id": "rename_labels", + "arguments": { + "data": { + "from_node": "sarbackscatter2" + }, + "dimension": "bands", + "target": [ + "VV", + "VH" + ] + } + }, + "eq8": { + "process_id": "eq", + "arguments": { + "case_sensitive": false, + "x": { + "from_parameter": "s1_collection" + }, + "y": "grd" + } + }, + "if8": { + "process_id": "if", + "arguments": { + "accept": { + "from_node": "renamelabels2" + }, + "reject": null, + "value": { + "from_node": "eq8" + } + } + }, + "eq9": { + "process_id": "eq", + "arguments": { + "case_sensitive": false, + "x": { + "from_parameter": "s1_collection" + }, + "y": "rvi" + } + }, + "if9": { + "process_id": "if", + "arguments": { + "accept": { + "from_node": "adddimension2" + }, + "reject": { + "from_node": "if8" + }, + "value": { + "from_node": "eq9" + } + } + }, + "mergecubes1": { + "process_id": "merge_cubes", + "arguments": { + "cube1": { + "from_node": "if7" + }, + "cube2": { + "from_node": "if9" + } + } + }, + "applyneighborhood1": { + "process_id": "apply_neighborhood", + "arguments": { + "data": { + "from_node": "mergecubes1" + }, + "overlap": [], + "process": { + "process_graph": { + "runudf1": { + "process_id": "run_udf", + "arguments": { + "context": {}, + "data": { + "from_parameter": "data" + }, + "runtime": "Python", + "udf": "#%%\n\nimport os\nimport sys\nimport zipfile\nimport requests\nimport tempfile\nimport shutil\nimport functools\n\nfrom openeo.udf import inspect\n\ndef download_file(url, path):\n \"\"\"\n Downloads a file from the given URL to the specified path.\n \"\"\"\n response = requests.get(url, stream=True)\n with open(path, \"wb\") as file:\n file.write(response.content)\n\ndef extract_zip_to_temp(zip_path, temp_dir):\n \"\"\"\n Extracts a zip file into the given temporary directory.\n \"\"\"\n with zipfile.ZipFile(zip_path, \"r\") as zip_ref:\n zip_ref.extractall(temp_dir) # Use the existing temp_dir\n return temp_dir\n\ndef move_top_level_folder_to_destination(temp_dir, destination_dir):\n \"\"\"\n Moves each top-level folder from the temporary directory to the destination directory.\n Throws an error if the folder already exists at the destination.\n \"\"\"\n # Find the top-level folders inside the extracted zip\n for item in os.listdir(temp_dir):\n item_path = os.path.join(temp_dir, item)\n \n if os.path.isdir(item_path):\n # Check if the folder already exists at destination\n dest_path = os.path.join(destination_dir, item)\n\n if os.path.exists(dest_path):\n # Throw an error if the folder already exists\n raise FileExistsError(f\"Error: The folder '{item}' already exists in the destination directory: {dest_path}\")\n\n # Move the folder out of temp and into the destination directory\n shutil.move(item_path, dest_path)\n\n\ndef add_to_sys_path(folder_path):\n \"\"\"\n Adds the folder path to sys.path.\n \"\"\"\n if folder_path not in sys.path:\n sys.path.append(folder_path)\n\n\n@functools.lru_cache(maxsize=5)\ndef setup_dependencies(dependencies_url):\n \"\"\"\n Main function to download, unzip, move the top-level folder, and add it to sys.path.\n \"\"\"\n with tempfile.TemporaryDirectory() as temp_dir:\n # Step 1: Download the zip file\n zip_path = os.path.join(temp_dir, \"temp.zip\")\n download_file(dependencies_url, zip_path)\n\n inspect(message=\"Extract dependencies to temp\")\n # Step 2: Extract the zip file to the temporary directory\n extracted_dir = extract_zip_to_temp(zip_path, temp_dir) \n\n # Step 3: Move the first top-level folder (dynamically) to the destination\n destination_dir = os.getcwd() # Current working directory\n inspect(message=\"Move top-level folder to destination\")\n moved_folder = move_top_level_folder_to_destination(extracted_dir, destination_dir)\n\n # Step 4: Add the folder to sys.path\n add_to_sys_path(moved_folder)\n inspect(message=\"Added to the sys path\") \n\n\nsetup_dependencies(\"https://artifactory.vgt.vito.be:443/artifactory/auxdata-public/ai4food/fusets_venv.zip\")\nimport os\nimport sys\nfrom configparser import ConfigParser\nfrom pathlib import Path\nfrom typing import Dict\n\nfrom openeo.udf import XarrayDataCube\n\n\ndef load_venv():\n \"\"\"\n Add the virtual environment to the system path if the folder `/tmp/venv_static` exists\n :return:\n \"\"\"\n for venv_path in ['tmp/venv_static', 'tmp/venv']:\n if Path(venv_path).exists():\n sys.path.insert(0, venv_path)\n\n\ndef set_home(home):\n os.environ['HOME'] = home\n\n\ndef create_gpy_cfg():\n home = os.getenv('HOME')\n set_home('/tmp')\n user_file = Path.home() / '.config' / 'GPy' / 'user.cfg'\n if not user_file.exists():\n user_file.parent.mkdir(parents=True, exist_ok=True)\n return user_file, home\n\n\ndef write_gpy_cfg():\n user_file, home = create_gpy_cfg()\n config = ConfigParser()\n config['plotting'] = {\n 'library': 'none'\n }\n with open(user_file, 'w') as cfg:\n config.write(cfg)\n cfg.close()\n return home\n\n\ndef apply_datacube(cube: XarrayDataCube, context: Dict) -> XarrayDataCube:\n \"\"\"\n Apply mogpr integration to a datacube.\n MOGPR requires a full timeseries for multiple bands, so it needs to be invoked in the context of an apply_neighborhood process.\n @param cube:\n @param context:\n @return:\n \"\"\"\n load_venv()\n home = write_gpy_cfg()\n\n from fusets.mogpr import mogpr\n dims = cube.get_array().dims\n result = mogpr(cube.get_array().to_dataset(dim=\"bands\"))\n result_dc = XarrayDataCube(result.to_array(dim=\"bands\").transpose(*dims))\n set_home(home)\n return result_dc\n\n\ndef load_mogpr_udf() -> str:\n \"\"\"\n Loads an openEO udf that applies mogpr.\n @return:\n \"\"\"\n import os\n return Path(os.path.realpath(__file__)).read_text()\n" + }, + "result": true + } + } + }, + "size": [ + { + "dimension": "x", + "value": 32, + "unit": "px" + }, + { + "dimension": "y", + "value": 32, + "unit": "px" + } + ] + }, + "result": true + } + }, + "id": "fusets_mogpr", + "summary": "Integrate S1 and S2 timeseries using multi-output gaussian process regression", + "description": "# Sentinel-1 and Sentinel-2 data fusion through Multi-output Gaussian process regression (MOGPR)\n\nThis service is designed to enable multi-output regression analysis using Gaussian Process Regression (GPR) on geospatial data. It provides a powerful tool for understanding and predicting spatiotemporal phenomena by filling gaps based on other correlated indicators. This service focuses on fusing Sentinel-1 and Sentinel-2 data, allowing the user to select one of the predefined data sources.\n\n## Parameters\n\nThe `fusets_mogpr_s1s2` service requires the following parameters:\n\n\n| Name | Description | Type | Default |\n| --------------- | -------------------------------------------------------------- | ------- | ------- |\n| spatial_extent | Polygon representing the AOI on which to apply the data fusion | GeoJSON | |\n| temporal_extent | Date range for which to apply the data fusion | Array | |\n| s1_collection | S1 data collection to use for the fusion | Text | RVI |\n| s2_collection | S2 data collection to use for fusing the data | Text | NDVI |\n\n## Supported collections\n\n#### Sentinel-1\n\n* RVI\n* GRD\n\n#### Sentinel-2\n\n* NDVI\n* FAPAR\n* LAI\n* FCOVER\n* EVI\n* CCC\n* CWC\n\n## Limitations\n\nThe spatial extent is limited to a maximum size equal to a Sentinel-2 MGRS tile (100 km x 100 km).\n\n## Dependencies\n\nIn addition to various Python libraries, the workflow utilizes the following libraries included in the User-Defined Function (UDF):\n\n* Biopar: The `biopar` package retrieves biophysical parameters like FAPAR, FCOVER, and more, that were passed as the S2_collection. The biopar package is a Python package that calculates biophysical parameters from Sentinel-2 satellite images as described [here](https://step.esa.int/docs/extra/ATBD_S2ToolBox_L2B_V1.1.pdf). The `fusets_mogpr` udp directly uses the biopar udp shared in the APEX Algorithms repository. \n\n* FuseTS: The `fusets` library was developed to facilitate data fusion and time-series analytics using AI/ML to extract insights about land environments. It functions as a Time Series & Data Fusion toolbox integrated with openEO. For additional information, please refer to the [FuseTS documentation](https://open-eo.github.io/FuseTS/installation.html).\n\n\n\n## Output\n\nThis User-Defined-Process (UDP) produces a datacube that contains a gap-filled time series for all pixels within the specified temporal and spatial range. This datacube can be seamlessly integrated with other openEO processes.", + "parameters": [ + { + "name": "spatial_extent", + "description": "Limits the data to process to the specified bounding box or polygons.\\n\\nFor raster data, the process loads the pixel into the data cube if the point at the pixel center intersects with the bounding box or any of the polygons (as defined in the Simple Features standard by the OGC).\\nFor vector data, the process loads the geometry into the data cube if the geometry is fully within the bounding box or any of the polygons (as defined in the Simple Features standard by the OGC). Empty geometries may only be in the data cube if no spatial extent has been provided.\\n\\nEmpty geometries are ignored.\\nSet this parameter to null to set no limit for the spatial extent.", + "schema": [ + { + "title": "Bounding Box", + "type": "object", + "subtype": "bounding-box", + "required": [ + "west", + "south", + "east", + "north" + ], + "properties": { + "west": { + "description": "West (lower left corner, coordinate axis 1).", + "type": "number" + }, + "south": { + "description": "South (lower left corner, coordinate axis 2).", + "type": "number" + }, + "east": { + "description": "East (upper right corner, coordinate axis 1).", + "type": "number" + }, + "north": { + "description": "North (upper right corner, coordinate axis 2).", + "type": "number" + }, + "base": { + "description": "Base (optional, lower left corner, coordinate axis 3).", + "type": [ + "number", + "null" + ], + "default": null + }, + "height": { + "description": "Height (optional, upper right corner, coordinate axis 3).", + "type": [ + "number", + "null" + ], + "default": null + }, + "crs": { + "description": "Coordinate reference system of the extent, specified as as [EPSG code](http://www.epsg-registry.org/) or [WKT2 CRS string](http://docs.opengeospatial.org/is/18-010r7/18-010r7.html). Defaults to `4326` (EPSG code 4326) unless the client explicitly requests a different coordinate reference system.", + "anyOf": [ + { + "title": "EPSG Code", + "type": "integer", + "subtype": "epsg-code", + "minimum": 1000, + "examples": [ + 3857 + ] + }, + { + "title": "WKT2", + "type": "string", + "subtype": "wkt2-definition" + } + ], + "default": 4326 + } + } + }, + { + "title": "Vector data cube", + "description": "Limits the data cube to the bounding box of the given geometries in the vector data cube. For raster data, all pixels inside the bounding box that do not intersect with any of the polygons will be set to no data (`null`). Empty geometries are ignored.", + "type": "object", + "subtype": "datacube", + "dimensions": [ + { + "type": "geometry" + } + ] + }, + { + "title": "No filter", + "description": "Don't filter spatially. All data is included in the data cube.", + "type": "null" + } + ] + }, + { + "name": "temporal_extent", + "description": "Temporal extent specified as two-element array with start and end date/date-time. \nThis is date range for which to apply the data fusion", + "schema": { + "type": "array", + "subtype": "temporal-interval", + "uniqueItems": true, + "minItems": 2, + "maxItems": 2, + "items": { + "anyOf": [ + { + "type": "string", + "subtype": "date-time", + "format": "date-time" + }, + { + "type": "string", + "subtype": "date", + "format": "date" + }, + { + "type": "null" + } + ] + } + } + }, + { + "name": "s1_collection", + "description": "S1 data collection to use for fusing the data", + "schema": { + "type": "string", + "enum": [ + "RVI", + "GRD" + ] + }, + "default": "RVI", + "optional": true + }, + { + "name": "s2_collection", + "description": "S2 data collection to use for fusing the data", + "schema": { + "type": "string", + "enum": [ + "NDVI", + "FAPAR", + "LAI", + "FCOVER", + "EVI", + "CCC", + "CWC" + ] + }, + "default": "NDVI", + "optional": true + } + ] +} \ No newline at end of file diff --git a/openeo_udp/fusets_mogpr/generate.py b/openeo_udp/fusets_mogpr/generate.py new file mode 100644 index 00000000..5fba4e02 --- /dev/null +++ b/openeo_udp/fusets_mogpr/generate.py @@ -0,0 +1,86 @@ +import json +from pathlib import Path +from typing import Union, Sequence + +import openeo +from openeo.api.process import Parameter +from openeo.processes import ProcessBuilder, apply_neighborhood +from openeo.rest.udp import build_process_dict + +from fusets.openeo import load_mogpr_udf + +from helpers import load_s1_collection, load_s2_collection + + +connection = openeo.connect("openeofed.dataspace.copernicus.eu") + +def get_mogpr_s1_s2( + polygon: Union[Parameter, dict] = None, + date: Union[Sequence[str], Parameter] = None, + s1_collection: Union[str, Parameter] = None, + s2_collection: Union[str, Parameter] = None, +) -> ProcessBuilder: + s1_input_cube = load_s1_collection(connection, s1_collection, polygon, date) + s2_input_cube = load_s2_collection(connection, s2_collection, polygon, date) + + # Merge the inputs to a single datacube + merged_cube = s2_input_cube.merge_cubes(s1_input_cube) + + return apply_neighborhood(merged_cube, + lambda data: data.run_udf(udf=Path("set_path.py").read_text()+"\n"+load_mogpr_udf(), runtime='Python', context=dict()), + size=[ + {'dimension': 'x', 'value': 32, 'unit': 'px'}, + {'dimension': 'y', 'value': 32, 'unit': 'px'} + ], overlap=[]) + + +def generate() -> dict: + + # define parameters + polygon = Parameter.spatial_extent( + name="spatial_extent", + description="Limits the data to process to the specified bounding box or polygons.\\n\\nFor raster data, the process loads the pixel into the data cube if the point at the pixel center intersects with the bounding box or any of the polygons (as defined in the Simple Features standard by the OGC).\\nFor vector data, the process loads the geometry into the data cube if the geometry is fully within the bounding box or any of the polygons (as defined in the Simple Features standard by the OGC). Empty geometries may only be in the data cube if no spatial extent has been provided.\\n\\nEmpty geometries are ignored.\\nSet this parameter to null to set no limit for the spatial extent." + ) + date = Parameter.temporal_interval( + name="temporal_extent", + description="Temporal extent specified as two-element array with start and end date/date-time. \nThis is date range for which to apply the data fusion" + ) + s1_collection = Parameter.string( + name="s1_collection", + description="S1 data collection to use for fusing the data", + default='RVI', + values=['RVI', 'GRD'] + ) + s2_collection = Parameter.string( + name="s2_collection", + description="S2 data collection to use for fusing the data", + default='NDVI', + values=['NDVI', 'FAPAR', 'LAI', 'FCOVER', 'EVI', 'CCC', 'CWC'] + ) + + + mogpr = get_mogpr_s1_s2( + polygon=polygon, + date=date, + s1_collection=s1_collection, + s2_collection=s2_collection + ) + + return build_process_dict( + process_graph=mogpr, + process_id="fusets_mogpr", + summary="Integrate S1 and S2 timeseries using multi-output gaussian process regression", + description=(Path(__file__).parent / "README.md").read_text(), + parameters=[ + polygon, + date, + s1_collection, + s2_collection + ], + ) + + +if __name__ == "__main__": + # save the generated process to a file + with open(Path(__file__).parent / "fusets_mogpr.json", "w") as f: + json.dump(generate(), f, indent=2) diff --git a/openeo_udp/fusets_mogpr/helpers.py b/openeo_udp/fusets_mogpr/helpers.py new file mode 100644 index 00000000..f5335e1a --- /dev/null +++ b/openeo_udp/fusets_mogpr/helpers.py @@ -0,0 +1,213 @@ +####################################################################################################################### +# S1 collection implementation +####################################################################################################################### +from openeo.processes import process, if_, eq + + +def _load_s1_grd_bands(connection, polygon, date, bands): + """ + Create an S1 datacube containing a selected set of bands from the SENTINEL1_GRD data collection. + :param connection: openEO connection + :param polygon: Area of interest + :param date: Time of interest + :param bands: Bands to load + :return: + """ + s1_grd = connection.load_collection('SENTINEL1_GRD', + spatial_extent=polygon, + temporal_extent=date, + bands=bands) + s1_grd = s1_grd.sar_backscatter(coefficient='sigma0-ellipsoid') + s1_grd = s1_grd.rename_labels(dimension="bands", target=bands) + return s1_grd + + +def _load_rvi(connection, polygon, date): + """ + Create an RVI datacube based on the S1 VV and VH bands. + :param connection: openEO connection + :param polygon: Area of interest + :param date: Time of interest + :return: + """ + base_s1 = _load_s1_grd_bands(connection, polygon, date, ['VV', 'VH']) + + VH = base_s1.band('VH') + VV = base_s1.band('VV') + rvi = (VH + VH) / (VV + VH) + return rvi.add_dimension(name="bands", label="RVI", type="bands") + + +####################################################################################################################### +# S2 collection implementation +####################################################################################################################### + +def _load_ndvi(connection, polygon, date): + """ + Create an NDVI datacube based on the SENTINEL2_L2A data collection. + :param connection: openEO connection + :param polygon: Area of interest + :param date: + :return: + """ + base_s2 = connection.load_collection('SENTINEL2_L2A', + spatial_extent=polygon, + temporal_extent=date, + bands=["B04", "B08"]) + scl = connection.load_collection('SENTINEL2_L2A', + spatial_extent=polygon, + temporal_extent=date, + bands=["SCL"]) + mask = mask = scl.process("to_scl_dilation_mask", data=scl) + masked_s2 = base_s2.mask(mask) + ndvi = masked_s2.ndvi(red="B04", nir="B08", target_band='NDVI') + ndvi_filtered = ndvi.filter_bands(bands=['NDVI']) + return ndvi_filtered + + +def _load_biopar(polygon, date, biopar): + """ + Create a BIOPAR datacube. This is done by using the existing BIOPAR service: + https://portal.terrascope.be/catalogue/app-details/21 + + :param polygon: Area of interest + :param date: Time of interest + :param biopar: BIOPAR type (see documentation of service on portal) + :return: + """ + base_biopar = process( + process_id="biopar", + namespace="https://raw.githubusercontent.com/ESA-APEx/apex_algorithms/3b5a011a90f4a3050ff8fdf69ca5bc2fd1535881/openeo_udp/biopar/biopar.json", + date=date, + polygon=polygon, + biopar_type=biopar + ) + return base_biopar + + +def _load_evi(connection, polygon, date): + """ + Create an EVI datacube. More information is available at https://en.wikipedia.org/wiki/Enhanced_vegetation_index + :param connection: openEO connection + :param polygon: Area of interest + :param date: Time of interest + :return: + """ + base_s2 = connection.load_collection( + collection_id='SENTINEL2_L2A', + spatial_extent=polygon, + temporal_extent=date, + bands=['B02', 'B04', 'B08'], + ) + scl = connection.load_collection('SENTINEL2_L2A', + spatial_extent=polygon, + temporal_extent=date, + bands=["SCL"]) + mask = mask = scl.process("to_scl_dilation_mask", data=scl) + masked_s2 = base_s2.mask(mask) + + B02 = masked_s2.band('B04') + B04 = masked_s2.band('B04') + B08 = masked_s2.band('B08') + + evi = (2.5 * (B08 - B04)) / ((B08 + 6.0 * B04 - 7.5 * B02) + 1.0) + return evi.add_dimension(name="bands", label="EVI", type="bands") + + +####################################################################################################################### +# OpenEO UDP implementation +####################################################################################################################### +def _build_collection_graph(collection, label, callable, reject): + """ + Helper function that will construct an if-else structure using the if_ openEO process. If the value of the + collection parameter matches with the given label, the callable is executed. If not the reject function is + executed. + + :param collection: openEO collection parameter + :param label: String representing the text with which the collection should match + :param callable: Function that is executed when the collection matches the label + :param reject: Function that is executed when the collection does not match the label + :return: + """ + return if_(eq(collection, label, case_sensitive=False), callable, reject) + + +def load_s1_collection(connection, collection, polygon, date): + """ + Create an S1 input data cube based on the collection selected by the user. This achieved by building an + if-else structure through the different openEO processes, making sure that the correct datacube is selected + when executing the UDP. + + :param connection: openEO connection + :param collection: One of the supported collection (S1_COLLECTIONS) + :param polygon: Area of interest + :param date: Time of interest + :return: + """ + collections = None + for option in [ + { + 'label': 'grd', + 'function': _load_s1_grd_bands(connection=connection, polygon=polygon, date=date, bands=['VV', 'VH']) + }, + { + 'label': 'rvi', + 'function': _load_rvi(connection=connection, polygon=polygon, date=date) + } + ]: + collections = _build_collection_graph(collection=collection, + label=option['label'], + callable=option['function'], + reject=collections) + return collections + + +def load_s2_collection(connection, collection, polygon, date): + """ + Create an S2 input data cube based on the collection selected by the user. This achieved by building an + if-else structure through the different openEO processes, making sure that the correct datacube is selected + when executing the UDP. + + :param connection: openEO connection + :param collection: One of the supported collection (S2_COLLECTIONS) + :param polygon: Area of interest + :param date: Time of interest + :return: + """ + collections = None + for option in [ + { + 'label': 'ndvi', + 'function': _load_ndvi(connection=connection, polygon=polygon, date=date) + + }, + { + 'label': 'fapar', + 'function': _load_biopar(polygon=polygon, date=date, biopar='FAPAR') + }, + { + 'label': 'lai', + 'function': _load_biopar(polygon=polygon, date=date, biopar='LAI') + }, + { + 'label': 'fcover', + 'function': _load_biopar(polygon=polygon, date=date, biopar='FCOVER') + }, + { + 'label': 'evi', + 'function': _load_evi(connection=connection, polygon=polygon, date=date) + }, + { + 'label': 'ccc', + 'function': _load_biopar(polygon=polygon, date=date, biopar='CCC') + }, + { + 'label': 'cwc', + 'function': _load_biopar(polygon=polygon, date=date, biopar='CWC') + } + ]: + collections = _build_collection_graph(collection=collection, + label=option['label'], + callable=option['function'], + reject=collections) + return collections \ No newline at end of file diff --git a/openeo_udp/fusets_mogpr/requirements.txt b/openeo_udp/fusets_mogpr/requirements.txt new file mode 100644 index 00000000..fddacbd7 --- /dev/null +++ b/openeo_udp/fusets_mogpr/requirements.txt @@ -0,0 +1 @@ +fusets>=2.0.1 \ No newline at end of file diff --git a/openeo_udp/fusets_mogpr/set_path.py b/openeo_udp/fusets_mogpr/set_path.py new file mode 100644 index 00000000..7b06dc3a --- /dev/null +++ b/openeo_udp/fusets_mogpr/set_path.py @@ -0,0 +1,82 @@ +#%% + +import os +import sys +import zipfile +import requests +import tempfile +import shutil +import functools + +from openeo.udf import inspect + +def download_file(url, path): + """ + Downloads a file from the given URL to the specified path. + """ + response = requests.get(url, stream=True) + with open(path, "wb") as file: + file.write(response.content) + +def extract_zip_to_temp(zip_path, temp_dir): + """ + Extracts a zip file into the given temporary directory. + """ + with zipfile.ZipFile(zip_path, "r") as zip_ref: + zip_ref.extractall(temp_dir) # Use the existing temp_dir + return temp_dir + +def move_top_level_folder_to_destination(temp_dir, destination_dir): + """ + Moves each top-level folder from the temporary directory to the destination directory. + Throws an error if the folder already exists at the destination. + """ + # Find the top-level folders inside the extracted zip + for item in os.listdir(temp_dir): + item_path = os.path.join(temp_dir, item) + + if os.path.isdir(item_path): + # Check if the folder already exists at destination + dest_path = os.path.join(destination_dir, item) + + if os.path.exists(dest_path): + # Throw an error if the folder already exists + raise FileExistsError(f"Error: The folder '{item}' already exists in the destination directory: {dest_path}") + + # Move the folder out of temp and into the destination directory + shutil.move(item_path, dest_path) + + +def add_to_sys_path(folder_path): + """ + Adds the folder path to sys.path. + """ + if folder_path not in sys.path: + sys.path.append(folder_path) + + +@functools.lru_cache(maxsize=5) +def setup_dependencies(dependencies_url): + """ + Main function to download, unzip, move the top-level folder, and add it to sys.path. + """ + with tempfile.TemporaryDirectory() as temp_dir: + # Step 1: Download the zip file + zip_path = os.path.join(temp_dir, "temp.zip") + download_file(dependencies_url, zip_path) + + inspect(message="Extract dependencies to temp") + # Step 2: Extract the zip file to the temporary directory + extracted_dir = extract_zip_to_temp(zip_path, temp_dir) + + # Step 3: Move the first top-level folder (dynamically) to the destination + destination_dir = os.getcwd() # Current working directory + inspect(message="Move top-level folder to destination") + moved_folder = move_top_level_folder_to_destination(extracted_dir, destination_dir) + + # Step 4: Add the folder to sys.path + add_to_sys_path(moved_folder) + inspect(message="Added to the sys path") + + +setup_dependencies("https://artifactory.vgt.vito.be:443/artifactory/auxdata-public/ai4food/fusets_venv.zip") \ No newline at end of file