diff --git a/.dvc/.gitignore b/.dvc/.gitignore new file mode 100644 index 000000000..528f30c71 --- /dev/null +++ b/.dvc/.gitignore @@ -0,0 +1,3 @@ +/config.local +/tmp +/cache diff --git a/.dvc/config b/.dvc/config new file mode 100644 index 000000000..023ff9857 --- /dev/null +++ b/.dvc/config @@ -0,0 +1,4 @@ +[core] + remote = storage +['remote "storage"'] + url = gdrive://1odHzrWi6yJ9tbwym_r4u-obxDD8gubEJ diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..15ec4f5a4 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/ProbaV_UTM_LC100_biome_clusters_V3_global.tif +tmp_unzip_path/ \ No newline at end of file diff --git a/Data/Soil/GlobalSoilOrganicCarbonDensityinkgCm_1mDepth.tif b/Data/Soil/GlobalSoilOrganicCarbonDensityinkgCm_1mDepth.tif new file mode 100644 index 000000000..892d4a2d9 Binary files /dev/null and b/Data/Soil/GlobalSoilOrganicCarbonDensityinkgCm_1mDepth.tif differ diff --git a/ProbaV_UTM_LC100_biome_clusters_V3_global.tif.dvc b/ProbaV_UTM_LC100_biome_clusters_V3_global.tif.dvc new file mode 100644 index 000000000..ad684da8c --- /dev/null +++ b/ProbaV_UTM_LC100_biome_clusters_V3_global.tif.dvc @@ -0,0 +1,4 @@ +outs: +- md5: d845c3c867e5210ca4f1bcb7449d9d08 + size: 200695 + path: ProbaV_UTM_LC100_biome_clusters_V3_global.tif diff --git a/dvc.lock b/dvc.lock new file mode 100644 index 000000000..f953831ad --- /dev/null +++ b/dvc.lock @@ -0,0 +1,31 @@ +schema: '2.0' +stages: + clean: + cmd: rm -rf tmp_unzip_path + extract: + cmd: + - dvc pull + - python scripts/extract.py --url https://zenodo.org/record/5848610/files/biome_cluster_shapefile.zip?download=1 + deps: + - path: scripts/extract.py + md5: d830ef0dbfdf852086fdadc71c09b79d + size: 866 + transform: + cmd: + - python scripts/transform.py + deps: + - path: scripts/transform.py + md5: 970803f50e773433cc3b5332de572bdf + size: 1872 + - path: tmp_unzip_path + md5: 0bad863a229679b7c6715bb869e7dbef.dir + size: 43367177 + nfiles: 8 + load: + cmd: + - dvc push + deps: + - path: tmp_unzip_path/biome_cluster_shapefile + md5: 55b51427163151378b68f034d49fbf9c.dir + size: 43367177 + nfiles: 8 diff --git a/dvc.yaml b/dvc.yaml new file mode 100644 index 000000000..27f85c7a7 --- /dev/null +++ b/dvc.yaml @@ -0,0 +1,21 @@ +stages: + extract: + cmd: + - dvc pull + - python scripts/extract.py --url https://zenodo.org/record/5848610/files/biome_cluster_shapefile.zip?download=1 + deps: + - scripts/extract.py + transform: + cmd: + - python scripts/transform.py + deps: + - tmp_unzip_path + - scripts/transform.py + load: + cmd: + - dvc push + deps: + - tmp_unzip_path/biome_cluster_shapefile + + + diff --git a/ref_data/.gitignore b/ref_data/.gitignore new file mode 100644 index 000000000..03e080063 --- /dev/null +++ b/ref_data/.gitignore @@ -0,0 +1,3 @@ +/WaterScarcity_GAEZ.tif +tmp_unzip_path/ +Data/Soil/GlobalSoilOrganicCarbonDensityinkgCm_1mDepth.tif diff --git a/ref_data/WaterScarcity_GAEZ.tif.dvc b/ref_data/WaterScarcity_GAEZ.tif.dvc new file mode 100644 index 000000000..b62ea301a --- /dev/null +++ b/ref_data/WaterScarcity_GAEZ.tif.dvc @@ -0,0 +1,4 @@ +outs: +- md5: 82facb576f7ae1bb98ae366d7001147f + size: 482424 + path: WaterScarcity_GAEZ.tif diff --git a/scripts/extract.py b/scripts/extract.py new file mode 100644 index 000000000..cebc84263 --- /dev/null +++ b/scripts/extract.py @@ -0,0 +1,33 @@ +import argparse +import requests +import zipfile +import io +import os + +parser = argparse.ArgumentParser() +parser.add_argument("--url", help="url to download data") +args = vars(parser.parse_args()) + +def ensure_url_is_accessible(URL): + r = requests.get(URL) + if not r.ok: + print("Download link expired. Please update download link") + else: + download_and_unzip_files(r.content) + + +def download_and_unzip_files(content): + current_directory = os.getcwd() + target_parent_dir = os.path.join(current_directory, r"tmp_unzip_path") + if not os.path.exists(target_parent_dir): + os.mkdir(target_parent_dir) + try: + z = zipfile.ZipFile(io.BytesIO(content)) + z.extractall(target_parent_dir) + except Exception as e: + print(e) + else: + print("unzipped successfully") + + +ensure_url_is_accessible(args.get("url")) diff --git a/scripts/transform.py b/scripts/transform.py new file mode 100644 index 000000000..b2ccaf4db --- /dev/null +++ b/scripts/transform.py @@ -0,0 +1,62 @@ +# Rasterise a shapefile to the same projection & pixel resolution as a reference image. +from osgeo import ogr, gdal +import subprocess +import os + +ogr.UseExceptions() +gdalformat = "GTiff" +datatype = gdal.GDT_Byte +burn_val = 1 + +output_image = "ProbaV_UTM_LC100_biome_clusters_V3_global.tif" +current_directory = os.getcwd() +input_vector = os.path.join( + current_directory, + r"tmp_unzip_path/biome_cluster_shapefile/ProbaV_UTM_LC100_biome_clusters_V3_global.shp", +) +ref_image = os.path.join(current_directory, r"ref_data/WaterScarcity_GAEZ.tif") + + +def read_files(input_vector, ref_image): + if not os.path.exists(input_vector): + print("Input vector does not exist") + else: + image = gdal.Open(ref_image, gdal.GA_ReadOnly) + if image == None: + print("Unable to read the reference data file") + else: + try: + Shapefile = ogr.Open(input_vector) + if Shapefile: + Shapefile_layer = Shapefile.GetLayer() + convert_shp_to_tiff(image, output_image, Shapefile_layer) + else: + print("Couldn't load shapefile") + except Exception as e: + print(e) + + +def convert_shp_to_tiff(image, output_image, Shapefile_layer): + print("Rasterising shapefile...") + Output = gdal.GetDriverByName(gdalformat).Create( + output_image, + image.RasterXSize, + image.RasterYSize, + 1, + datatype, + options=["COMPRESS=DEFLATE"], + ) + Output.SetProjection(image.GetProjectionRef()) + Output.SetGeoTransform(image.GetGeoTransform()) + + Band = Output.GetRasterBand(1) + Band.SetNoDataValue(0) + gdal.RasterizeLayer(Output, [1], Shapefile_layer, burn_values=[burn_val]) + + Band = None + Output = None + image = None + Shapefile = None + + +read_files(input_vector, ref_image)