Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .dvc/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
/config.local
/tmp
/cache
4 changes: 4 additions & 0 deletions .dvc/config
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[core]
remote = storage
['remote "storage"']
url = gdrive://1odHzrWi6yJ9tbwym_r4u-obxDD8gubEJ
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
/ProbaV_UTM_LC100_biome_clusters_V3_global.tif
tmp_unzip_path/
Binary file not shown.
4 changes: 4 additions & 0 deletions ProbaV_UTM_LC100_biome_clusters_V3_global.tif.dvc
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
outs:
- md5: d845c3c867e5210ca4f1bcb7449d9d08
size: 200695
path: ProbaV_UTM_LC100_biome_clusters_V3_global.tif
31 changes: 31 additions & 0 deletions dvc.lock
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
schema: '2.0'
stages:
clean:
cmd: rm -rf tmp_unzip_path
extract:
cmd:
- dvc pull
- python scripts/extract.py --url https://zenodo.org/record/5848610/files/biome_cluster_shapefile.zip?download=1
deps:
- path: scripts/extract.py
md5: d830ef0dbfdf852086fdadc71c09b79d
size: 866
transform:
cmd:
- python scripts/transform.py
deps:
- path: scripts/transform.py
md5: 970803f50e773433cc3b5332de572bdf
size: 1872
- path: tmp_unzip_path
md5: 0bad863a229679b7c6715bb869e7dbef.dir
size: 43367177
nfiles: 8
load:
cmd:
- dvc push
deps:
- path: tmp_unzip_path/biome_cluster_shapefile
md5: 55b51427163151378b68f034d49fbf9c.dir
size: 43367177
nfiles: 8
21 changes: 21 additions & 0 deletions dvc.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
stages:
extract:
cmd:
- dvc pull
- python scripts/extract.py --url https://zenodo.org/record/5848610/files/biome_cluster_shapefile.zip?download=1
deps:
- scripts/extract.py
transform:
cmd:
- python scripts/transform.py
deps:
- tmp_unzip_path
- scripts/transform.py
load:
cmd:
- dvc push
deps:
- tmp_unzip_path/biome_cluster_shapefile



3 changes: 3 additions & 0 deletions ref_data/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
/WaterScarcity_GAEZ.tif
tmp_unzip_path/
Data/Soil/GlobalSoilOrganicCarbonDensityinkgCm_1mDepth.tif
4 changes: 4 additions & 0 deletions ref_data/WaterScarcity_GAEZ.tif.dvc
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
outs:
- md5: 82facb576f7ae1bb98ae366d7001147f
size: 482424
path: WaterScarcity_GAEZ.tif
33 changes: 33 additions & 0 deletions scripts/extract.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import argparse
import requests
import zipfile
import io
import os

parser = argparse.ArgumentParser()
parser.add_argument("--url", help="url to download data")
args = vars(parser.parse_args())

def ensure_url_is_accessible(URL):
r = requests.get(URL)
if not r.ok:
print("Download link expired. Please update download link")
else:
download_and_unzip_files(r.content)


def download_and_unzip_files(content):
current_directory = os.getcwd()
target_parent_dir = os.path.join(current_directory, r"tmp_unzip_path")
if not os.path.exists(target_parent_dir):
os.mkdir(target_parent_dir)
try:
z = zipfile.ZipFile(io.BytesIO(content))
z.extractall(target_parent_dir)
except Exception as e:
print(e)
else:
print("unzipped successfully")


ensure_url_is_accessible(args.get("url"))
62 changes: 62 additions & 0 deletions scripts/transform.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# Rasterise a shapefile to the same projection & pixel resolution as a reference image.
from osgeo import ogr, gdal
import subprocess
import os

ogr.UseExceptions()
gdalformat = "GTiff"
datatype = gdal.GDT_Byte
burn_val = 1

output_image = "ProbaV_UTM_LC100_biome_clusters_V3_global.tif"
current_directory = os.getcwd()
input_vector = os.path.join(
current_directory,
r"tmp_unzip_path/biome_cluster_shapefile/ProbaV_UTM_LC100_biome_clusters_V3_global.shp",
)
ref_image = os.path.join(current_directory, r"ref_data/WaterScarcity_GAEZ.tif")


def read_files(input_vector, ref_image):
if not os.path.exists(input_vector):
print("Input vector does not exist")
else:
image = gdal.Open(ref_image, gdal.GA_ReadOnly)
if image == None:
print("Unable to read the reference data file")
else:
try:
Shapefile = ogr.Open(input_vector)
if Shapefile:
Shapefile_layer = Shapefile.GetLayer()
convert_shp_to_tiff(image, output_image, Shapefile_layer)
else:
print("Couldn't load shapefile")
except Exception as e:
print(e)


def convert_shp_to_tiff(image, output_image, Shapefile_layer):
print("Rasterising shapefile...")
Output = gdal.GetDriverByName(gdalformat).Create(
output_image,
image.RasterXSize,
image.RasterYSize,
1,
datatype,
options=["COMPRESS=DEFLATE"],
)
Output.SetProjection(image.GetProjectionRef())
Output.SetGeoTransform(image.GetGeoTransform())

Band = Output.GetRasterBand(1)
Band.SetNoDataValue(0)
gdal.RasterizeLayer(Output, [1], Shapefile_layer, burn_values=[burn_val])

Band = None
Output = None
image = None
Shapefile = None


read_files(input_vector, ref_image)