terrastackai · Beldine-Moturi · Mar 30, 2026 · Mar 30, 2026 · Mar 31, 2026 · Mar 31, 2026
@@ -14,8 +14,8 @@
 from zipfile import ZipFile
 
 import geopandas as gpd
+import mgrs
 import numpy as np
-import pandas as pd
 import rasterio
 import rioxarray
 import xarray as xr
@@ -31,12 +31,17 @@
 from shapely.geometry import Polygon, box
 
 # LULC data location
-LULC_TILE_ROOT = os.environ.get("LULC_SHARED_DATA_ROOT", "/auxdata/lulc/lc2021/")
-LULC_TILE_SHAPEFILE = os.environ.get("LULC_TILE_SHAPEFILE", "/auxdata/lulc/tiles.shp")
-LAND_POLYGON_PATH = os.environ.get("LAND_POLYGON_PATH", "/auxdata/general/land_polygons.shp")
+LULC_TILE_ROOT = os.environ.get("LULC_SHARED_DATA_ROOT", "/auxdata/lulc/")
+LAND_POLYGON_PATH = os.environ.get(
+    "LAND_POLYGON_PATH", "/auxdata/general/land_polygons.shp"
+)
 
 # Cloud mask
-S2_CLOUD_MASK = [3, 8, 9]  # Cloud shadows, Cloud medium probability, Cloud high probability
+S2_CLOUD_MASK = [
+    3,
+    8,
+    9,
+]  # Cloud shadows, Cloud medium probability, Cloud high probability
 HLS_CLOUD_MASK = ["0", "0", "0", "0", "1", "1", "1", "1"]
 CLOUD_VALUE = 999
 
@@ -47,7 +52,9 @@
 
 # Permanent water mask
 S2_PERMANENT_WATER_MASK = [6]  # Permanent water using S2 SCL
-S2_LULC_PERMANENT_WATER_MASK = [1]  # https://www.arcgis.com/home/item.html?id=cfcb7609de5f478eb7666240902d4d3d
+S2_LULC_PERMANENT_WATER_MASK = [
+    1
+]  # https://www.arcgis.com/home/item.html?id=cfcb7609de5f478eb7666240902d4d3d
 S2_LULC_PERMANENT_WATER_BAND_INDEX = 0
 HLS_PERMANENT_WATER_MASK = ["0", "0", "1", "0", "0", "0", "0", "0"]
 PERMANENT_WATER_VALUE = 997
@@ -80,13 +87,15 @@ def resize_image(image, input_image=None, bbox=None):
     def __resize_image(input_image):
         if input_image:
             get_bbox_cmd = f"""gdalinfo -json {input_image} | jq '(.cornerCoordinates.upperLeft[0] | tostring) + " " + (.cornerCoordinates.upperLeft[1] | tostring ) + " " + (.cornerCoordinates.lowerRight[0] | tostring) + " " + (.cornerCoordinates.lowerRight[1] | tostring )'"""
-            input_image_bbox = check_output([get_bbox_cmd], shell=True, text=True).replace('"', "").replace("\n", "")
+            input_image_bbox = (
+                check_output([get_bbox_cmd], shell=True, text=True)
+                .replace('"', "")
+                .replace("\n", "")
+            )
             os.system(f"cp {image} {image}-temp.tif")
             command = f"gdal_translate -projwin {input_image_bbox} -of GTiff {image}-temp.tif {image}"
         elif bbox:
-            command = (
-                f"gdal_translate -projwin {bbox[0]} {bbox[3]} {bbox[2]} {bbox[1]} -of GTiff {image}-temp.tif {image}"
-            )
+            command = f"gdal_translate -projwin {bbox[0]} {bbox[3]} {bbox[2]} {bbox[1]} -of GTiff {image}-temp.tif {image}"
 
         os.system(command)
         os.system(f"rm {image}-temp.tif")
@@ -118,8 +127,16 @@ def read_json_with_retries(filepath, max_retries=5, base_delay=0.5):
             time.sleep(base_delay * (2 ** (attempt - 1)))
 
 
-def hls_masking(xds, ids, fmask_index=6, mask_bits=["0", "0", "0", "0", "1", "1", "1", "1"], mask_to_value=999):
-    logger.info(f">>>>>>>>>>>> Performing mask for scenes {mask_bits} with masked values set to {mask_to_value}")
+def hls_masking(
+    xds,
+    ids,
+    fmask_index=6,
+    mask_bits=["0", "0", "0", "0", "1", "1", "1", "1"],
+    mask_to_value=999,
+):
+    logger.info(
+        f">>>>>>>>>>>> Performing mask for scenes {mask_bits} with masked values set to {mask_to_value}"
+    )
     qVals = list(np.unique(ids[fmask_index].astype(int)))
     goodQuality = []
     for v in qVals:
@@ -160,7 +177,9 @@ def s2_masking(xds, ids, fmask_index, scene_classification, mask_to_value):
     logger.info(
         f">>>>>>>>>>>> Performing mask for scenes {scene_classification} with masked values set to {mask_to_value}"
     )
-    mask = ids[fmask_index].values == scene_classification[0]  # initalise the mask array
+    mask = (
+        ids[fmask_index].values == scene_classification[0]
+    )  # initalise the mask array
     for scene_class in range(1, len(scene_classification)):
         mask = np.logical_or(
             mask, ids[fmask_index].values == scene_classification[scene_class]
@@ -182,12 +201,19 @@ def mask_and_set_to(xds, ids, masking_type, inference_dict, i=0):
         mask_band = masking_config.get("band", None)
         if mask_band is not None:
             encoding_type = masking_config.get("encoding", None)
-            if encoding_type is not None and "bands" in inference_dict["model_input_data_spec"][i]:
-                model_input_data_spec_bands = inference_dict["model_input_data_spec"][i]["bands"]
+            if (
+                encoding_type is not None
+                and "bands" in inference_dict["model_input_data_spec"][i]
+            ):
+                model_input_data_spec_bands = inference_dict["model_input_data_spec"][
+                    i
+                ]["bands"]
                 if encoding_type == "sentinel2_lulc":
                     mask_band_index = S2_LULC_PERMANENT_WATER_BAND_INDEX
                 else:
-                    mask_band_index = search_band_dict(model_input_data_spec_bands, "band_name", f"{mask_band}")
+                    mask_band_index = search_band_dict(
+                        model_input_data_spec_bands, "band_name", f"{mask_band}"
+                    )
                     if mask_band_index is not None:
                         mask_band_index = mask_band_index.get("index")
             else:
@@ -278,7 +304,9 @@ def mask_from_url(mask_url: str, xds: object, buffer_size_m: float = 100):
     ccgdf["geometry"] = ccgdf.buffer(buffer_size_m)
     ccgdf.to_crs(crs=original_crs, inplace=True)
 
-    shapes = ((geom, value) for geom, value in zip(ccgdf.geometry, [90] * len(ccgdf.geometry)))
+    shapes = (
+        (geom, value) for geom, value in zip(ccgdf.geometry, [90] * len(ccgdf.geometry))
+    )
 
     features.rasterize(shapes=shapes, fill=0, out=xds, transform=xds.rio.transform())
     return xds
@@ -305,11 +333,19 @@ def _make_rgb_single_file(model_input_original_image: str, i: int = 0):
         nodata_value = xds.attrs.get("_FillValue", -9999)
         xds.rio.write_nodata(nodata_value, inplace=True)
 
-        model_input_data_spec_bands = inference_dict["model_input_data_spec"][i]["bands"]
+        model_input_data_spec_bands = inference_dict["model_input_data_spec"][i][
+            "bands"
+        ]
         logger.info(f"model_input_data_spec_bands: {model_input_data_spec_bands}")
-        red_band_index = int(search_band_dict(model_input_data_spec_bands, "RGB_band", "R").get("index"))
-        green_band_index = int(search_band_dict(model_input_data_spec_bands, "RGB_band", "G").get("index"))
-        blue_band_index = int(search_band_dict(model_input_data_spec_bands, "RGB_band", "B").get("index"))
+        red_band_index = int(
+            search_band_dict(model_input_data_spec_bands, "RGB_band", "R").get("index")
+        )
+        green_band_index = int(
+            search_band_dict(model_input_data_spec_bands, "RGB_band", "G").get("index")
+        )
+        blue_band_index = int(
+            search_band_dict(model_input_data_spec_bands, "RGB_band", "B").get("index")
+        )
         rgb_indexes = [red_band_index, green_band_index, blue_band_index]
 
         rgb_xds = xds[rgb_indexes, :, :]
@@ -329,15 +365,6 @@ def _make_rgb_single_file(model_input_original_image: str, i: int = 0):
     else:
         return _make_rgb_single_file(model_input_original_image)
 
-
-def get_tiles_list(bbox):
-    ldf = gpd.read_file(LULC_TILE_SHAPEFILE)
-    polygon = box(*bbox)
-    ldf = ldf.clip(polygon)
-    tiles_list = list(ldf["tile"].values)
-    return tiles_list
-
-
 def get_lulc_tile_for_input(input_image_path):
     """
     Find the file path for the LULC tile corresponding to a given input.
@@ -354,34 +381,88 @@ def get_lulc_tile_for_input(input_image_path):
     # Setup output directory
     outputs_dir = "/".join(input_image_path.split("/")[:-1])
     Path(outputs_dir).mkdir(parents=True, exist_ok=True)
-    # Read LULC shapefile
-    gdf = gpd.read_file(LULC_TILE_SHAPEFILE)
     # Get input image bbox
     bbox = rdo.get_raster_bbox(input_image_path)
-    tile_list = get_tiles_list([bbox[0], bbox[1], bbox[2], bbox[3]])
-    # Prepare dataframe
-    df_tmp = pd.DataFrame({"file": [input_image_path]})
-    df_tmp["geometry"] = box(bbox.bottom, bbox.left, bbox.right, bbox.top)
-    gdf_bbox = gpd.GeoDataFrame(df_tmp, geometry="geometry", crs="EPSG:4326")
-    # Join lulc shapefile dataframe with model input image dataframe
-    # to create a dataframe of lulc tile paths, model input image path
-    # and model input bbox.
-    join = gpd.sjoin(left_df=gdf, right_df=gdf_bbox, how="right", predicate="intersects")
-    # Iterate through the list of relavent tiles, then reproject as required.
-    for j in range(len(tile_list)):
-        tile_j_value = join[join["tile"] == tile_list[j]]["tile"].values[0]
-        file_j_value = join[join["tile"] == tile_list[j]]["file"].values[0]
-        if tile_j_value == "nan" or not join["tile"].values[j]:
-            continue
+
+    m = mgrs.MGRS()
+    grid_zones = set()
+
+    # Sample points across bbox to find all intersecting grid zones
+    lat_range = bbox.top - bbox.bottom
+    lon_range = bbox.right - bbox.left
+    lat_samples = max(3, min(10, int(lat_range / 0.5)))
+    lon_samples = max(3, min(10, int(lon_range / 0.5)))
+
+    for lat in np.linspace(bbox.bottom, bbox.top, lat_samples):
+        for lon in np.linspace(bbox.left, bbox.right, lon_samples):
+            try:
+                # Get MGRS code for this point
+                mgrs_code = m.toMGRS(lat, lon, MGRSPrecision=0)
+                # Extract grid zone (first 3 characters: zone number + latitude band)
+                # Example: "37MBT12345678" -> "37M"
+                grid_zone = mgrs_code[:3]
+                grid_zones.add(grid_zone)
+            except Exception:
+                # Skip invalid coordinates (e.g., near poles)
+                continue
+
+    if not grid_zones:
+        logger.error("Failed to calculate MGRS grid zones for input image")
+        return None
+
+    # Find matching LULC tiles in the data directory
+    # Expected naming: [GridZone]_[StartDate]-[EndDate].tiff
+    # Example: 37N_20240101-20241231.tiff
+    tile_paths = []
+    missing_zones = []
+
+    # Get all available LULC files
+    available_files = glob.glob(os.path.join(LULC_TILE_ROOT, "*.tif"))
+    available_files.extend(glob.glob(os.path.join(LULC_TILE_ROOT, "*.tiff")))
+
+    for grid_zone in sorted(grid_zones):
+        found = False
+
+        # Find file matching this grid zone
+        for filepath in available_files:
+            filename = os.path.basename(filepath)
+
+            # Check if filename starts with the grid zone
+            # Handles: 37N_20240101-20241231.tiff, 37N.tif, etc.
+            if filename.startswith(grid_zone):
+                tile_paths.append(filepath)
+                found = True
+                logger.info(f"Found LULC tile for zone {grid_zone}: {filename}")
+                break
+
+        if not found:
+            missing_zones.append(grid_zone)
+
+    if missing_zones:
+        logger.warning(
+            f"Missing LULC tiles for {len(missing_zones)} grid zones: {missing_zones}. "
+            f"Download from: "
+            f"https://lulctimeseries.blob.core.windows.net/lulctimeseriesv003/lc2024/"
+        )
+
+    if not tile_paths:
+        logger.error(
+            f"No LULC tiles found for calculated grid zones: {sorted(grid_zones)}. "
+            f"Please download required tiles to {LULC_TILE_ROOT}"
+        )
+        return None
+
+    for j, tile_path in enumerate(tile_paths):
         tmp_unique_id = str(uuid4())
         # Match LULC tile to model input image
         output_tmp_path = rdo.match_raster_to_target(
-            input_file=LULC_TILE_ROOT + tile_j_value,
-            target_file=file_j_value,
+            input_file=tile_path,
+            target_file=input_image_path,
             output_suffix=f"_{tmp_unique_id}_padded",
         )
         os.system(f"mv {output_tmp_path} {outputs_dir}/lulc_tile{str(j)}.tif")
     lulc_tiles = glob.glob(outputs_dir + "/lulc_tile*")
+    lulc_file_path = ""
     if len(lulc_tiles) > 1:
         print("mosaic")
         lulc_file_path = rdo.create_mosaic(
@@ -400,7 +481,11 @@ def get_lulc_tile_for_input(input_image_path):
 
 
 def search_band_dict(bands_list, search_key, search_value):
-    band_dict_match = (band_dict for band_dict in bands_list if band_dict.get(f"{search_key}") == f"{search_value}")
+    band_dict_match = (
+        band_dict
+        for band_dict in bands_list
+        if band_dict.get(f"{search_key}") == f"{search_value}"
+    )
     band_dict = next(band_dict_match, None)
     return band_dict
 
@@ -423,10 +508,21 @@ def zip_inference_data(task_dir):
     zip_location = f"{task_dir}/archive.zip"
 
     directory = Path(task_dir)
-    geoserver_supported_extensions = ("*.tif", "*.gpkg", "*.shp", "*.shx", "*.dbf", "*.cpg", "*.prj", "*.nc")
+    geoserver_supported_extensions = (
+        "*.tif",
+        "*.gpkg",
+        "*.shp",
+        "*.shx",
+        "*.dbf",
+        "*.cpg",
+        "*.prj",
+        "*.nc",
+    )
 
     with ZipFile(zip_location, mode="w") as archive:
-        for file_path in chain.from_iterable(directory.glob(ext) for ext in geoserver_supported_extensions):
+        for file_path in chain.from_iterable(
+            directory.glob(ext) for ext in geoserver_supported_extensions
+        ):
             archive.write(file_path, arcname=file_path.relative_to(directory))
 
 
@@ -473,12 +569,18 @@ def regularize_by_technique(
         threshold=0,
         unique_attribute_value=False,
     )
-    regularization_technique_to_run = REGULARIZATION_TECHNIQUES.get(technique, adaptive_regularization)
+    regularization_technique_to_run = REGULARIZATION_TECHNIQUES.get(
+        technique, adaptive_regularization
+    )
 
-    regularized_gdf = regularization_technique_to_run(building_polygons=rasterized_vector, **kwargs)
+    regularized_gdf = regularization_technique_to_run(
+        building_polygons=rasterized_vector, **kwargs
+    )
 
     if attribute_name not in regularized_gdf.columns:
-        logger.warning(f" Regularized polgon lacks the attribute name {attribute_name}. Adding it back")
+        logger.warning(
+            f" Regularized polgon lacks the attribute name {attribute_name}. Adding it back"
+        )
         regularized_gdf[attribute_name] = rasterized_vector[attribute_name]
 
     return regularized_gdf
@@ -498,7 +600,10 @@ def geojson_to_tiff(
 
     # Rasterize using the existing raster's shape and transform
     rasterized_data = features.rasterize(
-        ((geom, value) for geom, value in zip(geodataframe.geometry, geodataframe[attribute_name])),
+        (
+            (geom, value)
+            for geom, value in zip(geodataframe.geometry, geodataframe[attribute_name])
+        ),
         # [(geom, val) for geom, val in zip(vector.geometry, vector["id"])],
         out_shape=raster.shape,
         transform=raster.transform,

@@ -28,6 +28,7 @@ terrakit==0.1.3
 tqdm
 wget
 xarray
+mgrs==1.5.4
-Original file line number
+Diff line change
@@ Expand Up / @@ -28,6 +28,7 @@ terrakit==0.1.3 @@
     tqdm
     wget
     xarray
+    mgrs==1.5.4
@@ Expand Down @@