diff --git a/docs/api/datasets.rst b/docs/api/datasets.rst index 72c55139c05..eefb9849f85 100644 --- a/docs/api/datasets.rst +++ b/docs/api/datasets.rst @@ -349,6 +349,11 @@ Million-AID .. autoclass:: MillionAID +MMEarth +^^^^^^^^ + +.. autoclass:: MMEarth + NASA Marine Debris ^^^^^^^^^^^^^^^^^^ diff --git a/docs/api/datasets/non_geo_datasets.csv b/docs/api/datasets/non_geo_datasets.csv index abdd41cc1f8..5608694036a 100644 --- a/docs/api/datasets/non_geo_datasets.csv +++ b/docs/api/datasets/non_geo_datasets.csv @@ -28,6 +28,7 @@ Dataset,Task,Source,License,# Samples,# Classes,Size (px),Resolution (m),Bands `LoveDA`_,S,Google Earth,"CC-BY-NC-SA-4.0","5,987",7,"1,024x1,024",0.3,RGB `MapInWild`_,S,"Sentinel-1/2, ESA WorldCover, NOAA VIIRS DNB","CC-BY-4.0",1018,1,1920x1920,10--463.83,"SAR, MSI, 2020_Map, avg_rad" `Million-AID`_,C,Google Earth,-,1M,51--73,,0.5--153,RGB +`MMEarth`_,"C, S","Aster, Sentinel, ERA5","CC-BY-4.0","100K--1M",,"128x128 or 64x64",10,MSI `NASA Marine Debris`_,OD,PlanetScope,"Apache-2.0",707,1,256x256,3,RGB `OSCD`_,CD,Sentinel-2,"CC-BY-4.0",24,2,"40--1,180",60,MSI `PASTIS`_,I,Sentinel-1/2,"CC-BY-4.0","2,433",19,128x128xT,10,MSI diff --git a/tests/data/mmearth/data.py b/tests/data/mmearth/data.py new file mode 100644 index 00000000000..45961fa1cfd --- /dev/null +++ b/tests/data/mmearth/data.py @@ -0,0 +1,235 @@ +#!/usr/bin/env python3 + +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +import json +import os +import shutil +from copy import deepcopy +from datetime import datetime, timedelta + +import h5py +import numpy as np + +meta_dummy_dict = { + 'S2_DATE': '2018-07-16', + 'S2_type': 'l1c', + 'CRS': 'EPSG:32721', + 'lat': -14.499441524746077, + 'lon': -56.98355999998649, +} + +num_tiles = 10 + +meta_id_strings = [str(i) for i in range(num_tiles)] + +modalities = { + 'aster': {'bands': 2, 'dtype': np.int16}, + 'biome': {'bands': 14, 'dtype': np.uint8}, + 'canopy_height_eth': {'bands': 2, 'dtype': np.int8}, + 'dynamic_world': {'bands': 1, 'dtype': np.uint8}, + 'eco_region': {'bands': 846, 'dtype': np.uint16}, + 'era5': {'bands': 12, 'dtype': np.float32}, + 'esa_worldcover': {'bands': 1, 'dtype': np.uint8}, + 'sentinel1': {'bands': 8, 'dtype': np.float32}, + 'sentinel2': {'bands': 13, 'dtype': np.uint16}, + 'sentinel2_cloudmask': {'bands': 1, 'dtype': np.uint16}, + 'sentinel2_cloudprod': {'bands': 1, 'dtype': np.uint16}, + 'sentinel2_scl': {'bands': 1, 'dtype': np.uint16}, +} + +all_modality_bands = { + 'sentinel2': [ + 'B1', + 'B2', + 'B3', + 'B4', + 'B5', + 'B6', + 'B7', + 'B8A', + 'B8', + 'B9', + 'B10', + 'B11', + 'B12', + ], + 'sentinel2_cloudmask': ['QA60'], + 'sentinel2_cloudprod': ['MSK_CLDPRB'], + 'sentinel2_scl': ['SCL'], + 'sentinel1_asc': ['VV', 'VH', 'HH', 'HV'], + 'sentinel1_desc': ['VV', 'VH', 'HH', 'HV'], + 'aster': ['b1', 'slope'], # elevation and slope + 'era5': [ + 'prev_temperature_2m', # previous month avg temp + 'prev_temperature_2m_min', # previous month min temp + 'prev_temperature_2m_max', # previous month max temp + 'prev_total_precipitation_sum', # previous month total precip + 'curr_temperature_2m', # current month avg temp + 'curr_temperature_2m_min', # current month min temp + 'curr_temperature_2m_max', # current month max temp + 'curr_total_precipitation_sum', # current month total precip + '0_temperature_2m_mean', # year avg temp + '1_temperature_2m_min_min', # year min temp + '2_temperature_2m_max_max', # year max temp + '3_total_precipitation_sum_sum', # year total precip + ], + 'dynamic_world': ['label'], + 'canopy_height_eth': ['height', 'std'], + 'lat': ['sin', 'cos'], + 'lon': ['sin', 'cos'], + 'biome': ['biome'], + 'eco_region': ['eco_region'], + 'month': ['sin_month', 'cos_month'], + 'esa_worldcover': ['Map'], +} + + +def create_hd5f(dataset_name: str, px_dim: tuple[int]) -> list[dict[str, str]]: + # Create the HDF5 file + with h5py.File(f'{dataset_name}.h5', 'w') as h5file: + # Create datasets for each modality + for modality, modal_info in modalities.items(): + bands = modal_info['bands'] + if modality in ['era5', 'eco_region', 'biome']: + h5file.create_dataset( + modality, (num_tiles, bands), dtype=modal_info['dtype'] + ) + else: + h5file.create_dataset( + modality, (num_tiles, bands, *px_dim), dtype=modal_info['dtype'] + ) + + # Create datasets for metadata + h5file.create_dataset('lat', (num_tiles, 2), dtype=np.float32) + h5file.create_dataset('lon', (num_tiles, 2), dtype=np.float32) + h5file.create_dataset('month', (num_tiles, 2), dtype=np.int32) + h5file.create_dataset( + 'metadata', + (num_tiles,), + dtype=np.dtype([('meta_id', 'S10'), ('S2_type', 'S3')]), + ) + + # Populate the datasets with sample data + tile_info = {} + for i in range(num_tiles): + for modality in modalities: + if modality == 'dynamic_world': + old_values = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + data = np.random.choice(old_values, size=(bands, *px_dim)) + elif modality == 'esa_worldcover': + old_values = [10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 100, 255] + data = np.random.choice(old_values, size=(bands, *px_dim)) + elif modality == 'era5': + # only vector not image data + data = np.random.random(size=(bands,)) + elif modality in ['biome', 'eco_region']: + data = np.random.randint(0, 2, size=(bands,)) + elif modality == 'sentinel2': + data = np.random.randint(0, 65535, size=(bands, *px_dim)) + elif modality in ['aster', 'canopy_height_eth', 'sentinel1']: + data = np.random.random(size=(bands, *px_dim)) + elif modality in [ + 'sentinel2_cloudmask', + 'sentinel2_cloudprod', + 'sentinel2_scl', + ]: + data = np.random.randint(0, 2, size=(bands, *px_dim)) + + data = data.astype(modal_info['dtype']) + h5file[modality][i] = data + + # add other data for lat, lon, month + h5file['lat'][i] = np.random.random(size=(2,)) + h5file['lon'][i] = np.random.random(size=(2,)) + h5file['month'][i] = np.random.random(size=(2,)) + + # Assign S2_type and store in metadata + S2_type = np.random.choice(['l1c', 'l2a']).encode('utf-8') + meta_id = str(i).encode('utf-8') + h5file['metadata'][i] = (meta_id, S2_type) + + # Collect tile info for JSON file + tile_meta = meta_dummy_dict.copy() + + tile_meta['S2_type'] = S2_type.decode('utf-8') + # in all_Modality_bands era5 contains the data instead `prev` and `curr` prefixes + date_str = tile_meta['S2_DATE'] + date_obj = datetime.strptime(date_str, '%Y-%m-%d') + curr_month_str = date_obj.strftime('%Y%m') + prev_month_obj = date_obj.replace(day=1) - timedelta(days=1) + prev_month_str = prev_month_obj.strftime('%Y%m') + curr_sample_bands = deepcopy(all_modality_bands) + curr_sample_bands['era5'] = [ + b.replace('curr', curr_month_str).replace('prev', prev_month_str) + for b in curr_sample_bands['era5'] + ] + tile_meta['BANDS'] = curr_sample_bands + tile_info[str(i)] = tile_meta + + return tile_info + + +extra_band_stats = { + 'sentinel2_l1c': {'bands': 13, 'dtype': np.uint16}, + 'sentinel2_l2a': {'bands': 13, 'dtype': np.uint16}, + 'lat': {'bands': 2, 'dtype': np.float32}, + 'lon': {'bands': 2, 'dtype': np.float32}, + 'month': {'bands': 2, 'dtype': np.float32}, +} + +band_modalities = { + k: v + for k, v in {**modalities, **extra_band_stats}.items() + if k not in {'biome', 'eco_region', 'dynamic_world', 'esa_worldcover'} +} + +# Create JSON files for band stats and splits +# sentinel 2 has l1c and l2a but there is only a common sentinel 2 data entry +band_stats = { + modality: { + 'mean': np.random.random(size=(mod_info['bands'])).tolist(), + 'std': np.random.random(size=(mod_info['bands'])).tolist(), + 'min': np.random.random(size=(mod_info['bands'])).tolist(), + 'max': np.random.random(size=(mod_info['bands'])).tolist(), + } + for modality, mod_info in band_modalities.items() +} + +train_split = num_tiles +val_split = 0 +test_split = 0 + +splits = { + 'train': list(range(train_split)), + 'val': list(range(train_split, train_split + val_split)), + 'test': list(range(train_split + val_split, num_tiles)), +} + +if __name__ == '__main__': + filenames = { + 'MMEarth': {'dirname': 'data_1M_v001', 'px_dim': (128, 128)}, + 'MMEarth64': {'dirname': 'data_1M_v001_64', 'px_dim': (64, 64)}, + 'MMEarth100k': {'dirname': 'data_100k_v001', 'px_dim': (128, 128)}, + } + for key, vals in filenames.items(): + dirname = vals['dirname'] + # remove existing files + if os.path.exists(dirname): + shutil.rmtree(dirname) + + # create directory + os.makedirs(dirname) + tile_info = create_hd5f(os.path.join(dirname, dirname), vals['px_dim']) + + print(f'{key} data file and JSON files created successfully.') + + with open(os.path.join(dirname, f'{dirname}_splits.json'), 'w') as f: + json.dump(splits, f, indent=4) + + with open(os.path.join(dirname, f'{dirname}_band_stats.json'), 'w') as f: + json.dump(band_stats, f, indent=4) + + with open(os.path.join(dirname, f'{dirname}_tile_info.json'), 'w') as f: + json.dump(tile_info, f, indent=4) diff --git a/tests/data/mmearth/data_100k_v001/data_100k_v001.h5 b/tests/data/mmearth/data_100k_v001/data_100k_v001.h5 new file mode 100644 index 00000000000..c485a0faa62 Binary files /dev/null and b/tests/data/mmearth/data_100k_v001/data_100k_v001.h5 differ diff --git a/tests/data/mmearth/data_100k_v001/data_100k_v001_band_stats.json b/tests/data/mmearth/data_100k_v001/data_100k_v001_band_stats.json new file mode 100644 index 00000000000..501667e4839 --- /dev/null +++ b/tests/data/mmearth/data_100k_v001/data_100k_v001_band_stats.json @@ -0,0 +1,420 @@ +{ + "aster": { + "mean": [ + 0.34133172608321716, + 0.3059512737624116 + ], + "std": [ + 0.3465348008910826, + 0.14108695274821736 + ], + "min": [ + 0.8418094294546998, + 0.4742174200974866 + ], + "max": [ + 0.56738806029585, + 0.0518313995381231 + ] + }, + "canopy_height_eth": { + "mean": [ + 0.854532719112457, + 0.48863801930320394 + ], + "std": [ + 0.5895142273813204, + 0.1380733622865845 + ], + "min": [ + 0.7537277848083938, + 0.20478855446904576 + ], + "max": [ + 0.5045161659636557, + 0.5376684828821884 + ] + }, + "era5": { + "mean": [ + 0.4417867806655783, + 0.18400642123926858, + 0.11974228279177279, + 0.9522889638018397, + 0.9273662674296557, + 0.8755178421266646, + 0.606034251540829, + 0.30760754028836534, + 0.6040509112467255, + 0.6765954694705612, + 0.6691595591399268, + 0.5760865666368172 + ], + "std": [ + 0.5142377087804115, + 0.2701723743576415, + 0.8413069700552763, + 0.23868021272203077, + 0.5615458693574323, + 0.7949644871571033, + 0.26212481323891657, + 0.7322482538861085, + 0.1995248437867745, + 0.42723767485667563, + 0.739198522837161, + 0.8092830064036739 + ], + "min": [ + 0.14533112908329815, + 0.23840001563382995, + 0.09261877533368601, + 0.10812791898965746, + 0.3602589294337053, + 0.41608271321516976, + 0.40824824209496946, + 0.4362332517942743, + 0.6458086696919946, + 0.2873520751891693, + 0.1946008373600201, + 0.3371402501790228 + ], + "max": [ + 0.9619147643696027, + 0.6002844111029695, + 0.34438509909726867, + 0.5211044855925113, + 0.249727288970654, + 0.07768059753391432, + 0.8934236930498343, + 0.8550867273916366, + 0.34905292318622505, + 0.07599362043189295, + 0.3695837636892234, + 0.8599690826993232 + ] + }, + "sentinel1": { + "mean": [ + 0.4602361303699314, + 0.9803602949980195, + 0.6286630558858189, + 0.8546244471280615, + 0.3908955820387353, + 0.15722620842791302, + 0.5954830179122328, + 0.8116450473795687 + ], + "std": [ + 0.03964016383304825, + 0.2701027934269321, + 0.3164522549613331, + 0.09860183113067111, + 0.1335076195305025, + 0.6380811967697871, + 0.5940489208142838, + 0.90153692977137 + ], + "min": [ + 0.44493594515658574, + 0.18478926184346423, + 0.2860240951390637, + 0.9376102612207217, + 0.9249907883844413, + 0.7000425768046851, + 0.3974535731475711, + 0.2996108322023431 + ], + "max": [ + 0.6430863691662376, + 0.9639089581632254, + 0.11634161184104996, + 0.753747780295231, + 0.4158525831196007, + 0.5988102320036879, + 0.10986853662090668, + 0.0600516168930747 + ] + }, + "sentinel2": { + "mean": [ + 0.572429320063415, + 0.15567923224572222, + 0.18809706032097528, + 0.8513440458791045, + 0.4678999223480048, + 0.050053414311246325, + 0.03783582407238084, + 0.2677522946476404, + 0.05453320208593193, + 0.5979956410404416, + 0.49602815159537084, + 0.988465511898549, + 0.6396682346061375 + ], + "std": [ + 0.788144262779709, + 0.8657320673010912, + 0.5279649775889855, + 0.3519159907818131, + 0.42634341564905587, + 0.7545521069496844, + 0.1962002041789851, + 0.7059625691340591, + 0.5931227904116899, + 0.9725044299059084, + 0.5405521502367713, + 0.2843034778768231, + 0.31920824614985277 + ], + "min": [ + 0.2720562009507226, + 0.5899353156966084, + 0.3934572906331085, + 0.44543431690993573, + 0.7278364898053944, + 0.02060665070965617, + 0.38574185899879954, + 0.6467951673496654, + 0.09562009477216771, + 0.7774338666717099, + 0.8432355577315033, + 0.4368636724686574, + 0.43488985400118574 + ], + "max": [ + 0.5900761314218557, + 0.36518105262763567, + 0.025620224680206638, + 0.5735969386962791, + 0.7634711203974548, + 0.1736244550922521, + 0.6024088499995152, + 0.9342662339896931, + 0.03710445086723202, + 0.1890352011946118, + 0.28380920040594426, + 0.08168516136465487, + 0.13526257707976375 + ] + }, + "sentinel2_cloudmask": { + "mean": [ + 0.6570709089318469 + ], + "std": [ + 0.5657620804780292 + ], + "min": [ + 0.9670225671155827 + ], + "max": [ + 0.5486983844030023 + ] + }, + "sentinel2_cloudprod": { + "mean": [ + 0.6891626967636988 + ], + "std": [ + 0.4094519969523073 + ], + "min": [ + 0.18725260491655094 + ], + "max": [ + 0.07180021957746674 + ] + }, + "sentinel2_scl": { + "mean": [ + 0.6780711668782042 + ], + "std": [ + 0.4943563461327216 + ], + "min": [ + 0.72302837101946 + ], + "max": [ + 0.28749332478382883 + ] + }, + "sentinel2_l1c": { + "mean": [ + 0.21099016187905117, + 0.5890058125196053, + 0.3870387069065061, + 0.40632422729999684, + 0.09220072185564243, + 0.05179158725809463, + 0.3472011267218935, + 0.27714371744503874, + 0.8667033333340239, + 0.42299347757834715, + 0.21100068056443366, + 0.9402893951577577, + 0.3890143754610127 + ], + "std": [ + 0.9129275727157, + 0.27695516423511546, + 0.6574105342764129, + 0.3857889836668025, + 0.4733288194932791, + 0.7763859293169395, + 0.969951792165023, + 0.7683755050895299, + 0.7736738677488465, + 0.6231553439174615, + 0.8681139667570541, + 0.693870549161861, + 0.07153957606497696 + ], + "min": [ + 0.7774302874038522, + 0.5237210940430268, + 0.48160697988637924, + 0.40412832766833284, + 0.49783101469118285, + 0.1676681532899118, + 0.8610056792509986, + 0.2652839446267331, + 0.9325651272132277, + 0.563023094265321, + 0.2869457262128843, + 0.6022487049661519, + 0.13539449396850844 + ], + "max": [ + 0.3394905584222998, + 0.6912694198479455, + 0.9365463758014783, + 0.026939601415270298, + 0.5290840296268874, + 0.38007307086114506, + 0.8005140940419264, + 0.7775367379319111, + 0.5736020267695333, + 0.9672861900139044, + 0.5859121986439549, + 0.8918748335743096, + 0.8098629367248834 + ] + }, + "sentinel2_l2a": { + "mean": [ + 0.4319213025299248, + 0.47014764209420445, + 0.10854844936417318, + 0.3565311102195149, + 0.035159148875477664, + 0.9947423748438694, + 0.6998282309520572, + 0.7089475988524567, + 0.6559450071993304, + 0.5583110883126653, + 0.9159743145429701, + 0.8343679900271499, + 0.7655093634482485 + ], + "std": [ + 0.12423175444317092, + 0.9912849566181509, + 0.3951297176601042, + 0.8104237474502085, + 0.7201051485011062, + 0.13586708888652077, + 0.7374687030638306, + 0.18741797127758675, + 0.16046499702755812, + 0.2749311810960794, + 0.13799794859023207, + 0.852581184239024, + 0.2925724204650476 + ], + "min": [ + 0.6710722460441257, + 0.6301086524595431, + 0.7368624384973665, + 0.13933868140865313, + 0.3705067827935764, + 0.7957615986693085, + 0.16723862032125847, + 0.20743892979117518, + 0.6662554693908289, + 0.9305180256466181, + 0.6165542799694995, + 0.9436576994737303, + 0.5915822101257956 + ], + "max": [ + 0.5530057895839687, + 0.12760001304721147, + 0.4562998709662902, + 0.04654611423548116, + 0.2540205560580904, + 0.15138539441364263, + 0.26367052218377185, + 0.6596795765749286, + 0.27285099411653047, + 0.47125521126252945, + 0.5939409956768125, + 0.2847412892997587, + 0.11228964358173976 + ] + }, + "lat": { + "mean": [ + 0.35663113563250803, + 0.9664439016211125 + ], + "std": [ + 0.5843606387551367, + 0.20984876015034148 + ], + "min": [ + 0.09962346810982947, + 0.8432152033355034 + ], + "max": [ + 0.16308304708635868, + 0.22022458984219218 + ] + }, + "lon": { + "mean": [ + 0.08778981307315648, + 0.5574407869891105 + ], + "std": [ + 0.6788585171009821, + 0.9327195921283604 + ], + "min": [ + 0.037812728275171015, + 0.7791613393176342 + ], + "max": [ + 0.45824364356139435, + 0.282148611369736 + ] + }, + "month": { + "mean": [ + 0.6768511662230008, + 0.020069115332411624 + ], + "std": [ + 0.40045110232925263, + 0.8656439167267811 + ], + "min": [ + 0.5073524073801928, + 0.0917181048136515 + ], + "max": [ + 0.6822690079185049, + 0.01508976602253198 + ] + } +} \ No newline at end of file diff --git a/tests/data/mmearth/data_100k_v001/data_100k_v001_splits.json b/tests/data/mmearth/data_100k_v001/data_100k_v001_splits.json new file mode 100644 index 00000000000..dffb6c43b69 --- /dev/null +++ b/tests/data/mmearth/data_100k_v001/data_100k_v001_splits.json @@ -0,0 +1,16 @@ +{ + "train": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9 + ], + "val": [], + "test": [] +} \ No newline at end of file diff --git a/tests/data/mmearth/data_100k_v001/data_100k_v001_tile_info.json b/tests/data/mmearth/data_100k_v001/data_100k_v001_tile_info.json new file mode 100644 index 00000000000..3700c94d789 --- /dev/null +++ b/tests/data/mmearth/data_100k_v001/data_100k_v001_tile_info.json @@ -0,0 +1,912 @@ +{ + "0": { + "S2_DATE": "2018-07-16", + "S2_type": "l1c", + "CRS": "EPSG:32721", + "lat": -14.499441524746077, + "lon": -56.98355999998649, + "BANDS": { + "sentinel2": [ + "B1", + "B2", + "B3", + "B4", + "B5", + "B6", + "B7", + "B8A", + "B8", + "B9", + "B10", + "B11", + "B12" + ], + "sentinel2_cloudmask": [ + "QA60" + ], + "sentinel2_cloudprod": [ + "MSK_CLDPRB" + ], + "sentinel2_scl": [ + "SCL" + ], + "sentinel1_asc": [ + "VV", + "VH", + "HH", + "HV" + ], + "sentinel1_desc": [ + "VV", + "VH", + "HH", + "HV" + ], + "aster": [ + "b1", + "slope" + ], + "era5": [ + "201806_temperature_2m", + "201806_temperature_2m_min", + "201806_temperature_2m_max", + "201806_total_precipitation_sum", + "201807_temperature_2m", + "201807_temperature_2m_min", + "201807_temperature_2m_max", + "201807_total_precipitation_sum", + "0_temperature_2m_mean", + "1_temperature_2m_min_min", + "2_temperature_2m_max_max", + "3_total_precipitation_sum_sum" + ], + "dynamic_world": [ + "label" + ], + "canopy_height_eth": [ + "height", + "std" + ], + "lat": [ + "sin", + "cos" + ], + "lon": [ + "sin", + "cos" + ], + "biome": [ + "biome" + ], + "eco_region": [ + "eco_region" + ], + "month": [ + "sin_month", + "cos_month" + ], + "esa_worldcover": [ + "Map" + ] + } + }, + "1": { + "S2_DATE": "2018-07-16", + "S2_type": "l2a", + "CRS": "EPSG:32721", + "lat": -14.499441524746077, + "lon": -56.98355999998649, + "BANDS": { + "sentinel2": [ + "B1", + "B2", + "B3", + "B4", + "B5", + "B6", + "B7", + "B8A", + "B8", + "B9", + "B10", + "B11", + "B12" + ], + "sentinel2_cloudmask": [ + "QA60" + ], + "sentinel2_cloudprod": [ + "MSK_CLDPRB" + ], + "sentinel2_scl": [ + "SCL" + ], + "sentinel1_asc": [ + "VV", + "VH", + "HH", + "HV" + ], + "sentinel1_desc": [ + "VV", + "VH", + "HH", + "HV" + ], + "aster": [ + "b1", + "slope" + ], + "era5": [ + "201806_temperature_2m", + "201806_temperature_2m_min", + "201806_temperature_2m_max", + "201806_total_precipitation_sum", + "201807_temperature_2m", + "201807_temperature_2m_min", + "201807_temperature_2m_max", + "201807_total_precipitation_sum", + "0_temperature_2m_mean", + "1_temperature_2m_min_min", + "2_temperature_2m_max_max", + "3_total_precipitation_sum_sum" + ], + "dynamic_world": [ + "label" + ], + "canopy_height_eth": [ + "height", + "std" + ], + "lat": [ + "sin", + "cos" + ], + "lon": [ + "sin", + "cos" + ], + "biome": [ + "biome" + ], + "eco_region": [ + "eco_region" + ], + "month": [ + "sin_month", + "cos_month" + ], + "esa_worldcover": [ + "Map" + ] + } + }, + "2": { + "S2_DATE": "2018-07-16", + "S2_type": "l2a", + "CRS": "EPSG:32721", + "lat": -14.499441524746077, + "lon": -56.98355999998649, + "BANDS": { + "sentinel2": [ + "B1", + "B2", + "B3", + "B4", + "B5", + "B6", + "B7", + "B8A", + "B8", + "B9", + "B10", + "B11", + "B12" + ], + "sentinel2_cloudmask": [ + "QA60" + ], + "sentinel2_cloudprod": [ + "MSK_CLDPRB" + ], + "sentinel2_scl": [ + "SCL" + ], + "sentinel1_asc": [ + "VV", + "VH", + "HH", + "HV" + ], + "sentinel1_desc": [ + "VV", + "VH", + "HH", + "HV" + ], + "aster": [ + "b1", + "slope" + ], + "era5": [ + "201806_temperature_2m", + "201806_temperature_2m_min", + "201806_temperature_2m_max", + "201806_total_precipitation_sum", + "201807_temperature_2m", + "201807_temperature_2m_min", + "201807_temperature_2m_max", + "201807_total_precipitation_sum", + "0_temperature_2m_mean", + "1_temperature_2m_min_min", + "2_temperature_2m_max_max", + "3_total_precipitation_sum_sum" + ], + "dynamic_world": [ + "label" + ], + "canopy_height_eth": [ + "height", + "std" + ], + "lat": [ + "sin", + "cos" + ], + "lon": [ + "sin", + "cos" + ], + "biome": [ + "biome" + ], + "eco_region": [ + "eco_region" + ], + "month": [ + "sin_month", + "cos_month" + ], + "esa_worldcover": [ + "Map" + ] + } + }, + "3": { + "S2_DATE": "2018-07-16", + "S2_type": "l1c", + "CRS": "EPSG:32721", + "lat": -14.499441524746077, + "lon": -56.98355999998649, + "BANDS": { + "sentinel2": [ + "B1", + "B2", + "B3", + "B4", + "B5", + "B6", + "B7", + "B8A", + "B8", + "B9", + "B10", + "B11", + "B12" + ], + "sentinel2_cloudmask": [ + "QA60" + ], + "sentinel2_cloudprod": [ + "MSK_CLDPRB" + ], + "sentinel2_scl": [ + "SCL" + ], + "sentinel1_asc": [ + "VV", + "VH", + "HH", + "HV" + ], + "sentinel1_desc": [ + "VV", + "VH", + "HH", + "HV" + ], + "aster": [ + "b1", + "slope" + ], + "era5": [ + "201806_temperature_2m", + "201806_temperature_2m_min", + "201806_temperature_2m_max", + "201806_total_precipitation_sum", + "201807_temperature_2m", + "201807_temperature_2m_min", + "201807_temperature_2m_max", + "201807_total_precipitation_sum", + "0_temperature_2m_mean", + "1_temperature_2m_min_min", + "2_temperature_2m_max_max", + "3_total_precipitation_sum_sum" + ], + "dynamic_world": [ + "label" + ], + "canopy_height_eth": [ + "height", + "std" + ], + "lat": [ + "sin", + "cos" + ], + "lon": [ + "sin", + "cos" + ], + "biome": [ + "biome" + ], + "eco_region": [ + "eco_region" + ], + "month": [ + "sin_month", + "cos_month" + ], + "esa_worldcover": [ + "Map" + ] + } + }, + "4": { + "S2_DATE": "2018-07-16", + "S2_type": "l2a", + "CRS": "EPSG:32721", + "lat": -14.499441524746077, + "lon": -56.98355999998649, + "BANDS": { + "sentinel2": [ + "B1", + "B2", + "B3", + "B4", + "B5", + "B6", + "B7", + "B8A", + "B8", + "B9", + "B10", + "B11", + "B12" + ], + "sentinel2_cloudmask": [ + "QA60" + ], + "sentinel2_cloudprod": [ + "MSK_CLDPRB" + ], + "sentinel2_scl": [ + "SCL" + ], + "sentinel1_asc": [ + "VV", + "VH", + "HH", + "HV" + ], + "sentinel1_desc": [ + "VV", + "VH", + "HH", + "HV" + ], + "aster": [ + "b1", + "slope" + ], + "era5": [ + "201806_temperature_2m", + "201806_temperature_2m_min", + "201806_temperature_2m_max", + "201806_total_precipitation_sum", + "201807_temperature_2m", + "201807_temperature_2m_min", + "201807_temperature_2m_max", + "201807_total_precipitation_sum", + "0_temperature_2m_mean", + "1_temperature_2m_min_min", + "2_temperature_2m_max_max", + "3_total_precipitation_sum_sum" + ], + "dynamic_world": [ + "label" + ], + "canopy_height_eth": [ + "height", + "std" + ], + "lat": [ + "sin", + "cos" + ], + "lon": [ + "sin", + "cos" + ], + "biome": [ + "biome" + ], + "eco_region": [ + "eco_region" + ], + "month": [ + "sin_month", + "cos_month" + ], + "esa_worldcover": [ + "Map" + ] + } + }, + "5": { + "S2_DATE": "2018-07-16", + "S2_type": "l1c", + "CRS": "EPSG:32721", + "lat": -14.499441524746077, + "lon": -56.98355999998649, + "BANDS": { + "sentinel2": [ + "B1", + "B2", + "B3", + "B4", + "B5", + "B6", + "B7", + "B8A", + "B8", + "B9", + "B10", + "B11", + "B12" + ], + "sentinel2_cloudmask": [ + "QA60" + ], + "sentinel2_cloudprod": [ + "MSK_CLDPRB" + ], + "sentinel2_scl": [ + "SCL" + ], + "sentinel1_asc": [ + "VV", + "VH", + "HH", + "HV" + ], + "sentinel1_desc": [ + "VV", + "VH", + "HH", + "HV" + ], + "aster": [ + "b1", + "slope" + ], + "era5": [ + "201806_temperature_2m", + "201806_temperature_2m_min", + "201806_temperature_2m_max", + "201806_total_precipitation_sum", + "201807_temperature_2m", + "201807_temperature_2m_min", + "201807_temperature_2m_max", + "201807_total_precipitation_sum", + "0_temperature_2m_mean", + "1_temperature_2m_min_min", + "2_temperature_2m_max_max", + "3_total_precipitation_sum_sum" + ], + "dynamic_world": [ + "label" + ], + "canopy_height_eth": [ + "height", + "std" + ], + "lat": [ + "sin", + "cos" + ], + "lon": [ + "sin", + "cos" + ], + "biome": [ + "biome" + ], + "eco_region": [ + "eco_region" + ], + "month": [ + "sin_month", + "cos_month" + ], + "esa_worldcover": [ + "Map" + ] + } + }, + "6": { + "S2_DATE": "2018-07-16", + "S2_type": "l1c", + "CRS": "EPSG:32721", + "lat": -14.499441524746077, + "lon": -56.98355999998649, + "BANDS": { + "sentinel2": [ + "B1", + "B2", + "B3", + "B4", + "B5", + "B6", + "B7", + "B8A", + "B8", + "B9", + "B10", + "B11", + "B12" + ], + "sentinel2_cloudmask": [ + "QA60" + ], + "sentinel2_cloudprod": [ + "MSK_CLDPRB" + ], + "sentinel2_scl": [ + "SCL" + ], + "sentinel1_asc": [ + "VV", + "VH", + "HH", + "HV" + ], + "sentinel1_desc": [ + "VV", + "VH", + "HH", + "HV" + ], + "aster": [ + "b1", + "slope" + ], + "era5": [ + "201806_temperature_2m", + "201806_temperature_2m_min", + "201806_temperature_2m_max", + "201806_total_precipitation_sum", + "201807_temperature_2m", + "201807_temperature_2m_min", + "201807_temperature_2m_max", + "201807_total_precipitation_sum", + "0_temperature_2m_mean", + "1_temperature_2m_min_min", + "2_temperature_2m_max_max", + "3_total_precipitation_sum_sum" + ], + "dynamic_world": [ + "label" + ], + "canopy_height_eth": [ + "height", + "std" + ], + "lat": [ + "sin", + "cos" + ], + "lon": [ + "sin", + "cos" + ], + "biome": [ + "biome" + ], + "eco_region": [ + "eco_region" + ], + "month": [ + "sin_month", + "cos_month" + ], + "esa_worldcover": [ + "Map" + ] + } + }, + "7": { + "S2_DATE": "2018-07-16", + "S2_type": "l1c", + "CRS": "EPSG:32721", + "lat": -14.499441524746077, + "lon": -56.98355999998649, + "BANDS": { + "sentinel2": [ + "B1", + "B2", + "B3", + "B4", + "B5", + "B6", + "B7", + "B8A", + "B8", + "B9", + "B10", + "B11", + "B12" + ], + "sentinel2_cloudmask": [ + "QA60" + ], + "sentinel2_cloudprod": [ + "MSK_CLDPRB" + ], + "sentinel2_scl": [ + "SCL" + ], + "sentinel1_asc": [ + "VV", + "VH", + "HH", + "HV" + ], + "sentinel1_desc": [ + "VV", + "VH", + "HH", + "HV" + ], + "aster": [ + "b1", + "slope" + ], + "era5": [ + "201806_temperature_2m", + "201806_temperature_2m_min", + "201806_temperature_2m_max", + "201806_total_precipitation_sum", + "201807_temperature_2m", + "201807_temperature_2m_min", + "201807_temperature_2m_max", + "201807_total_precipitation_sum", + "0_temperature_2m_mean", + "1_temperature_2m_min_min", + "2_temperature_2m_max_max", + "3_total_precipitation_sum_sum" + ], + "dynamic_world": [ + "label" + ], + "canopy_height_eth": [ + "height", + "std" + ], + "lat": [ + "sin", + "cos" + ], + "lon": [ + "sin", + "cos" + ], + "biome": [ + "biome" + ], + "eco_region": [ + "eco_region" + ], + "month": [ + "sin_month", + "cos_month" + ], + "esa_worldcover": [ + "Map" + ] + } + }, + "8": { + "S2_DATE": "2018-07-16", + "S2_type": "l2a", + "CRS": "EPSG:32721", + "lat": -14.499441524746077, + "lon": -56.98355999998649, + "BANDS": { + "sentinel2": [ + "B1", + "B2", + "B3", + "B4", + "B5", + "B6", + "B7", + "B8A", + "B8", + "B9", + "B10", + "B11", + "B12" + ], + "sentinel2_cloudmask": [ + "QA60" + ], + "sentinel2_cloudprod": [ + "MSK_CLDPRB" + ], + "sentinel2_scl": [ + "SCL" + ], + "sentinel1_asc": [ + "VV", + "VH", + "HH", + "HV" + ], + "sentinel1_desc": [ + "VV", + "VH", + "HH", + "HV" + ], + "aster": [ + "b1", + "slope" + ], + "era5": [ + "201806_temperature_2m", + "201806_temperature_2m_min", + "201806_temperature_2m_max", + "201806_total_precipitation_sum", + "201807_temperature_2m", + "201807_temperature_2m_min", + "201807_temperature_2m_max", + "201807_total_precipitation_sum", + "0_temperature_2m_mean", + "1_temperature_2m_min_min", + "2_temperature_2m_max_max", + "3_total_precipitation_sum_sum" + ], + "dynamic_world": [ + "label" + ], + "canopy_height_eth": [ + "height", + "std" + ], + "lat": [ + "sin", + "cos" + ], + "lon": [ + "sin", + "cos" + ], + "biome": [ + "biome" + ], + "eco_region": [ + "eco_region" + ], + "month": [ + "sin_month", + "cos_month" + ], + "esa_worldcover": [ + "Map" + ] + } + }, + "9": { + "S2_DATE": "2018-07-16", + "S2_type": "l2a", + "CRS": "EPSG:32721", + "lat": -14.499441524746077, + "lon": -56.98355999998649, + "BANDS": { + "sentinel2": [ + "B1", + "B2", + "B3", + "B4", + "B5", + "B6", + "B7", + "B8A", + "B8", + "B9", + "B10", + "B11", + "B12" + ], + "sentinel2_cloudmask": [ + "QA60" + ], + "sentinel2_cloudprod": [ + "MSK_CLDPRB" + ], + "sentinel2_scl": [ + "SCL" + ], + "sentinel1_asc": [ + "VV", + "VH", + "HH", + "HV" + ], + "sentinel1_desc": [ + "VV", + "VH", + "HH", + "HV" + ], + "aster": [ + "b1", + "slope" + ], + "era5": [ + "201806_temperature_2m", + "201806_temperature_2m_min", + "201806_temperature_2m_max", + "201806_total_precipitation_sum", + "201807_temperature_2m", + "201807_temperature_2m_min", + "201807_temperature_2m_max", + "201807_total_precipitation_sum", + "0_temperature_2m_mean", + "1_temperature_2m_min_min", + "2_temperature_2m_max_max", + "3_total_precipitation_sum_sum" + ], + "dynamic_world": [ + "label" + ], + "canopy_height_eth": [ + "height", + "std" + ], + "lat": [ + "sin", + "cos" + ], + "lon": [ + "sin", + "cos" + ], + "biome": [ + "biome" + ], + "eco_region": [ + "eco_region" + ], + "month": [ + "sin_month", + "cos_month" + ], + "esa_worldcover": [ + "Map" + ] + } + } +} \ No newline at end of file diff --git a/tests/data/mmearth/data_1M_v001/data_1M_v001.h5 b/tests/data/mmearth/data_1M_v001/data_1M_v001.h5 new file mode 100644 index 00000000000..1e37c8a005d Binary files /dev/null and b/tests/data/mmearth/data_1M_v001/data_1M_v001.h5 differ diff --git a/tests/data/mmearth/data_1M_v001/data_1M_v001_band_stats.json b/tests/data/mmearth/data_1M_v001/data_1M_v001_band_stats.json new file mode 100644 index 00000000000..501667e4839 --- /dev/null +++ b/tests/data/mmearth/data_1M_v001/data_1M_v001_band_stats.json @@ -0,0 +1,420 @@ +{ + "aster": { + "mean": [ + 0.34133172608321716, + 0.3059512737624116 + ], + "std": [ + 0.3465348008910826, + 0.14108695274821736 + ], + "min": [ + 0.8418094294546998, + 0.4742174200974866 + ], + "max": [ + 0.56738806029585, + 0.0518313995381231 + ] + }, + "canopy_height_eth": { + "mean": [ + 0.854532719112457, + 0.48863801930320394 + ], + "std": [ + 0.5895142273813204, + 0.1380733622865845 + ], + "min": [ + 0.7537277848083938, + 0.20478855446904576 + ], + "max": [ + 0.5045161659636557, + 0.5376684828821884 + ] + }, + "era5": { + "mean": [ + 0.4417867806655783, + 0.18400642123926858, + 0.11974228279177279, + 0.9522889638018397, + 0.9273662674296557, + 0.8755178421266646, + 0.606034251540829, + 0.30760754028836534, + 0.6040509112467255, + 0.6765954694705612, + 0.6691595591399268, + 0.5760865666368172 + ], + "std": [ + 0.5142377087804115, + 0.2701723743576415, + 0.8413069700552763, + 0.23868021272203077, + 0.5615458693574323, + 0.7949644871571033, + 0.26212481323891657, + 0.7322482538861085, + 0.1995248437867745, + 0.42723767485667563, + 0.739198522837161, + 0.8092830064036739 + ], + "min": [ + 0.14533112908329815, + 0.23840001563382995, + 0.09261877533368601, + 0.10812791898965746, + 0.3602589294337053, + 0.41608271321516976, + 0.40824824209496946, + 0.4362332517942743, + 0.6458086696919946, + 0.2873520751891693, + 0.1946008373600201, + 0.3371402501790228 + ], + "max": [ + 0.9619147643696027, + 0.6002844111029695, + 0.34438509909726867, + 0.5211044855925113, + 0.249727288970654, + 0.07768059753391432, + 0.8934236930498343, + 0.8550867273916366, + 0.34905292318622505, + 0.07599362043189295, + 0.3695837636892234, + 0.8599690826993232 + ] + }, + "sentinel1": { + "mean": [ + 0.4602361303699314, + 0.9803602949980195, + 0.6286630558858189, + 0.8546244471280615, + 0.3908955820387353, + 0.15722620842791302, + 0.5954830179122328, + 0.8116450473795687 + ], + "std": [ + 0.03964016383304825, + 0.2701027934269321, + 0.3164522549613331, + 0.09860183113067111, + 0.1335076195305025, + 0.6380811967697871, + 0.5940489208142838, + 0.90153692977137 + ], + "min": [ + 0.44493594515658574, + 0.18478926184346423, + 0.2860240951390637, + 0.9376102612207217, + 0.9249907883844413, + 0.7000425768046851, + 0.3974535731475711, + 0.2996108322023431 + ], + "max": [ + 0.6430863691662376, + 0.9639089581632254, + 0.11634161184104996, + 0.753747780295231, + 0.4158525831196007, + 0.5988102320036879, + 0.10986853662090668, + 0.0600516168930747 + ] + }, + "sentinel2": { + "mean": [ + 0.572429320063415, + 0.15567923224572222, + 0.18809706032097528, + 0.8513440458791045, + 0.4678999223480048, + 0.050053414311246325, + 0.03783582407238084, + 0.2677522946476404, + 0.05453320208593193, + 0.5979956410404416, + 0.49602815159537084, + 0.988465511898549, + 0.6396682346061375 + ], + "std": [ + 0.788144262779709, + 0.8657320673010912, + 0.5279649775889855, + 0.3519159907818131, + 0.42634341564905587, + 0.7545521069496844, + 0.1962002041789851, + 0.7059625691340591, + 0.5931227904116899, + 0.9725044299059084, + 0.5405521502367713, + 0.2843034778768231, + 0.31920824614985277 + ], + "min": [ + 0.2720562009507226, + 0.5899353156966084, + 0.3934572906331085, + 0.44543431690993573, + 0.7278364898053944, + 0.02060665070965617, + 0.38574185899879954, + 0.6467951673496654, + 0.09562009477216771, + 0.7774338666717099, + 0.8432355577315033, + 0.4368636724686574, + 0.43488985400118574 + ], + "max": [ + 0.5900761314218557, + 0.36518105262763567, + 0.025620224680206638, + 0.5735969386962791, + 0.7634711203974548, + 0.1736244550922521, + 0.6024088499995152, + 0.9342662339896931, + 0.03710445086723202, + 0.1890352011946118, + 0.28380920040594426, + 0.08168516136465487, + 0.13526257707976375 + ] + }, + "sentinel2_cloudmask": { + "mean": [ + 0.6570709089318469 + ], + "std": [ + 0.5657620804780292 + ], + "min": [ + 0.9670225671155827 + ], + "max": [ + 0.5486983844030023 + ] + }, + "sentinel2_cloudprod": { + "mean": [ + 0.6891626967636988 + ], + "std": [ + 0.4094519969523073 + ], + "min": [ + 0.18725260491655094 + ], + "max": [ + 0.07180021957746674 + ] + }, + "sentinel2_scl": { + "mean": [ + 0.6780711668782042 + ], + "std": [ + 0.4943563461327216 + ], + "min": [ + 0.72302837101946 + ], + "max": [ + 0.28749332478382883 + ] + }, + "sentinel2_l1c": { + "mean": [ + 0.21099016187905117, + 0.5890058125196053, + 0.3870387069065061, + 0.40632422729999684, + 0.09220072185564243, + 0.05179158725809463, + 0.3472011267218935, + 0.27714371744503874, + 0.8667033333340239, + 0.42299347757834715, + 0.21100068056443366, + 0.9402893951577577, + 0.3890143754610127 + ], + "std": [ + 0.9129275727157, + 0.27695516423511546, + 0.6574105342764129, + 0.3857889836668025, + 0.4733288194932791, + 0.7763859293169395, + 0.969951792165023, + 0.7683755050895299, + 0.7736738677488465, + 0.6231553439174615, + 0.8681139667570541, + 0.693870549161861, + 0.07153957606497696 + ], + "min": [ + 0.7774302874038522, + 0.5237210940430268, + 0.48160697988637924, + 0.40412832766833284, + 0.49783101469118285, + 0.1676681532899118, + 0.8610056792509986, + 0.2652839446267331, + 0.9325651272132277, + 0.563023094265321, + 0.2869457262128843, + 0.6022487049661519, + 0.13539449396850844 + ], + "max": [ + 0.3394905584222998, + 0.6912694198479455, + 0.9365463758014783, + 0.026939601415270298, + 0.5290840296268874, + 0.38007307086114506, + 0.8005140940419264, + 0.7775367379319111, + 0.5736020267695333, + 0.9672861900139044, + 0.5859121986439549, + 0.8918748335743096, + 0.8098629367248834 + ] + }, + "sentinel2_l2a": { + "mean": [ + 0.4319213025299248, + 0.47014764209420445, + 0.10854844936417318, + 0.3565311102195149, + 0.035159148875477664, + 0.9947423748438694, + 0.6998282309520572, + 0.7089475988524567, + 0.6559450071993304, + 0.5583110883126653, + 0.9159743145429701, + 0.8343679900271499, + 0.7655093634482485 + ], + "std": [ + 0.12423175444317092, + 0.9912849566181509, + 0.3951297176601042, + 0.8104237474502085, + 0.7201051485011062, + 0.13586708888652077, + 0.7374687030638306, + 0.18741797127758675, + 0.16046499702755812, + 0.2749311810960794, + 0.13799794859023207, + 0.852581184239024, + 0.2925724204650476 + ], + "min": [ + 0.6710722460441257, + 0.6301086524595431, + 0.7368624384973665, + 0.13933868140865313, + 0.3705067827935764, + 0.7957615986693085, + 0.16723862032125847, + 0.20743892979117518, + 0.6662554693908289, + 0.9305180256466181, + 0.6165542799694995, + 0.9436576994737303, + 0.5915822101257956 + ], + "max": [ + 0.5530057895839687, + 0.12760001304721147, + 0.4562998709662902, + 0.04654611423548116, + 0.2540205560580904, + 0.15138539441364263, + 0.26367052218377185, + 0.6596795765749286, + 0.27285099411653047, + 0.47125521126252945, + 0.5939409956768125, + 0.2847412892997587, + 0.11228964358173976 + ] + }, + "lat": { + "mean": [ + 0.35663113563250803, + 0.9664439016211125 + ], + "std": [ + 0.5843606387551367, + 0.20984876015034148 + ], + "min": [ + 0.09962346810982947, + 0.8432152033355034 + ], + "max": [ + 0.16308304708635868, + 0.22022458984219218 + ] + }, + "lon": { + "mean": [ + 0.08778981307315648, + 0.5574407869891105 + ], + "std": [ + 0.6788585171009821, + 0.9327195921283604 + ], + "min": [ + 0.037812728275171015, + 0.7791613393176342 + ], + "max": [ + 0.45824364356139435, + 0.282148611369736 + ] + }, + "month": { + "mean": [ + 0.6768511662230008, + 0.020069115332411624 + ], + "std": [ + 0.40045110232925263, + 0.8656439167267811 + ], + "min": [ + 0.5073524073801928, + 0.0917181048136515 + ], + "max": [ + 0.6822690079185049, + 0.01508976602253198 + ] + } +} \ No newline at end of file diff --git a/tests/data/mmearth/data_1M_v001/data_1M_v001_splits.json b/tests/data/mmearth/data_1M_v001/data_1M_v001_splits.json new file mode 100644 index 00000000000..dffb6c43b69 --- /dev/null +++ b/tests/data/mmearth/data_1M_v001/data_1M_v001_splits.json @@ -0,0 +1,16 @@ +{ + "train": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9 + ], + "val": [], + "test": [] +} \ No newline at end of file diff --git a/tests/data/mmearth/data_1M_v001/data_1M_v001_tile_info.json b/tests/data/mmearth/data_1M_v001/data_1M_v001_tile_info.json new file mode 100644 index 00000000000..f89fae376e3 --- /dev/null +++ b/tests/data/mmearth/data_1M_v001/data_1M_v001_tile_info.json @@ -0,0 +1,912 @@ +{ + "0": { + "S2_DATE": "2018-07-16", + "S2_type": "l1c", + "CRS": "EPSG:32721", + "lat": -14.499441524746077, + "lon": -56.98355999998649, + "BANDS": { + "sentinel2": [ + "B1", + "B2", + "B3", + "B4", + "B5", + "B6", + "B7", + "B8A", + "B8", + "B9", + "B10", + "B11", + "B12" + ], + "sentinel2_cloudmask": [ + "QA60" + ], + "sentinel2_cloudprod": [ + "MSK_CLDPRB" + ], + "sentinel2_scl": [ + "SCL" + ], + "sentinel1_asc": [ + "VV", + "VH", + "HH", + "HV" + ], + "sentinel1_desc": [ + "VV", + "VH", + "HH", + "HV" + ], + "aster": [ + "b1", + "slope" + ], + "era5": [ + "201806_temperature_2m", + "201806_temperature_2m_min", + "201806_temperature_2m_max", + "201806_total_precipitation_sum", + "201807_temperature_2m", + "201807_temperature_2m_min", + "201807_temperature_2m_max", + "201807_total_precipitation_sum", + "0_temperature_2m_mean", + "1_temperature_2m_min_min", + "2_temperature_2m_max_max", + "3_total_precipitation_sum_sum" + ], + "dynamic_world": [ + "label" + ], + "canopy_height_eth": [ + "height", + "std" + ], + "lat": [ + "sin", + "cos" + ], + "lon": [ + "sin", + "cos" + ], + "biome": [ + "biome" + ], + "eco_region": [ + "eco_region" + ], + "month": [ + "sin_month", + "cos_month" + ], + "esa_worldcover": [ + "Map" + ] + } + }, + "1": { + "S2_DATE": "2018-07-16", + "S2_type": "l2a", + "CRS": "EPSG:32721", + "lat": -14.499441524746077, + "lon": -56.98355999998649, + "BANDS": { + "sentinel2": [ + "B1", + "B2", + "B3", + "B4", + "B5", + "B6", + "B7", + "B8A", + "B8", + "B9", + "B10", + "B11", + "B12" + ], + "sentinel2_cloudmask": [ + "QA60" + ], + "sentinel2_cloudprod": [ + "MSK_CLDPRB" + ], + "sentinel2_scl": [ + "SCL" + ], + "sentinel1_asc": [ + "VV", + "VH", + "HH", + "HV" + ], + "sentinel1_desc": [ + "VV", + "VH", + "HH", + "HV" + ], + "aster": [ + "b1", + "slope" + ], + "era5": [ + "201806_temperature_2m", + "201806_temperature_2m_min", + "201806_temperature_2m_max", + "201806_total_precipitation_sum", + "201807_temperature_2m", + "201807_temperature_2m_min", + "201807_temperature_2m_max", + "201807_total_precipitation_sum", + "0_temperature_2m_mean", + "1_temperature_2m_min_min", + "2_temperature_2m_max_max", + "3_total_precipitation_sum_sum" + ], + "dynamic_world": [ + "label" + ], + "canopy_height_eth": [ + "height", + "std" + ], + "lat": [ + "sin", + "cos" + ], + "lon": [ + "sin", + "cos" + ], + "biome": [ + "biome" + ], + "eco_region": [ + "eco_region" + ], + "month": [ + "sin_month", + "cos_month" + ], + "esa_worldcover": [ + "Map" + ] + } + }, + "2": { + "S2_DATE": "2018-07-16", + "S2_type": "l1c", + "CRS": "EPSG:32721", + "lat": -14.499441524746077, + "lon": -56.98355999998649, + "BANDS": { + "sentinel2": [ + "B1", + "B2", + "B3", + "B4", + "B5", + "B6", + "B7", + "B8A", + "B8", + "B9", + "B10", + "B11", + "B12" + ], + "sentinel2_cloudmask": [ + "QA60" + ], + "sentinel2_cloudprod": [ + "MSK_CLDPRB" + ], + "sentinel2_scl": [ + "SCL" + ], + "sentinel1_asc": [ + "VV", + "VH", + "HH", + "HV" + ], + "sentinel1_desc": [ + "VV", + "VH", + "HH", + "HV" + ], + "aster": [ + "b1", + "slope" + ], + "era5": [ + "201806_temperature_2m", + "201806_temperature_2m_min", + "201806_temperature_2m_max", + "201806_total_precipitation_sum", + "201807_temperature_2m", + "201807_temperature_2m_min", + "201807_temperature_2m_max", + "201807_total_precipitation_sum", + "0_temperature_2m_mean", + "1_temperature_2m_min_min", + "2_temperature_2m_max_max", + "3_total_precipitation_sum_sum" + ], + "dynamic_world": [ + "label" + ], + "canopy_height_eth": [ + "height", + "std" + ], + "lat": [ + "sin", + "cos" + ], + "lon": [ + "sin", + "cos" + ], + "biome": [ + "biome" + ], + "eco_region": [ + "eco_region" + ], + "month": [ + "sin_month", + "cos_month" + ], + "esa_worldcover": [ + "Map" + ] + } + }, + "3": { + "S2_DATE": "2018-07-16", + "S2_type": "l1c", + "CRS": "EPSG:32721", + "lat": -14.499441524746077, + "lon": -56.98355999998649, + "BANDS": { + "sentinel2": [ + "B1", + "B2", + "B3", + "B4", + "B5", + "B6", + "B7", + "B8A", + "B8", + "B9", + "B10", + "B11", + "B12" + ], + "sentinel2_cloudmask": [ + "QA60" + ], + "sentinel2_cloudprod": [ + "MSK_CLDPRB" + ], + "sentinel2_scl": [ + "SCL" + ], + "sentinel1_asc": [ + "VV", + "VH", + "HH", + "HV" + ], + "sentinel1_desc": [ + "VV", + "VH", + "HH", + "HV" + ], + "aster": [ + "b1", + "slope" + ], + "era5": [ + "201806_temperature_2m", + "201806_temperature_2m_min", + "201806_temperature_2m_max", + "201806_total_precipitation_sum", + "201807_temperature_2m", + "201807_temperature_2m_min", + "201807_temperature_2m_max", + "201807_total_precipitation_sum", + "0_temperature_2m_mean", + "1_temperature_2m_min_min", + "2_temperature_2m_max_max", + "3_total_precipitation_sum_sum" + ], + "dynamic_world": [ + "label" + ], + "canopy_height_eth": [ + "height", + "std" + ], + "lat": [ + "sin", + "cos" + ], + "lon": [ + "sin", + "cos" + ], + "biome": [ + "biome" + ], + "eco_region": [ + "eco_region" + ], + "month": [ + "sin_month", + "cos_month" + ], + "esa_worldcover": [ + "Map" + ] + } + }, + "4": { + "S2_DATE": "2018-07-16", + "S2_type": "l2a", + "CRS": "EPSG:32721", + "lat": -14.499441524746077, + "lon": -56.98355999998649, + "BANDS": { + "sentinel2": [ + "B1", + "B2", + "B3", + "B4", + "B5", + "B6", + "B7", + "B8A", + "B8", + "B9", + "B10", + "B11", + "B12" + ], + "sentinel2_cloudmask": [ + "QA60" + ], + "sentinel2_cloudprod": [ + "MSK_CLDPRB" + ], + "sentinel2_scl": [ + "SCL" + ], + "sentinel1_asc": [ + "VV", + "VH", + "HH", + "HV" + ], + "sentinel1_desc": [ + "VV", + "VH", + "HH", + "HV" + ], + "aster": [ + "b1", + "slope" + ], + "era5": [ + "201806_temperature_2m", + "201806_temperature_2m_min", + "201806_temperature_2m_max", + "201806_total_precipitation_sum", + "201807_temperature_2m", + "201807_temperature_2m_min", + "201807_temperature_2m_max", + "201807_total_precipitation_sum", + "0_temperature_2m_mean", + "1_temperature_2m_min_min", + "2_temperature_2m_max_max", + "3_total_precipitation_sum_sum" + ], + "dynamic_world": [ + "label" + ], + "canopy_height_eth": [ + "height", + "std" + ], + "lat": [ + "sin", + "cos" + ], + "lon": [ + "sin", + "cos" + ], + "biome": [ + "biome" + ], + "eco_region": [ + "eco_region" + ], + "month": [ + "sin_month", + "cos_month" + ], + "esa_worldcover": [ + "Map" + ] + } + }, + "5": { + "S2_DATE": "2018-07-16", + "S2_type": "l1c", + "CRS": "EPSG:32721", + "lat": -14.499441524746077, + "lon": -56.98355999998649, + "BANDS": { + "sentinel2": [ + "B1", + "B2", + "B3", + "B4", + "B5", + "B6", + "B7", + "B8A", + "B8", + "B9", + "B10", + "B11", + "B12" + ], + "sentinel2_cloudmask": [ + "QA60" + ], + "sentinel2_cloudprod": [ + "MSK_CLDPRB" + ], + "sentinel2_scl": [ + "SCL" + ], + "sentinel1_asc": [ + "VV", + "VH", + "HH", + "HV" + ], + "sentinel1_desc": [ + "VV", + "VH", + "HH", + "HV" + ], + "aster": [ + "b1", + "slope" + ], + "era5": [ + "201806_temperature_2m", + "201806_temperature_2m_min", + "201806_temperature_2m_max", + "201806_total_precipitation_sum", + "201807_temperature_2m", + "201807_temperature_2m_min", + "201807_temperature_2m_max", + "201807_total_precipitation_sum", + "0_temperature_2m_mean", + "1_temperature_2m_min_min", + "2_temperature_2m_max_max", + "3_total_precipitation_sum_sum" + ], + "dynamic_world": [ + "label" + ], + "canopy_height_eth": [ + "height", + "std" + ], + "lat": [ + "sin", + "cos" + ], + "lon": [ + "sin", + "cos" + ], + "biome": [ + "biome" + ], + "eco_region": [ + "eco_region" + ], + "month": [ + "sin_month", + "cos_month" + ], + "esa_worldcover": [ + "Map" + ] + } + }, + "6": { + "S2_DATE": "2018-07-16", + "S2_type": "l2a", + "CRS": "EPSG:32721", + "lat": -14.499441524746077, + "lon": -56.98355999998649, + "BANDS": { + "sentinel2": [ + "B1", + "B2", + "B3", + "B4", + "B5", + "B6", + "B7", + "B8A", + "B8", + "B9", + "B10", + "B11", + "B12" + ], + "sentinel2_cloudmask": [ + "QA60" + ], + "sentinel2_cloudprod": [ + "MSK_CLDPRB" + ], + "sentinel2_scl": [ + "SCL" + ], + "sentinel1_asc": [ + "VV", + "VH", + "HH", + "HV" + ], + "sentinel1_desc": [ + "VV", + "VH", + "HH", + "HV" + ], + "aster": [ + "b1", + "slope" + ], + "era5": [ + "201806_temperature_2m", + "201806_temperature_2m_min", + "201806_temperature_2m_max", + "201806_total_precipitation_sum", + "201807_temperature_2m", + "201807_temperature_2m_min", + "201807_temperature_2m_max", + "201807_total_precipitation_sum", + "0_temperature_2m_mean", + "1_temperature_2m_min_min", + "2_temperature_2m_max_max", + "3_total_precipitation_sum_sum" + ], + "dynamic_world": [ + "label" + ], + "canopy_height_eth": [ + "height", + "std" + ], + "lat": [ + "sin", + "cos" + ], + "lon": [ + "sin", + "cos" + ], + "biome": [ + "biome" + ], + "eco_region": [ + "eco_region" + ], + "month": [ + "sin_month", + "cos_month" + ], + "esa_worldcover": [ + "Map" + ] + } + }, + "7": { + "S2_DATE": "2018-07-16", + "S2_type": "l1c", + "CRS": "EPSG:32721", + "lat": -14.499441524746077, + "lon": -56.98355999998649, + "BANDS": { + "sentinel2": [ + "B1", + "B2", + "B3", + "B4", + "B5", + "B6", + "B7", + "B8A", + "B8", + "B9", + "B10", + "B11", + "B12" + ], + "sentinel2_cloudmask": [ + "QA60" + ], + "sentinel2_cloudprod": [ + "MSK_CLDPRB" + ], + "sentinel2_scl": [ + "SCL" + ], + "sentinel1_asc": [ + "VV", + "VH", + "HH", + "HV" + ], + "sentinel1_desc": [ + "VV", + "VH", + "HH", + "HV" + ], + "aster": [ + "b1", + "slope" + ], + "era5": [ + "201806_temperature_2m", + "201806_temperature_2m_min", + "201806_temperature_2m_max", + "201806_total_precipitation_sum", + "201807_temperature_2m", + "201807_temperature_2m_min", + "201807_temperature_2m_max", + "201807_total_precipitation_sum", + "0_temperature_2m_mean", + "1_temperature_2m_min_min", + "2_temperature_2m_max_max", + "3_total_precipitation_sum_sum" + ], + "dynamic_world": [ + "label" + ], + "canopy_height_eth": [ + "height", + "std" + ], + "lat": [ + "sin", + "cos" + ], + "lon": [ + "sin", + "cos" + ], + "biome": [ + "biome" + ], + "eco_region": [ + "eco_region" + ], + "month": [ + "sin_month", + "cos_month" + ], + "esa_worldcover": [ + "Map" + ] + } + }, + "8": { + "S2_DATE": "2018-07-16", + "S2_type": "l1c", + "CRS": "EPSG:32721", + "lat": -14.499441524746077, + "lon": -56.98355999998649, + "BANDS": { + "sentinel2": [ + "B1", + "B2", + "B3", + "B4", + "B5", + "B6", + "B7", + "B8A", + "B8", + "B9", + "B10", + "B11", + "B12" + ], + "sentinel2_cloudmask": [ + "QA60" + ], + "sentinel2_cloudprod": [ + "MSK_CLDPRB" + ], + "sentinel2_scl": [ + "SCL" + ], + "sentinel1_asc": [ + "VV", + "VH", + "HH", + "HV" + ], + "sentinel1_desc": [ + "VV", + "VH", + "HH", + "HV" + ], + "aster": [ + "b1", + "slope" + ], + "era5": [ + "201806_temperature_2m", + "201806_temperature_2m_min", + "201806_temperature_2m_max", + "201806_total_precipitation_sum", + "201807_temperature_2m", + "201807_temperature_2m_min", + "201807_temperature_2m_max", + "201807_total_precipitation_sum", + "0_temperature_2m_mean", + "1_temperature_2m_min_min", + "2_temperature_2m_max_max", + "3_total_precipitation_sum_sum" + ], + "dynamic_world": [ + "label" + ], + "canopy_height_eth": [ + "height", + "std" + ], + "lat": [ + "sin", + "cos" + ], + "lon": [ + "sin", + "cos" + ], + "biome": [ + "biome" + ], + "eco_region": [ + "eco_region" + ], + "month": [ + "sin_month", + "cos_month" + ], + "esa_worldcover": [ + "Map" + ] + } + }, + "9": { + "S2_DATE": "2018-07-16", + "S2_type": "l1c", + "CRS": "EPSG:32721", + "lat": -14.499441524746077, + "lon": -56.98355999998649, + "BANDS": { + "sentinel2": [ + "B1", + "B2", + "B3", + "B4", + "B5", + "B6", + "B7", + "B8A", + "B8", + "B9", + "B10", + "B11", + "B12" + ], + "sentinel2_cloudmask": [ + "QA60" + ], + "sentinel2_cloudprod": [ + "MSK_CLDPRB" + ], + "sentinel2_scl": [ + "SCL" + ], + "sentinel1_asc": [ + "VV", + "VH", + "HH", + "HV" + ], + "sentinel1_desc": [ + "VV", + "VH", + "HH", + "HV" + ], + "aster": [ + "b1", + "slope" + ], + "era5": [ + "201806_temperature_2m", + "201806_temperature_2m_min", + "201806_temperature_2m_max", + "201806_total_precipitation_sum", + "201807_temperature_2m", + "201807_temperature_2m_min", + "201807_temperature_2m_max", + "201807_total_precipitation_sum", + "0_temperature_2m_mean", + "1_temperature_2m_min_min", + "2_temperature_2m_max_max", + "3_total_precipitation_sum_sum" + ], + "dynamic_world": [ + "label" + ], + "canopy_height_eth": [ + "height", + "std" + ], + "lat": [ + "sin", + "cos" + ], + "lon": [ + "sin", + "cos" + ], + "biome": [ + "biome" + ], + "eco_region": [ + "eco_region" + ], + "month": [ + "sin_month", + "cos_month" + ], + "esa_worldcover": [ + "Map" + ] + } + } +} \ No newline at end of file diff --git a/tests/data/mmearth/data_1M_v001_64/data_1M_v001_64.h5 b/tests/data/mmearth/data_1M_v001_64/data_1M_v001_64.h5 new file mode 100644 index 00000000000..1e4908001b2 Binary files /dev/null and b/tests/data/mmearth/data_1M_v001_64/data_1M_v001_64.h5 differ diff --git a/tests/data/mmearth/data_1M_v001_64/data_1M_v001_64_band_stats.json b/tests/data/mmearth/data_1M_v001_64/data_1M_v001_64_band_stats.json new file mode 100644 index 00000000000..501667e4839 --- /dev/null +++ b/tests/data/mmearth/data_1M_v001_64/data_1M_v001_64_band_stats.json @@ -0,0 +1,420 @@ +{ + "aster": { + "mean": [ + 0.34133172608321716, + 0.3059512737624116 + ], + "std": [ + 0.3465348008910826, + 0.14108695274821736 + ], + "min": [ + 0.8418094294546998, + 0.4742174200974866 + ], + "max": [ + 0.56738806029585, + 0.0518313995381231 + ] + }, + "canopy_height_eth": { + "mean": [ + 0.854532719112457, + 0.48863801930320394 + ], + "std": [ + 0.5895142273813204, + 0.1380733622865845 + ], + "min": [ + 0.7537277848083938, + 0.20478855446904576 + ], + "max": [ + 0.5045161659636557, + 0.5376684828821884 + ] + }, + "era5": { + "mean": [ + 0.4417867806655783, + 0.18400642123926858, + 0.11974228279177279, + 0.9522889638018397, + 0.9273662674296557, + 0.8755178421266646, + 0.606034251540829, + 0.30760754028836534, + 0.6040509112467255, + 0.6765954694705612, + 0.6691595591399268, + 0.5760865666368172 + ], + "std": [ + 0.5142377087804115, + 0.2701723743576415, + 0.8413069700552763, + 0.23868021272203077, + 0.5615458693574323, + 0.7949644871571033, + 0.26212481323891657, + 0.7322482538861085, + 0.1995248437867745, + 0.42723767485667563, + 0.739198522837161, + 0.8092830064036739 + ], + "min": [ + 0.14533112908329815, + 0.23840001563382995, + 0.09261877533368601, + 0.10812791898965746, + 0.3602589294337053, + 0.41608271321516976, + 0.40824824209496946, + 0.4362332517942743, + 0.6458086696919946, + 0.2873520751891693, + 0.1946008373600201, + 0.3371402501790228 + ], + "max": [ + 0.9619147643696027, + 0.6002844111029695, + 0.34438509909726867, + 0.5211044855925113, + 0.249727288970654, + 0.07768059753391432, + 0.8934236930498343, + 0.8550867273916366, + 0.34905292318622505, + 0.07599362043189295, + 0.3695837636892234, + 0.8599690826993232 + ] + }, + "sentinel1": { + "mean": [ + 0.4602361303699314, + 0.9803602949980195, + 0.6286630558858189, + 0.8546244471280615, + 0.3908955820387353, + 0.15722620842791302, + 0.5954830179122328, + 0.8116450473795687 + ], + "std": [ + 0.03964016383304825, + 0.2701027934269321, + 0.3164522549613331, + 0.09860183113067111, + 0.1335076195305025, + 0.6380811967697871, + 0.5940489208142838, + 0.90153692977137 + ], + "min": [ + 0.44493594515658574, + 0.18478926184346423, + 0.2860240951390637, + 0.9376102612207217, + 0.9249907883844413, + 0.7000425768046851, + 0.3974535731475711, + 0.2996108322023431 + ], + "max": [ + 0.6430863691662376, + 0.9639089581632254, + 0.11634161184104996, + 0.753747780295231, + 0.4158525831196007, + 0.5988102320036879, + 0.10986853662090668, + 0.0600516168930747 + ] + }, + "sentinel2": { + "mean": [ + 0.572429320063415, + 0.15567923224572222, + 0.18809706032097528, + 0.8513440458791045, + 0.4678999223480048, + 0.050053414311246325, + 0.03783582407238084, + 0.2677522946476404, + 0.05453320208593193, + 0.5979956410404416, + 0.49602815159537084, + 0.988465511898549, + 0.6396682346061375 + ], + "std": [ + 0.788144262779709, + 0.8657320673010912, + 0.5279649775889855, + 0.3519159907818131, + 0.42634341564905587, + 0.7545521069496844, + 0.1962002041789851, + 0.7059625691340591, + 0.5931227904116899, + 0.9725044299059084, + 0.5405521502367713, + 0.2843034778768231, + 0.31920824614985277 + ], + "min": [ + 0.2720562009507226, + 0.5899353156966084, + 0.3934572906331085, + 0.44543431690993573, + 0.7278364898053944, + 0.02060665070965617, + 0.38574185899879954, + 0.6467951673496654, + 0.09562009477216771, + 0.7774338666717099, + 0.8432355577315033, + 0.4368636724686574, + 0.43488985400118574 + ], + "max": [ + 0.5900761314218557, + 0.36518105262763567, + 0.025620224680206638, + 0.5735969386962791, + 0.7634711203974548, + 0.1736244550922521, + 0.6024088499995152, + 0.9342662339896931, + 0.03710445086723202, + 0.1890352011946118, + 0.28380920040594426, + 0.08168516136465487, + 0.13526257707976375 + ] + }, + "sentinel2_cloudmask": { + "mean": [ + 0.6570709089318469 + ], + "std": [ + 0.5657620804780292 + ], + "min": [ + 0.9670225671155827 + ], + "max": [ + 0.5486983844030023 + ] + }, + "sentinel2_cloudprod": { + "mean": [ + 0.6891626967636988 + ], + "std": [ + 0.4094519969523073 + ], + "min": [ + 0.18725260491655094 + ], + "max": [ + 0.07180021957746674 + ] + }, + "sentinel2_scl": { + "mean": [ + 0.6780711668782042 + ], + "std": [ + 0.4943563461327216 + ], + "min": [ + 0.72302837101946 + ], + "max": [ + 0.28749332478382883 + ] + }, + "sentinel2_l1c": { + "mean": [ + 0.21099016187905117, + 0.5890058125196053, + 0.3870387069065061, + 0.40632422729999684, + 0.09220072185564243, + 0.05179158725809463, + 0.3472011267218935, + 0.27714371744503874, + 0.8667033333340239, + 0.42299347757834715, + 0.21100068056443366, + 0.9402893951577577, + 0.3890143754610127 + ], + "std": [ + 0.9129275727157, + 0.27695516423511546, + 0.6574105342764129, + 0.3857889836668025, + 0.4733288194932791, + 0.7763859293169395, + 0.969951792165023, + 0.7683755050895299, + 0.7736738677488465, + 0.6231553439174615, + 0.8681139667570541, + 0.693870549161861, + 0.07153957606497696 + ], + "min": [ + 0.7774302874038522, + 0.5237210940430268, + 0.48160697988637924, + 0.40412832766833284, + 0.49783101469118285, + 0.1676681532899118, + 0.8610056792509986, + 0.2652839446267331, + 0.9325651272132277, + 0.563023094265321, + 0.2869457262128843, + 0.6022487049661519, + 0.13539449396850844 + ], + "max": [ + 0.3394905584222998, + 0.6912694198479455, + 0.9365463758014783, + 0.026939601415270298, + 0.5290840296268874, + 0.38007307086114506, + 0.8005140940419264, + 0.7775367379319111, + 0.5736020267695333, + 0.9672861900139044, + 0.5859121986439549, + 0.8918748335743096, + 0.8098629367248834 + ] + }, + "sentinel2_l2a": { + "mean": [ + 0.4319213025299248, + 0.47014764209420445, + 0.10854844936417318, + 0.3565311102195149, + 0.035159148875477664, + 0.9947423748438694, + 0.6998282309520572, + 0.7089475988524567, + 0.6559450071993304, + 0.5583110883126653, + 0.9159743145429701, + 0.8343679900271499, + 0.7655093634482485 + ], + "std": [ + 0.12423175444317092, + 0.9912849566181509, + 0.3951297176601042, + 0.8104237474502085, + 0.7201051485011062, + 0.13586708888652077, + 0.7374687030638306, + 0.18741797127758675, + 0.16046499702755812, + 0.2749311810960794, + 0.13799794859023207, + 0.852581184239024, + 0.2925724204650476 + ], + "min": [ + 0.6710722460441257, + 0.6301086524595431, + 0.7368624384973665, + 0.13933868140865313, + 0.3705067827935764, + 0.7957615986693085, + 0.16723862032125847, + 0.20743892979117518, + 0.6662554693908289, + 0.9305180256466181, + 0.6165542799694995, + 0.9436576994737303, + 0.5915822101257956 + ], + "max": [ + 0.5530057895839687, + 0.12760001304721147, + 0.4562998709662902, + 0.04654611423548116, + 0.2540205560580904, + 0.15138539441364263, + 0.26367052218377185, + 0.6596795765749286, + 0.27285099411653047, + 0.47125521126252945, + 0.5939409956768125, + 0.2847412892997587, + 0.11228964358173976 + ] + }, + "lat": { + "mean": [ + 0.35663113563250803, + 0.9664439016211125 + ], + "std": [ + 0.5843606387551367, + 0.20984876015034148 + ], + "min": [ + 0.09962346810982947, + 0.8432152033355034 + ], + "max": [ + 0.16308304708635868, + 0.22022458984219218 + ] + }, + "lon": { + "mean": [ + 0.08778981307315648, + 0.5574407869891105 + ], + "std": [ + 0.6788585171009821, + 0.9327195921283604 + ], + "min": [ + 0.037812728275171015, + 0.7791613393176342 + ], + "max": [ + 0.45824364356139435, + 0.282148611369736 + ] + }, + "month": { + "mean": [ + 0.6768511662230008, + 0.020069115332411624 + ], + "std": [ + 0.40045110232925263, + 0.8656439167267811 + ], + "min": [ + 0.5073524073801928, + 0.0917181048136515 + ], + "max": [ + 0.6822690079185049, + 0.01508976602253198 + ] + } +} \ No newline at end of file diff --git a/tests/data/mmearth/data_1M_v001_64/data_1M_v001_64_splits.json b/tests/data/mmearth/data_1M_v001_64/data_1M_v001_64_splits.json new file mode 100644 index 00000000000..dffb6c43b69 --- /dev/null +++ b/tests/data/mmearth/data_1M_v001_64/data_1M_v001_64_splits.json @@ -0,0 +1,16 @@ +{ + "train": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9 + ], + "val": [], + "test": [] +} \ No newline at end of file diff --git a/tests/data/mmearth/data_1M_v001_64/data_1M_v001_64_tile_info.json b/tests/data/mmearth/data_1M_v001_64/data_1M_v001_64_tile_info.json new file mode 100644 index 00000000000..ad0abfb43c7 --- /dev/null +++ b/tests/data/mmearth/data_1M_v001_64/data_1M_v001_64_tile_info.json @@ -0,0 +1,912 @@ +{ + "0": { + "S2_DATE": "2018-07-16", + "S2_type": "l2a", + "CRS": "EPSG:32721", + "lat": -14.499441524746077, + "lon": -56.98355999998649, + "BANDS": { + "sentinel2": [ + "B1", + "B2", + "B3", + "B4", + "B5", + "B6", + "B7", + "B8A", + "B8", + "B9", + "B10", + "B11", + "B12" + ], + "sentinel2_cloudmask": [ + "QA60" + ], + "sentinel2_cloudprod": [ + "MSK_CLDPRB" + ], + "sentinel2_scl": [ + "SCL" + ], + "sentinel1_asc": [ + "VV", + "VH", + "HH", + "HV" + ], + "sentinel1_desc": [ + "VV", + "VH", + "HH", + "HV" + ], + "aster": [ + "b1", + "slope" + ], + "era5": [ + "201806_temperature_2m", + "201806_temperature_2m_min", + "201806_temperature_2m_max", + "201806_total_precipitation_sum", + "201807_temperature_2m", + "201807_temperature_2m_min", + "201807_temperature_2m_max", + "201807_total_precipitation_sum", + "0_temperature_2m_mean", + "1_temperature_2m_min_min", + "2_temperature_2m_max_max", + "3_total_precipitation_sum_sum" + ], + "dynamic_world": [ + "label" + ], + "canopy_height_eth": [ + "height", + "std" + ], + "lat": [ + "sin", + "cos" + ], + "lon": [ + "sin", + "cos" + ], + "biome": [ + "biome" + ], + "eco_region": [ + "eco_region" + ], + "month": [ + "sin_month", + "cos_month" + ], + "esa_worldcover": [ + "Map" + ] + } + }, + "1": { + "S2_DATE": "2018-07-16", + "S2_type": "l2a", + "CRS": "EPSG:32721", + "lat": -14.499441524746077, + "lon": -56.98355999998649, + "BANDS": { + "sentinel2": [ + "B1", + "B2", + "B3", + "B4", + "B5", + "B6", + "B7", + "B8A", + "B8", + "B9", + "B10", + "B11", + "B12" + ], + "sentinel2_cloudmask": [ + "QA60" + ], + "sentinel2_cloudprod": [ + "MSK_CLDPRB" + ], + "sentinel2_scl": [ + "SCL" + ], + "sentinel1_asc": [ + "VV", + "VH", + "HH", + "HV" + ], + "sentinel1_desc": [ + "VV", + "VH", + "HH", + "HV" + ], + "aster": [ + "b1", + "slope" + ], + "era5": [ + "201806_temperature_2m", + "201806_temperature_2m_min", + "201806_temperature_2m_max", + "201806_total_precipitation_sum", + "201807_temperature_2m", + "201807_temperature_2m_min", + "201807_temperature_2m_max", + "201807_total_precipitation_sum", + "0_temperature_2m_mean", + "1_temperature_2m_min_min", + "2_temperature_2m_max_max", + "3_total_precipitation_sum_sum" + ], + "dynamic_world": [ + "label" + ], + "canopy_height_eth": [ + "height", + "std" + ], + "lat": [ + "sin", + "cos" + ], + "lon": [ + "sin", + "cos" + ], + "biome": [ + "biome" + ], + "eco_region": [ + "eco_region" + ], + "month": [ + "sin_month", + "cos_month" + ], + "esa_worldcover": [ + "Map" + ] + } + }, + "2": { + "S2_DATE": "2018-07-16", + "S2_type": "l2a", + "CRS": "EPSG:32721", + "lat": -14.499441524746077, + "lon": -56.98355999998649, + "BANDS": { + "sentinel2": [ + "B1", + "B2", + "B3", + "B4", + "B5", + "B6", + "B7", + "B8A", + "B8", + "B9", + "B10", + "B11", + "B12" + ], + "sentinel2_cloudmask": [ + "QA60" + ], + "sentinel2_cloudprod": [ + "MSK_CLDPRB" + ], + "sentinel2_scl": [ + "SCL" + ], + "sentinel1_asc": [ + "VV", + "VH", + "HH", + "HV" + ], + "sentinel1_desc": [ + "VV", + "VH", + "HH", + "HV" + ], + "aster": [ + "b1", + "slope" + ], + "era5": [ + "201806_temperature_2m", + "201806_temperature_2m_min", + "201806_temperature_2m_max", + "201806_total_precipitation_sum", + "201807_temperature_2m", + "201807_temperature_2m_min", + "201807_temperature_2m_max", + "201807_total_precipitation_sum", + "0_temperature_2m_mean", + "1_temperature_2m_min_min", + "2_temperature_2m_max_max", + "3_total_precipitation_sum_sum" + ], + "dynamic_world": [ + "label" + ], + "canopy_height_eth": [ + "height", + "std" + ], + "lat": [ + "sin", + "cos" + ], + "lon": [ + "sin", + "cos" + ], + "biome": [ + "biome" + ], + "eco_region": [ + "eco_region" + ], + "month": [ + "sin_month", + "cos_month" + ], + "esa_worldcover": [ + "Map" + ] + } + }, + "3": { + "S2_DATE": "2018-07-16", + "S2_type": "l1c", + "CRS": "EPSG:32721", + "lat": -14.499441524746077, + "lon": -56.98355999998649, + "BANDS": { + "sentinel2": [ + "B1", + "B2", + "B3", + "B4", + "B5", + "B6", + "B7", + "B8A", + "B8", + "B9", + "B10", + "B11", + "B12" + ], + "sentinel2_cloudmask": [ + "QA60" + ], + "sentinel2_cloudprod": [ + "MSK_CLDPRB" + ], + "sentinel2_scl": [ + "SCL" + ], + "sentinel1_asc": [ + "VV", + "VH", + "HH", + "HV" + ], + "sentinel1_desc": [ + "VV", + "VH", + "HH", + "HV" + ], + "aster": [ + "b1", + "slope" + ], + "era5": [ + "201806_temperature_2m", + "201806_temperature_2m_min", + "201806_temperature_2m_max", + "201806_total_precipitation_sum", + "201807_temperature_2m", + "201807_temperature_2m_min", + "201807_temperature_2m_max", + "201807_total_precipitation_sum", + "0_temperature_2m_mean", + "1_temperature_2m_min_min", + "2_temperature_2m_max_max", + "3_total_precipitation_sum_sum" + ], + "dynamic_world": [ + "label" + ], + "canopy_height_eth": [ + "height", + "std" + ], + "lat": [ + "sin", + "cos" + ], + "lon": [ + "sin", + "cos" + ], + "biome": [ + "biome" + ], + "eco_region": [ + "eco_region" + ], + "month": [ + "sin_month", + "cos_month" + ], + "esa_worldcover": [ + "Map" + ] + } + }, + "4": { + "S2_DATE": "2018-07-16", + "S2_type": "l1c", + "CRS": "EPSG:32721", + "lat": -14.499441524746077, + "lon": -56.98355999998649, + "BANDS": { + "sentinel2": [ + "B1", + "B2", + "B3", + "B4", + "B5", + "B6", + "B7", + "B8A", + "B8", + "B9", + "B10", + "B11", + "B12" + ], + "sentinel2_cloudmask": [ + "QA60" + ], + "sentinel2_cloudprod": [ + "MSK_CLDPRB" + ], + "sentinel2_scl": [ + "SCL" + ], + "sentinel1_asc": [ + "VV", + "VH", + "HH", + "HV" + ], + "sentinel1_desc": [ + "VV", + "VH", + "HH", + "HV" + ], + "aster": [ + "b1", + "slope" + ], + "era5": [ + "201806_temperature_2m", + "201806_temperature_2m_min", + "201806_temperature_2m_max", + "201806_total_precipitation_sum", + "201807_temperature_2m", + "201807_temperature_2m_min", + "201807_temperature_2m_max", + "201807_total_precipitation_sum", + "0_temperature_2m_mean", + "1_temperature_2m_min_min", + "2_temperature_2m_max_max", + "3_total_precipitation_sum_sum" + ], + "dynamic_world": [ + "label" + ], + "canopy_height_eth": [ + "height", + "std" + ], + "lat": [ + "sin", + "cos" + ], + "lon": [ + "sin", + "cos" + ], + "biome": [ + "biome" + ], + "eco_region": [ + "eco_region" + ], + "month": [ + "sin_month", + "cos_month" + ], + "esa_worldcover": [ + "Map" + ] + } + }, + "5": { + "S2_DATE": "2018-07-16", + "S2_type": "l2a", + "CRS": "EPSG:32721", + "lat": -14.499441524746077, + "lon": -56.98355999998649, + "BANDS": { + "sentinel2": [ + "B1", + "B2", + "B3", + "B4", + "B5", + "B6", + "B7", + "B8A", + "B8", + "B9", + "B10", + "B11", + "B12" + ], + "sentinel2_cloudmask": [ + "QA60" + ], + "sentinel2_cloudprod": [ + "MSK_CLDPRB" + ], + "sentinel2_scl": [ + "SCL" + ], + "sentinel1_asc": [ + "VV", + "VH", + "HH", + "HV" + ], + "sentinel1_desc": [ + "VV", + "VH", + "HH", + "HV" + ], + "aster": [ + "b1", + "slope" + ], + "era5": [ + "201806_temperature_2m", + "201806_temperature_2m_min", + "201806_temperature_2m_max", + "201806_total_precipitation_sum", + "201807_temperature_2m", + "201807_temperature_2m_min", + "201807_temperature_2m_max", + "201807_total_precipitation_sum", + "0_temperature_2m_mean", + "1_temperature_2m_min_min", + "2_temperature_2m_max_max", + "3_total_precipitation_sum_sum" + ], + "dynamic_world": [ + "label" + ], + "canopy_height_eth": [ + "height", + "std" + ], + "lat": [ + "sin", + "cos" + ], + "lon": [ + "sin", + "cos" + ], + "biome": [ + "biome" + ], + "eco_region": [ + "eco_region" + ], + "month": [ + "sin_month", + "cos_month" + ], + "esa_worldcover": [ + "Map" + ] + } + }, + "6": { + "S2_DATE": "2018-07-16", + "S2_type": "l2a", + "CRS": "EPSG:32721", + "lat": -14.499441524746077, + "lon": -56.98355999998649, + "BANDS": { + "sentinel2": [ + "B1", + "B2", + "B3", + "B4", + "B5", + "B6", + "B7", + "B8A", + "B8", + "B9", + "B10", + "B11", + "B12" + ], + "sentinel2_cloudmask": [ + "QA60" + ], + "sentinel2_cloudprod": [ + "MSK_CLDPRB" + ], + "sentinel2_scl": [ + "SCL" + ], + "sentinel1_asc": [ + "VV", + "VH", + "HH", + "HV" + ], + "sentinel1_desc": [ + "VV", + "VH", + "HH", + "HV" + ], + "aster": [ + "b1", + "slope" + ], + "era5": [ + "201806_temperature_2m", + "201806_temperature_2m_min", + "201806_temperature_2m_max", + "201806_total_precipitation_sum", + "201807_temperature_2m", + "201807_temperature_2m_min", + "201807_temperature_2m_max", + "201807_total_precipitation_sum", + "0_temperature_2m_mean", + "1_temperature_2m_min_min", + "2_temperature_2m_max_max", + "3_total_precipitation_sum_sum" + ], + "dynamic_world": [ + "label" + ], + "canopy_height_eth": [ + "height", + "std" + ], + "lat": [ + "sin", + "cos" + ], + "lon": [ + "sin", + "cos" + ], + "biome": [ + "biome" + ], + "eco_region": [ + "eco_region" + ], + "month": [ + "sin_month", + "cos_month" + ], + "esa_worldcover": [ + "Map" + ] + } + }, + "7": { + "S2_DATE": "2018-07-16", + "S2_type": "l2a", + "CRS": "EPSG:32721", + "lat": -14.499441524746077, + "lon": -56.98355999998649, + "BANDS": { + "sentinel2": [ + "B1", + "B2", + "B3", + "B4", + "B5", + "B6", + "B7", + "B8A", + "B8", + "B9", + "B10", + "B11", + "B12" + ], + "sentinel2_cloudmask": [ + "QA60" + ], + "sentinel2_cloudprod": [ + "MSK_CLDPRB" + ], + "sentinel2_scl": [ + "SCL" + ], + "sentinel1_asc": [ + "VV", + "VH", + "HH", + "HV" + ], + "sentinel1_desc": [ + "VV", + "VH", + "HH", + "HV" + ], + "aster": [ + "b1", + "slope" + ], + "era5": [ + "201806_temperature_2m", + "201806_temperature_2m_min", + "201806_temperature_2m_max", + "201806_total_precipitation_sum", + "201807_temperature_2m", + "201807_temperature_2m_min", + "201807_temperature_2m_max", + "201807_total_precipitation_sum", + "0_temperature_2m_mean", + "1_temperature_2m_min_min", + "2_temperature_2m_max_max", + "3_total_precipitation_sum_sum" + ], + "dynamic_world": [ + "label" + ], + "canopy_height_eth": [ + "height", + "std" + ], + "lat": [ + "sin", + "cos" + ], + "lon": [ + "sin", + "cos" + ], + "biome": [ + "biome" + ], + "eco_region": [ + "eco_region" + ], + "month": [ + "sin_month", + "cos_month" + ], + "esa_worldcover": [ + "Map" + ] + } + }, + "8": { + "S2_DATE": "2018-07-16", + "S2_type": "l2a", + "CRS": "EPSG:32721", + "lat": -14.499441524746077, + "lon": -56.98355999998649, + "BANDS": { + "sentinel2": [ + "B1", + "B2", + "B3", + "B4", + "B5", + "B6", + "B7", + "B8A", + "B8", + "B9", + "B10", + "B11", + "B12" + ], + "sentinel2_cloudmask": [ + "QA60" + ], + "sentinel2_cloudprod": [ + "MSK_CLDPRB" + ], + "sentinel2_scl": [ + "SCL" + ], + "sentinel1_asc": [ + "VV", + "VH", + "HH", + "HV" + ], + "sentinel1_desc": [ + "VV", + "VH", + "HH", + "HV" + ], + "aster": [ + "b1", + "slope" + ], + "era5": [ + "201806_temperature_2m", + "201806_temperature_2m_min", + "201806_temperature_2m_max", + "201806_total_precipitation_sum", + "201807_temperature_2m", + "201807_temperature_2m_min", + "201807_temperature_2m_max", + "201807_total_precipitation_sum", + "0_temperature_2m_mean", + "1_temperature_2m_min_min", + "2_temperature_2m_max_max", + "3_total_precipitation_sum_sum" + ], + "dynamic_world": [ + "label" + ], + "canopy_height_eth": [ + "height", + "std" + ], + "lat": [ + "sin", + "cos" + ], + "lon": [ + "sin", + "cos" + ], + "biome": [ + "biome" + ], + "eco_region": [ + "eco_region" + ], + "month": [ + "sin_month", + "cos_month" + ], + "esa_worldcover": [ + "Map" + ] + } + }, + "9": { + "S2_DATE": "2018-07-16", + "S2_type": "l2a", + "CRS": "EPSG:32721", + "lat": -14.499441524746077, + "lon": -56.98355999998649, + "BANDS": { + "sentinel2": [ + "B1", + "B2", + "B3", + "B4", + "B5", + "B6", + "B7", + "B8A", + "B8", + "B9", + "B10", + "B11", + "B12" + ], + "sentinel2_cloudmask": [ + "QA60" + ], + "sentinel2_cloudprod": [ + "MSK_CLDPRB" + ], + "sentinel2_scl": [ + "SCL" + ], + "sentinel1_asc": [ + "VV", + "VH", + "HH", + "HV" + ], + "sentinel1_desc": [ + "VV", + "VH", + "HH", + "HV" + ], + "aster": [ + "b1", + "slope" + ], + "era5": [ + "201806_temperature_2m", + "201806_temperature_2m_min", + "201806_temperature_2m_max", + "201806_total_precipitation_sum", + "201807_temperature_2m", + "201807_temperature_2m_min", + "201807_temperature_2m_max", + "201807_total_precipitation_sum", + "0_temperature_2m_mean", + "1_temperature_2m_min_min", + "2_temperature_2m_max_max", + "3_total_precipitation_sum_sum" + ], + "dynamic_world": [ + "label" + ], + "canopy_height_eth": [ + "height", + "std" + ], + "lat": [ + "sin", + "cos" + ], + "lon": [ + "sin", + "cos" + ], + "biome": [ + "biome" + ], + "eco_region": [ + "eco_region" + ], + "month": [ + "sin_month", + "cos_month" + ], + "esa_worldcover": [ + "Map" + ] + } + } +} \ No newline at end of file diff --git a/tests/datasets/test_mmearth.py b/tests/datasets/test_mmearth.py new file mode 100644 index 00000000000..c25c2a1dece --- /dev/null +++ b/tests/datasets/test_mmearth.py @@ -0,0 +1,144 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +import os +import shutil +from pathlib import Path + +import pytest +import torch +import torch.nn as nn +from _pytest.fixtures import SubRequest + +from torchgeo.datasets import DatasetNotFoundError, MMEarth + +pytest.importorskip('h5py', minversion='3.6') + +data_dir_dict = { + 'MMEarth': os.path.join('tests', 'data', 'mmearth', 'data_1M_v001'), + 'MMEarth64': os.path.join('tests', 'data', 'mmearth', 'data_1M_v001_64'), + 'MMEarth100k': os.path.join('tests', 'data', 'mmearth', 'data_100k_v001'), +} + + +class TestMMEarth: + @pytest.fixture(params=['MMEarth', 'MMEarth64', 'MMEarth100k']) + def dataset(self, tmp_path: Path, request: SubRequest) -> MMEarth: + root = tmp_path + subset = request.param + shutil.copytree(data_dir_dict[subset], root / Path(data_dir_dict[subset]).name) + transforms = nn.Identity() + return MMEarth(root, subset=subset, transforms=transforms) + + def test_getitem(self, dataset: MMEarth) -> None: + x = dataset[0] + assert isinstance(x, dict) + for modality in dataset.modalities: + modality_name = dataset.modality_category_name.get(modality, '') + modality + assert modality_name in x + assert isinstance(x[modality_name], torch.Tensor) + assert x[modality_name].shape[0] == len(dataset.modality_bands[modality]) + + def test_subset_modalities(self, dataset: MMEarth) -> None: + specified_modalities = ['sentinel2', 'dynamic_world'] + dataset = MMEarth( + dataset.root, subset=dataset.subset, modalities=specified_modalities + ) + x = dataset[0] + assert isinstance(x, dict) + + for modality in dataset.modalities: + modality_name = dataset.modality_category_name.get(modality, '') + modality + if modality in specified_modalities: + assert modality_name in x + else: + assert modality_name not in x + + def test_dataset_not_found(self, tmp_path: Path) -> None: + with pytest.raises(DatasetNotFoundError, match='Dataset not found'): + MMEarth(tmp_path) + + def test_invalid_modalities(self, dataset: MMEarth) -> None: + with pytest.raises(ValueError, match='is an invalid modality'): + MMEarth(dataset.root, subset=dataset.subset, modalities=['invalid']) + + def test_invalid_modality_bands_modality_name(self, dataset: MMEarth) -> None: + with pytest.raises(ValueError, match='is an invalid modality name'): + MMEarth( + dataset.root, + subset=dataset.subset, + modality_bands={'invalid': ['invalid']}, + ) + + def test_invalid_modality_bands(self, dataset: MMEarth) -> None: + with pytest.raises(ValueError, match='is an invalid band name for modality'): + MMEarth( + dataset.root, + subset=dataset.subset, + modality_bands={'sentinel2': ['invalid']}, + ) + + @pytest.mark.parametrize( + 'modality_bands, modalities', + [ + ({'sentinel2': ['B2', 'B3']}, ['sentinel2']), + ( + {'sentinel1_asc': ['VV'], 'sentinel1_desc': ['VH']}, + ['sentinel1_asc', 'sentinel1_desc'], + ), + ], + ) + def test_subset_modaliy_bands( + self, + dataset: MMEarth, + modality_bands: dict[str, list[str]], + modalities: list[str], + ) -> None: + dataset = MMEarth( + dataset.root, + subset=dataset.subset, + modalities=modalities, + modality_bands=modality_bands, + ) + x = dataset[0] + assert isinstance(x, dict) + + for modality in dataset.modalities: + modality_name = dataset.modality_category_name.get(modality, '') + modality + if modality in modality_bands: + assert modality_name in x + assert x[modality_name].shape[0] == len(modality_bands[modality]) + else: + assert modality_name not in x + + def test_sentinel1_asc_desc(self, dataset: MMEarth) -> None: + modality_bands = {'sentinel1_asc': ['VV'], 'sentinel1_desc': ['VH']} + dataset = MMEarth( + dataset.root, + subset=dataset.subset, + modalities=['sentinel1_asc', 'sentinel1_desc'], + modality_bands=modality_bands, + ) + x = dataset[0] + assert isinstance(x, dict) + + for modality in dataset.modalities: + modality_name = dataset.modality_category_name.get(modality, '') + modality + if modality in modality_bands: + assert modality_name in x + assert x[modality_name].shape[0] == len(modality_bands[modality]) + else: + assert modality_name not in x + + @pytest.mark.parametrize('normalization_mode', ['z-score', 'min-max']) + def test_normalization_mode( + self, dataset: MMEarth, normalization_mode: str + ) -> None: + dataset = MMEarth( + dataset.root, subset=dataset.subset, normalization_mode=normalization_mode + ) + x = dataset[0] + assert isinstance(x, dict) + + def test_len(self, dataset: MMEarth) -> None: + assert len(dataset) >= 2 diff --git a/torchgeo/datasets/__init__.py b/torchgeo/datasets/__init__.py index 5c4fea89700..663b08e7cb8 100644 --- a/torchgeo/datasets/__init__.py +++ b/torchgeo/datasets/__init__.py @@ -84,6 +84,7 @@ from .loveda import LoveDA from .mapinwild import MapInWild from .millionaid import MillionAID +from .mmearth import MMEarth from .naip import NAIP from .nasa_marine_debris import NASAMarineDebris from .nccm import NCCM @@ -233,6 +234,7 @@ 'LoveDA', 'MapInWild', 'MillionAID', + 'MMEarth', 'NASAMarineDebris', 'OSCD', 'PASTIS', diff --git a/torchgeo/datasets/mmearth.py b/torchgeo/datasets/mmearth.py new file mode 100644 index 00000000000..f363276c40a --- /dev/null +++ b/torchgeo/datasets/mmearth.py @@ -0,0 +1,620 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +"""MMEarth Dataset.""" + +import json +import os +from collections.abc import Callable, Sequence +from datetime import datetime, timedelta +from typing import Any, ClassVar, cast + +import numpy as np +import torch +from torch import Tensor + +from .errors import DatasetNotFoundError +from .geo import NonGeoDataset +from .utils import Path, lazy_import + + +class MMEarth(NonGeoDataset): + """MMEarth dataset. + + There are three different versions of the dataset, that vary in image size + and the number of tiles: + + * MMEarth: 128x128 px, 1.2M tiles, 579 GB + * MMEarth64: 64x64 px, 1.2M tiles, 162 GB + * MMEarth100k: 128x128 px, 100K tiles, 48 GB + + The dataset consists of 12 modalities: + + * Aster: elevation and slope + * Biome: 14 terrestrial ecosystem categories + * ETH Canopy Height: Canopy height and standard deviation + * Dynamic World: 9 landcover categories + * Ecoregion: 846 ecoregion categories + * ERA5: Climate reanalysis data for temperature mean, min, and max of [year, month, previous month] + and precipitation total of [year, month, previous month] (counted as separate modalities) + * ESA World Cover: 11 landcover categories + * Sentinel-1: VV, VH, HV, HH for ascending/descending orbit + * Sentinel-2: multi-spectral B1-B12 for L1C/L2A products + * Geolocation: cyclic encoding of latitude and longitude + * Date: cyclic encoding of month + + Additionally, there are three masks available as modalities: + + * Sentinel-2 Cloudmask: Sentinel-2 cloud mask + * Sentinel-2 Cloud probability: Sentinel-2 cloud probability + * Sentinel-2 SCL: Sentinel-2 scene classification + + that are synchronized across tiles. + + Dataset format: + + * Dataset in single HDF5 file + * JSON files for band statistics, splits, and tile information + + For additional information, as well as bash scripts to + download the data, please refer to the + `official repository `_. + + If you use this dataset in your research, please cite the following paper: + + * https://arxiv.org/abs/2405.02771 + + .. note:: + + This dataset requires the following additional library to be installed: + + * `h5py `_ to load the dataset + + .. versionadded:: 0.7 + """ + + subsets = ('MMEarth', 'MMEarth64', 'MMEarth100k') + + filenames: ClassVar[dict[str, str]] = { + 'MMEarth': 'data_1M_v001', + 'MMEarth64': 'data_1M_v001_64', + 'MMEarth100k': 'data_100k_v001', + } + + all_modalities = ( + 'aster', + 'biome', + 'canopy_height_eth', + 'dynamic_world', + 'eco_region', + 'era5', + 'esa_worldcover', + 'sentinel1_asc', + 'sentinel1_desc', + 'sentinel2', + 'sentinel2_cloudmask', + 'sentinel2_cloudprod', + 'sentinel2_scl', + ) + + # See https://github.com/vishalned/MMEarth-train/blob/8d6114e8e3ccb5ca5d98858e742dac24350b64fd/MODALITIES.py#L108C1-L160C2 + all_modality_bands: ClassVar[dict[str, list[str]]] = { + 'sentinel2': [ + 'B1', + 'B2', + 'B3', + 'B4', + 'B5', + 'B6', + 'B7', + 'B8A', + 'B8', + 'B9', + 'B10', + 'B11', + 'B12', + ], + 'sentinel2_cloudmask': ['QA60'], + 'sentinel2_cloudprod': ['MSK_CLDPRB'], + 'sentinel2_scl': ['SCL'], + 'sentinel1_asc': ['VV', 'VH', 'HH', 'HV'], + 'sentinel1_desc': ['VV', 'VH', 'HH', 'HV'], + 'aster': ['b1', 'slope'], # elevation and slope + 'era5': [ + 'prev_temperature_2m', # previous month avg temp + 'prev_temperature_2m_min', # previous month min temp + 'prev_temperature_2m_max', # previous month max temp + 'prev_total_precipitation_sum', # previous month total precip + 'curr_temperature_2m', # current month avg temp + 'curr_temperature_2m_min', # current month min temp + 'curr_temperature_2m_max', # current month max temp + 'curr_total_precipitation_sum', # current month total precip + '0_temperature_2m_mean', # year avg temp + '1_temperature_2m_min_min', # year min temp + '2_temperature_2m_max_max', # year max temp + '3_total_precipitation_sum_sum', # year total precip + ], + 'dynamic_world': ['label'], + 'canopy_height_eth': ['height', 'std'], + 'lat': ['sin', 'cos'], + 'lon': ['sin', 'cos'], + 'biome': ['biome'], + 'eco_region': ['eco_region'], + 'month': ['sin_month', 'cos_month'], + 'esa_worldcover': ['Map'], + } + + # See https://github.com/vishalned/MMEarth-train/blob/8d6114e8e3ccb5ca5d98858e742dac24350b64fd/MODALITIES.py#L36 + no_data_vals: ClassVar[dict[str, int | float]] = { + 'sentinel2': 0, + 'sentinel2_cloudmask': 65535, + 'sentinel2_cloudprod': 65535, + 'sentinel2_scl': 255, + 'sentinel1_asc': float('-inf'), + 'sentinel1_desc': float('-inf'), + 'aster': float('-inf'), + 'canopy_height_eth': 255, + 'dynamic_world': 0, + 'esa_worldcover': 255, + 'lat': float('-inf'), + 'lon': float('-inf'), + 'month': float('-inf'), + 'era5': float('inf'), + 'biome': 255, + 'eco_region': 65535, + } + + norm_modes = ('z-score', 'min-max') + + modality_category_name: ClassVar[dict[str, str]] = { + 'sentinel1_asc': 'image_', + 'sentinel1_desc': 'image_', + 'sentinel2': 'image_', + 'sentinel2_cloudmask': 'mask_', + 'sentinel2_cloudprod': 'mask_', + 'sentinel2_scl': 'mask_', + 'aster': 'image_', + 'era5': '', + 'canopy_height_eth': 'image_', + 'dynamic_world': 'mask_', + 'esa_worldcover': 'mask_', + } + + def __init__( + self, + root: Path = 'data', + subset: str = 'MMEarth', + modalities: Sequence[str] = all_modalities, + modality_bands: dict[str, list[str]] | None = None, + normalization_mode: str = 'z-score', + transforms: Callable[[dict[str, Tensor]], dict[str, Tensor]] | None = None, + ) -> None: + """Initialize the MMEarth dataset. + + Args: + root: root directory where dataset can be found + subset: one of "MMEarth", "MMEarth64", or "MMEarth100k" + modalities: list of modalities to load + modality_bands: dictionary of modality bands, see + normalization_mode: one of "z-score" or "min-max" + transforms: a function/transform that takes input sample dictionary + and returns a transformed version + + Raises: + AssertionError: if *normalization_mode* or *subset* + DatasetNotFoundError: If dataset is not found and *download* is False. + """ + lazy_import('h5py') + + assert ( + normalization_mode in self.norm_modes + ), f'Invalid normalization mode: {normalization_mode}, please choose from {self.norm_modes}' + assert ( + subset in self.subsets + ), f'Invalid dataset version: {subset}, please choose from {self.subsets}' + + self._validate_modalities(modalities) + self.modalities = modalities + if modality_bands is None: + modality_bands = { + modality: self.all_modality_bands[modality] for modality in modalities + } + self._validate_modality_bands(modality_bands) + self.modality_bands = modality_bands + + self.root = root + self.subset = subset + self.normalization_mode = normalization_mode + self.split = 'train' + self.transforms = transforms + + self.dataset_filename = f'{self.filenames[subset]}.h5' + self.band_stats_filename = f'{self.filenames[subset]}_band_stats.json' + self.splits_filename = f'{self.filenames[subset]}_splits.json' + self.tile_info_filename = f'{self.filenames[subset]}_tile_info.json' + + self._verify() + + self.indices = self._load_indices() + self.band_stats = self._load_normalization_stats() + self.tile_info = self._load_tile_info() + + def _verify(self) -> None: + """Verify the dataset.""" + data_dir = os.path.join(self.root, self.filenames[self.subset]) + + exists = [ + os.path.exists(os.path.join(data_dir, f)) + for f in [ + self.dataset_filename, + self.band_stats_filename, + self.splits_filename, + self.tile_info_filename, + ] + ] + if not all(exists): + raise DatasetNotFoundError(self) + + def _load_indices(self) -> list[int]: + """Load the indices for the dataset split. + + Returns: + list of indices + """ + with open( + os.path.join(self.root, self.filenames[self.subset], self.splits_filename) + ) as f: + split_indices: dict[str, list[int]] = json.load(f) + + return split_indices[self.split] + + def _load_normalization_stats(self) -> dict[str, dict[str, float]]: + """Load normalization statistics for each band. + + Returns: + dictionary containing the normalization statistics + """ + with open( + os.path.join( + self.root, self.filenames[self.subset], self.band_stats_filename + ) + ) as f: + band_stats = json.load(f) + + return cast(dict[str, dict[str, float]], band_stats) + + def _load_tile_info(self) -> dict[str, dict[str, str]]: + """Load tile information. + + Returns: + dictionary containing tile information + """ + with open( + os.path.join( + self.root, self.filenames[self.subset], self.tile_info_filename + ) + ) as f: + tile_info = json.load(f) + + return cast(dict[str, dict[str, str]], tile_info) + + def _validate_modalities(self, modalities: Sequence[str]) -> None: + """Validate list of modalities. + + Args: + modalities: user-provided sequence of modalities to load + + Raises: + AssertionError: if ``modalities`` is not a sequence or an + invalid modality name is provided + """ + # validate modalities + assert isinstance(modalities, Sequence), "'modalities' must be a sequence" + if not set(modalities) <= set(self.all_modalities): + raise ValueError( + f'{set(modalities) - set(self.all_modalities)} is an invalid modality.' + ) + + def _validate_modality_bands(self, modality_bands: dict[str, list[str]]) -> None: + """Validate modality bands. + + Args: + modality_bands: user-provided dictionary of modality bands + + Raises: + AssertionError: if ``modality_bands`` is not a dictionary + ValueError: if an invalid modality name is provided + ValueError: if modality bands are invalid + """ + assert isinstance(modality_bands, dict), "'modality_bands' must be a dictionary" + # validate modality bands + for key, vals in modality_bands.items(): + # check that the modality name is also specified in modalities + if key not in self.modalities: + raise ValueError(f"'{key}' is an invalid modality name.") + for val in vals: + if val not in self.all_modality_bands[key]: + raise ValueError( + f"'{val}' is an invalid band name for modality '{key}'." + ) + + def __getitem__(self, index: int) -> dict[str, Any]: + """Return a sample from the dataset. + + Normalization is applied to the data with chosen ``normalization_mode``. + In addition to the modalities, the sample contains the following raw metadata: + + * lat: latitude + * lon: longitude + * date: date + * crs: coordinate reference system + * tile_id: tile identifier + + Args: + index: index to return + + Returns: + dictionary containing the modalities and metadata + of the sample + """ + ds_index = self.indices[index] + + # expose sample retrieval to separate function to allow for different index sampling strategies + # in subclasses + sample = self._retrieve_sample(ds_index) + + if self.transforms is not None: + sample = self.transforms(sample) + + return sample + + def get_sample_specific_band_names( + self, tile_info: dict[str, Any] + ) -> dict[str, list[str]]: + """Retrieve the sample specific band names. + + Args: + tile_info: tile information for a sample + + Returns: + dictionary containing the specific band names for each modality + """ + date_str = tile_info['S2_DATE'] + date_obj = datetime.strptime(date_str, '%Y-%m-%d') + curr_month_str = date_obj.strftime('%Y%m') + # set to first day of month and subtract one day to get previous month + prev_month_obj = date_obj.replace(day=1) - timedelta(days=1) + prev_month_str = prev_month_obj.strftime('%Y%m') + + specific_modality_bands = {} + for modality, bands in self.modality_bands.items(): + if modality == 'era5': + # replace date with the 'prev' and 'curr' strings for generality + bands = [band.replace(prev_month_str, 'prev') for band in bands] + bands = [band.replace(curr_month_str, 'curr') for band in bands] + specific_modality_bands[modality] = bands + + return specific_modality_bands + + def get_intersection_dict(self, tile_info: dict[str, Any]) -> dict[str, list[str]]: + """Get intersection of requested and available bands. + + Args: + tile_info: tile information for a sample + + Returns: + Dictionary with intersected keys and lists. + """ + sample_specific_band_names = self.get_sample_specific_band_names(tile_info) + # used the chosen modality bands to get the intersection with available bands + intersection_dict = {} + for modality in self.all_modalities: + if modality in sample_specific_band_names: + intersected_list = [ + band + for band in self.all_modality_bands[modality] + if band in sample_specific_band_names[modality] + ] + if intersected_list: + intersection_dict[modality] = intersected_list + + return intersection_dict + + def _retrieve_sample(self, ds_index: int) -> dict[str, Any]: + """Retrieve a sample from the dataset. + + Args: + ds_index: index inside the hdf5 dataset file + + Returns: + dictionary containing the modalities and metadata + of the sample + """ + h5py = lazy_import('h5py') + sample: dict[str, Any] = {} + with h5py.File( + os.path.join(self.root, self.filenames[self.subset], self.dataset_filename), + 'r', + ) as f: + name = f['metadata'][ds_index][0].decode('utf-8') + tile_info: dict[str, Any] = self.tile_info[name] + # need to find the intersection of requested and available bands + intersection_dict = self.get_intersection_dict(tile_info) + for modality, bands in intersection_dict.items(): + if 'sentinel1' in modality: + data = f['sentinel1'][ds_index][:] + else: + data = f[modality][ds_index][:] + + tensor = self._preprocess_modality(data, modality, tile_info, bands) + modality_name = self.modality_category_name.get(modality, '') + modality + sample[modality_name] = tensor + + # add the sensor and bands actually available + sample['avail_bands'] = intersection_dict + + # add additional metadata to the sample + sample['lat'] = tile_info['lat'] + sample['lon'] = tile_info['lon'] + sample['date'] = tile_info['S2_DATE'] + sample['crs'] = tile_info['CRS'] + sample['tile_id'] = name + + return sample + + def _select_indices_for_modality( + self, modality: str, bands: list[str] + ) -> list[int]: + """Select bands for a modality. + + Args: + modality: modality name + bands: bands aviailable for the modality + + Returns: + list of band indices + """ + # need to handle sentinel1 descending separately, because ascending + # and descending are stored under the same modality + if modality == 'sentinel1_desc': + indices = [ + self.all_modality_bands['sentinel1_desc'].index(band) + 4 + for band in bands + ] + # the modality is called sentinel2 but has different bands stats for l1c and l2a + # but common indices + elif modality in ['sentinel2_l1c', 'sentinel2_l2a']: + indices = [ + self.all_modality_bands['sentinel2'].index(band) for band in bands + ] + else: + indices = [self.all_modality_bands[modality].index(band) for band in bands] + return indices + + def _preprocess_modality( + self, + data: 'np.typing.NDArray[Any]', + modality: str, + tile_info: dict[str, Any], + bands: list[str], + ) -> Tensor: + """Preprocess a single modality. + + Args: + data: data to process + modality: modality name + tile_info: tile information + bands: available bands for the modality + + Returns: + processed data + """ + # band selection for modality + indices = self._select_indices_for_modality(modality, bands) + data = data[indices, ...] + + # See https://github.com/vishalned/MMEarth-train/blob/8d6114e8e3ccb5ca5d98858e742dac24350b64fd/mmearth_dataset.py#L69 + if modality == 'dynamic_world': + # first replace 0 with nan then assign new labels to have 0-index classes + data = np.where(data == self.no_data_vals[modality], np.nan, data) + old_values = [1, 2, 3, 4, 5, 6, 7, 8, 9, np.nan] + new_values = [0, 1, 2, 3, 4, 5, 6, 7, 8, np.nan] + for old, new in zip(old_values, new_values): + data = np.where(data == old, new, data) + + # need to replace nan with a no-data value and get long tensor + # maybe also 255 like esa_worldcover + tensor = torch.from_numpy(data) + + elif modality == 'esa_worldcover': + old_values = [10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 100, 255] + new_values = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 255] + for old, new in zip(old_values, new_values): + data = np.where(data == old, new, data) + + # currently no-data value is still 255 + tensor = torch.from_numpy(data).long() + + elif modality in [ + 'aster', + 'canopy_height_eth', + 'sentinel1_asc', + 'sentinel1_desc', + 'sentinel2', + 'era5', + 'lat', + 'lon', + 'month', + ]: + data = data.astype(np.float32) + # See https://github.com/vishalned/MMEarth-train/blob/8d6114e8e3ccb5ca5d98858e742dac24350b64fd/mmearth_dataset.py#L88 + # the modality is called sentinel2 but has different bands stats for l1c and l2a + if modality == 'sentinel2': + modality_ = ( + 'sentinel2_l2a' + if tile_info['S2_type'] == 'l2a' + else 'sentinel2_l1c' + ) + else: + modality_ = modality + data = self._normalize_modality(data, modality_, bands) + data = np.where(data == self.no_data_vals[modality], np.nan, data) + tensor = torch.from_numpy(data).float() + elif modality in ['biome', 'eco_region']: + data = data.astype(np.int32) + # no data value also 255 for biome and 65535 for eco_region + tensor = torch.from_numpy(data).long() + elif modality in [ + 'sentinel2_cloudmask', + 'sentinel2_cloudprod', + 'sentinel2_scl', + ]: + tensor = torch.from_numpy(data.astype(np.int32)).long() + + # TODO: tensor might still contain nans, how to handle this? + return tensor + + def _normalize_modality( + self, data: 'np.typing.NDArray[Any]', modality: str, bands: list[str] + ) -> 'np.typing.NDArray[np.float64]': + """Normalize a single modality. + + Args: + data: data to normalize + modality: modality name + bands: available bands for the modality + + Returns: + normalized data + """ + indices = self._select_indices_for_modality(modality, bands) + + if 'sentinel1' in modality: + modality = 'sentinel1' + + if self.normalization_mode == 'z-score': + mean = np.array(self.band_stats[modality]['mean'])[indices, ...] + std = np.array(self.band_stats[modality]['std'])[indices, ...] + if data.ndim == 3: + data = (data - mean[:, None, None]) / std[:, None, None] + else: + data = (data - mean) / std + elif self.normalization_mode == 'min-max': + min_val = np.array(self.band_stats[modality]['min'])[indices, ...] + max_val = np.array(self.band_stats[modality]['max'])[indices, ...] + if data.ndim == 3: + data = (data - min_val[:, None, None]) / ( + max_val[:, None, None] - min_val[:, None, None] + ) + else: + data = (data - min_val) / (max_val - min_val) + + return data + + def __len__(self) -> int: + """Return the length of the dataset. + + Returns: + length of the dataset + """ + return len(self.indices)