diff --git a/.gitignore b/.gitignore index f894c4c5..455024f8 100644 --- a/.gitignore +++ b/.gitignore @@ -67,6 +67,7 @@ studio-pf.log # Credentials and secrets .geostudio_config_file +geostudio_config_file *.env .env.local secrets.yml diff --git a/workshop/docs/notebooks/lab1-getting-started.ipynb b/workshop/docs/notebooks/lab1-getting-started.ipynb index f4f76271..46056af7 100644 --- a/workshop/docs/notebooks/lab1-getting-started.ipynb +++ b/workshop/docs/notebooks/lab1-getting-started.ipynb @@ -7,6 +7,12 @@ "source": [ "# Lab 1\n", "\n", + "\n", + "\n", + " \"Open\n", + "\n", + "\n", + "\n", "**Getting Started with IBM Geospatial Studio**\n", "\n", "⏱️ **Estimated Duration:** 10 minutes \n", @@ -228,6 +234,8 @@ "BASE_STUDIO_UI_URL=https://localhost:4180\n", "\"\"\"\n", "\n", + "# If you are using jupyter lab, create a normal file to view it on the fileviewer. `geostudio_config_file`\n", + "\n", "# Write to file\n", "with open('.geostudio_config_file', 'w') as f:\n", " f.write(config_content)\n", diff --git a/workshop/docs/notebooks/lab2-onboarding-examples.ipynb b/workshop/docs/notebooks/lab2-onboarding-examples.ipynb index 2f7eddd8..76a64652 100644 --- a/workshop/docs/notebooks/lab2-onboarding-examples.ipynb +++ b/workshop/docs/notebooks/lab2-onboarding-examples.ipynb @@ -7,6 +7,12 @@ "source": [ "# Lab 2\n", "\n", + "\n", + "\n", + " \"Open\n", + "\n", + "\n", + "\n", "**Onboarding Pre-computed Examples**\n", "\n", "⏱️ **Estimated Duration:** 20 minutes \n", @@ -116,6 +122,8 @@ "outputs": [], "source": [ "# Initialize the client\n", + "# If you are using jupyter lab, create a normal file to view it on the fileviewer. `geostudio_config_file`\n", + "\n", "client = Client(geostudio_config_file=\".geostudio_config_file\")\n", "\n", "print(\"✅ Connected to Geospatial Studio!\")" @@ -403,7 +411,7 @@ "\n", "2. **Navigate to the Inference Lab:**\n", " - Click on \"Inference Lab\" in the left sidebar\n", - " - Or go directly to: `https://localhost:4180/inference`\n", + " - Or go directly to: `https://localhost:4180/inference#inference`\n", "\n", "3. **Find Your Example:**\n", " - Look for \"My Examples\" section\n", diff --git a/workshop/docs/notebooks/lab3-running-inference.ipynb b/workshop/docs/notebooks/lab3-running-inference.ipynb index c5e196fa..712e6453 100644 --- a/workshop/docs/notebooks/lab3-running-inference.ipynb +++ b/workshop/docs/notebooks/lab3-running-inference.ipynb @@ -7,6 +7,11 @@ "source": [ "# Lab 3\n", "\n", + "\n", + "\n", + " \"Open\n", + "\n", + "\n", "**Upload Model Checkpoints and Run Inference**\n", "\n", "⏱️ **Estimated Duration:** 30 minutes \n", @@ -132,6 +137,8 @@ "outputs": [], "source": [ "# Initialize the client\n", + "# If you are using jupyter lab, create a normal file to view it on the fileviewer. `geostudio_config_file`\n", + "\n", "client = Client(geostudio_config_file=\".geostudio_config_file\")\n", "\n", "print(\"✅ Connected to Geospatial Studio\")" diff --git a/workshop/docs/notebooks/lab4-burnscars-workflow.ipynb b/workshop/docs/notebooks/lab4-burnscars-workflow.ipynb index 8f46d9fa..db4b2302 100644 --- a/workshop/docs/notebooks/lab4-burnscars-workflow.ipynb +++ b/workshop/docs/notebooks/lab4-burnscars-workflow.ipynb @@ -7,6 +7,12 @@ "source": [ "# Lab 4\n", "\n", + "\n", + "\n", + " \"Open\n", + "\n", + "\n", + "\n", "**Training a Custom Model for Wildfire Burn Scar Detection**\n", "\n", "⏱️ **Estimated Duration:** 60-90 minutes \n", @@ -127,6 +133,8 @@ "outputs": [], "source": [ "# Initialize the client\n", + "# If you are using jupyter lab, create a normal file to view it on the fileviewer. `geostudio_config_file`\n", + "\n", "client = Client(geostudio_config_file=\".geostudio_config_file\")\n", "\n", "print(\"✅ Connected to Geospatial Studio!\")" @@ -199,8 +207,18 @@ "outputs": [], "source": [ "# Load the backbone configuration\n", - "with open(\"../../../populate-studio/payloads/backbones/backbone-Prithvi_EO_V2_300M.json\", \"r\") as f:\n", - " backbone = json.load(f)\n", + "try:\n", + " with open(\"../populate-studio/payloads/backbones/backbone-Prithvi_EO_V2_300M.json\", \"r\") as f:\n", + " backbone = json.load(f)\n", + " \n", + "except FileNotFoundError:\n", + " backbone = {\n", + " \"name\" : \"Prithvi_EO_V2_300M\",\n", + " \"description\" : \"Geospatial pre-traineed foundation model Prithvi-EO-V2 (Prithvi_EO_V2_300M)\",\n", + " \"checkpoint_filename\" : \"prithvi_eo_v2_300\\/Prithvi_EO_V2_300M.pt\",\n", + " \"model_params\" : {\"backbone\": \"prithvi_eo_v2_300\", \"embed_dim\": 768, \"num_heads\": 12, \"tile_size\": 1, \"num_layers\": 12, \"patch_size\": 16, \"tubelet_size\": 1, \"model_category\": \"prithvi\"}\n", + " }\n", + "\n", "\n", "print(\"📋 Backbone Configuration:\")\n", "print(json.dumps(backbone, indent=2))" @@ -255,8 +273,54 @@ "outputs": [], "source": [ "# Load the dataset configuration\n", - "with open(\"../../../populate-studio/payloads/datasets/dataset-burn_scars.json\", \"r\") as f:\n", - " wild_fire_dataset = json.load(f)\n", + "try:\n", + " with open(\"../populate-studio/payloads/datasets/dataset-burn_scars.json\", \"r\") as f:\n", + " wild_fire_dataset = json.load(f)\n", + "except FileNotFoundError:\n", + " wild_fire_dataset = {\n", + " \"dataset_name\": \"Wildfire burn scars\",\n", + " \"dataset_url\": \"https://s3.us-east.cloud-object-storage.appdomain.cloud/geospatial-studio-example-data/burn-scar-training-data.zip\",\n", + " \"label_suffix\": \".mask.tif\",\n", + " \"description\": \"A set of wildfire burn scar extent using HLS data. This dataset contains Harmonized Landsat and Sentinel-2 imagery of burn scars and the associated masks for the years 2018-2021 over the contiguous United States. There are 804 512x512 scenes. Its primary purpose is for training geospatial machine learning models. Each tiff file contains a 512x512 pixel tiff file. Scenes contain six bands, and masks have one band. For satellite scenes, each band has already been converted to reflectance.\",\n", + " \"purpose\": \"Segmentation\",\n", + " \"data_sources\": [\n", + " {\n", + " \"bands\": [\n", + " {\"index\": \"0\", \"band_name\": \"Blue\", \"scaling_factor\": \"0.0001\", \"RGB_band\": \"B\"},\n", + " {\"index\": \"1\", \"band_name\": \"Green\", \"scaling_factor\": \"0.0001\", \"RGB_band\": \"G\"},\n", + " {\"index\": \"2\", \"band_name\": \"Red\", \"scaling_factor\": \"0.0001\", \"RGB_band\": \"R\"},\n", + " {\"index\": \"3\", \"band_name\": \"NIR_Narrow\", \"scaling_factor\": \"0.0001\"},\n", + " {\"index\": \"4\", \"band_name\": \"SWIR1\", \"scaling_factor\": \"0.0001\"},\n", + " {\"index\": \"5\", \"band_name\": \"SWIR2\", \"scaling_factor\": \"0.0001\"}\n", + " ],\n", + " \"connector\": \"sentinelhub\",\n", + " \"collection\": \"hls_l30\",\n", + " \"file_suffix\": \"_merged.tif\",\n", + " \"modality_tag\": \"HLS_L30\"\n", + " }\n", + " ],\n", + " \"label_categories\": [\n", + " {\n", + " \"id\": \"1\",\n", + " \"name\": \"Fire Scar\",\n", + " \"color\": \"#ab4f4f\",\n", + " \"opacity\": 1\n", + " },\n", + " {\n", + " \"id\": \"0\",\n", + " \"name\": \"No data\",\n", + " \"color\": \"#000000\",\n", + " \"opacity\": \"0\"\n", + " },\n", + " {\n", + " \"id\": \"-1\",\n", + " \"name\": \"Ignore\",\n", + " \"color\": \"#000000\",\n", + " \"opacity\": \"0\"\n", + " }\n", + " ],\n", + " \"version\": \"v2\"\n", + " }\n", "\n", "print(\"📋 Dataset Configuration:\")\n", "print(json.dumps(wild_fire_dataset, indent=2))" @@ -371,14 +435,388 @@ "\n", "# Load the template configuration\n", "try:\n", - " with open('../../../populate-studio/payloads/templates/template-seg.json', 'r') as f:\n", + " with open('../populate-studio/payloads/templates/template-seg.json', 'r') as f:\n", " segmentation_template = json.load(f)\n", " \n", - " print(\"📋 Segmentation Task Template:\")\n", - " print(json.dumps(segmentation_template, indent=2))\n", "except FileNotFoundError:\n", - " print(\"⚠️ Template file not found. Please check populate-studio/payloads/templates/\")\n", - " segmentation_template = None" + " segmentation_template = {\n", + " \"name\": \"Segmentation\",\n", + " \"description\": \"Generic template v1 and v2 models: Segmentation\",\n", + " \"purpose\": \"Segmentation\",\n", + " \"model_params\": {\n", + " \"$uri\": \"https://ibm.com/watsonx.ai.geospatial.finetune.segmentation.json\",\n", + " \"type\": \"object\",\n", + " \"title\": \"Finetune\",\n", + " \"$schema\": \"https://json-schema.org/draft/2020-12/schema\",\n", + " \"properties\": {\n", + " \"data\": {\n", + " \"type\": \"object\",\n", + " \"default\": {\n", + " \"batch_size\": 4,\n", + " \"constant_multiply\": 1,\n", + " \"workers_per_gpu\": 2,\n", + " \"check_stackability\": False\n", + " },\n", + " \"properties\": {\n", + " \"batch_size\": {\n", + " \"type\": \"int\",\n", + " \"default\": 4,\n", + " \"description\": \"Batch size\",\n", + " \"studio_name\": \"Batch size\"\n", + " },\n", + " \"constant_multiply\": {\n", + " \"type\": \"float\",\n", + " \"default\": 1,\n", + " \"description\": \"Constant Scale\",\n", + " \"studio_name\": \"Constant Scale\"\n", + " },\n", + " \"workers_per_gpu\": {\n", + " \"studio_name\": \"Workers per GPU\",\n", + " \"description\": \"Workers per GPU\",\n", + " \"type\": \"int\",\n", + " \"default\": 2\n", + " },\n", + " \"check_stackability\": {\n", + " \"studio_name\": \"Check Stackability\",\n", + " \"description\": \"Check Stackability\",\n", + " \"type\": \"bool\",\n", + " \"default\": False\n", + " }\n", + " },\n", + " \"studio_name\": \"Data loading\"\n", + " },\n", + " \"model\": {\n", + " \"type\": \"object\",\n", + " \"default\": {\n", + " \"decode_head\": {\n", + " \"channels\": 256,\n", + " \"num_convs\": 4,\n", + " \"decoder\": \"UNetDecoder\",\n", + " \"loss_decode\": {\n", + " \"type\": \"CrossEntropyLoss\",\n", + " \"avg_non_ignore\": True\n", + " }\n", + " },\n", + " \"frozen_backbone\": False,\n", + " \"tiled_inference_parameters\": {\n", + " \"h_crop\": 224,\n", + " \"h_stride\": 196,\n", + " \"w_crop\": 224,\n", + " \"w_stride\": 196,\n", + " \"average_patches\": False\n", + " }\n", + " },\n", + " \"properties\": {\n", + " \"decode_head\": {\n", + " \"type\": \"object\",\n", + " \"default\": {\n", + " \"channels\": 256,\n", + " \"num_convs\": 4,\n", + " \"decoder\": \"UperNetDecoder\",\n", + " \"loss_decode\": {\n", + " \"type\": \"CrossEntropyLoss\",\n", + " \"avg_non_ignore\": True\n", + " }\n", + " },\n", + " \"properties\": {\n", + " \"channels\": {\n", + " \"type\": \"int\",\n", + " \"default\": 256,\n", + " \"description\": \"Channels at each block of the decode head, except the final one\",\n", + " \"studio_name\": \"Channels\"\n", + " },\n", + " \"num_convs\": {\n", + " \"type\": \"int\",\n", + " \"default\": 4,\n", + " \"description\": \"Number of convolutional blocks in the head (except the final one)\",\n", + " \"studio_name\": \"Blocks\"\n", + " },\n", + " \"decoder\": {\n", + " \"enum\": [\n", + " \"UperNetDecoder\",\n", + " \"UNetDecoder\"\n", + " ],\n", + " \"type\": \"string\",\n", + " \"default\": \"Fixed\",\n", + " \"description\": \"Decoder type\",\n", + " \"studio_name\": \"Decoder type\"\n", + " },\n", + " \"loss_decode\": {\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"type\": {\n", + " \"enum\": [\n", + " \"CrossEntropyLoss\"\n", + " ],\n", + " \"type\": \"string\",\n", + " \"default\": \"CrossEntropyLoss\",\n", + " \"description\": \"Type of loss function\",\n", + " \"studio_name\": \"Loss function\"\n", + " },\n", + " \"avg_non_ignore\": {\n", + " \"type\": \"bool\",\n", + " \"default\": True,\n", + " \"description\": \"The loss is only averaged over non-ignored targets (ignored targets are usually where labels are missing in the dataset) if this is True\"\n", + " }\n", + " },\n", + " \"description\": \"Loss function to be used\",\n", + " \"studio_name\": \"Loss\"\n", + " }\n", + " },\n", + " \"description\": \"Architecture of the decode head\",\n", + " \"studio_name\": \"Head\"\n", + " },\n", + " \"auxiliary_head\": {\n", + " \"type\": \"object\",\n", + " \"default\": {},\n", + " \"properties\": {\n", + " \"decoder\": {\n", + " \"type\": \"string\",\n", + " \"default\": \"FCNDecoder\",\n", + " \"description\": \"Decoder function to use\",\n", + " \"studio_name\": \"Decoder\"\n", + " },\n", + " \"channels\": {\n", + " \"type\": \"int\",\n", + " \"default\": 256,\n", + " \"description\": \"Channels at each block of the decode head, except the final one\",\n", + " \"studio_name\": \"Channels\"\n", + " },\n", + " \"num_convs\": {\n", + " \"type\": \"int\",\n", + " \"default\": 2,\n", + " \"description\": \"Number of convolutional blocks in the head (except the final one)\",\n", + " \"studio_name\": \"Blocks\"\n", + " },\n", + " \"in_index\": {\n", + " \"type\": \"int\",\n", + " \"default\": -1,\n", + " \"description\": \"Index of the input list to take. Defaults to -1\",\n", + " \"studio_name\": \"In index\"\n", + " },\n", + " \"dropout\": {\n", + " \"type\": \"int\",\n", + " \"default\": 0,\n", + " \"description\": \"Dropout value to apply. Defaults to 0\",\n", + " \"studio_name\": \"Dropout\"\n", + " },\n", + " \"loss_decode\": {\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"type\": {\n", + " \"enum\": [\n", + " \"CrossEntropyLoss\"\n", + " ],\n", + " \"type\": \"string\",\n", + " \"default\": \"CrossEntropyLoss\",\n", + " \"description\": \"Type of loss function\",\n", + " \"studio_name\": \"Loss function\"\n", + " },\n", + " \"loss_weight\": {\n", + " \"type\": \"float\",\n", + " \"default\": 1,\n", + " \"description\": \"Multiplicative weight of the loss of the auxiliary head in the loss. The loss is calculated as aux_head_weight * aux_head_loss + decode_head_loss\",\n", + " \"studio_name\": \"Loss weight\"\n", + " },\n", + " \"avg_non_ignore\": {\n", + " \"type\": \"bool\",\n", + " \"default\": True,\n", + " \"description\": \"The loss is only averaged over non-ignored targets (ignored targets are usually where labels are missing in the dataset) if this is True\"\n", + " }\n", + " },\n", + " \"description\": \"Loss function to be used\",\n", + " \"studio_name\": \"Loss\"\n", + " }\n", + " },\n", + " \"description\": \"Architecture of the auxiliary head\"\n", + " },\n", + " \"frozen_backbone\": {\n", + " \"type\": \"bool\",\n", + " \"default\": False,\n", + " \"description\": \"Freeze the weights of the backbone when set to True\",\n", + " \"studio_name\": \"Freeze backbone\"\n", + " },\n", + " \"tiled_inference_parameters\": {\n", + " \"type\": \"object\",\n", + " \"default\": {\n", + " \"h_crop\": 224,\n", + " \"h_stride\": 196,\n", + " \"w_crop\": 224,\n", + " \"w_stride\": 196,\n", + " \"average_patches\": False\n", + " },\n", + " \"properties\": {\n", + " \"h_crop\": {\n", + " \"type\": \"int\",\n", + " \"default\": 224,\n", + " \"description\": \"h_crop values for tilling images\",\n", + " \"studio_name\": \"h_crop\"\n", + " },\n", + " \"h_stride\": {\n", + " \"type\": \"int\",\n", + " \"default\": 196,\n", + " \"description\": \"h_stride values for tilling images\",\n", + " \"studio_name\": \"h_stride\"\n", + " },\n", + " \"w_crop\": {\n", + " \"type\": \"int\",\n", + " \"default\": 224,\n", + " \"description\": \"w_crop values for tilling images\",\n", + " \"studio_name\": \"w_crop\"\n", + " },\n", + " \"w_stride\": {\n", + " \"type\": \"int\",\n", + " \"default\": 196,\n", + " \"description\": \"w_stride values for tilling images\",\n", + " \"studio_name\": \"w_stride\"\n", + " },\n", + " \"average_patches\": {\n", + " \"type\": \"bool\",\n", + " \"default\": False,\n", + " \"description\": \"Whether to use average_patches\",\n", + " \"studio_name\": \"average_patches\"\n", + " }\n", + " }\n", + " }\n", + " },\n", + " \"description\": \"Model architecture definition\",\n", + " \"studio_name\": \"Architecture\"\n", + " },\n", + " \"runner\": {\n", + " \"type\": \"object\",\n", + " \"default\": {\n", + " \"max_epochs\": 10,\n", + " \"early_stopping_patience\": 20,\n", + " \"early_stopping_monitor\": \"val/loss\"\n", + " },\n", + " \"properties\": {\n", + " \"max_epochs\": {\n", + " \"type\": \"int\",\n", + " \"default\": 10,\n", + " \"description\": \"Training epochs\",\n", + " \"studio_name\": \"Training epochs\"\n", + " },\n", + " \"early_stopping_patience\": {\n", + " \"type\": \"int\",\n", + " \"default\": 20,\n", + " \"description\": \"Early stopping patience\",\n", + " \"studio_name\": \"Early stopping patience\"\n", + " },\n", + " \"early_stopping_monitor\": {\n", + " \"type\": \"string\",\n", + " \"default\": \"val/loss\",\n", + " \"description\": \"Monitoring value to determine early stopping\",\n", + " \"studio_name\": \"Early stopping monitor\"\n", + " }\n", + " },\n", + " \"studio_name\": \"Runner\"\n", + " },\n", + " \"lr_config\": {\n", + " \"type\": \"object\",\n", + " \"default\": {\n", + " \"policy\": \"Fixed\"\n", + " },\n", + " \"required\": [\n", + " \"policy\"\n", + " ],\n", + " \"properties\": {\n", + " \"policy\": {\n", + " \"enum\": [\n", + " \"Fixed\",\n", + " \"CosineAnnealing\"\n", + " ],\n", + " \"type\": \"string\",\n", + " \"default\": \"Fixed\",\n", + " \"description\": \"Policy type\",\n", + " \"studio_name\": \"Policy type\"\n", + " },\n", + " \"warmup_iters\": {\n", + " \"type\": \"int\",\n", + " \"default\": 0,\n", + " \"description\": \"LR warmup iterations. Valid for some policies\",\n", + " \"studio_name\": \"Learning rate warmup iterations\"\n", + " },\n", + " \"warmup_ratio\": {\n", + " \"type\": \"float\",\n", + " \"default\": 1,\n", + " \"description\": \"Initial lr at warmup will be learning_rate * warmup_ratio\",\n", + " \"studio_name\": \"LR warmup initialization ratio\"\n", + " }\n", + " },\n", + " \"description\": \"Learning rate policy\",\n", + " \"studio_name\": \"Learning rate policy\"\n", + " },\n", + " \"optimizer\": {\n", + " \"type\": \"object\",\n", + " \"default\": {\n", + " \"lr\": 6e-05,\n", + " \"type\": \"Adam\"\n", + " },\n", + " \"properties\": {\n", + " \"lr\": {\n", + " \"type\": \"float\",\n", + " \"default\": 6e-05,\n", + " \"description\": \"Learning rate\",\n", + " \"studio_name\": \"Learning rate\"\n", + " },\n", + " \"type\": {\n", + " \"enum\": [\n", + " \"Adam\",\n", + " \"SGD\",\n", + " \"AdamW\",\n", + " \"RMSProp\"\n", + " ],\n", + " \"default\": \"Adam\",\n", + " \"description\": \"Optimizer to be used\",\n", + " \"studio_name\": \"Optimizer type\"\n", + " },\n", + " \"weight_decay\": {\n", + " \"type\": \"float\",\n", + " \"default\": 0,\n", + " \"description\": \"L2 weight regularization (weight decay)\",\n", + " \"studio_name\": \"L2 regularization weight\"\n", + " }\n", + " },\n", + " \"description\": \"Optimizer\",\n", + " \"studio_name\": \"Optimizer\"\n", + " },\n", + " \"dataset_id\": {\n", + " \"type\": \"string\",\n", + " \"description\": \"ID of dataset to use for this finetuning\",\n", + " \"studio_name\": \"Dataset\"\n", + " },\n", + " \"evaluation\": {\n", + " \"type\": \"object\",\n", + " \"default\": {\n", + " \"interval\": 1\n", + " },\n", + " \"properties\": {\n", + " \"interval\": {\n", + " \"type\": \"int\",\n", + " \"default\": 1,\n", + " \"description\": \"Frequency of epochs with which to perform validation\",\n", + " \"studio_name\": \"Epoch interval\"\n", + " }\n", + " },\n", + " \"studio_name\": \"Validation\"\n", + " },\n", + " \"backbone_model_id\": {\n", + " \"type\": \"string\",\n", + " \"description\": \"ID of the pretrained backbone\"\n", + " }\n", + " },\n", + " \"description\": \"A request sent to the finetuning service to start a finetune task for segmentation\"\n", + " },\n", + " \"extra_info\": {\n", + " \"runtime_image\": \"quay.io/geospatial-studio/terratorch:latest\",\n", + " \"model_category\": \"prithvi\",\n", + " \"model_framework\": \"terratorch-v2\"\n", + " },\n", + " \"content\": \"################################################################
# Licensed Materials - Property of IBM
# "Restricted Materials of IBM"
# Copyright IBM Corp. 2025 ALL RIGHTS RESERVED
################################################################


# lightning.pytorch==2.1.1
seed_everything: 0
trainer:
  accelerator: auto
  strategy: auto
  devices: auto
  num_nodes: 1
  precision: 16-mixed
  logger:
    class_path: lightning.pytorch.loggers.mlflow.MLFlowLogger
    init_args:
      experiment_name: {{ tune_id }} # Future version, chnage this to user / email
      run_name: "Train"    # Future version, chnage this to tune_id
      tracking_uri: {{ mlflow_tracking_url }}
      save_dir: {{ mount_root + 'tune-tasks/' + tune_id + '/mlflow' }}
      {% if mlflow_tags -%}
      tags:
        {% for key, value in mlflow_tags.items() -%}
        {{ key }}: {{ value }}
        {% endfor %}
      {%- endif %}       
  callbacks:
    - class_path: RichProgressBar
    - class_path: LearningRateMonitor
      init_args:
        logging_interval: epoch
    # ---- Early stop if ----
    {% if runner["early_stopping_patience"] -%}
    - class_path: EarlyStopping
      init_args:
        monitor: {{ runner["early_stopping_monitor"] }}
        patience: {{ runner["early_stopping_patience"] }}
    {%- endif %}
     # ---- Early stop endif ----
    - class_path: ModelCheckpoint
      init_args:
        dirpath: {{ mount_root + 'tune-tasks/' + tune_id  + '/' }}
        mode: min
        monitor: val/loss
        filename: {{ 'best-state_dict-{epoch:02d}' }}
        save_weights_only: True
      
  max_epochs: {{ runner["max_epochs"] }}
  check_val_every_n_epoch: {{ evaluation["interval"] }}
  log_every_n_steps: 50
  enable_checkpointing: true
  default_root_dir: {{ mount_root + 'tune-tasks/' + tune_id }}

data:
  class_path: terratorch.datamodules.GenericNonGeoSegmentationDataModule
  init_args:
    batch_size: {{ data["batch_size"] }}
    num_workers: {{ data["workers_per_gpu"] }}
    no_label_replace: {{ label_nodata }}
    no_data_replace: {{ image_nodata_replace }}
    constant_scale: {{ constant_multiply }}
    dataset_bands:
      {{ bands.values() | list | first | to_yaml | indent(6) }}
    output_bands:
      {{ output_bands.values() | list | first | to_yaml | indent(6) }}
    rgb_indices:
      {{ rgb_band_indices | to_yaml | indent(6) }}
    train_data_root: {{ data_root }}{{ train_data_dir.values() | list | first }}
    train_label_data_root: {{ data_root + train_labels_dir }}
    val_data_root: {{ data_root }}{{ val_data_dir.values() | list | first }}
    val_label_data_root: {{ data_root + val_labels_dir }}
    test_data_root: {{ data_root }}{{ test_data_dir.values() | list | first }}
    test_label_data_root: {{ data_root + test_labels_dir }}
    {% if train_split_path -%}
    train_split: {{ data_root + train_split_path }}
    {% endif -%}
    {% if test_split_path -%}
    test_split: {{ data_root + test_split_path }}
    {% endif -%}
    {% if val_split_path -%}
    val_split: {{ data_root + val_split_path }}
    {% endif -%}
    {% if img_suffix -%}
    img_grep:  {{ img_suffix.values() | list | first | tojson }}
    {% endif -%}
    {% if seg_map_suffix -%}
    label_grep: "{{ seg_map_suffix }}"
    {% endif -%}
    means: 
      {{ norm_means.values() | list | first| to_yaml | indent(6) }}
    stds: 
      {{ norm_stds.values() | list | first | to_yaml | indent(6) }}
    num_classes: {{ classes|length }}
    {% if data["expand_temporal_dimension"] is not none -%}
    expand_temporal_dimension: data["expand_temporal_dimension"]
    {% endif -%}
    {% if data["drop_last"] is not none -%}
    drop_last: data["drop_last"]
    {% endif -%}
    # ---- train_transform if ----
    {% if data["train_transform"] -%}
    train_transform:
    {% for transform in data["train_transform"] %}
    - class_path: trasnsform["class_path"]
      init_args:
        {% if trasnsform["height"] is not none -%}
        height: trasnsform["height"]
        {% endif -%}
        {% if trasnsform["width"] is not none -%}
        width: trasnsform["width"]
        {% endif -%}
        {% if trasnsform["always_apply"] is not none -%}
        always_apply: trasnsform["always_apply"]
        {% endif -%}
        {% if trasnsform["transpose_mask"] is not none -%}
        transpose_mask: trasnsform["transpose_mask"]
        {% endif -%}
        {% if trasnsform["p"] is not none -%}
        p: trasnsform["p"]
        {% endif -%}
    {% endfor %}
    {% endif -%}
    # ---- train_transform endif ----

    # if backbone is prithvi-EO-v2
    test_transform:
      - class_path: ToTensorV2
model:
  class_path: terratorch.tasks.SemanticSegmentationTask
  init_args:
    model_args: 
      {%- if pretrained_model_name == "prithvi_eo_v1_100" or pretrained_model_name == "prithvi_eo_v2_300" or pretrained_model_name == "prithvi_eo_v2_300_tl" or pretrained_model_name == "prithvi_eo_v2_600" or pretrained_model_name == "prithvi_eo_v2_600_tl" %}
      backbone_pretrained: true 
      backbone: {{ pretrained_model_name }}
      backbone_drop_path: 0.1 
      backbone_bands:
        {{ output_bands.values() | list | first | to_yaml | indent(8) }}
      necks: 
        - name: SelectIndices
          {%- if pretrained_model_name == "prithvi_eo_v1_100" %}
          indices: [2, 5, 8, 11] # 100M models
          {%- elif pretrained_model_name == "prithvi_eo_v2_300" or pretrained_model_name == "prithvi_eo_v2_300_tl" %}
          indices: [5, 11, 17, 23]  # 300M models
          {%- elif pretrained_model_name == "prithvi_eo_v2_600" or pretrained_model_name == "prithvi_eo_v2_600_tl" %}
          indices: [7, 15, 23, 31] # 600M models    
          {% endif %}
        - name: ReshapeTokensToImage # required
        - name: LearnedInterpolateToPyramidal
      {%- else %}
      # Old model version configurations
      pretrained: true
      backbone: {{ pretrained_model_name }}
      # backbone_temporal_encoding: true
      backbone_drop_path_rate: 0.3
      backbone_window_size: 7
      num_frames: {{ num_frames }}
      head_channel_list:
        {{ head_channel_list | to_yaml | indent(10) }}
      bands:
        {{ output_bands.values() | list | first | to_yaml | indent(8) }}
      {% endif %} 
      decoder: {{ model["decode_head"]["decoder"] }}
      {% if  model["decode_head"]["decoder"] == "UperNetDecoder" -%}
      decoder_channels: 256
      {% elif  model["decode_head"]["decoder"] == "UNetDecoder" -%}
      #TODO user provided channels
      decoder_channels: [512, 256, 128, 64]
      {% else %}
      decoder_channels: {{ model["decode_head"]["channels"] }}
      {% endif -%}
      num_classes: {{ classes|length }}
      head_dropout: 0.1
    {% if pretrained_model_name == "prithvi_eo_v1_100" or pretrained_model_name == "prithvi_eo_v2_300" or pretrained_model_name == "prithvi_eo_v2_300_tl" or pretrained_model_name == "prithvi_eo_v2_600" or pretrained_model_name == "prithvi_eo_v2_600_tl" -%}
    model_factory: EncoderDecoderFactory
    {%- else %}
    model_factory: PrithviModelFactory
    {% endif %} 
    loss: {{ model["decode_head"]["loss_decode"]["type"] }}
    plot_on_val: {{ runner["plot_on_val"] }}
    {% if model["auxiliary_head"] -%}
    aux_heads:
      - name: aux_head
        decoder: {{ model["auxiliary_head"]["decoder"] }}
        decoder_args:
          decoder_channels: {{ model["auxiliary_head"]["channels"] }}
          decoder_in_index: {{ model["auxiliary_head"]["in_index"] }}
          decoder_num_convs: {{ model["auxiliary_head"]["num_convs"] }}
          head_dropout:  {{ model["auxiliary_head"]["dropout"] }}
          # head_channel_list:
          #   - 64
    aux_loss:
      aux_head: {{ model["auxiliary_head"]["loss_decode"]["loss_weight"] }}
    {% endif -%}
    ignore_index: {{ ignore_index }}
    freeze_backbone: {{ model["frozen_backbone"] | lower }}
    freeze_decoder: false

    # ---- optimizer start ----
    {% if model["optimizer"] -%}
    optimizer: {{ model["optimizer"]["type"] }}
    lr: {{ model["optimizer"]["lr"] | float }}
    {% endif -%}
    # ---- optimizer end ----
    {% if model["tiled_inference_parameters"] %}
    tiled_inference_parameters: 
      h_crop: {{ model["tiled_inference_parameters"]["h_crop"] | int}}
      h_stride: {{ model["tiled_inference_parameters"]["h_stride"] | int }}
      w_crop: {{ model["tiled_inference_parameters"]["w_crop"] | int}}
      w_stride: {{ model["tiled_inference_parameters"]["w_stride"] | int }}
      average_patches: {{ model["tiled_inference_parameters"]["average_patches"] }}
    {% else %}
    # ToDo: Remove the tiled_inference if user not provided. 
    tiled_inference_parameters: 
      h_crop: 512
      # stride logic = would be h_crop - h_crop * 0.125
      h_stride: 448
      w_crop: 512
      # stride logic = would be w_crop - w_crop * 0.125
      w_stride: 448
      average_patches: true
    {% endif %}
optimizer:
  class_path: {{ 'torch.optim.' + optimizer["type"] }}
  init_args:
    # ---- Optimizer start if ----
    {% if optimizer["lr"] -%}
    lr: {{ optimizer["lr"] | float }}
    {% else %}
    lr: 1.e-4
    {% endif -%}
    {% if optimizer["weight_decay"] -%}
    weight_decay: {{ optimizer["weight_decay"] }}
    {% else %}
    weight_decay: 0.05
    {% endif -%}
    # ---- Optimizer stop if ----
lr_scheduler:
  class_path: ReduceLROnPlateau
  init_args:
    monitor: val/loss
\"\n", + " }\n", + "\n", + " print(\"📋 Segmentation Task Template:\")\n", + "print(json.dumps(segmentation_template, indent=2))" ] }, { diff --git a/workshop/docs/notebooks/lab5-gesa-burnscars-workflow.ipynb b/workshop/docs/notebooks/lab5-gesa-burnscars-workflow.ipynb index 2bbc56af..2bdd595a 100644 --- a/workshop/docs/notebooks/lab5-gesa-burnscars-workflow.ipynb +++ b/workshop/docs/notebooks/lab5-gesa-burnscars-workflow.ipynb @@ -8,12 +8,12 @@ "# Lab 5\n", "\n", "\n", - "\n", + "\n", " \"Open\n", "\n", "\n", "\n", - "![Geospatial Studio banner](https://raw.githubusercontent.com/terrastackai/geospatial-studio/gesa-workshop-notebooks/workshop/docs/assets/banner.png)\n", + "![Geospatial Studio banner](https://raw.githubusercontent.com/terrastackai/geospatial-studio/main/workshop/docs/assets/banner.png)\n", "\n", "## 🌍 Geospatial Exploration and Orchestration Studio \n", "\n", @@ -142,17 +142,17 @@ "Raw satellite data showing the area affected by wildfire:\n", "\n", "\n", - "\"RGB\n", + "\"RGB\n", "\n", "#### Model Prediction\n", "The fine-tuned model identifies burn scar areas:\n", "\n", - "\"Fire\n", + "\"Fire\n", "\n", "#### Prediction Overlay\n", "Burn scar predictions overlaid on the original imagery for validation:\n", "\n", - "\"Fire\n", + "\"Fire\n", "\n", "\n", "\n", @@ -259,7 +259,9 @@ "#############################################################\n", "# Initialize Geostudio client using a geostudio config file\n", "#############################################################\n", - "gfm_client = Client(geostudio_config_file=\".geostudio_config_file\")" + "# If you are using jupyter lab, create a normal file to view it on the fileviewer. `geostudio_config_file`\n", + "\n", + "gfm_client = Client(geostudio_config_file=\"geostudio_config_file\")" ] }, { @@ -280,12 +282,6 @@ "In this section, we'll explore how to review datasets that have already been prepared and onboarded to the Geospatial Studio. This is useful for understanding what data is available before starting a fine-tuning task." ] }, - { - "cell_type": "markdown", - "id": "ae7d34b9", - "metadata": {}, - "source": [] - }, { "cell_type": "code", "execution_count": null, @@ -309,10 +305,16 @@ "source": [ "# Get detailed information about a specific burn scars dataset\n", "# If you know the dataset name or ID, you can retrieve its details\n", - "burn_scars_dataset_id = \"geodata-e8hwqfrvtsgwfgsx7agvgk\"\n", + "burn_scars_dataset_id = \"select-dataset_id-from-DataFrame-above\"\n", "burn_scars_dataset_id" ] }, + { + "cell_type": "markdown", + "id": "31fee87f", + "metadata": {}, + "source": [] + }, { "cell_type": "code", "execution_count": null, @@ -381,10 +383,18 @@ "source": [ "# List all the base models onboarded in the cluster\n", "base_models = gfm_client.list_base_models()['results']\n", - "display(DataFrame(base_models))\n", - "\n", + "display(DataFrame(base_models))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "06dfa760", + "metadata": {}, + "outputs": [], + "source": [ "# Select Prithvi_EO_V2_300M model\n", - "prithvi_eo_v2_300m_base_model_id = \"28ad8b4f-13ba-4567-b28f-bb16758324c3\"" + "prithvi_eo_v2_300m_base_model_id = \"select-base_model-id-from-DataFrame-above\"" ] }, { @@ -429,10 +439,18 @@ "source": [ "# List all the templates onboarded in the cluster\n", "tune_templates = gfm_client.list_tune_templates()['results']\n", - "display(DataFrame(tune_templates))\n", - "\n", + "display(DataFrame(tune_templates))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7aecd8db", + "metadata": {}, + "outputs": [], + "source": [ "# Select segmentation template\n", - "segmentation_template_id = \"8573acf1-5ee7-4291-bcfe-357408080445\"" + "segmentation_template_id = \"select-template-id-from-DataFrame-above\"" ] }, { @@ -512,6 +530,8 @@ "5. **Completion**: Finalizing and storing the trained model\n", "\n", "**Expected Duration:**\n", + "\n", + "These times are dependent on the tuning config e.g. number of epochs, batch size, dataset size, etc.\n", "- Modern GPU (V100/A100): 30-45 minutes\n", "- Older GPU: 45-90 minutes\n", "- CPU: Not recommended (2 hrs + Using Terramind tiny )\n", @@ -727,7 +747,7 @@ " \"visible_by_default\": \"True\"\n", " }\n", " ],\n", - " \"tune_id\" : \"geotune-k9xhhztv6rqub8usa4xjdp\"\n", + " \"tune_id\" : tune_id\n", "}\n", "\n", "print(\"📋 Inference Configuration:\")\n", @@ -745,7 +765,6 @@ "source": [ "# Submit the inference request\n", "print(\"\\n🚀 Submitting inference request...\\n\")\n", - "tune_id = \"geotune-k9xhhztv6rqub8usa4xjdp\"\n", "\n", "inference_response = gfm_client.try_out_tune(tune_id=tune_id, data=inference_payload)\n", "\n", @@ -824,20 +843,72 @@ "outputs": [], "source": [ "# The best model already onboarded in the cluster\n", - "best_burnscars_model = \"geotune-mgmshlc8b9mwksedrghray\"\n", + "best_burnscars_model = \"replace-with-best-burnscars_model_id\" \n", "# Run an inference on the best model\n", - "gfm_client.try_out_tune(tune_id=tune_id, data={**inference_payload, \"description\": \"Best Model Inference\", \"tune_id\":best_burnscars_model})\n", + "gfm_client.try_out_tune(tune_id=tune_id, data={**inference_payload, \"description\": \"Burns Best Model Inference\", \"tune_id\":best_burnscars_model})\n", "\n", "\n", "# Use 5.1 instructions to view the results in the UI" ] }, + { + "cell_type": "markdown", + "id": "44a8b962", + "metadata": {}, + "source": [ + "## 7.0 Download artifacts : Download our trained model artifacts\n", + "\n", + "With the trained model, we will download the checkpoint and config that we can re-use for running inferences. \n", + "\n", + "We will use wget/curl to download the artifacts. Otherwise, you can grab the links and download directly to your local machine.\n", + "\n", + "---" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "33e73e49", + "metadata": {}, + "outputs": [], + "source": [ + "# Get the download URLs for the trained model artifacts\n", + "artifact_urls = gfm_client.download_tune(tune_id=tune_id)\n", + "artifact_urls" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "download-artifacts", + "metadata": {}, + "outputs": [], + "source": [ + "# Extract URLs\n", + "config_url = artifact_urls['config_url']\n", + "checkpoint_url = artifact_urls['checkpoint_url']\n", + "\n", + "print(f\"Config URL: {config_url}\")\n", + "print(f\"Checkpoint URL: {checkpoint_url}\")\n", + "print(\"\\n--- Download Commands ---\\n\")\n", + "\n", + "# Using wget\n", + "print(\"Using wget:\")\n", + "print(f\"wget -O config_deploy.yaml '{config_url}'\")\n", + "print(f\"wget -O model_checkpoint.ckpt '{checkpoint_url}'\")\n", + "\n", + "# Using wget\n", + "print(\"\\nUsing curl:\")\n", + "print(f\"curl -o config_deploy.yaml '{config_url}'\")\n", + "print(f\"curl -o model_checkpoint.ckpt '{checkpoint_url}'\")" + ] + }, { "cell_type": "markdown", "id": "summary-section-2", "metadata": {}, "source": [ - "## 7. Summary and Key Takeaways\n", + "## 8. Summary and Key Takeaways\n", "\n", "Congratulations! You've completed an end-to-end machine learning workflow for geospatial AI with a burnscars example. 🎉\n", "\n", diff --git a/workshop/docs/notebooks/lab6-gesa-floods-workflow.ipynb b/workshop/docs/notebooks/lab6-gesa-floods-workflow.ipynb index 8e264513..ac97dc6b 100644 --- a/workshop/docs/notebooks/lab6-gesa-floods-workflow.ipynb +++ b/workshop/docs/notebooks/lab6-gesa-floods-workflow.ipynb @@ -7,7 +7,7 @@ "source": [ "# Lab 6\n", "\n", - "\n", + "\n", " \"Open\n", "\n" ] @@ -17,7 +17,7 @@ "id": "d61fa1ff", "metadata": {}, "source": [ - "![Geospatial Studio banner](https://raw.githubusercontent.com/terrastackai/geospatial-studio/gesa-workshop-notebooks/workshop/docs/assets/banner.png)\n", + "![Geospatial Studio banner](https://raw.githubusercontent.com/terrastackai/geospatial-studio/main/workshop/docs/assets/banner.png)\n", "\n", "## 🌍 Geospatial Exploration and Orchestration Studio \n", "\n", @@ -27,36 +27,36 @@ "\n", " License\n", " \n", - " \n", + " \"Apache\n", " \n", "\n", "\n", " TerraStackAI\n", " \n", - " \n", - " \n", - " \n", + " \"TerraTorch\"\n", + " \"TerraKit\"\n", + " \"Iterate\"\n", " \n", "\n", "\n", " Built With\n", " \n", - " \n", - " \n", - " \n", + " \"Python\n", + " \"Code\n", + " \"Pre-commit\n", "\n", " \n", "\n", "\n", " Deployment\n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \"Helm\"\n", + " \"Red\n", + " \"Kubernetes\"\n", + " \"OAuth\n", + " \"PostgreSQL\"\n", + " \"Keycloak\"\n", + " \"MinIO\"\n", " \n", "\n", "\n", @@ -145,17 +145,17 @@ "#### Input Satellite Imagery\n", "Raw satellite data showing the area affected by wildfire:\n", "\n", - "![Fire Input](https://raw.githubusercontent.com/terrastackai/geospatial-studio/gesa-workshop-notebooks/workshop/docs/assets/floods-input.png)\n", + "![Fire Input](https://raw.githubusercontent.com/terrastackai/geospatial-studio/main/workshop/docs/assets/floods-input.png)\n", "\n", "#### Model Prediction\n", "The fine-tuned model identifies floods areas:\n", "\n", - "![Fire Prediction](https://raw.githubusercontent.com/terrastackai/geospatial-studio/gesa-workshop-notebooks/workshop/docs/assets/floods-prediction.png)\n", + "![Fire Prediction](https://raw.githubusercontent.com/terrastackai/geospatial-studio/main/workshop/docs/assets/floods-prediction.png)\n", "\n", "#### Prediction Overlay\n", "floods predictions overlaid on the original imagery for validation:\n", "\n", - "![Fire Prediction Overlayed](https://raw.githubusercontent.com/terrastackai/geospatial-studio/gesa-workshop-notebooks/workshop/docs/assets/floods-prediction-overlayed.png)\n", + "![Fire Prediction Overlayed](https://raw.githubusercontent.com/terrastackai/geospatial-studio/main/workshop/docs/assets/floods-prediction-overlayed.png)\n", "\n", "This workflow demonstrates the power of geospatial foundation models in identifying and mapping wildfire-affected areas, which is crucial for disaster response, environmental monitoring, and recovery planning.\n", "\n", @@ -260,6 +260,8 @@ "#############################################################\n", "# Initialize Geostudio client using a geostudio config file\n", "#############################################################\n", + "# If you are using jupyter lab, create a normal file to view it on the fileviewer. `geostudio_config_file`\n", + "\n", "gfm_client = Client(geostudio_config_file=\".geostudio_config_file\")" ] }, @@ -310,7 +312,7 @@ "source": [ "# Get detailed information about a specific floodss dataset\n", "# If you know the dataset name or ID, you can retrieve its details\n", - "floods_dataset_id = \"geodata-4vdk3x6pyfvr59zxki6kcc\"\n", + "floods_dataset_id = \"select-dataset_id-from-above\"\n", "floods_dataset_id" ] }, @@ -396,10 +398,18 @@ "source": [ "# List all the base models onboarded in the cluster\n", "base_models = gfm_client.list_base_models()['results']\n", - "display(DataFrame(base_models))\n", - "\n", + "display(DataFrame(base_models))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d04ca019", + "metadata": {}, + "outputs": [], + "source": [ "# Select Terramind Tiny model\n", - "terramind_tiny_base_model_id = \"fdf47e1e-aa27-42c2-b5da-b82e8f608c9d\"" + "terramind_tiny_base_model_id = \"select-base_model-id-from-DataFrame-above\"" ] }, { @@ -444,10 +454,18 @@ "source": [ "# List all the templates onboarded in the cluster\n", "tune_templates = gfm_client.list_tune_templates()['results']\n", - "display(DataFrame(tune_templates))\n", - "\n", + "display(DataFrame(tune_templates))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "956e3be2", + "metadata": {}, + "outputs": [], + "source": [ "# Select segmentation template\n", - "terramind_segmentation_template_id = \"ccf91f72-0a17-41f0-b835-5be3c901fe34\"" + "terramind_segmentation_template_id = \"select-template-id-from-DataFrame-above\"" ] }, { @@ -527,6 +545,8 @@ "5. **Completion**: Finalizing and storing the trained model\n", "\n", "**Expected Duration:**\n", + "\n", + "These times are dependent on the tuning config e.g. number of epochs, batch size, dataset size, etc.\n", "- Modern GPU (V100/A100): 30-45 minutes\n", "- Older GPU: 45-90 minutes\n", "- CPU: Not recommended (2 hrs + Using Terramind tiny )\n", @@ -606,7 +626,7 @@ " \"polygons\": []\n", " },\n", " \"temporal_domain\": [\n", - " \"2024-04-30,2024-05-07\"\n", + " \"2024-04-30_2024-05-07\"\n", " ],\n", " \"pipeline_steps\": [\n", " {\n", @@ -904,6 +924,58 @@ "---" ] }, + { + "cell_type": "markdown", + "id": "6b9f8e09", + "metadata": {}, + "source": [ + "## 7.0 Download artifacts : Download our trained model artifacts\n", + "\n", + "With the trained model, we will download the checkpoint and config that we can re-use for running inferences. \n", + "\n", + "We will use wget/curl to download the artifacts. Otherwise, you can grab the links and download directly to your local machine.\n", + "\n", + "---" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f02b97ce", + "metadata": {}, + "outputs": [], + "source": [ + "# Get the download URLs for the trained model artifacts\n", + "artifact_urls = gfm_client.download_tune(tune_id=tune_id)\n", + "artifact_urls" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cbb4b98a", + "metadata": {}, + "outputs": [], + "source": [ + "# Extract URLs\n", + "config_url = artifact_urls['config_url']\n", + "checkpoint_url = artifact_urls['checkpoint_url']\n", + "\n", + "print(f\"Config URL: {config_url}\")\n", + "print(f\"Checkpoint URL: {checkpoint_url}\")\n", + "print(\"\\n--- Download Commands ---\\n\")\n", + "\n", + "# Using wget\n", + "print(\"Using wget:\")\n", + "print(f\"wget -O config_deploy.yaml '{config_url}'\")\n", + "print(f\"wget -O model_checkpoint.ckpt '{checkpoint_url}'\")\n", + "\n", + "# Using wget\n", + "print(\"\\nUsing curl:\")\n", + "print(f\"curl -o config_deploy.yaml '{config_url}'\")\n", + "print(f\"curl -o model_checkpoint.ckpt '{checkpoint_url}'\")" + ] + }, { "cell_type": "code", "execution_count": null, @@ -912,9 +984,9 @@ "outputs": [], "source": [ "# The best model already onboarded in the cluster\n", - "best_floods_model = \"geotune-mgmshlc8b9mwksedrghray\"\n", + "best_floods_model = \"replace-with-best-floods_model_id\"\n", "# Run an inference on the best model\n", - "gfm_client.try_out_tune(tune_id=tune_id, data={**inference_payload, \"description\": \"Best Model Inference\",\"tune_id\":best_floods_model})\n", + "gfm_client.try_out_tune(tune_id=tune_id, data={**inference_payload, \"description\": \"Floods Best Model Inference\",\"tune_id\":best_floods_model})\n", "\n", "\n", "# Use 5.1 instructions to view the results in the UI" @@ -925,7 +997,7 @@ "id": "summary-section-2", "metadata": {}, "source": [ - "## 7. Summary and Key Takeaways\n", + "## 8. Summary and Key Takeaways\n", "\n", "Congratulations! You've completed an end-to-end machine learning workflow for geospatial AI with a Multimodal Floods example. 🎉\n", "\n", @@ -950,8 +1022,8 @@ "---\n", "### Next Steps\n", "\n", - "- **Lab 6**: Complete end-to-end workflow with floods usecase\n", - "- **Explore**: Using Multimodal models for predicting floods. \n", + "- **Lab 7**: Complete end-to-end workflow with buildings usecase\n", + "- **Explore**: Using convnext model for building detection. \n", "\n", "\n", "Thank you for participating! We hope you found this workshop valuable and are excited to build geospatial AI applications with IBM Geospatial Studio.\n", diff --git a/workshop/docs/notebooks/lab7-gesa-buildings-workflow.ipynb b/workshop/docs/notebooks/lab7-gesa-buildings-workflow.ipynb index 05860557..cb6e05fc 100644 --- a/workshop/docs/notebooks/lab7-gesa-buildings-workflow.ipynb +++ b/workshop/docs/notebooks/lab7-gesa-buildings-workflow.ipynb @@ -7,9 +7,9 @@ "source": [ "# Lab 7\n", "\n", - "\n", + "\n", "\n", - "\n", + "\n", " \"Open\n", "\n" ] @@ -19,7 +19,7 @@ "id": "d61fa1ff", "metadata": {}, "source": [ - "![Geospatial Studio banner](https://raw.githubusercontent.com/terrastackai/geospatial-studio/gesa-workshop-notebooks/workshop/docs/assets/banner.png)\n", + "![Geospatial Studio banner](https://raw.githubusercontent.com/terrastackai/geospatial-studio/main/workshop/docs/assets/banner.png)\n", "## 🌍 Geospatial Exploration and Orchestration Studio \n", "\n", "## Training a Custom Model for Buildings Detection Detection\n", @@ -145,18 +145,18 @@ "#### Input Satellite Imagery\n", "Raw satellite data showing labelled buildings:\n", "\n", - "\"RGB\n", + "\"RGB\n", "\n", "#### Model Prediction\n", "The fine-tuned model identifies buildings :\n", "\n", "\n", - "\"RGB\n", + "\"RGB\n", "\n", "#### Prediction Overlay\n", "Buildings predictions overlaid on the original imagery for validation:\n", "\n", - "\"Building\n", + "\"Building\n", "\n", "This workflow demonstrates the power of geospatial foundation models in identifying buildings which is crucial for disaster response, environmental monitoring, and recovery planning.\n", "\n", @@ -261,6 +261,8 @@ "#############################################################\n", "# Initialize Geostudio client using a geostudio config file\n", "#############################################################\n", + "# If you are using jupyter lab, create a normal file to view it on the fileviewer. `geostudio_config_file`\n", + "\n", "gfm_client = Client(geostudio_config_file=\".geostudio_config_file\")" ] }, @@ -312,7 +314,7 @@ "source": [ "# Get detailed information about a specific buildings detections dataset\n", "# If you know the dataset name or ID, you can retrieve its details\n", - "buildings_detection_dataset_id = \"geodata-mtst5e5yh8zdg3azhcj6he\"\n", + "buildings_detection_dataset_id = \"select-dataset_id-from-DataFrame-above\"\n", "buildings_detection_dataset_id" ] }, @@ -394,9 +396,18 @@ "source": [ "# List all the base models onboarded in the cluster\n", "base_models = gfm_client.list_base_models(output='df')\n", - "base_models\n", + "display(DataFrame(base_models))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "668204eb", + "metadata": {}, + "outputs": [], + "source": [ "# Select Prithvi_EO_V2_300M model\n", - "convnext_base_model_id = \"69791dd1-cb10-440a-a34f-f4c00bf74697\"" + "convnext_base_model_id = \"select-base_model-id-from-DataFrame-above\"" ] }, { @@ -441,10 +452,18 @@ "source": [ "# List all the templates onboarded in the cluster\n", "tune_templates = gfm_client.list_tune_templates(output='df')\n", - "tune_templates\n", - "\n", + "display(DataFrame(tune_templates))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "777f2c45", + "metadata": {}, + "outputs": [], + "source": [ "# Select segmentation template\n", - "convnext_segmentation_template_id = \"83fd6d96-2f25-4293-b6a7-f47d86724549\"" + "convnext_segmentation_template_id = \"select-template_id-from-DataFrame-above\"" ] }, { @@ -524,6 +543,8 @@ "5. **Completion**: Finalizing and storing the trained model\n", "\n", "**Expected Duration:**\n", + "\n", + "These times are dependent on the tuning config e.g. number of epochs, batch size, dataset size, etc.\n", "- Modern GPU (V100/A100): 30-45 minutes\n", "- Older GPU: 45-90 minutes\n", "- CPU: Not recommended (2 hrs + Using Terramind tiny )\n", @@ -835,20 +856,72 @@ "outputs": [], "source": [ "# The best model already onboarded in the cluster\n", - "best_floods_model = \"geotune-mgmshlc8b9mwksedrghray\"\n", + "best_floods_model = \"replace-with-best-buildings_model_id\"\n", "# Run an inference on the best model\n", - "gfm_client.try_out_tune(tune_id=tune_id, data={**inference_payload, \"description\": \"Best Model Inference\"})\n", + "gfm_client.try_out_tune(tune_id=tune_id, data={**inference_payload, \"description\": \"Buildings Best Model Inference\"})\n", "\n", "\n", "# Use 5.1 instructions to view the results in the UI" ] }, + { + "cell_type": "markdown", + "id": "0bda7a5a", + "metadata": {}, + "source": [ + "## 7.0 Download artifacts : Download our trained model artifacts\n", + "\n", + "With the trained model, we will download the checkpoint and config that we can re-use for running inferences. \n", + "\n", + "We will use wget/curl to download the artifacts. Otherwise, you can grab the links and download directly to your local machine.\n", + "\n", + "---" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8f8cf9e3", + "metadata": {}, + "outputs": [], + "source": [ + "# Get the download URLs for the trained model artifacts\n", + "artifact_urls = gfm_client.download_tune(tune_id=tune_id)\n", + "artifact_urls" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "aed0836a", + "metadata": {}, + "outputs": [], + "source": [ + "# Extract URLs\n", + "config_url = artifact_urls['config_url']\n", + "checkpoint_url = artifact_urls['checkpoint_url']\n", + "\n", + "print(f\"Config URL: {config_url}\")\n", + "print(f\"Checkpoint URL: {checkpoint_url}\")\n", + "print(\"\\n--- Download Commands ---\\n\")\n", + "\n", + "# Using wget\n", + "print(\"Using wget:\")\n", + "print(f\"wget -O config_deploy.yaml '{config_url}'\")\n", + "print(f\"wget -O model_checkpoint.ckpt '{checkpoint_url}'\")\n", + "\n", + "# Using wget\n", + "print(\"\\nUsing curl:\")\n", + "print(f\"curl -o config_deploy.yaml '{config_url}'\")\n", + "print(f\"curl -o model_checkpoint.ckpt '{checkpoint_url}'\")" + ] + }, { "cell_type": "markdown", "id": "summary-section-2", "metadata": {}, "source": [ - "## 7. Summary and Key Takeaways\n", + "## 8. Summary and Key Takeaways\n", "\n", "Congratulations! You've completed an end-to-end machine learning workflow for geospatial AI with a burnscars example. 🎉\n", "\n",