diff --git a/modules/data.remote/inst/download_updated.R b/modules/data.remote/inst/download_updated.R new file mode 100644 index 00000000000..5075e8dc98f --- /dev/null +++ b/modules/data.remote/inst/download_updated.R @@ -0,0 +1,34 @@ +#DAAC_Set_Credential(replace = TRUE) + +# California bounding box is: +# up_lat <- 42.0095082699265845 +# up_lon <- -124.4820168611238245 +# low_lat <- 32.5288367369123748 +# low_lon <- -114.1312224747231312 + +ul_lat <- 42.0095082699265845 # y = 4651894 in crs +ul_lon <- -124.4820168611238245 # x = 377279.7 in crs +lr_lat <- 32.5288367369123748 # y = 3633946 in crs +lr_lon <- -114.1312224747231312 # x = 1334269 in crs + +from <- "2019-01-01" +to <- "2019-12-31" +doi <- "10.5067/HLS/HLSS30.002" +outdir <- "//projectnb/dietzelab/XinyuanJi/State_of_California_HLSS/2019_Fmask" +# SWIR - Landsat (B6&7), Sentinel (B11&12) +band <- "Fmask" +credential.folder <- "~/projectnb/XinyuanJi" +paths <- NASA_DAAC_download(ul_lat = ul_lat, + ul_lon = ul_lon, + lr_lat = lr_lat, + lr_lon = lr_lon, + ncore = 16, + from = from, + to = to, + outdir = outdir, + band = band, + credential.folder = credential.folder, + doi = doi, + just_path = F) + +provider_conceptID <- NASA_CMR_finder("10.5067/HLS/HLSS30.002") diff --git a/modules/data.remote/inst/hlsp-scene-explorer.qmd b/modules/data.remote/inst/hlsp-scene-explorer.qmd new file mode 100644 index 00000000000..19d224920ea --- /dev/null +++ b/modules/data.remote/inst/hlsp-scene-explorer.qmd @@ -0,0 +1,384 @@ +--- +title: "HLSP Scene Explorer" +format: html +editor: visual +execute: + echo: true + message: false + warning: false +--- + +## 1. Setup Environment + +```{r} +librarian::shelf(terra, ncdf4, stringr, ggplot2, maps, readr, DT, knitr, sf, + leaflet, dplyr, tidyr) + +terraOptions(threads = 16) + +# Define HLSP data directory +hls_dir <- "/projectnb/dietzelab/malmborg/CARB/HLS_data/" +hls_files <- list.files(hls_dir, pattern = "MSLSP.*\\.nc$", full.names = TRUE) + +``` + +## 2. Extract Variables + Scaling Info + +```{r} +# Extract years and tile IDs +years_available <- sort(unique(stringr::str_extract(hls_files, "(?<=_)\\d{4}(?=\\.nc$)"))) + +cat("Available years:", years_available, "\n") +``` + +```{r} +tile_ids <- sort(unique(stringr::str_extract(basename(hls_files), "(?<=MSLSP_)\\w{5}(?=_)"))) + +cat(length(hls_files), "available tile IDs:", tile_ids) +# The HLS box we have contains 110 images, only 70 are within California +# '10SDH 10SDJ 10SEF 10SEG 10SEH 10SEJ 10SFE 10SFF +# 10SFG 10SFH 10SFJ 10SGD 10SGE 10SGF 10SGG 10SGH +# 10SGJ 10TCK 10TCL 10TCM 10TDK 10TDL 10TDM 10TEK +# 10TEL 10TEM 10TFK 10TFL 10TFM 10TGK 10TGL 10TGM +# 11SKA 11SKB 11SKC 11SKD 11SKT 11SKU 11SKV 11SLA +# 11SLB 11SLC 11SLT 11SLU 11SLV 11SMA 11SMB 11SMR +# 11SMS 11SMT 11SMU 11SMV 11SNA 11SNS 11SNT 11SNU +# 11SNV 11SPS 11SPT 11SPU 11SPV 11SQS 11SQT 11SQU +# 11SQV 11TKE 11TKF 11TKG 12STC 12STD' +``` + +```{r} +# ---- 1. Load Centroids from GeoJSON ---- +sf_points <- st_read("/projectnb/dietzelab/skanee/ccmmf-phenology/MSLSP_tileCentroids.geojson") + +# ---- 2. Load Grid from GeoJSON ---- +grid <- st_read("/projectnb/dietzelab/skanee/ccmmf-phenology/MSLSP_tileGrid.geojson") + +# ---- 3. Leaflet Map ---- +leaflet() %>% + addProviderTiles("Esri.WorldImagery") %>% + + # Add tile grid outlines + addPolylines(data = grid, color = "red", weight = 1, opacity = 0.8) %>% + + # Add centroid circles + addCircleMarkers( + data = sf_points, + radius = 3, + color = "white", + stroke = FALSE, + fillOpacity = 0.7 + ) %>% + + # Add centroid labels + addLabelOnlyMarkers( + data = sf_points, + label = ~tileID, + labelOptions = labelOptions( + noHide = TRUE, + direction = "top", + textOnly = TRUE, + textsize = "14px", + style = list( + "font-weight" = "bold", + "color" = "white", + "text-shadow" = "0px 0px 5px black", + "padding" = "2px" + ) + ) + ) +``` + +```{r} +# Open one sample file +sample_file <- hls_files[1] +nc <- ncdf4::nc_open(sample_file) + +# Extract variable names and scale info +var_names <- names(nc$var) +cat("Available variables:\n") + +# Extract long names +for (v in var_names) { + longname <- ncatt_get(nc, v, "long_name")$value + cat(v, "→", longname, "\n") +} +ncdf4::nc_close(nc) +``` + +## 3. Load Tile and Cropland Shapefiles + +```{r} +# Set target tile and year +target_vars <- c("NumCycles", "OGI", "50PCGI", "Peak", "50PCGD", "OGMn", + "EVImax", "EVIamp", "EVIarea", + "OGI_2", "50PCGI_2","Peak_2", "50PCGD_2","OGMn_2", + "EVImax_2", "EVIamp_2", "EVIarea_2") + +target_tile <- "10SGD" +target_year <- "2018" + +# Match raster file +selected_file <- hls_files[ + stringr::str_detect(hls_files, target_tile) & + stringr::str_detect(hls_files, target_year) +][1] + +# Load + reproject raster +r <- terra::rast(selected_file, subds = target_vars) +r <- terra::project(r, "EPSG:4326") + +cat("Loaded:", basename(selected_file), "\n") +cat("Loaded layers:", names(r), "\n") +``` + +```{r} +# --- Set paths and year --- +landiq_dir <- "/projectnb/dietzelab/ccmmf/LandIQ_data" +pft_table <- read_csv(file.path(landiq_dir, "CARB_PFTs_table.csv")) + +# --- Load the LandIQ shapefile with geometry --- +shp_path <- paste0( + landiq_dir, "/LandIQ_shapefiles/i15_Crop_Mapping_", + target_year, "_SHP/i15_Crop_Mapping_", + target_year, ".shp" +) +plots_sf <- sf::st_read(shp_path, quiet = TRUE) + +# --- Load crops_all, filter for target year and add PFT column --- +load(file.path(landiq_dir, "crops_all_years.RData")) # provides crops_all +crops_2018 <- crops_all %>% filter(year == target_year) %>% left_join(pft_table + %>% select(CLASS = crop_type, SUBCLASS = crop_code, + crop_desc, pft_group), by = c("CLASS", "SUBCLASS")) + +# --- Ensure ID types match --- +crops_2018$UniqueID <- as.character(crops_2018$UniqueID) +plots_sf$UniqueID <- as.character(plots_sf$UniqueID) + +# --- Join geometry to long-format attribute table --- +crops_sf <- crops_2018 %>% + left_join(plots_sf %>% select(UniqueID, geometry = geometry), by = "UniqueID") %>% + sf::st_as_sf() # ensures the result is an sf object + +# Result: long-format sf object with geometry per season +print(crops_sf) +``` + +```{r} +# Convert sf to terra +crops_vect <- terra::vect(crops_sf) + +# Reproject to match raster CRS if needed +crops_vect <- terra::project(crops_vect, terra::crs(r)) + +# Crop to tile extent +plots_tile <- terra::crop(crops_vect, r) + +# Print result +cat("\nNumber of cropland polygons in", target_tile, ":", nrow(plots_tile), "\n") +cat("\nAvailable LandIQ Metadata:\n") +head(plots_tile) +``` + +## **Identification & Location** + +| Column | Description | +|----|----| +| `UniqueID` | Unique polygon identifier for the cropland parcel | +| `year` | **Water year** associated with this record (e.g., 2018 means Oct 2017–Sep 2018) | +| `centx` | X-coordinate (longitude) of the polygon centroid | +| `centy` | Y-coordinate (latitude) of the polygon centroid | + +------------------------------------------------------------------------ + +### **Administrative / Regional** + +| Column | Description | +|----|----| +| `REGION` | Broad geographic region (e.g., for water agency or policy zone) | +| `COUNTY` | California county name where the polygon is located | +| `HYDRORGN` | Hydrologic region (used for water planning) | + +------------------------------------------------------------------------ + +### **Cropping Information** + +| Column | Description | +|----|----| +| `season` | Season number (1–4) for this crop instance (based on peak NDVI) | +| `CLASS` | Primary crop classification code (e.g., P = processing tomato) | +| `SUBCLASS` | Subtype or secondary classification, often numeric | +| `SPECOND` | Special condition flag: `"Y"` = yes, `"F"` = fallow, `NA` = none | +| `crop_desc` | Human-readable crop name or description (derived from CLASS) | +| `pft_group` | Plant Functional Type group (e.g., for ecological modeling) | + +------------------------------------------------------------------------ + +### **Temporal Information** + +| Column | Description | +|----|----| +| `ADOY` | Adjusted DOY — day of planting relative to water year (Oct–Sep), can be negative | +| `DOY` | **Calendar year DOY** — converted from `ADOY`, represents true planting date (1–365) | +| `YRPLANTED` | Calendar year planting occurred (used when multiple years are covered) | + +------------------------------------------------------------------------ + +### **Other Attributes** + +| Column | Description | +|----|----| +| `MULTIUSE` | Flag for mixed-use land (e.g., pasture + crop) or multi-crop parcels | +| `PCNT` | Percent of the field occupied by this crop (in multicrop settings) | + +## 4. Static Plot + +```{r} +plot(r[["OGI"]], main = paste("Planting DOY -", target_tile, target_year)) +plot(plots_tile, add = TRUE, border = "red", lwd = 0.5) +``` + +```{r} +plot(r[["OGMn"]], main = paste("Harvesting DOY -", target_tile, target_year)) +plot(plots_tile, add = TRUE, border = "red", lwd = 0.5) +``` + +## 5. Interactive Map (Leaflet) + +```{r} +# Downsample and prepare color palettes for each variable +r_down_list <- lapply(target_vars, function(var) { + r_down <- terra::aggregate(r[[var]], fact = 4) + terra::minmax(r_down) + list( + raster = r_down, + palette = colorNumeric("viridis", domain = values(r_down), na.color = "transparent") + ) +}) +names(r_down_list) <- target_vars + +# Start leaflet map with satellite basemap +m <- leaflet() |> + addProviderTiles("Esri.WorldImagery", group = "Satellite") |> + addProviderTiles("CartoDB.Positron", group = "Basemap") # Optional: switchable basemap + +# Add each raster layer +for (var in target_vars) { + m <- m |> + addRasterImage( + r_down_list[[var]]$raster, + colors = r_down_list[[var]]$palette, + opacity = 0.7, + group = var + ) +} + +# Add cropland polygons +m <- m |> + addPolygons(data = plots_tile, color = "red", weight = 0.7, group = "Cropland") |> + addLayersControl( + baseGroups = c("Satellite", "Basemap"), + overlayGroups = c(target_vars, "Cropland"), + options = layersControlOptions(collapsed = FALSE) + ) |> + addMiniMap(toggleDisplay = TRUE) + +m + +``` + +## 8. Extract Events for One Polygon + +```{r} +# Convert to data.frame to inspect values +plots_df <- as.data.frame(plots_tile) + +# Remove any stale HLSP columns if present +hlsp_raw_vars <- c("OGI", "50PCGI", "Peak", "50PCGD", "OGMn", + "EVImax", "EVIamp", "EVIarea", + "OGI_2", "50PCGI_2", "Peak_2", "50PCGD_2", "OGMn_2", + "EVImax_2", "EVIamp_2", "EVIarea_2") +plots_df <- plots_df[, !(names(plots_df) %in% hlsp_raw_vars)] + +# Extract mean values for all HLSP layers +hlsp_means <- terra::extract(r, plots_tile, fun = mean, na.rm = TRUE) +hlsp_means$ID <- NULL +names(hlsp_means) <- paste0(names(hlsp_means), "_mean") + +# Extract standard deviation values for all HLSP layers +hlsp_sds <- terra::extract(r, plots_tile, fun = sd, na.rm = TRUE) +hlsp_sds$ID <- NULL +names(hlsp_sds) <- paste0(names(hlsp_sds), "_sd") + +# Merge both mean and sd columns into plots_df +plots_df <- cbind(plots_df, hlsp_means, hlsp_sds) + +# Rescale vegetation index values +scale_vars <- list( + EVImax = 0.0001, + EVIamp = 0.0001, + EVIarea = 0.01, + EVImax_2 = 0.0001, + EVIamp_2 = 0.0001, + EVIarea_2 = 0.01 +) + +# Apply scaling to both mean and sd columns +for (var in names(scale_vars)) { + sf <- scale_vars[[var]] + mean_col <- paste0(var, "_mean") + sd_col <- paste0(var, "_sd") + + if (mean_col %in% names(plots_df)) plots_df[[mean_col]] <- plots_df[[mean_col]] * sf + if (sd_col %in% names(plots_df)) plots_df[[sd_col]] <- plots_df[[sd_col]] * sf +} + +# (Optional) Drop any columns where all values are NA +na_cols <- sapply(plots_df, function(col) all(is.na(col))) +plots_df <- plots_df[, !(names(plots_df) %in% names(na_cols[na_cols]))] +``` + +```{r} +head(plots_df) +``` + +```{r} +doy_to_iso <- function(doy, year) { + # For DOY < 1 → previous year + # For DOY > 365 → next year + base_date <- as.Date(paste0(year, "-01-01")) + as.Date(doy - 1, origin = base_date) +} + +date_vars <- c("OGI_mean", "50PCGI_mean", "Peak_mean", "50PCGD_mean", "OGMn_mean", + "OGI_2_mean", "50PCGI_2_mean", "Peak_2_mean", "50PCGD_2_mean", "OGMn_2_mean") + +# Loop through each variable and add an ISO version +for (var in date_vars) { + new_col <- paste0(var, "_date") + plots_df[[new_col]] <- doy_to_iso(plots_df[[var]], plots_df$year) +} + +# 1. Create ADOY_ISO as Date (set NA for ADOY == 0 or NA) +plots_df$ADOY_ISO <- doy_to_iso( + ifelse(plots_df$ADOY == 0 | is.na(plots_df$ADOY), NA, plots_df$ADOY), + plots_df$year +) + +# 2. Insert ADOY_ISO right after ADOY (without duplication) +adoy_index <- which(names(plots_df) == "ADOY") +col_names <- names(plots_df) + +# Reorder: insert ADOY_ISO after ADOY +new_order <- append( + col_names[1:adoy_index], + c("ADOY_ISO", col_names[(adoy_index + 1):(ncol(plots_df) - 1)]) +) + +# Apply reordered column list +plots_df <- plots_df[, new_order] +``` + +```{r} +head(plots_df,400) +``` diff --git a/modules/data.remote/inst/landiq-file-explorer.qmd b/modules/data.remote/inst/landiq-file-explorer.qmd new file mode 100644 index 00000000000..d3d57ba2d29 --- /dev/null +++ b/modules/data.remote/inst/landiq-file-explorer.qmd @@ -0,0 +1,246 @@ +--- +title: "LandIQ File Explorer" +format: html +editor: visual +execute: + echo: true + message: false + warning: false +--- + +## Context and Purpose + +This script supports a broader workflow to estimate crop phenology using HLS data. While the \`hlsp-scene-explorer.qmd\` script focuses on loading HLS tiles and extracting phenology metrics (e.g. EVImax, OGI, OGMn), this script analyzes the LandIQ cropland dataset to: + +\- Understand crop types, cropping intensity (single, double, etc.), and seasonal timing. + +\- Clarify the use of adjusted day-of-year (ADOY) and seasonal labels. + +\- Build a 2018-only \*\*mastersheet\*\* for integration with phenology metrics. + +## 1. Setup + +```{r} +librarian::shelf(dplyr, tidyr, readr, stringr, ggplot2, terra, sf, DT, knitr) +landiq_dir <- "/projectnb/dietzelab/ccmmf/LandIQ_data" +``` + +## 2. Load Data + +### 2.1 Load Long-Format Crop Metadata (2018 only) + +```{r} +load(file.path(landiq_dir, "crops_all_years.RData")) # provides `crops_all` +crops_2018 <- crops_all %>% filter(year == 2018) +``` + +### 2.2 Load Crop Type Descriptions (PFTs) + +```{r} +pft_table <- read_csv(file.path(landiq_dir, "CARB_PFTs_table.csv")) + +# Join with 2018 crops for description +crops_2018_joined <- crops_2018 %>% + left_join(pft_table %>% select(CLASS = crop_type, SUBCLASS = crop_code, crop_desc, pft_group), + by = c("CLASS", "SUBCLASS")) +``` + +## 3. Explore Cropping Intensity and Seasonal Use + +```{r} +# Define labels again for safety +multiuse_labels <- c( + S = "Single Crop", + D = "Double Crop", + T = "Triple Crop", + Q = "Quadruple Crop", + I = "Intercropped", + M = "Mixed Use" +) + +plot_data <- crops_2018_joined %>% + filter(MULTIUSE %in% names(multiuse_labels)) %>% + filter(!is.na(CLASS), PCNT > 0) %>% # 👈 Keep only actual planted crops + select(season, MULTIUSE) %>% + ungroup() %>% + count(season, MULTIUSE) %>% + mutate( + MULTIUSE = factor(MULTIUSE, levels = names(multiuse_labels), labels = multiuse_labels), + season = factor(season, levels = 1:4) + ) +``` + +### 3.1 Stacked Crop Record by Season + +```{r} +ggplot(plot_data, aes(x = season, y = n, fill = MULTIUSE)) + + geom_col(position = "stack", color = "black") + + scale_fill_brewer(palette = "Set2") + + theme_minimal() + + labs( + title = "Stacked Crop Records by Season (2018)", + x = "Season", + y = "Number of Crop Records", + fill = "Cropping Type" + ) +``` + +### 3.2 Crop Records by Season Faceted by Cropping Type + +```{r} +ggplot(plot_data, aes(x = season, y = n, fill = MULTIUSE)) + + geom_col(color = "black") + + scale_fill_brewer(palette = "Set2") + + facet_wrap(~ MULTIUSE, scales = "free_y") + + theme_minimal() + + labs( + title = "Crop Records by Season Faceted by Cropping Type (2018)", + x = "Season", + y = "Number of Crop Records" + ) +``` + +### 3.3 Summary Table + +```{r} +# Pivot summary table (wide format) +summary_table <- plot_data %>% + arrange(season, desc(n)) %>% + pivot_wider( + names_from = season, + values_from = n, + values_fill = 0 + ) %>% + rename(`Cropping Type` = MULTIUSE) + +# Add row totals +summary_table_totals <- summary_table %>% + mutate(Total = `1` + `2` + `3` + `4`) + +# Add grand total row +grand_total <- summary_table_totals %>% + summarise(across(where(is.numeric), sum)) %>% + mutate(`Cropping Type` = "Total") %>% + select(`Cropping Type`, everything()) + +# Combine +summary_table_final <- bind_rows(summary_table_totals, grand_total) + +# Display DT table without dropdown +datatable( + summary_table_final, + caption = "Crop Record Counts by Season and Cropping Type (2018)", + options = list( + dom = 't', # Only show the table (no search box etc) + ordering = FALSE, + autoWidth = TRUE + ) +) %>% + formatCurrency(columns = colnames(summary_table_final)[-1], currency = "", digits = 0) +``` + +## 4. ADOY for Crop Timing + +#### Understanding ADOY and Seasonal Mapping + +Below is a reference for how LandIQ's crop seasons map to ADOY (Adjusted Day of Year) within the [water year calendar]{.underline}: + +| **Season** | **Typical Timing** | **ADOY Range** | **Notes** | +|----|----|----|----| +| 1 | October–December (previous CY) | `-92 to -1` | Fall crops starting water year (e.g., Oct–Dec 2017 for 2018) | +| 2 | January–May | `1 to ~150` | Main winter/spring growing season | +| 3 | May–August | `~151 to ~240` | Typical summer crops | +| 4 | August–September | `~241 to ~275+` | Late or intercropped season | + +```{r} +crops_2018_joined %>% + filter(!is.na(ADOY), PCNT > 0) %>% + ggplot(aes(x = ADOY)) + + geom_histogram(binwidth = 10, fill = "#66c2a5", color = "black") + + theme_minimal() + + labs( + title = "Distribution of Adjusted DOY (Peak NDVI) for Crop Records (2018)", + x = "Adjusted Day of Year (ADOY)", + y = "Count of Crop Records" + ) +``` + +## 5. Crop Types + +### 5.1 View Unique Values by Column Name + +```{r} +get_unique_values <- function(data, multiuse_codes, column_name) { + data %>% + ungroup() %>% + filter(MULTIUSE %in% multiuse_codes, !is.na(.data[[column_name]]), PCNT > 0) %>% + distinct(.data[[column_name]]) %>% + arrange(.data[[column_name]]) %>% + pull() +} +``` + +```{r} +# All crop types (crop descriptions) +get_unique_values(crops_2018_joined, unique(crops_2018_joined$MULTIUSE), "crop_desc") + +``` + +Note all Q are present in T + +```{r} +get_unique_values(crops_2018_joined, c("T", "Q"), "crop_desc") # Triple or Quadruple +``` + +```{r} +get_unique_values(crops_2018_joined, c("M", "I"), "crop_desc") # Mixed or Intercropped +``` + +```{r} +# PFTs for triple + quadruple crop fields +get_unique_values(crops_2018_joined, unique(crops_2018_joined$MULTIUSE), "pft_group") +``` + +### 5.2 Table of Crops by Season + +Note discrepancies in rows 2, 4, 10 (examples) + +```{r} +# Base cleaned data +crop_summary <- crops_2018_joined %>% + filter(!is.na(crop_desc), PCNT > 0) %>% + distinct(crop_desc, MULTIUSE, season, pft_group) + +# Wide format with season indicators +crop_summary_wide <- crop_summary %>% + mutate(present = 1) %>% + pivot_wider( + names_from = season, + values_from = present, + values_fill = 0, + names_prefix = "" + ) + +# Collapse MULTIUSE and finalize +final_crop_table <- crop_summary_wide %>% + group_by(crop_desc, pft_group) %>% + summarise( + MULTIUSE = paste(sort(unique(MULTIUSE)), collapse = ", "), + `1` = max(`1`, na.rm = TRUE), + `2` = max(`2`, na.rm = TRUE), + `3` = max(`3`, na.rm = TRUE), + `4` = max(`4`, na.rm = TRUE), + .groups = "drop" + ) %>% + arrange(crop_desc) + +DT::datatable( + final_crop_table, + caption = "All Crop Types by PFT, MULTIUSE and Seasonal Presence (2018)", + options = list( + pageLength = 10, + autoWidth = TRUE, + scrollX = TRUE + ) +) +``` diff --git a/modules/data.remote/inst/ndti_evi_timeseries_example.R b/modules/data.remote/inst/ndti_evi_timeseries_example.R new file mode 100644 index 00000000000..092296b89b6 --- /dev/null +++ b/modules/data.remote/inst/ndti_evi_timeseries_example.R @@ -0,0 +1,481 @@ +# --- STEP 1. Setup --- +# Core libraries +library(zoo) +librarian::shelf( +readr, dplyr, stringr, tidyr, purrr, ggplot2, +leaflet, sf, terra, progressr, lubridate +) + +# parallel & progress +terraOptions(threads = 16) +handlers("txtprogressbar") + + + +# --- STEP 2. Load Anchor CSV & Pick a Site --- +# Read and clean the CSV +path <- "/projectnb/dietzelab/XinyuanJi/anchor_sites_locations.csv" +raw <- read_lines(path) +cleaned <- str_replace_all(raw, + ",c\\(([0-9\\.-]+), *([0-9\\.-]+)\\),", + ",\"c(\\1,\\2)\"," +) +anchors <- read_csv(paste(cleaned, collapse = "\n"), show_col_types = FALSE) %>% +select(id, site_name, + upper_left_lon, upper_left_lat, + lower_right_lon, lower_right_lat) %>% +mutate(across(starts_with(c("upper_left","lower_right")), as.numeric), + center_lon = (upper_left_lon + lower_right_lon)/2, + center_lat = (upper_left_lat + lower_right_lat)/2) %>% +filter(!is.na(center_lon), !is.na(center_lat)) + + + +# ── CHUNK 1 : read tillage sheet, keep Treatment column ───────────────────── +path <- "/projectnb/dietzelab/XinyuanJi/till_treatment_polygons.csv" +# path <- "/projectnb/dietzelab/XinyuanJi/anchor_sites_locations.csv" + +raw <- readr::read_lines(path) +cleaned <- stringr::str_replace_all( +raw, +",c\\(([0-9\\.-]+), *([0-9\\.-]+)\\),", +",\"c(\\1,\\2)\"," # quote the “c(x,y)” blobs (same fix as before) +) + +anchors <- readr::read_csv(paste(cleaned, collapse = "\n"), + show_col_types = FALSE) %>% + +# filter anchors to just 2018 +dplyr::filter(Year == c(2018, 2019)) %>% + + + +# ── 1 standardise column names so downstream code is unchanged +dplyr::rename( + id = SampleID, # ← tillage file’s ID + site_name = ProjectName.x # ← adjust if your column is ProjectName +) %>% + +# ── 2 include Treatment_Control +dplyr::select( + id, site_name, Treatment_Control, + upper_left_lon, upper_left_lat, + lower_right_lon, lower_right_lat +) %>% + +dplyr::mutate( + dplyr::across( + dplyr::starts_with(c("upper_left", "lower_right")), + as.numeric + ), + center_lon = (upper_left_lon + lower_right_lon) / 2, + center_lat = (upper_left_lat + lower_right_lat) / 2 +) %>% +dplyr::filter(!is.na(center_lon), !is.na(center_lat)) + + + +# Make the whole code a function +process_ndti_for_site <- function(selected_anchor) { + site_id <- selected_anchor$id + site_name <- selected_anchor$site_name + + + + # --- STEP 3. Visualize All Anchor Sites --- + # Please refer to Step 3 in: + # //projectnb/dietzelab/XinyuanJi/ndti-evi-timeseries(Sarah)-L+S.qmd + + + + # --- STEP 4. Build Exact tile-polygon ROI --- + # A) Build a closed polygon in WGS84 from the CSV bbox + corner_df <- data.frame( + lon = c( + selected_anchor$upper_left_lon, + selected_anchor$upper_left_lon, + selected_anchor$lower_right_lon, + selected_anchor$lower_right_lon, + selected_anchor$upper_left_lon # close ring + ), + lat = c( + selected_anchor$upper_left_lat, + selected_anchor$lower_right_lat, + selected_anchor$lower_right_lat, + selected_anchor$upper_left_lat, + selected_anchor$upper_left_lat # close ring + ) + ) + + # B) Make an sf and then a terra SpatVector + bbox_sf <- st_sfc(st_polygon(list(as.matrix(corner_df))), crs = 4326) + roi_vect_wgs <- terra::vect(bbox_sf) + + + + # --- STEP 5. Determine Which HLS Tile Covers This Site --- + ## Determine which HLS tile covers this site ──────────────────────────── + ## (robust: uses ROI polygon → largest-overlap → nearest fallback) + + # ── Load the MS-LSP grid (already in WGS-84 / EPSG:4326) ─────────────────── + grid <- st_read( + "/projectnb/dietzelab/skanee/ccmmf-phenology/MSLSP_tileGrid.geojson", + quiet = TRUE + ) + + # ── Build the anchor-site polygon in the grid CRS (4326) ------------------- + bbox_anchor <- st_transform(bbox_sf, st_crs(grid)) # bbox_sf from Step 5 later + + # ── 1) tiles whose polygons intersect the ROI ----------------------------- + hits <- st_intersects(bbox_anchor, grid) |> unlist() + + if (length(hits) == 1) { + + tileID <- grid$tileID[hits] # exactly one tile + + } else if (length(hits) > 1) { + + # 2) choose the one with the largest overlapping area + overlap_areas <- st_area(st_intersection(grid[hits, ], bbox_anchor)) + tileID <- grid$tileID[hits[which.max(overlap_areas)]] + + } else { + + # 3) ROI outside every grid polygon – pick the nearest grid feature + idx <- st_nearest_feature(st_centroid(bbox_anchor), grid) + tileID <- grid$tileID[idx] + } + + site_id <- selected_anchor$id + site_name <- selected_anchor$site_name + crop_type <- site_name # keep your original variable + + cat("Selected site:", site_id, "-", site_name, "\n") + cat("Using HLS tile:", tileID, "\n") + + + + # --- STEP 6. List & Filter HLS NetCDFs for That Tile --- + hls_dir <- "/projectnb/dietzelab/malmborg/CARB/HLS_data" + hls_files <- list.files(hls_dir, pattern = "MSLSP.*\\.nc$", full.names = TRUE) + + # Filter by tileID in filename (fast, no raster I/O) + hls_tile_files <- hls_files[str_detect(basename(hls_files), fixed(tileID))] + + cat("Found", length(hls_tile_files), "HLS files for tile", tileID, "\n") + + + + # --- STEP 7. Precompute Projected ROI Extent --- + # Load one file to get its CRS + metrics <- c("50PCGI","50PCGD")#,"OGMn", "OGI_2", "Peak_2", "OGMn_2") + template_r <- rast(hls_tile_files[1], subds = metrics) + roi_proj <- project(roi_vect_wgs, crs(template_r)) + roi_ext <- ext(roi_proj) + + + + # --- STEP 8. Extract phenology metrics --- + extract_one <- function(nc) { + yr <- as.integer(str_extract(basename(nc), "\\d{4}")) + r <- try(rast(nc, subds = metrics), silent = TRUE) + if (inherits(r, "try-error")) { + message("Skipping load: ", basename(nc)) + return(tibble(year = yr, OGI = NA, Peak = NA, OGMn = NA)) + } + cr <- try(crop(r, roi_ext), silent = TRUE) + if (inherits(cr, "try-error")) { + message(" Crop failed for ", basename(nc)) + return(tibble(year = yr, OGI = NA, Peak = NA, OGMn = NA)) + } + ms <- mask(cr, roi_proj) + vals <- global(ms, mean, na.rm = TRUE) + out <- as_tibble(t(vals)) %>% setNames(names(ms)) + out %>% mutate(year = yr, .before = 1) + } + + # Sequential extraction with a text progress bar + ts_list <- vector("list", length(hls_tile_files)) + pb <- txtProgressBar(0, length(hls_tile_files), style = 3) + for (i in seq_along(hls_tile_files)) { + ts_list[[i]] <- extract_one(hls_tile_files[i]) + setTxtProgressBar(pb, i) + } + close(pb) + + ts_raw <- bind_rows(ts_list) %>% arrange(year) + + + + # --- STEP 9. Pivot long & plot phenology --- + ts_long <- ts_raw %>% + pivot_longer( + cols = all_of(metrics), + names_to = "metric", + values_to = "doy" + ) %>% + # ensure doy is integer, then add to the first of each year + mutate( + doy = as.integer(doy), + date = as.Date(paste0(year, "-01-01")) + (doy - 1) + ) + + + + ## --- 10. Extract Mean NDTI over same tile polygon --- + + # .tif directory + tif_dir <- c("/projectnb/dietzelab/XinyuanJi/State_of_California_HLSL/2018", + "/projectnb/dietzelab/XinyuanJi/State_of_California_HLSL/2019", + "/projectnb/dietzelab/XinyuanJi/State_of_California_HLSS/2018", + "/projectnb/dietzelab/XinyuanJi/State_of_California_HLSS/2019" + ) + # tif_dir <- "/projectnb/dietzelab/XinyuanJi/anchor_sites/anchor_site_1" + + # .Fmask directory + fmask_folder <- c("~/projectnb/XinyuanJi/State_of_California_HLSL/2018_Fmask", + "~/projectnb/XinyuanJi/State_of_California_HLSL/2019_Fmask", + "~/projectnb/XinyuanJi/State_of_California_HLSS/2018_Fmask", + "~/projectnb/XinyuanJi/State_of_California_HLSS/2019_Fmask" + ) + + # read both Landsat & Sentinel + ref_file <- list.files( + tif_dir, + pattern = "B(06|07|11|12).*\\.tif$", + full.names = TRUE + )[1] + + # setup the ROI + r0 <- rast(list.files(tif_dir, pattern="B11.*\\.tif$", full.names=TRUE)[1]) + roi_ndti <- project(roi_vect_wgs, crs(r0)) |> ext() + + # -------------------------------------------------------------------------- + # 1) KEEP ONLY THIS SITE’S TILE (e.g. T10SGF), *not* every B06 in 2018 + # -------------------------------------------------------------------------- + # fine relevant files + b11_all <- list.files( + tif_dir, + pattern = paste0("T", tileID, ".*B11.*\\.tif$"), + full.names = TRUE + ) + # ...file cleaning, etc (copy your existing cleaning/overlap code here) ... + + # extracting dates from file name + b11_dates <- as.Date(stringr::str_extract(b11_all, "\\d{7}"), "%Y%j") + + # Date filtering & NA-removal + keep_idx <- !is.na(b11_dates) & + b11_dates >= as.Date("2018-01-01") & + b11_dates <= as.Date("2019-12-31") + + b11_sub <- b11_all[keep_idx] + b11_dates <- b11_dates[keep_idx] + # ...date filtering etc as before... + + # Pre-allocating Results Vectors + n_sen <- length(b11_sub) + dates_sen <- as.Date(rep(NA, n_sen)) + ndvals_sen <- numeric(n_sen) + + # for-loop processing Sentinel-2 data + pb <- txtProgressBar(0, n_sen, style = 3) + for (i in seq_along(b11_sub)) { + b11 <- b11_sub[i] + b12 <- sub("B11", "B12", b11) + if (!file.exists(b12)) { setTxtProgressBar(pb, i); next } + scene <- basename(b11) + scene_year <- stringr::str_extract(scene, "\\d{4}") + fmask_dir <- file.path("/projectnb/dietzelab/XinyuanJi/State_of_California_HLSS", paste0(scene_year, "_Fmask")) + fmask_file <- file.path(fmask_dir, sub("B11.tif$", "Fmask.tif", scene)) + + # Skip if missing + if (!file.exists(b12) || !file.exists(fmask_file)) { + setTxtProgressBar(pb, i) + next + } + + r11 <- crop(rast(b11), roi_ndti) + r12 <- crop(rast(b12), roi_ndti) + fmask <- crop(rast(fmask_file), roi_ndti) + + # calculate NDTI + ndti <- (r11 - r12) / (r11 + r12) + + # cloud/shadow masking + # 1 = cloud; 3 = cloud shadow; 4 = snow + badmask <- (bitwAnd(values(fmask), bitwShiftL(1, 1)) != 0) | + (bitwAnd(values(fmask), bitwShiftL(1, 3)) != 0) | + (bitwAnd(values(fmask), bitwShiftL(1, 4)) != 0) + ndti[badmask] <- NA + + # calculate the mean of all pixels in the raster + nd_mean <- global(ndti, fun = mean, na.rm = TRUE)[1, 1] + + # storing the result + if (is.na(nd_mean)) { setTxtProgressBar(pb, i); next } + ndvals_sen[i] <- nd_mean + dates_sen[i] <- b11_dates[i] + setTxtProgressBar(pb, i) + } + close(pb) + + ndti_sent <- tibble( + date = dates_sen, + mean_ndti = ndvals_sen + ) %>% + filter(!is.na(date) & !is.na(mean_ndti)) %>% # remove invalid rows + arrange(date) # sort the final result by date + + # -------------------------------------------------------------------------- + # LANDSAT: B06/B07; a copy of the code above but for Band 6 & 7 + # -------------------------------------------------------------------------- + b06_all <- list.files( + tif_dir, + pattern = paste0("T", tileID, ".*B06.*\\.tif$"), + full.names = TRUE + ) + # ...copy file cleaning, overlap check code from above, but with B06 instead of B11 ... + + b06_dates <- as.Date(stringr::str_extract(b06_all, "\\d{7}"), "%Y%j") + keep_idx <- !is.na(b06_dates) & + b06_dates >= as.Date("2018-01-01") & + b06_dates <= as.Date("2019-12-31") + b06_sub <- b06_all[keep_idx] + b06_dates <- b06_dates[keep_idx] + # ...date filtering... + + n_lan <- length(b06_sub) + dates_lan <- as.Date(rep(NA, n_lan)) + ndvals_lan <- numeric(n_lan) + + pb <- txtProgressBar(0, n_lan, style = 3) + for (i in seq_along(b06_sub)) { + b06 <- b06_sub[i] + b07 <- sub("B06", "B07", b06) + if (!file.exists(b07)) { setTxtProgressBar(pb, i); next } + + scene <- basename(b06) + scene_year <- stringr::str_extract(scene, "\\d{4}") + fmask_dir <- file.path("/projectnb/dietzelab/XinyuanJi/State_of_California_HLSL", paste0(scene_year, "_Fmask")) + fmask_file <- file.path(fmask_dir, sub("B06.tif$", "Fmask.tif", scene)) + + if (!file.exists(fmask_file)) { + setTxtProgressBar(pb, i) + next + } + + r6 <- crop(rast(b06), roi_ndti) + r7 <- crop(rast(b07), roi_ndti) + fmask <- crop(rast(fmask_file), roi_ndti) + + # calculate NDTI + ndti <- (r6 - r7) / (r6 + r7) + + badmask <- (bitwAnd(values(fmask), bitwShiftL(1, 1)) != 0) | + (bitwAnd(values(fmask), bitwShiftL(1, 3)) != 0) | + (bitwAnd(values(fmask), bitwShiftL(1, 4)) != 0) + ndti[badmask] <- NA + + nd_mean <- global(ndti, fun = mean, na.rm = TRUE)[1, 1] + + + if (is.na(nd_mean)) { setTxtProgressBar(pb, i); next } + ndvals_lan[i] <- nd_mean + dates_lan[i] <- b06_dates[i] + setTxtProgressBar(pb, i) + } + close(pb) + + ndti_land <- tibble( + date = dates_lan, + mean_ndti = ndvals_lan + ) %>% + filter(!is.na(date) & !is.na(mean_ndti)) %>% + arrange(date) + + # -------------------------------------------------------------------------- + # COMBINE AND (OPTIONALLY) INDICATE SENSOR + # -------------------------------------------------------------------------- + ndti_df <- bind_rows(ndti_land, ndti_sent) + + + + # fill the gap + ndti_df$mean_ndti_filled <- na.approx(ndti_df$mean_ndti, x = ndti_df$date, na.rm = FALSE) + # smoothing with a Moving Average + w <- 4 + k <- rep(1/w, w) + ndti_df$smoothed <- as.numeric(stats::filter(ndti_df$mean_ndti_filled, k, sides = 2)) + + # find annual minimum + ndti_smooth <- ndti_df[!is.na(ndti_df$smoothed), ] + min_row <- ndti_smooth[which.min(ndti_smooth$smoothed), ] + min_val <- min_row$smoothed + min_date <- min_row$date + + # find the maximum before the minimum occured + before_min <- ndti_smooth[ndti_smooth$date < min_date, ] + + if (nrow(before_min) > 0 && !all(is.na(before_min$smoothed))) { + max_before_min_row <- before_min[which.max(before_min$smoothed), ] + max_before_min <- max_before_min_row$smoothed + max_before_min_date<- max_before_min_row$date + } else { + max_before_min <- NA + max_before_min_date<- NA + } + + # calculate the Percentage Change (aka "drop") + ratio <- if (!is.na(max_before_min) && max_before_min != 0) { + (max_before_min - min_val) / max_before_min + } else NA + + # Keep any extra info, e.g. treatment + tibble( + id = selected_anchor$id, + site_name = selected_anchor$site_name, + Treatment_Control = selected_anchor$Treatment_Control, + min_ndti_smooth = min_val, + min_ndti_date = min_date, + max_before_min = max_before_min, + max_before_min_date = max_before_min_date, + ndti_smooth_drop = ratio + ) +} + + + +# summary +summary_table <- anchors %>% + split(.$id) %>% + map_dfr(process_ndti_for_site) + +# save the result +# write.csv(summary_table, "/projectnb/dietzelab/XinyuanJi/ndti_all_sites_summary.csv", row.names = FALSE) + + + +# ndti_df$mean_ndti_filled <- na.approx(ndti_df$mean_ndti, x = ndti_df$date, na.rm = FALSE) +# w <- 4 +# k <- rep(1/w, w) +# ndti_df$smoothed <- as.numeric(stats::filter(ndti_df$mean_ndti_filled, k, sides = 2)) +# +# ndti_smooth <- ndti_df[!is.na(ndti_df$smoothed), ] +# min_row <- ndti_smooth[which.min(ndti_smooth$smoothed), ] +# min_val <- min_row$smoothed +# min_date <- min_row$date +# before_min <- ndti_smooth[ndti_smooth$date < min_date, ] +# +# if (nrow(before_min) > 0 && !all(is.na(before_min$smoothed))) { +# max_before_min_row <- before_min[which.max(before_min$smoothed), ] +# max_before_min <- max_before_min_row$smoothed +# max_before_min_date<- max_before_min_row$date +# } else { +# max_before_min <- NA +# max_before_min_date<- NA +# } +# ratio <- if (!is.na(max_before_min) && max_before_min != 0) { +# (max_before_min - min_val) / max_before_min +# } else NA +