Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
9d5a89d
updated process satellite data on Ursa or other machines
mingchen-NOAA Jan 22, 2026
a74c750
combine processed satellite data in monthly
mingchen-NOAA Jan 22, 2026
da0e052
data availability based interpolation capability
mingchen-NOAA Jan 22, 2026
90500af
Combine data for retrotests and gfsv16 on Ursa
mingchen-NOAA Jan 22, 2026
a0814aa
add plot settings and scripts worked on Ursa
mingchen-NOAA Jan 22, 2026
5655fff
add test case for Ursa
mingchen-NOAA Jan 22, 2026
ea1b3b4
Add machine-specific configs for Orion and Ursa; move output path fro…
mingchen-NOAA Jan 28, 2026
aea7013
Add machine definition support for interpolation
mingchen-NOAA Jan 28, 2026
5f8d75d
update combine netcdf files to fit all machines
mingchen-NOAA Jan 28, 2026
807afda
remove all *_ursa scripts
mingchen-NOAA Feb 13, 2026
a6e4e20
move data processing scripts to src folder
mingchen-NOAA Feb 13, 2026
4343c06
add run script to ush folder
mingchen-NOAA Feb 13, 2026
67c2210
update makecombinemonthly.sh with multi-satellite support and WORKDIR…
mingchen-NOAA Feb 13, 2026
e39e77c
move scripts in ush, config files in parm
mingchen-NOAA Feb 13, 2026
40d3473
add WORKDIR in combineSatInterpOut.py
mingchen-NOAA Feb 18, 2026
55f6f4e
fix a filename error in makesubmitinterp.py
mingchen-NOAA Feb 19, 2026
13aeec5
adding copy run_all_jobs.sh to jobdir
mingchen-NOAA Feb 19, 2026
7aeedd5
update run_all_jobs that limiting submit 20 jobs per time
mingchen-NOAA Feb 19, 2026
50bf9ba
fix OOM when SBATCH_EXCLUSIVE = False
mingchen-NOAA Feb 20, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
493 changes: 291 additions & 202 deletions hr-eval/CombineSatInterpOut.py

Large diffs are not rendered by default.

303 changes: 214 additions & 89 deletions hr-eval/InterpModel2Sat/makesubmitinterpgfsv16.py
Original file line number Diff line number Diff line change
@@ -1,92 +1,217 @@
import datetime as dt
from dateutil.relativedelta import relativedelta
import os

rootdir = os.path.join('/work2/noaa/marine/jmeixner/processsatdata', 'jobinterp')


season=['summer', 'hurricane']
satelites=['JASON3', 'CRYOSAT2', 'SARAL', 'SENTINEL3A'] #JASON3,JASON2,CRYOSAT2,JASON1,HY2,SARAL,SENTINEL3A,ENVISAT,ERS1,ERS2,GEOSAT,GFO,TOPEX,SENTINEL3B,CFOSAT
#model=['multi1', 'GFSv16', 'HR1', 'HR2', 'HR3a']
model='GFSv16'

for k in range(len(season)):
if season[k] == "winter":
startdate = dt.datetime(2019,12,3)
enddate = dt.datetime(2020,2,26)
datestride = 3
elif season[k] == "summer":
startdate = dt.datetime(2020,6,1)
#enddate = dt.datetime(2020,8,30) #nooverlap needed
enddate = dt.datetime(2020,7,19)
datestride = 3
elif season[k] == "hurricane":
startdate = dt.datetime(2020,7,20)
enddate = dt.datetime(2020,11,20)
datestride = 1

nowdate = startdate
dates1 = []
while nowdate <= enddate:
dates1.append(nowdate.strftime('%Y%m%d%H'))
#nowdate = (nowdate + dt.timedelta(days=datestride)).strftime('%Y%m%d')
nowdate = nowdate + dt.timedelta(days=datestride)

print(dates1)
for i in range(len(dates1)):
outfile = os.path.join(rootdir, f"job_{model}_p16_{dates1[i]}.sh")
with open(outfile, 'w') as f:
f.write('#!/bin/bash\n')
sbatch = f"""#SBATCH --nodes=1
#SBATCH -q batch
#SBATCH -t 08:00:00
#SBATCH -A marine-cpu
#SBATCH -J procsat_{model}_p16_{dates1[i]}
#SBATCH -o run_{model}_p16_{dates1[i]}.o%j
#SBATCH --partition=orion
#SBATCH --exclusive


module use /work2/noaa/marine/jmeixner/general/modulefiles
module load ww3tools

ThisDir=/work2/noaa/marine/jmeixner/processsatdata
PathToWW3TOOLS=/work2/noaa/marine/jmeixner/processsatdata/ww3-tools/ww3tools

set -x

SEASON={season[k]}
MODEL={model}
CDATE={dates1[i]}
OUTDIR=/work2/noaa/marine/jmeixner/processsatdata/outinterp/{model}

"""

f.write(sbatch)
f.write('DATE=${CDATE:0:8} \n')
f.write('TZ=${CDATE:8:2} \n')
f.write('MODEL_DATA_DIR="/work/noaa/marine/jmeixner/Data/${MODEL}/gfs.${DATE}/${TZ}/wave/gridded" \n')
for j in range(len(satelites)):
satvalue = f"""

SAT={satelites[j]}

"""
f.write(satvalue)

f.write('SATELLITE_FILE=/work/noaa/marine/jmeixner/Data/processedsatdata/Altimeter_${SAT}_HR${SEASON}.nc \n')
#grids=['global.0p25','global.0p16']
grids=['global.0p16']
for g in range(len(grids)):
if grids[g] == 'global.0p25':
f.write("MODEL_DATA_PATTERN='gfswave.t00z.global.0p25.f*.grib2'\n")
f.write('OUTPUT_FILE="${MODEL}_global.0p25_${CDATE}_${SAT}.nc" \n')
elif grids[g] == 'global.0p16':
f.write("MODEL_DATA_PATTERN='gfswave.t00z.global.0p16.f*.grib2'\n")
f.write('OUTPUT_FILE="${MODEL}_global.0p16_${CDATE}_${SAT}.nc" \n')
elif grids[g] == 'arctic.9km':
f.write("MODEL_DATA_PATTERN='gfswave.t00z.arctic.9km.f*.grib2'\n")
f.write('OUTPUT_FILE="${MODEL}_arctic.9km_${CDATE}_${SAT}.nc" \n')
f.write('python ${PathToWW3TOOLS}/ProcSat_interpolation.py -t grib2 -d $MODEL_DATA_DIR -p $MODEL_DATA_PATTERN -s $SATELLITE_FILE -o $OUTDIR -f $OUTPUT_FILE -m ${MODEL} \n')


import re
import glob

## ===================== Setting (modified as needed) =========================
MACHINE = "ursa" # machine name ursa/orion/hercules

# directory settings
rootdir = os.path.join('/scratch4/NCEPDEV/marine/Ming.Chen/wave_eval/processsatdata', 'jobinterp')
MODEL_BASE = "/scratch3/NCEPDEV/climate/Jessica.Meixner/Data/gfsv16"
SAT_BASE = "/scratch3/NCEPDEV/climate/Jessica.Meixner/WaveEvaluation/processsatdata/combineoutmonthly"
OUTDIR_BASE = "/scratch4/NCEPDEV/marine/Ming.Chen/wave_eval/processsatdata/outinterp/GFSv16"

# satellite and model settings
satellites=['JASON3', 'CRYOSAT2', 'SARAL', 'SENTINEL3A', 'SENTINEL3B', 'SENTINEL6A']
model='GFSv16' # now only support model of GFSv16 and retrov17_01
tz_list = ["00","06","12","18"]
grid = "global.0p25"
MODEL_DATA_PATTERN_TEMPLATE = "gfswave.t{tz}z.{grid}.f*.grib2"

# process script
PROC_SCRIPT = "/scratch4/NCEPDEV/marine/Ming.Chen/wave_eval/WW3-tools/ww3tools/ProcSat_interpolation.py"

# Slurm settings
SBATCH_ACCOUNT = "marine-cpu"
SBATCH_QUEUE = "batch"
SBATCH_TIME = "08:00:00"
SBATCH_NODES = 1
SBATCH_NTASKS = 1
SBATCH_CPUS_PER_TASK = 4
SBATCH_MEM = "16G"
SET_THREAD_ENVS = True

## --------------- Machine-specific configuration ------------------

if MACHINE == "ursa":
MODULE_USE_PATH = "/scratch4/NCEPDEV/marine/Saeideh.Banihashemi/installs/python-modules/"
MODULE_LOAD = "Ursa_ENV"
elif MACHINE in ("orion", "hercules"):
MODULE_USE_PATH = "/work2/noaa/marine/jmeixner/general/modulefiles"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

orion and hercules probably shouldn't be sharing a module.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It works on Orion and Ursa. After maintenance, I will try Hercules.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm pretty sure i have a specific hercules module, i'll check after maintenance.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The module works on Hercules

MODULE_LOAD = "ww3tools"
else:
print(f"ERROR: Unsupported MACHINE='{MACHINE}'. Use Ursa, Orion, or Hercules.", file=sys.stderr)
sys.exit(1)

## --------------- Checking inputs and settings --------------------

jobdir = os.path.join(rootdir, model)

if not os.path.isdir(rootdir):
os.makedirs(rootdir, exist_ok=True)

if not os.path.isdir(jobdir):
os.makedirs(jobdir, exist_ok=True)

if not os.path.isdir(OUTDIR_BASE):
os.makedirs(OUTDIR_BASE, exist_ok=True)

re_gfs = re.compile(r"^gfs\.(\d{8})$")
re_sat = re.compile(r"^Altimeter_(.+)_(\d{6})\.nc$")

def discover_model_dates(model_base: str):
"""
Discover available model dates from folders:
gfs.YYYYMMDD
Returns a sorted list of YYYYMMDD strings.
"""
dates = []
for name in sorted(os.listdir(model_base)):
if re.match(r"^gfs\.\d{8}$", name):
yyyymmdd = name.split(".")[1]
dates.append(yyyymmdd)
return sorted(dates)

def sat_month_available_all(sat_base: str, satellites, yyyymm: str) -> bool:
"""
Return True only if ALL satellites have monthly files for yyyymm:
Altimeter_{sat}_{yyyymm}.nc
"""
for sat in satellites:
fname = os.path.join(sat_base, f"Altimeter_{sat}_{yyyymm}.nc")
if not os.path.isfile(fname):
return False
return True


cdates = discover_model_dates(MODEL_BASE)

with open("cdates.txt", "w") as f:
for c in cdates:
f.write(c + "\n")

total = len(cdates)
covered = 0
missing = 0

for cdate in cdates:
yyyymm = cdate[:6]
if sat_month_available_all(SAT_BASE, satellites, yyyymm):
covered += 1
else:
missing += 1

print("Satellite coverage verification:")
print(f" Total model dates : {total}")
print(f" Dates fully covered : {covered}")
print(f" Dates missing satellites : {missing}")

# ------------------- Write jobcards -----------------------------------
written = 0
skipped_no_sat_all = 0
skipped_no_model_gribs = 0
missing_cycles = []

for cdate in cdates:
yyyymm = cdate[:6]

sat_ok = True
for sat in satellites:
fname = os.path.join(SAT_BASE, f"Altimeter_{sat}_{yyyymm}.nc")
if not os.path.isfile(fname):
sat_ok = False
break
if not sat_ok:
skipped_no_sat_all += 1
continue

for tz in tz_list:
if model == "GFSv16":
model_gridded_dir = os.path.join(MODEL_BASE, f"gfs.{cdate}", tz, "wave", "gridded")
elif model == "retrov17_01":
model_gridded_dir = os.path.join(MODEL_BASE, f"gfs.{cdate}", tz, "products", "wave", "gridded","global.0p25")
else:
print(f"ERROR: Unsupported Model.", file=sys.stderr)
sys.exit(1)

if not os.path.isdir(model_gridded_dir):
skipped_no_model_gribs += 1
missing_cycles.append(f"{cdate}{tz}")
continue

pattern = MODEL_DATA_PATTERN_TEMPLATE.format(tz=tz, grid=grid)
gribs = glob.glob(os.path.join(model_gridded_dir, pattern))
if len(gribs) == 0:
skipped_no_model_gribs += 1
missing_cycles.append(f"{cdate}{tz}")
continue

cdate_full = f"{cdate}{tz}"
outfile = os.path.join(jobdir, f"job_{model}_{grid}_{cdate_full}.sh")
outlog = os.path.join(jobdir, f"run_{model}_{grid}_{cdate_full}.o%j")

with open(outfile, "w") as f:
f.write("#!/bin/bash\n")
f.write(f"#SBATCH --nodes={SBATCH_NODES}\n")
f.write(f"#SBATCH --ntasks={SBATCH_NTASKS}\n")
f.write(f"#SBATCH --cpus-per-task={SBATCH_CPUS_PER_TASK}\n")
f.write(f"#SBATCH --mem={SBATCH_MEM}\n")
f.write(f"#SBATCH -q {SBATCH_QUEUE}\n")
f.write(f"#SBATCH -t {SBATCH_TIME}\n")
f.write(f"#SBATCH -A {SBATCH_ACCOUNT}\n")
f.write(f"#SBATCH -J procsat_{model}_{grid}_{cdate_full}\n")
f.write(f"#SBATCH -o {outlog}\n")

f.write(f"module use {MODULE_USE_PATH}\n")
f.write(f"module load {MODULE_LOAD}\n\n")

f.write("set -euo pipefail\n")
f.write("set -x\n\n")

if SET_THREAD_ENVS:
f.write("export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK\n")
f.write("export MKL_NUM_THREADS=$SLURM_CPUS_PER_TASK\n\n")

f.write(f"MODEL={model}\n")
f.write(f"GRID={grid}\n")
f.write(f"DATE={cdate}\n")
f.write(f"TZ={tz}\n")
f.write(f"YYYYMM={yyyymm}\n")
f.write(f"OUTDIR={OUTDIR_BASE}\n")
f.write("mkdir -p ${OUTDIR}\n\n")

if model == "GFSv16":
f.write(f"MODEL_DATA_DIR={MODEL_BASE}/gfs.${{DATE}}/${{TZ}}/wave/gridded\n")
elif model == "retrov17_01":
f.write(f"MODEL_DATA_DIR={MODEL_BASE}/gfs.${{DATE}}/${{TZ}}/products/wave/gridded/global.0p25\n")
else:
print(f"ERROR: Unsupported Model.", file=sys.stderr)
sys.exit(1)

f.write(f"MODEL_DATA_PATTERN='{pattern}'\n\n")

for sat in satellites:
f.write(f"SAT={sat}\n")
f.write(f"SATELLITE_FILE={SAT_BASE}/Altimeter_{sat}_${{YYYYMM}}.nc\n")
f.write(f"OUTPUT_FILE=${{MODEL}}_${{GRID}}_{cdate_full}_{sat}.nc\n")
f.write(
f"python {PROC_SCRIPT} "
f"-t grib2 -d $MODEL_DATA_DIR -p $MODEL_DATA_PATTERN "
f"-s $SATELLITE_FILE -o $OUTDIR -f $OUTPUT_FILE -m $MODEL\n\n"
)

os.chmod(outfile, 0o750)
written += 1

print("\nJobcard generation summary:")
print(f" Jobcards written : {written}")
print(f" Dates skipped (month missing ≥1 satellite) : {skipped_no_sat_all}")
print(f" Cycles skipped (missing model dir or GRIBs) : {skipped_no_model_gribs}")

if missing_cycles:
for c in sorted(missing_cycles):
print(f" {c}")

print(f" Jobcards directory : {rootdir}")
Loading