diff --git a/hr-eval/CombineSatInterpOut.py b/hr-eval/CombineSatInterpOut.py index de54637..28f4b72 100644 --- a/hr-eval/CombineSatInterpOut.py +++ b/hr-eval/CombineSatInterpOut.py @@ -1,219 +1,308 @@ import numpy as np -import matplotlib.pyplot as plt import netCDF4 as nc -import argparse - import datetime as dt -from dateutil.relativedelta import relativedelta import os import xarray as xr - +import glob ''' -Create combined NetCDF files for easier post processing. +Create combined NetCDF files for easier post processing. ''' -def main(): +# ================================================ +# =========== User-Defined Variables ============= +# ================================================ + +models = ['retrov17_01', 'GFSv16'] + +INPUTDIR_BASE = "/scratch4/NCEPDEV/marine/Ming.Chen/wave_eval/processsatdata_ursa/outinterp" +OUTDIR = "/scratch4/NCEPDEV/marine/Ming.Chen/wave_eval/processsatdata/outcombine" + +satellites = ['JASON3', 'CRYOSAT2', 'SARAL', 'SENTINEL3A'] + +# Custom date range (only used if use_seasons = False) +startdate = dt.datetime(2024, 11, 15, 12) # yyyy,m,d,h +enddate = dt.datetime(2024, 12, 17, 18) +interval_hours = 6 # interval between cycles in HOURS (e.g. 6, 12, 24, 72) + +max_forecast_day = 16 + +force_season = "winter" # None = auto from data; or user override season as winter, summer, hurricane + +selected_years = ['2024'] # ['2024'], ['2024','2025'], [] = all - ap = argparse.ArgumentParser() - ap.add_argument('-m', '--model', help="String Identifier of Model 'multi1', 'GFSv16', 'HR1', 'HR2', 'HR3a', 'HR3b'", required=True) - ap.add_argument('-o', '--outdir', help="Output directory for files", default='./') - MyArgs = ap.parse_args() - - model = MyArgs.model - OUTDIR = MyArgs.outdir - - - #Check output directory exists: - INPUTDIR=f"/work2/noaa/marine/jmeixner/processsatdata/outinterp/{model}" - if not os.path.isdir(INPUTDIR): - INPUTDIR=f"/scratch1/NCEPDEV/climate/Jessica.Meixner/processsatdata/outinterp/{model}" - if not os.path.isdir(INPUTDIR): - print('INPUTDIR ({INPUTDIR}) does not exist!!!!') - exit(1) - - #create OUTDIR directory if it does not exist: - if not os.path.isdir(OUTDIR): - os.makedirs(OUTDIR) - - - - satelites=['JASON3', 'CRYOSAT2', 'SARAL', 'SENTINEL3A'] #JASON3,JASON2,CRYOSAT2,JASON1,HY2,SARAL,SENTINEL3A,ENVISAT,ERS1,ERS2,GEOSAT,GFO,TOPEX,SENTINEL3B,CFOSAT - - if model == "GFSv16": - season=['summer', 'hurricane'] - else: - season=['winter', 'summer', 'hurricane'] - - for k in range(len(season)): - if season[k] == "winter": - startdate = dt.datetime(2019,12,3) - enddate = dt.datetime(2020,2,26) - datestride = 3 - endday = 16 - elif season[k] == "summer": - startdate = dt.datetime(2020,6,1) - enddate = dt.datetime(2020,8,30) - datestride = 3 - endday = 16 - elif season[k] == "hurricane": - startdate = dt.datetime(2020,7,20) - enddate = dt.datetime(2020,11,20) - datestride = 1 - endday = 7 - - #if multi-1 end at day 7 +output_all_in_one = True +output_per_day = True + +# ================================================= + +def determine_season(dt_obj): + month = dt_obj.month + day = dt_obj.day + + # Atlantic hurricane season (NOAA): June 1 – Nov 30 + if (month == 6 and day >= 1) or month in [7,8,9,10] or (month == 11 and day <= 30): + return "hurricane" + + # Meteorological winter: Dec–Feb + if month in [12, 1, 2]: + return "winter" + + # Meteorological summer: Jun–Aug + if month in [6, 7, 8]: + return "summer" + + return "other" + +def get_inputdir(model): + return os.path.join(INPUTDIR_BASE, model) + +def get_grids_for_model(model): if model == "multi1": - endday = 7 - - - nowdate = startdate - dates1 = [] - while nowdate <= enddate: - dates1.append(nowdate.strftime('%Y%m%d%H')) - nowdate = nowdate + dt.timedelta(days=datestride) - - for j in range(len(satelites)): - time = []; lats = []; lons = [] - fhrs = []; fhrsall = []; - obs_hs = []; obs_wnd = [] - model_hs = []; model_wnd = [] - obs_hs_cal = []; obs_wnd_cal = [] - for i in range(len(dates1)): - #list of grids for each model. First should be "global" or the base, followed by high resolution inserts in the order - #of lower(global) to higher(regional) resolution. - if model == "multi1": - grids=['global.0p50', 'alaska.0p16', 'atlocn.0p16', 'epacif.0p16', 'wcoast.0p16', 'alaska.0p06', 'atlocn.0p06', 'wcoast.0p06'] - elif model == "GFSv16": - grids=['global.0p25', 'global.0p16'] - else: - grids=['global.0p25'] - - for g in range(len(grids)): - - if model == "multi1": - INPUT_FILE=f"{model}_{grids[g]}_{dates1[i]}_{satelites[j]}.nc" - elif model == "GFSv16": - INPUT_FILE=f"{model}_{grids[g]}_{dates1[i]}_{satelites[j]}.nc" - else: - INPUT_FILE=f"{model}_{grids[g]}_{season[k]}_{dates1[i]}_{satelites[j]}.nc" - - datapath = INPUTDIR + "/" + INPUT_FILE - datanc = nc.Dataset(datapath) - if g == 0: - #this is the global/base grid: - time_tmpbase = np.array(datanc.variables['time'][:]) - fhrs_tmpbase = np.array(datanc.variables['fcst_hr'][:]) - lats_tmpbase = np.array(datanc.variables['latitude'][:]) - lons_tmpbase = np.array(datanc.variables['longitude'][:]) - obs_hs_tmpbase = np.array(datanc.variables['obs_hs'][:]) - obs_hs_cal_tmpbase = np.array(datanc.variables['obs_hs_cal'][:]) - obs_wnd_tmpbase = np.array(datanc.variables['obs_wnd'][:]) - obs_wnd_cal_tmpbase = np.array(datanc.variables['obs_wnd_cal'][:]) - model_hs_tmpbase = np.array(datanc.variables['model_hs'][:]) - model_wnd_tmpbase = np.array(datanc.variables['model_wnd'][:]) - initial_condition_time = datanc.getncattr('initial_condition_time') - fhrsall_tmpbase = (time_tmpbase - initial_condition_time)/3600 - else: - #this is s a higher resolution sub-grid - time_tmphigh = np.array(datanc.variables['time'][:]) - fhrs_tmphigh = np.array(datanc.variables['fcst_hr'][:]) - lats_tmphigh = np.array(datanc.variables['latitude'][:]) - lons_tmphigh = np.array(datanc.variables['longitude'][:]) - obs_hs_tmphigh = np.array(datanc.variables['obs_hs'][:]) - obs_hs_cal_tmphigh = np.array(datanc.variables['obs_hs_cal'][:]) - obs_wnd_tmphigh = np.array(datanc.variables['obs_wnd'][:]) - obs_wnd_cal_tmphigh = np.array(datanc.variables['obs_wnd_cal'][:]) - model_hs_tmphigh = np.array(datanc.variables['model_hs'][:]) - model_wnd_tmphigh = np.array(datanc.variables['model_wnd'][:]) - #Check that obs values are the same for sanity check and if so, - #replace model values with high res inserts where HS is not nan - if ((obs_hs_tmphigh == obs_hs_tmpbase).all()): - np.where(~np.isnan(model_hs_tmphigh), model_hs_tmpbase, model_hs_tmphigh) - np.where(~np.isnan(model_hs_tmphigh), model_wnd_tmpbase, model_wnd_tmphigh) - - - time = np.append(time, time_tmpbase) - lats = np.append(lats, lats_tmpbase) - lons = np.append(lons, lons_tmpbase) - fhrs = np.append(fhrs, fhrs_tmpbase) - fhrsall = np.append(fhrsall, fhrsall_tmpbase) - - obs_hs = np.append(obs_hs, obs_hs_tmpbase) - obs_wnd = np.append(obs_wnd, obs_wnd_tmpbase) - obs_hs_cal = np.append(obs_hs_cal, obs_hs_cal_tmpbase) - obs_wnd_cal = np.append(obs_wnd_cal, obs_wnd_cal_tmpbase) - - model_hs = np.append(model_hs, model_hs_tmpbase) - model_wnd = np.append(model_wnd, model_wnd_tmpbase) - - #remove values we should not use for wind due to zero at fhr0 - if model == "HR1": - model_wnd[fhrs<3]=np.nan - elif model == "HR2": - model_wnd[fhrs<3]=np.nan - elif model == "HR3a": - model_wnd[fhrs<1]=np.nan - elif model == "HR3b": - model_wnd[fhrs<1]=np.nan - - #Call function to write out netcdf file with all forecast hours - outfilename=f"combined_{model}_{season[k]}_{satelites[j]}.nc" - OUTFILE = OUTDIR + '/' + outfilename - write_netcdf_file(OUTFILE, model, satelites[j], time,lats, lons, fhrs, obs_hs, obs_hs_cal, obs_wnd, obs_wnd_cal, model_hs, model_wnd) - - day0=0 - day=1 - while day <= endday: - f0 = day0*24 - f1 = day*24 - #it will likely be easier to match all models up if we don't filter nans out here... - #indx=np.where(( fhrs < f1 ) & ( fhrs > f0 ) & (~np.isnan(model_hs))) - indx=np.where(( fhrsall <= f1 ) & ( fhrsall > f0 )) - time_day = time[indx] - lats_day = lats[indx] - lons_day = lons[indx] - fhrs_day = fhrs[indx] - obs_hs_day = obs_hs[indx] - obs_wnd_day = obs_wnd[indx] - obs_hs_cal_day = obs_hs_cal[indx] - obs_wnd_cal_day = obs_wnd_cal[indx] - model_hs_day = model_hs[indx] - model_wnd_day = model_wnd[indx] - - #Call function to write out netcdf file for each day - outfilename=f"combined_day{day:02d}_{model}_{season[k]}_{satelites[j]}.nc" - OUTFILE = OUTDIR + '/' + outfilename - write_netcdf_file(OUTFILE, model, satelites[j], time_day,lats_day, lons_day, fhrs_day, obs_hs_day, obs_hs_cal_day, obs_wnd_day, obs_wnd_cal_day, model_hs_day, model_wnd_day) - - day0 = day - day = day + 1 - -def write_netcdf_file(nameoffile, nameofmodel,nameofsat, val_time, val_lats, val_lons, val_fhrs, val_obs_hs, val_obs_hs_cal, val_obs_wnd, val_obs_wnd_cal, val_model_hs, val_model_wnd): - - time_dataarray = xr.DataArray(val_time, dims=['time'], name='time', attrs={ - 'standard_name': 'time', - 'units': 'seconds since 1970-01-01 00:00:00', - 'calendar': 'standard', - 'axis': 'T' - }) + return ['global.0p50', 'alaska.0p16', 'atlocn.0p16', 'epacif.0p16', 'wcoast.0p16', + 'alaska.0p06', 'atlocn.0p06', 'wcoast.0p06'] + elif model == "GFSv16": + return ['global.0p25'] + elif model == "retrov17_01": + return ['global.0p25'] + else: + return ['global.0p25'] + +def get_max_forecast_days(model, season): + return max_forecast_day - interpolated_dataset = xr.Dataset({ - 'time': time_dataarray, - 'latitude': xr.DataArray(val_lats, coords={'time': val_time}, dims=['time'], name='latitude').assign_attrs(units='degree_north'), - 'longitude': xr.DataArray(val_lons, coords={'time': val_time}, dims=['time'], name='longitude').assign_attrs(units='degree_east'), - 'model_hs': xr.DataArray(val_model_hs, coords={'time': val_time}, dims=['time'], name='model_hs').assign_attrs(units='m'), - 'model_wnd': xr.DataArray(val_model_wnd, coords={'time': val_time}, dims=['time'], name='model_wnd').assign_attrs(units='m'), - 'obs_hs': xr.DataArray(val_obs_hs, coords={'time': val_time}, dims=['time'], name='obs_hs').assign_attrs(units='m'), - 'obs_hs_cal': xr.DataArray(val_obs_hs_cal, coords={'time': val_time}, dims=['time'], name='obs_hs_cal').assign_attrs(units='m'), - 'obs_wnd': xr.DataArray(val_obs_wnd, coords={'time': val_time}, dims=['time'], name='obs_wnd').assign_attrs(units='m/s'), - 'obs_wnd_cal': xr.DataArray(val_obs_wnd_cal, coords={'time': val_time}, dims=['time'], name='obs_wnd_cal').assign_attrs(units='m/s'), - 'fcst_hr': xr.DataArray(val_fhrs,coords={'time': val_time}, dims=['time'], name='fcst_hr').assign_attrs(description="Forecast hour relative to initial condition time", units='hours') +def main(): + if not os.path.isdir(OUTDIR): + os.makedirs(OUTDIR) + + now = startdate + all_cycles = [] + while now <= enddate: + all_cycles.append(now) + now += dt.timedelta(hours=interval_hours) + + if selected_years: + cycles_to_process = [c for c in all_cycles if str(c.year) in selected_years] + else: + cycles_to_process = all_cycles + + if not cycles_to_process: + print("No cycles after year filter.") + return + + print(f"Processing {len(cycles_to_process)} cycles (interval {interval_hours}h)") + + for model in models: + print(f"\n=== Model: {model} ===") + grids = get_grids_for_model(model) + inputdir = get_inputdir(model) + + if not os.path.isdir(inputdir): + print(f" Directory not found: {inputdir}") + continue + + cycles_by_season = {} + for cycle_dt in cycles_to_process: + season = force_season if force_season else determine_season(cycle_dt) + cycles_by_season.setdefault(season, []).append(cycle_dt.strftime('%Y%m%d%H')) + + for season_name, date_strs in cycles_by_season.items(): + print(f" → {season_name}: {len(date_strs)} cycles") + + endday = get_max_forecast_days(model, season_name) + + years = sorted(set(d[:4] for d in date_strs)) + + for sat in satellites: + if len(years) == 1: + y = years[0] + y_dates = date_strs + data = collect_data(model, sat, y_dates, grids, inputdir, season_name) + if not data.get('time'): + continue + arr = {k: np.array(v) for k, v in data.items()} + adjust_wind(model, arr['fhrs'], arr['model_wnd']) + write_outputs(arr, model, sat, season_name, endday, year=y) + else: + for y in years: + y_dates = [d for d in date_strs if d.startswith(y)] + data = collect_data(model, sat, y_dates, grids, inputdir, season_name) + if not data.get('time'): + continue + arr = {k: np.array(v) for k, v in data.items()} + adjust_wind(model, arr['fhrs'], arr['model_wnd']) + write_outputs(arr, model, sat, season_name, endday, year=y) + + +def collect_data(model, sat, date_list, grids, inputdir, season_name): + collected = { + 'time':[], 'lats':[], 'lons':[], 'fhrs':[], 'fhrsall':[], + 'obs_hs':[], 'obs_hs_cal':[], 'obs_wnd':[], 'obs_wnd_cal':[], + 'model_hs':[], 'model_wnd':[] + } + for dstr in date_list: + cycle = process_one_cycle(model, sat, dstr, grids, inputdir, season_name) + if cycle: + for k in collected: + collected[k].extend(cycle[k]) + return collected + + +def process_one_cycle(model, sat, date_str, grids, inputdir, season_name): + base = None + for g_idx, grid in enumerate(grids): + # Adjust filename pattern if your files include season name or not + # Here assuming no season in filename: + fname = f"{model}_{grid}_{date_str}_{sat}.nc" + + path = os.path.join(inputdir, fname) + if not os.path.exists(path): + continue + + with nc.Dataset(path) as ds: + t = np.array(ds.variables['time'][:]) + fh = np.array(ds.variables['fcst_hr'][:]) + la = np.array(ds.variables['latitude'][:]) + lo = np.array(ds.variables['longitude'][:]) + ohs = np.array(ds.variables['obs_hs'][:]) + ohc = np.array(ds.variables['obs_hs_cal'][:]) + ow = np.array(ds.variables['obs_wnd'][:]) + owc = np.array(ds.variables['obs_wnd_cal'][:]) + mhs = np.array(ds.variables['model_hs'][:]) + mw = np.array(ds.variables['model_wnd'][:]) + + ict = ds.getncattr('initial_condition_time') + fha = (t - ict) / 3600.0 + + if g_idx == 0: + base = { + 'time':t, 'lats':la, 'lons':lo, + 'fhrs':fh, 'fhrsall':fha, + 'obs_hs':ohs, 'obs_hs_cal':ohc, + 'obs_wnd':ow, 'obs_wnd_cal':owc, + 'model_hs':mhs, 'model_wnd':mw + } + else: + if base is None: continue + base['model_hs'] = np.where(~np.isnan(mhs), mhs, base['model_hs']) + base['model_wnd'] = np.where(~np.isnan(mw), mw, base['model_wnd']) + + return base + + +def adjust_wind(model, fhrs, model_wnd): + if model in ["HR1", "HR2"]: + model_wnd[fhrs < 3] = np.nan + elif model in ["HR3a", "HR3b"]: + model_wnd[fhrs < 1] = np.nan + + +def write_outputs(arr, model, sat, season, endday, year): + year_str = f"_{year}" if year else "" + + def save(filename_suffix, t, la, lo, fh, ohs, ohc, ow, owc, mhs, mw): + # All variables as double (float64) + ds = xr.Dataset({ + 'time': xr.DataArray( + t.astype(np.float64), + dims=['time'], + name='time', + attrs={ + 'standard_name': 'time', + 'units': 'seconds since 1970-01-01 00:00:00', + 'calendar': 'standard', + 'axis': 'T' + } + ), + 'latitude': xr.DataArray( + la.astype(np.float64), + dims=['time'], + attrs={'units': 'degree_north'} + ), + 'longitude': xr.DataArray( + lo.astype(np.float64), + dims=['time'], + attrs={'units': 'degree_east'} + ), + 'model_hs': xr.DataArray( + mhs.astype(np.float64), + dims=['time'], + attrs={'units': 'm'} + ), + 'model_wnd': xr.DataArray( + mw.astype(np.float64), + dims=['time'], + attrs={'units': 'm'} + ), + 'obs_hs': xr.DataArray( + ohs.astype(np.float64), + dims=['time'], + attrs={'units': 'm'} + ), + 'obs_hs_cal': xr.DataArray( + ohc.astype(np.float64), + dims=['time'], + attrs={'units': 'm'} + ), + 'obs_wnd': xr.DataArray( + ow.astype(np.float64), + dims=['time'], + attrs={'units': 'm/s'} + ), + 'obs_wnd_cal': xr.DataArray( + owc.astype(np.float64), + dims=['time'], + attrs={'units': 'm/s'} + ), + 'fcst_hr': xr.DataArray( + fh.astype(np.float64), + dims=['time'], + attrs={ + 'description': 'Forecast hour relative to initial condition time', + 'units': 'hours' + } + ), }) - interpolated_dataset.attrs['satellite_name'] = f"{nameofsat}" - interpolated_dataset.attrs['model_name'] = f"{nameofmodel}" - interpolated_dataset.to_netcdf(nameoffile, format='NETCDF4') + # Explicitly set _FillValue = NaN for all variables + encoding = { + var: {'_FillValue': np.nan, 'dtype': 'float64'} + for var in ds.variables + } + + # Global attributes + ds.attrs['satellite_name'] = sat + ds.attrs['model_name'] = model + + fullpath = os.path.join(OUTDIR, filename_suffix) + ds.to_netcdf( + fullpath, + format='NETCDF4', + encoding=encoding, + engine='netcdf4' + ) + print(f" Wrote: {os.path.basename(fullpath)} ({len(t)} points)") + + if output_all_in_one: + fname = f"combined_{model}_{season}_{sat}.nc" + save(fname, arr['time'], arr['lats'], arr['lons'], + arr['obs_hs'], arr['obs_hs_cal'], arr['obs_wnd'], arr['obs_wnd_cal'], + arr['model_hs'], arr['model_wnd'], arr['fhrs']) + + if output_per_day: + day0 = 0 + day = 1 + while day <= endday: + idx = (arr['fhrsall'] > day0*24) & (arr['fhrsall'] <= day*24) + if np.any(idx): + fname = f"combined_day{day:02d}_{model}_{season}_{sat}.nc" + save(fname, + arr['time'][idx], arr['lats'][idx], arr['lons'][idx], + arr['obs_hs'][idx], arr['obs_hs_cal'][idx], + arr['obs_wnd'][idx], arr['obs_wnd_cal'][idx], + arr['model_hs'][idx], arr['model_wnd'][idx], arr['fhrs'][idx]) + day0 = day + day += 1 if __name__ == '__main__': diff --git a/hr-eval/InterpModel2Sat/makesubmitinterpgfsv16.py b/hr-eval/InterpModel2Sat/makesubmitinterpgfsv16.py index ab445ec..09609db 100644 --- a/hr-eval/InterpModel2Sat/makesubmitinterpgfsv16.py +++ b/hr-eval/InterpModel2Sat/makesubmitinterpgfsv16.py @@ -1,92 +1,217 @@ import datetime as dt from dateutil.relativedelta import relativedelta import os - -rootdir = os.path.join('/work2/noaa/marine/jmeixner/processsatdata', 'jobinterp') - - -season=['summer', 'hurricane'] -satelites=['JASON3', 'CRYOSAT2', 'SARAL', 'SENTINEL3A'] #JASON3,JASON2,CRYOSAT2,JASON1,HY2,SARAL,SENTINEL3A,ENVISAT,ERS1,ERS2,GEOSAT,GFO,TOPEX,SENTINEL3B,CFOSAT -#model=['multi1', 'GFSv16', 'HR1', 'HR2', 'HR3a'] -model='GFSv16' - -for k in range(len(season)): - if season[k] == "winter": - startdate = dt.datetime(2019,12,3) - enddate = dt.datetime(2020,2,26) - datestride = 3 - elif season[k] == "summer": - startdate = dt.datetime(2020,6,1) - #enddate = dt.datetime(2020,8,30) #nooverlap needed - enddate = dt.datetime(2020,7,19) - datestride = 3 - elif season[k] == "hurricane": - startdate = dt.datetime(2020,7,20) - enddate = dt.datetime(2020,11,20) - datestride = 1 - - nowdate = startdate - dates1 = [] - while nowdate <= enddate: - dates1.append(nowdate.strftime('%Y%m%d%H')) - #nowdate = (nowdate + dt.timedelta(days=datestride)).strftime('%Y%m%d') - nowdate = nowdate + dt.timedelta(days=datestride) - - print(dates1) - for i in range(len(dates1)): - outfile = os.path.join(rootdir, f"job_{model}_p16_{dates1[i]}.sh") - with open(outfile, 'w') as f: - f.write('#!/bin/bash\n') - sbatch = f"""#SBATCH --nodes=1 -#SBATCH -q batch -#SBATCH -t 08:00:00 -#SBATCH -A marine-cpu -#SBATCH -J procsat_{model}_p16_{dates1[i]} -#SBATCH -o run_{model}_p16_{dates1[i]}.o%j -#SBATCH --partition=orion -#SBATCH --exclusive - - -module use /work2/noaa/marine/jmeixner/general/modulefiles -module load ww3tools - -ThisDir=/work2/noaa/marine/jmeixner/processsatdata -PathToWW3TOOLS=/work2/noaa/marine/jmeixner/processsatdata/ww3-tools/ww3tools - -set -x - -SEASON={season[k]} -MODEL={model} -CDATE={dates1[i]} -OUTDIR=/work2/noaa/marine/jmeixner/processsatdata/outinterp/{model} - -""" - - f.write(sbatch) - f.write('DATE=${CDATE:0:8} \n') - f.write('TZ=${CDATE:8:2} \n') - f.write('MODEL_DATA_DIR="/work/noaa/marine/jmeixner/Data/${MODEL}/gfs.${DATE}/${TZ}/wave/gridded" \n') - for j in range(len(satelites)): - satvalue = f""" - -SAT={satelites[j]} - -""" - f.write(satvalue) - - f.write('SATELLITE_FILE=/work/noaa/marine/jmeixner/Data/processedsatdata/Altimeter_${SAT}_HR${SEASON}.nc \n') - #grids=['global.0p25','global.0p16'] - grids=['global.0p16'] - for g in range(len(grids)): - if grids[g] == 'global.0p25': - f.write("MODEL_DATA_PATTERN='gfswave.t00z.global.0p25.f*.grib2'\n") - f.write('OUTPUT_FILE="${MODEL}_global.0p25_${CDATE}_${SAT}.nc" \n') - elif grids[g] == 'global.0p16': - f.write("MODEL_DATA_PATTERN='gfswave.t00z.global.0p16.f*.grib2'\n") - f.write('OUTPUT_FILE="${MODEL}_global.0p16_${CDATE}_${SAT}.nc" \n') - elif grids[g] == 'arctic.9km': - f.write("MODEL_DATA_PATTERN='gfswave.t00z.arctic.9km.f*.grib2'\n") - f.write('OUTPUT_FILE="${MODEL}_arctic.9km_${CDATE}_${SAT}.nc" \n') - f.write('python ${PathToWW3TOOLS}/ProcSat_interpolation.py -t grib2 -d $MODEL_DATA_DIR -p $MODEL_DATA_PATTERN -s $SATELLITE_FILE -o $OUTDIR -f $OUTPUT_FILE -m ${MODEL} \n') - - +import re +import glob + +## ===================== Setting (modified as needed) ========================= +MACHINE = "ursa" # machine name ursa/orion/hercules + +# directory settings +rootdir = os.path.join('/scratch4/NCEPDEV/marine/Ming.Chen/wave_eval/processsatdata', 'jobinterp') +MODEL_BASE = "/scratch3/NCEPDEV/climate/Jessica.Meixner/Data/gfsv16" +SAT_BASE = "/scratch3/NCEPDEV/climate/Jessica.Meixner/WaveEvaluation/processsatdata/combineoutmonthly" +OUTDIR_BASE = "/scratch4/NCEPDEV/marine/Ming.Chen/wave_eval/processsatdata/outinterp/GFSv16" + +# satellite and model settings +satellites=['JASON3', 'CRYOSAT2', 'SARAL', 'SENTINEL3A', 'SENTINEL3B', 'SENTINEL6A'] +model='GFSv16' # now only support model of GFSv16 and retrov17_01 +tz_list = ["00","06","12","18"] +grid = "global.0p25" +MODEL_DATA_PATTERN_TEMPLATE = "gfswave.t{tz}z.{grid}.f*.grib2" + +# process script +PROC_SCRIPT = "/scratch4/NCEPDEV/marine/Ming.Chen/wave_eval/WW3-tools/ww3tools/ProcSat_interpolation.py" + +# Slurm settings +SBATCH_ACCOUNT = "marine-cpu" +SBATCH_QUEUE = "batch" +SBATCH_TIME = "08:00:00" +SBATCH_NODES = 1 +SBATCH_NTASKS = 1 +SBATCH_CPUS_PER_TASK = 4 +SBATCH_MEM = "16G" +SET_THREAD_ENVS = True + +## --------------- Machine-specific configuration ------------------ + +if MACHINE == "ursa": + MODULE_USE_PATH = "/scratch4/NCEPDEV/marine/Saeideh.Banihashemi/installs/python-modules/" + MODULE_LOAD = "Ursa_ENV" +elif MACHINE in ("orion", "hercules"): + MODULE_USE_PATH = "/work2/noaa/marine/jmeixner/general/modulefiles" + MODULE_LOAD = "ww3tools" +else: + print(f"ERROR: Unsupported MACHINE='{MACHINE}'. Use Ursa, Orion, or Hercules.", file=sys.stderr) + sys.exit(1) + +## --------------- Checking inputs and settings -------------------- + +jobdir = os.path.join(rootdir, model) + +if not os.path.isdir(rootdir): + os.makedirs(rootdir, exist_ok=True) + +if not os.path.isdir(jobdir): + os.makedirs(jobdir, exist_ok=True) + +if not os.path.isdir(OUTDIR_BASE): + os.makedirs(OUTDIR_BASE, exist_ok=True) + +re_gfs = re.compile(r"^gfs\.(\d{8})$") +re_sat = re.compile(r"^Altimeter_(.+)_(\d{6})\.nc$") + +def discover_model_dates(model_base: str): + """ + Discover available model dates from folders: + gfs.YYYYMMDD + Returns a sorted list of YYYYMMDD strings. + """ + dates = [] + for name in sorted(os.listdir(model_base)): + if re.match(r"^gfs\.\d{8}$", name): + yyyymmdd = name.split(".")[1] + dates.append(yyyymmdd) + return sorted(dates) + +def sat_month_available_all(sat_base: str, satellites, yyyymm: str) -> bool: + """ + Return True only if ALL satellites have monthly files for yyyymm: + Altimeter_{sat}_{yyyymm}.nc + """ + for sat in satellites: + fname = os.path.join(sat_base, f"Altimeter_{sat}_{yyyymm}.nc") + if not os.path.isfile(fname): + return False + return True + + +cdates = discover_model_dates(MODEL_BASE) + +with open("cdates.txt", "w") as f: + for c in cdates: + f.write(c + "\n") + +total = len(cdates) +covered = 0 +missing = 0 + +for cdate in cdates: + yyyymm = cdate[:6] + if sat_month_available_all(SAT_BASE, satellites, yyyymm): + covered += 1 + else: + missing += 1 + +print("Satellite coverage verification:") +print(f" Total model dates : {total}") +print(f" Dates fully covered : {covered}") +print(f" Dates missing satellites : {missing}") + +# ------------------- Write jobcards ----------------------------------- +written = 0 +skipped_no_sat_all = 0 +skipped_no_model_gribs = 0 +missing_cycles = [] + +for cdate in cdates: + yyyymm = cdate[:6] + + sat_ok = True + for sat in satellites: + fname = os.path.join(SAT_BASE, f"Altimeter_{sat}_{yyyymm}.nc") + if not os.path.isfile(fname): + sat_ok = False + break + if not sat_ok: + skipped_no_sat_all += 1 + continue + + for tz in tz_list: + if model == "GFSv16": + model_gridded_dir = os.path.join(MODEL_BASE, f"gfs.{cdate}", tz, "wave", "gridded") + elif model == "retrov17_01": + model_gridded_dir = os.path.join(MODEL_BASE, f"gfs.{cdate}", tz, "products", "wave", "gridded","global.0p25") + else: + print(f"ERROR: Unsupported Model.", file=sys.stderr) + sys.exit(1) + + if not os.path.isdir(model_gridded_dir): + skipped_no_model_gribs += 1 + missing_cycles.append(f"{cdate}{tz}") + continue + + pattern = MODEL_DATA_PATTERN_TEMPLATE.format(tz=tz, grid=grid) + gribs = glob.glob(os.path.join(model_gridded_dir, pattern)) + if len(gribs) == 0: + skipped_no_model_gribs += 1 + missing_cycles.append(f"{cdate}{tz}") + continue + + cdate_full = f"{cdate}{tz}" + outfile = os.path.join(jobdir, f"job_{model}_{grid}_{cdate_full}.sh") + outlog = os.path.join(jobdir, f"run_{model}_{grid}_{cdate_full}.o%j") + + with open(outfile, "w") as f: + f.write("#!/bin/bash\n") + f.write(f"#SBATCH --nodes={SBATCH_NODES}\n") + f.write(f"#SBATCH --ntasks={SBATCH_NTASKS}\n") + f.write(f"#SBATCH --cpus-per-task={SBATCH_CPUS_PER_TASK}\n") + f.write(f"#SBATCH --mem={SBATCH_MEM}\n") + f.write(f"#SBATCH -q {SBATCH_QUEUE}\n") + f.write(f"#SBATCH -t {SBATCH_TIME}\n") + f.write(f"#SBATCH -A {SBATCH_ACCOUNT}\n") + f.write(f"#SBATCH -J procsat_{model}_{grid}_{cdate_full}\n") + f.write(f"#SBATCH -o {outlog}\n") + + f.write(f"module use {MODULE_USE_PATH}\n") + f.write(f"module load {MODULE_LOAD}\n\n") + + f.write("set -euo pipefail\n") + f.write("set -x\n\n") + + if SET_THREAD_ENVS: + f.write("export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK\n") + f.write("export MKL_NUM_THREADS=$SLURM_CPUS_PER_TASK\n\n") + + f.write(f"MODEL={model}\n") + f.write(f"GRID={grid}\n") + f.write(f"DATE={cdate}\n") + f.write(f"TZ={tz}\n") + f.write(f"YYYYMM={yyyymm}\n") + f.write(f"OUTDIR={OUTDIR_BASE}\n") + f.write("mkdir -p ${OUTDIR}\n\n") + + if model == "GFSv16": + f.write(f"MODEL_DATA_DIR={MODEL_BASE}/gfs.${{DATE}}/${{TZ}}/wave/gridded\n") + elif model == "retrov17_01": + f.write(f"MODEL_DATA_DIR={MODEL_BASE}/gfs.${{DATE}}/${{TZ}}/products/wave/gridded/global.0p25\n") + else: + print(f"ERROR: Unsupported Model.", file=sys.stderr) + sys.exit(1) + + f.write(f"MODEL_DATA_PATTERN='{pattern}'\n\n") + + for sat in satellites: + f.write(f"SAT={sat}\n") + f.write(f"SATELLITE_FILE={SAT_BASE}/Altimeter_{sat}_${{YYYYMM}}.nc\n") + f.write(f"OUTPUT_FILE=${{MODEL}}_${{GRID}}_{cdate_full}_{sat}.nc\n") + f.write( + f"python {PROC_SCRIPT} " + f"-t grib2 -d $MODEL_DATA_DIR -p $MODEL_DATA_PATTERN " + f"-s $SATELLITE_FILE -o $OUTDIR -f $OUTPUT_FILE -m $MODEL\n\n" + ) + + os.chmod(outfile, 0o750) + written += 1 + +print("\nJobcard generation summary:") +print(f" Jobcards written : {written}") +print(f" Dates skipped (month missing ≥1 satellite) : {skipped_no_sat_all}") +print(f" Cycles skipped (missing model dir or GRIBs) : {skipped_no_model_gribs}") + +if missing_cycles: + for c in sorted(missing_cycles): + print(f" {c}") + +print(f" Jobcards directory : {rootdir}") diff --git a/hr-eval/InterpModel2Sat/makesubmitinterpretrotest.py b/hr-eval/InterpModel2Sat/makesubmitinterpretrotest.py new file mode 100644 index 0000000..b5de251 --- /dev/null +++ b/hr-eval/InterpModel2Sat/makesubmitinterpretrotest.py @@ -0,0 +1,217 @@ +import datetime as dt +from dateutil.relativedelta import relativedelta +import os +import re +import glob + +## ===================== Setting (modified as needed) ========================= +MACHINE = "ursa" # machine name ursa/orion/hercules + +# directory settings +rootdir = os.path.join('/scratch4/NCEPDEV/marine/Ming.Chen/wave_eval/processsatdata', 'jobinterp') +MODEL_BASE = "/scratch3/NCEPDEV/climate/Jessica.Meixner/Data/retrov17_01" +SAT_BASE = "/scratch3/NCEPDEV/climate/Jessica.Meixner/WaveEvaluation/processsatdata/combineoutmonthly" +OUTDIR_BASE = "/scratch4/NCEPDEV/marine/Ming.Chen/wave_eval/processsatdata/outinterp/retrov17_01" + +# satellite and model settings +satellites=['JASON3', 'CRYOSAT2', 'SARAL', 'SENTINEL3A', 'SENTINEL3B', 'SENTINEL6A'] +model='retrov17_01' # now only support model of GFSv16 and retrov17_01 +tz_list = ["00","06","12","18"] +grid = "global.0p25" +MODEL_DATA_PATTERN_TEMPLATE = "gfs.t{tz}z.{grid}.f*.grib2" + +# process script +PROC_SCRIPT = "/scratch4/NCEPDEV/marine/Ming.Chen/wave_eval/WW3-tools/ww3tools/ProcSat_interpolation.py" + +# Slurm settings +SBATCH_ACCOUNT = "marine-cpu" +SBATCH_QUEUE = "batch" +SBATCH_TIME = "08:00:00" +SBATCH_NODES = 1 +SBATCH_NTASKS = 1 +SBATCH_CPUS_PER_TASK = 4 +SBATCH_MEM = "16G" +SET_THREAD_ENVS = True + +## --------------- Machine-specific configuration ------------------ + +if MACHINE == "ursa": + MODULE_USE_PATH = "/scratch4/NCEPDEV/marine/Saeideh.Banihashemi/installs/python-modules/" + MODULE_LOAD = "Ursa_ENV" +elif MACHINE in ("orion", "hercules"): + MODULE_USE_PATH = "/work2/noaa/marine/jmeixner/general/modulefiles" + MODULE_LOAD = "ww3tools" +else: + print(f"ERROR: Unsupported MACHINE='{MACHINE}'. Use Ursa, Orion, or Hercules.", file=sys.stderr) + sys.exit(1) + +## --------------- Checking inputs and settings -------------------- + +jobdir = os.path.join(rootdir, model) + +if not os.path.isdir(rootdir): + os.makedirs(rootdir, exist_ok=True) + +if not os.path.isdir(jobdir): + os.makedirs(jobdir, exist_ok=True) + +if not os.path.isdir(OUTDIR_BASE): + os.makedirs(OUTDIR_BASE, exist_ok=True) + +re_gfs = re.compile(r"^gfs\.(\d{8})$") +re_sat = re.compile(r"^Altimeter_(.+)_(\d{6})\.nc$") + +def discover_model_dates(model_base: str): + """ + Discover available model dates from folders: + gfs.YYYYMMDD + Returns a sorted list of YYYYMMDD strings. + """ + dates = [] + for name in sorted(os.listdir(model_base)): + if re.match(r"^gfs\.\d{8}$", name): + yyyymmdd = name.split(".")[1] + dates.append(yyyymmdd) + return sorted(dates) + +def sat_month_available_all(sat_base: str, satellites, yyyymm: str) -> bool: + """ + Return True only if ALL satellites have monthly files for yyyymm: + Altimeter_{sat}_{yyyymm}.nc + """ + for sat in satellites: + fname = os.path.join(sat_base, f"Altimeter_{sat}_{yyyymm}.nc") + if not os.path.isfile(fname): + return False + return True + + +cdates = discover_model_dates(MODEL_BASE) + +with open("cdates.txt", "w") as f: + for c in cdates: + f.write(c + "\n") + +total = len(cdates) +covered = 0 +missing = 0 + +for cdate in cdates: + yyyymm = cdate[:6] + if sat_month_available_all(SAT_BASE, satellites, yyyymm): + covered += 1 + else: + missing += 1 + +print("Satellite coverage verification:") +print(f" Total model dates : {total}") +print(f" Dates fully covered : {covered}") +print(f" Dates missing satellites : {missing}") + +# ------------------- Write jobcards ----------------------------------- +written = 0 +skipped_no_sat_all = 0 +skipped_no_model_gribs = 0 +missing_cycles = [] + +for cdate in cdates: + yyyymm = cdate[:6] + + sat_ok = True + for sat in satellites: + fname = os.path.join(SAT_BASE, f"Altimeter_{sat}_{yyyymm}.nc") + if not os.path.isfile(fname): + sat_ok = False + break + if not sat_ok: + skipped_no_sat_all += 1 + continue + + for tz in tz_list: + if model == "GFSv16": + model_gridded_dir = os.path.join(MODEL_BASE, f"gfs.{cdate}", tz, "wave", "gridded") + elif model == "retrov17_01": + model_gridded_dir = os.path.join(MODEL_BASE, f"gfs.{cdate}", tz, "products", "wave", "gridded","global.0p25") + else: + print(f"ERROR: Unsupported Model.", file=sys.stderr) + sys.exit(1) + + if not os.path.isdir(model_gridded_dir): + skipped_no_model_gribs += 1 + missing_cycles.append(f"{cdate}{tz}") + continue + + pattern = MODEL_DATA_PATTERN_TEMPLATE.format(tz=tz, grid=grid) + gribs = glob.glob(os.path.join(model_gridded_dir, pattern)) + if len(gribs) == 0: + skipped_no_model_gribs += 1 + missing_cycles.append(f"{cdate}{tz}") + continue + + cdate_full = f"{cdate}{tz}" + outfile = os.path.join(jobdir, f"job_{model}_{grid}_{cdate_full}.sh") + outlog = os.path.join(jobdir, f"run_{model}_{grid}_{cdate_full}.o%j") + + with open(outfile, "w") as f: + f.write("#!/bin/bash\n") + f.write(f"#SBATCH --nodes={SBATCH_NODES}\n") + f.write(f"#SBATCH --ntasks={SBATCH_NTASKS}\n") + f.write(f"#SBATCH --cpus-per-task={SBATCH_CPUS_PER_TASK}\n") + f.write(f"#SBATCH --mem={SBATCH_MEM}\n") + f.write(f"#SBATCH -q {SBATCH_QUEUE}\n") + f.write(f"#SBATCH -t {SBATCH_TIME}\n") + f.write(f"#SBATCH -A {SBATCH_ACCOUNT}\n") + f.write(f"#SBATCH -J procsat_{model}_{grid}_{cdate_full}\n") + f.write(f"#SBATCH -o {outlog}\n") + + f.write(f"module use {MODULE_USE_PATH}\n") + f.write(f"module load {MODULE_LOAD}\n\n") + + f.write("set -euo pipefail\n") + f.write("set -x\n\n") + + if SET_THREAD_ENVS: + f.write("export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK\n") + f.write("export MKL_NUM_THREADS=$SLURM_CPUS_PER_TASK\n\n") + + f.write(f"MODEL={model}\n") + f.write(f"GRID={grid}\n") + f.write(f"DATE={cdate}\n") + f.write(f"TZ={tz}\n") + f.write(f"YYYYMM={yyyymm}\n") + f.write(f"OUTDIR={OUTDIR_BASE}\n") + f.write("mkdir -p ${OUTDIR}\n\n") + + if model == "GFSv16": + f.write(f"MODEL_DATA_DIR={MODEL_BASE}/gfs.${{DATE}}/${{TZ}}/wave/gridded\n") + elif model == "retrov17_01": + f.write(f"MODEL_DATA_DIR={MODEL_BASE}/gfs.${{DATE}}/${{TZ}}/products/wave/gridded/global.0p25\n") + else: + print(f"ERROR: Unsupported Model.", file=sys.stderr) + sys.exit(1) + + f.write(f"MODEL_DATA_PATTERN='{pattern}'\n\n") + + for sat in satellites: + f.write(f"SAT={sat}\n") + f.write(f"SATELLITE_FILE={SAT_BASE}/Altimeter_{sat}_${{YYYYMM}}.nc\n") + f.write(f"OUTPUT_FILE=${{MODEL}}_${{GRID}}_{cdate_full}_{sat}.nc\n") + f.write( + f"python {PROC_SCRIPT} " + f"-t grib2 -d $MODEL_DATA_DIR -p $MODEL_DATA_PATTERN " + f"-s $SATELLITE_FILE -o $OUTDIR -f $OUTPUT_FILE -m $MODEL\n\n" + ) + + os.chmod(outfile, 0o750) + written += 1 + +print("\nJobcard generation summary:") +print(f" Jobcards written : {written}") +print(f" Dates skipped (month missing ≥1 satellite) : {skipped_no_sat_all}") +print(f" Cycles skipped (missing model dir or GRIBs) : {skipped_no_model_gribs}") + +if missing_cycles: + for c in sorted(missing_cycles): + print(f" {c}") + +print(f" Jobcards directory : {rootdir}") diff --git a/hr-eval/configs/CRYOSAT2.yaml b/hr-eval/configs_orion/CRYOSAT2.yaml similarity index 91% rename from hr-eval/configs/CRYOSAT2.yaml rename to hr-eval/configs_orion/CRYOSAT2.yaml index 25a82d1..2d5856b 100755 --- a/hr-eval/configs/CRYOSAT2.yaml +++ b/hr-eval/configs_orion/CRYOSAT2.yaml @@ -3,11 +3,9 @@ # text string for identification. This will be included in the name of the output files. ftag: ww3tools -# output path, where results will be saved -path_out: /work2/noaa/marine/jmeixner/processsatdata/out/CRYOSAT2 # Data paths where the observations are saved -path_alt: /work/noaa/marine/ricardo.campos/data/AODN/altimeter +path_alt: /work/noaa/marine/jmeixner/Data/AODN/altimeter path_ndbc: /work/noaa/marine/ricardo.campos/data/buoys/NDBC/ncformat/wparam path_copernicus: /work/noaa/marine/ricardo.campos/data/buoys/Copernicus/wtimeseries diff --git a/hr-eval/configs/JASON3.yaml b/hr-eval/configs_orion/JASON3.yaml similarity index 91% rename from hr-eval/configs/JASON3.yaml rename to hr-eval/configs_orion/JASON3.yaml index 01cc7de..7832dc3 100755 --- a/hr-eval/configs/JASON3.yaml +++ b/hr-eval/configs_orion/JASON3.yaml @@ -3,11 +3,9 @@ # text string for identification. This will be included in the name of the output files. ftag: ww3tools -# output path, where results will be saved -path_out: /work2/noaa/marine/jmeixner/processsatdata/out/JASON3 # Data paths where the observations are saved -path_alt: /work/noaa/marine/ricardo.campos/data/AODN/altimeter +path_alt: /work/noaa/marine/jmeixner/Data/AODN/altimeter path_ndbc: /work/noaa/marine/ricardo.campos/data/buoys/NDBC/ncformat/wparam path_copernicus: /work/noaa/marine/ricardo.campos/data/buoys/Copernicus/wtimeseries diff --git a/hr-eval/configs/SARAL.yaml b/hr-eval/configs_orion/SARAL.yaml similarity index 91% rename from hr-eval/configs/SARAL.yaml rename to hr-eval/configs_orion/SARAL.yaml index 9b1f0be..73b75c0 100755 --- a/hr-eval/configs/SARAL.yaml +++ b/hr-eval/configs_orion/SARAL.yaml @@ -3,11 +3,9 @@ # text string for identification. This will be included in the name of the output files. ftag: ww3tools -# output path, where results will be saved -path_out: /work2/noaa/marine/jmeixner/processsatdata/out/SARAL # Data paths where the observations are saved -path_alt: /work/noaa/marine/ricardo.campos/data/AODN/altimeter +path_alt: /work/noaa/marine/jmeixner/Data/AODN/altimeter path_ndbc: /work/noaa/marine/ricardo.campos/data/buoys/NDBC/ncformat/wparam path_copernicus: /work/noaa/marine/ricardo.campos/data/buoys/Copernicus/wtimeseries diff --git a/hr-eval/configs/SENTINEL3A.yaml b/hr-eval/configs_orion/SENTINEL3A.yaml similarity index 91% rename from hr-eval/configs/SENTINEL3A.yaml rename to hr-eval/configs_orion/SENTINEL3A.yaml index 66157e2..8b2e1e1 100755 --- a/hr-eval/configs/SENTINEL3A.yaml +++ b/hr-eval/configs_orion/SENTINEL3A.yaml @@ -3,11 +3,9 @@ # text string for identification. This will be included in the name of the output files. ftag: ww3tools -# output path, where results will be saved -path_out: /work2/noaa/marine/jmeixner/processsatdata/out/SENTINEL3A # Data paths where the observations are saved -path_alt: /work/noaa/marine/ricardo.campos/data/AODN/altimeter +path_alt: /work/noaa/marine/jmeixner/Data/AODN/altimeter path_ndbc: /work/noaa/marine/ricardo.campos/data/buoys/NDBC/ncformat/wparam path_copernicus: /work/noaa/marine/ricardo.campos/data/buoys/Copernicus/wtimeseries diff --git a/hr-eval/configs_ursa/CRYOSAT2.yaml b/hr-eval/configs_ursa/CRYOSAT2.yaml new file mode 100755 index 0000000..2dae6b2 --- /dev/null +++ b/hr-eval/configs_ursa/CRYOSAT2.yaml @@ -0,0 +1,56 @@ +# Configuration file for WW3-tools +# https://github.com/NOAA-EMC/WW3-tools + +# text string for identification. This will be included in the name of the output files. +ftag: ww3tools + +# Data paths where the observations are saved +path_alt: /scratch3/NCEPDEV/climate/Jessica.Meixner/Data/AODN/altimeter +path_ndbc: /scratch3/NCEPDEV/climate/Jessica.Meixner/Data/buoys/NDBC/ncformat/wparam +path_copernicus: /scratch3/NCEPDEV/climate/Jessica.Meixner/Data/buoys/Copernicus/wtimeseries + +# WAVEWATCHIII field output, for the scripts to obtain the latitude and longitude +# grid_info: /work/noaa/marine/ricardo.campos/work/ww3tools/tests/ww3gfs_met5_2022061600.nc +grid_info: /work/noaa/marine/ricardo.campos/work/ww3tools/tests/ops.glwu.20220601.t19z.nc +# Grid format: Unstructured (0) or Structured (1) +grid_format: 1 + +# General quality control parameters. Basic range test +# Significant wave height (m) min and max: +hsmin: 0. +hsmax: 50. +# Wind speed (m/s) min and max: +wspmin: 0. +wspmax: 100. + +# netcdf format when saving outputs +fnetcdf: 'NETCDF4' + +# Parameters for ProcSat_Altimeter.py +# target space for collocation. Model (1), Altimeter (2), I don't know (0) +tspace: 2 +# Maximum distance (m) for pooling and averaging the data, See: https://doi.org/10.3390/rs15082203 +dlim: 5000.0 +# Maximum temporal distance (s) for pooling and averaging the data +maxti: 1 +# lat lon interval to read AODN altimeter data files (bins). Smaller areas read faster. +latmin: -88.0 +latmax: 88.0 +lonmin: 0. +lonmax: 360. +# Apply Quality Control. 1(yes), 0(no) +qc: 1 +# Minimum water depth, in meters +mindepth: 10. +# Minimum distance from the coast, in Km +mindfc: 5. +# Max RMS of the band significant wave height +max_swh_rms: 1.5 +# Max RMS of the backscatter coefficient +max_sig0_rms: 0.8 +# Max SWH Ku band quality control +max_swh_qc: 2.0 +# Processing. Number of procs for parallelization +npcs: 40 +# power of initial array 10**pia (size) that will be used to allocate satellite data (faster than append) +pia: 10 diff --git a/hr-eval/configs_ursa/JASON3.yaml b/hr-eval/configs_ursa/JASON3.yaml new file mode 100755 index 0000000..670bc37 --- /dev/null +++ b/hr-eval/configs_ursa/JASON3.yaml @@ -0,0 +1,56 @@ +# Configuration file for WW3-tools +# https://github.com/NOAA-EMC/WW3-tools + +# text string for identification. This will be included in the name of the output files. +ftag: ww3tools + +# Data paths where the observations are saved +path_alt: /scratch3/NCEPDEV/climate/Jessica.Meixner/Data/AODN/altimeter +path_ndbc: /scratch3/NCEPDEV/climate/Jessica.Meixner/Data/buoys/NDBC/ncformat/wparam +path_copernicus: /scratch3/NCEPDEV/climate/Jessica.Meixner/Data/buoys/Copernicus/wtimeseries + +# WAVEWATCHIII field output, for the scripts to obtain the latitude and longitude +# grid_info: /work/noaa/marine/ricardo.campos/work/ww3tools/tests/ww3gfs_met5_2022061600.nc +grid_info: /work/noaa/marine/ricardo.campos/work/ww3tools/tests/ops.glwu.20220601.t19z.nc +# Grid format: Unstructured (0) or Structured (1) +grid_format: 1 + +# General quality control parameters. Basic range test +# Significant wave height (m) min and max: +hsmin: 0. +hsmax: 50. +# Wind speed (m/s) min and max: +wspmin: 0. +wspmax: 100. + +# netcdf format when saving outputs +fnetcdf: 'NETCDF4' + +# Parameters for ProcSat_Altimeter.py +# target space for collocation. Model (1), Altimeter (2), I don't know (0) +tspace: 2 +# Maximum distance (m) for pooling and averaging the data, See: https://doi.org/10.3390/rs15082203 +dlim: 5000.0 +# Maximum temporal distance (s) for pooling and averaging the data +maxti: 1 +# lat lon interval to read AODN altimeter data files (bins). Smaller areas read faster. +latmin: -66.15 +latmax: 66.15 +lonmin: 0. +lonmax: 360. +# Apply Quality Control. 1(yes), 0(no) +qc: 1 +# Minimum water depth, in meters +mindepth: 10. +# Minimum distance from the coast, in Km +mindfc: 5. +# Max RMS of the band significant wave height +max_swh_rms: 1.5 +# Max RMS of the backscatter coefficient +max_sig0_rms: 0.8 +# Max SWH Ku band quality control +max_swh_qc: 2.0 +# Processing. Number of procs for parallelization +npcs: 40 +# power of initial array 10**pia (size) that will be used to allocate satellite data (faster than append) +pia: 10 diff --git a/hr-eval/configs_ursa/SARAL.yaml b/hr-eval/configs_ursa/SARAL.yaml new file mode 100755 index 0000000..279330c --- /dev/null +++ b/hr-eval/configs_ursa/SARAL.yaml @@ -0,0 +1,56 @@ +# Configuration file for WW3-tools +# https://github.com/NOAA-EMC/WW3-tools + +# text string for identification. This will be included in the name of the output files. +ftag: ww3tools + +# Data paths where the observations are saved +path_alt: /scratch3/NCEPDEV/climate/Jessica.Meixner/Data/AODN/altimeter +path_ndbc: /scratch3/NCEPDEV/climate/Jessica.Meixner/Data/buoys/NDBC/ncformat/wparam +path_copernicus: /scratch3/NCEPDEV/climate/Jessica.Meixner/Data/buoys/Copernicus/wtimeseries + +# WAVEWATCHIII field output, for the scripts to obtain the latitude and longitude +# grid_info: /work/noaa/marine/ricardo.campos/work/ww3tools/tests/ww3gfs_met5_2022061600.nc +grid_info: /work/noaa/marine/ricardo.campos/work/ww3tools/tests/ops.glwu.20220601.t19z.nc +# Grid format: Unstructured (0) or Structured (1) +grid_format: 1 + +# General quality control parameters. Basic range test +# Significant wave height (m) min and max: +hsmin: 0. +hsmax: 50. +# Wind speed (m/s) min and max: +wspmin: 0. +wspmax: 100. + +# netcdf format when saving outputs +fnetcdf: 'NETCDF4' + +# Parameters for ProcSat_Altimeter.py +# target space for collocation. Model (1), Altimeter (2), I don't know (0) +tspace: 2 +# Maximum distance (m) for pooling and averaging the data, See: https://doi.org/10.3390/rs15082203 +dlim: 5000.0 +# Maximum temporal distance (s) for pooling and averaging the data +maxti: 1 +# lat lon interval to read AODN altimeter data files (bins). Smaller areas read faster. +latmin: -81.49 +latmax: 81.45 +lonmin: 0. +lonmax: 360. +# Apply Quality Control. 1(yes), 0(no) +qc: 1 +# Minimum water depth, in meters +mindepth: 10. +# Minimum distance from the coast, in Km +mindfc: 5. +# Max RMS of the band significant wave height +max_swh_rms: 1.5 +# Max RMS of the backscatter coefficient +max_sig0_rms: 0.8 +# Max SWH Ku band quality control +max_swh_qc: 2.0 +# Processing. Number of procs for parallelization +npcs: 40 +# power of initial array 10**pia (size) that will be used to allocate satellite data (faster than append) +pia: 10 diff --git a/hr-eval/configs_ursa/SENTINEL3A.yaml b/hr-eval/configs_ursa/SENTINEL3A.yaml new file mode 100755 index 0000000..dcfd7a9 --- /dev/null +++ b/hr-eval/configs_ursa/SENTINEL3A.yaml @@ -0,0 +1,56 @@ +# Configuration file for WW3-tools +# https://github.com/NOAA-EMC/WW3-tools + +# text string for identification. This will be included in the name of the output files. +ftag: ww3tools + +# Data paths where the observations are saved +path_alt: /scratch3/NCEPDEV/climate/Jessica.Meixner/Data/AODN/altimeter +path_ndbc: /scratch3/NCEPDEV/climate/Jessica.Meixner/Data/buoys/NDBC/ncformat/wparam +path_copernicus: /scratch3/NCEPDEV/climate/Jessica.Meixner/Data/buoys/Copernicus/wtimeseries + +# WAVEWATCHIII field output, for the scripts to obtain the latitude and longitude +# grid_info: /work/noaa/marine/ricardo.campos/work/ww3tools/tests/ww3gfs_met5_2022061600.nc +grid_info: /work/noaa/marine/ricardo.campos/work/ww3tools/tests/ops.glwu.20220601.t19z.nc +# Grid format: Unstructured (0) or Structured (1) +grid_format: 1 + +# General quality control parameters. Basic range test +# Significant wave height (m) min and max: +hsmin: 0. +hsmax: 50. +# Wind speed (m/s) min and max: +wspmin: 0. +wspmax: 100. + +# netcdf format when saving outputs +fnetcdf: 'NETCDF4' + +# Parameters for ProcSat_Altimeter.py +# target space for collocation. Model (1), Altimeter (2), I don't know (0) +tspace: 2 +# Maximum distance (m) for pooling and averaging the data, See: https://doi.org/10.3390/rs15082203 +dlim: 5000.0 +# Maximum temporal distance (s) for pooling and averaging the data +maxti: 1 +# lat lon interval to read AODN altimeter data files (bins). Smaller areas read faster. +latmin: -78.0 +latmax: 81.0 +lonmin: 0. +lonmax: 360. +# Apply Quality Control. 1(yes), 0(no) +qc: 1 +# Minimum water depth, in meters +mindepth: 10. +# Minimum distance from the coast, in Km +mindfc: 5. +# Max RMS of the band significant wave height +max_swh_rms: 1.5 +# Max RMS of the backscatter coefficient +max_sig0_rms: 0.8 +# Max SWH Ku band quality control +max_swh_qc: 2.0 +# Processing. Number of procs for parallelization +npcs: 40 +# power of initial array 10**pia (size) that will be used to allocate satellite data (faster than append) +pia: 10 diff --git a/hr-eval/evalsumconfig.json b/hr-eval/evalsumconfig.json index 2fb1d70..8041f9a 100644 --- a/hr-eval/evalsumconfig.json +++ b/hr-eval/evalsumconfig.json @@ -1,23 +1,27 @@ { "directories": { - "HR3a": "/scratch2/NCEPDEV/marine/Ghazal.Mohammadpour/transfer/validation/WW3-tools/ww3tools/ORION/WW3-tools/ww3tools/output/HR3a/hurricane", - "HR1": "/scratch2/NCEPDEV/marine/Ghazal.Mohammadpour/transfer/validation/WW3-tools/ww3tools/ORION/WW3-tools/ww3tools/output/HR1/hurricane", - "HR2": "/scratch2/NCEPDEV/marine/Ghazal.Mohammadpour/transfer/validation/WW3-tools/ww3tools/ORION/WW3-tools/ww3tools/output/HR2/hurricane", - "GFSv16": "/scratch2/NCEPDEV/marine/Ghazal.Mohammadpour/transfer/validation/WW3-tools/ww3tools/ORION/WW3-tools/ww3tools/output/GFSv16/hurricane", - "MULTI1": "/scratch2/NCEPDEV/marine/Ghazal.Mohammadpour/transfer/validation/WW3-tools/ww3tools/ORION/WW3-tools/ww3tools/output/MULTI1/hurricane", - "HR3b": "/scratch2/NCEPDEV/marine/Ghazal.Mohammadpour/transfer/validation/WW3-tools/ww3tools/ORION/WW3-tools/ww3tools/output/HR3b/hurricane" + "GFSv16": "/scratch4/NCEPDEV/marine/Ming.Chen/wave_eval/processsatdata_ursa/outcombine", + "Retrotests": "/scratch4/NCEPDEV/marine/Ming.Chen/wave_eval/processsatdata_ursa/outcombine" }, "filenames": [ - "filtered_3h_to_24h.nc", - "filtered_24h_to_48h.nc", - "filtered_48h_to_72h.nc", - "filtered_72h_to_96h.nc", - "filtered_96h_to_120h.nc", - "filtered_120h_to_144h.nc", - "filtered_144h_to_168h.nc" + "combined_day01", + "combined_day02", + "combined_day03", + "combined_day04", + "combined_day05", + "combined_day06", + "combined_day07", + "combined_day08", + "combined_day09", + "combined_day10", + "combined_day11", + "combined_day12", + "combined_day13", + "combined_day14", + "combined_day15", + "combined_day16" ], - "satellite_name": "CRYOSAT", - "season": "Hurricane", - "output_dir": "./plots/hurricane" + "satellite_name": "CRYOSAT2", + "season": "winter", + "output_dir": "/scratch4/NCEPDEV/marine/Ming.Chen/wave_eval/processsatdata_ursa/outeval/winter" } - diff --git a/hr-eval/makecombinemonthly.sh b/hr-eval/makecombinemonthly.sh new file mode 100644 index 0000000..a13791e --- /dev/null +++ b/hr-eval/makecombinemonthly.sh @@ -0,0 +1,84 @@ +#!/bin/bash +#SBATCH --nodes=1 +#SBATCH -q batch +#SBATCH -t 08:00:00 +#SBATCH -A marine-cpu +#SBATCH -J makecombinemonthly +#SBATCH -o makecombinemonthly.o%j +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=8 +#SBATCH --mem=64G + +# ================= USER'S INPUT ======================= +MACHINE="ursa" + +satoutdir=/scratch3/NCEPDEV/climate/Jessica.Meixner/WaveEvaluation/processsatdata/out # processed satellite data need to be combined +basedir=/scratch4/NCEPDEV/marine/Ming.Chen/wave_eval/processsatdata/combineoutmonthly # output combined data + +SATS=('SENTINEL6A') + +months=("08" "09" "10" "03" "04" "05" "06" "07" "08" "09" "10" "11" "12" "01" "02" "03" "04" "05") +nextmonths=("08" "09" "10" "04" "05" "06" "07" "08" "09" "10" "11" "12" "01" "02" "03" "04" "05" "06") +years=("2022" "2022" "2022" "2024" "2024" "2024" "2024" "2024" "2024" "2024" "2024" "2024" "2024" "2025" "2025" "2025" "2025" "2025") +nextyears=("2022" "2022" "2022" "2024" "2024" "2024" "2024" "2024" "2024" "2024" "2024" "2024" "2025" "2025" "2025" "2025" "2025" "2025") +# ====================================================== + +if [[ "$MACHINE" == "ursa" ]]; then + module use /contrib/spack-stack/spack-stack-1.9.2/envs/ue-oneapi-2024.2.1/install/modulefiles/Core + module use /contrib/spack-stack/spack-stack-1.9.2/envs/ue-oneapi-2024.2.1/install/modulefiles/intel-oneapi-mpi/2021.13-haww6b3/gcc/12.4.0 + module load stack-oneapi/2024.2.1 + module load stack-intel-oneapi-mpi/2021.13 + module load cdo/2.4.4 + +elif [[ "$MACHINE" == "hercules" ]]; then + module use /apps/contrib/spack-stack/spack-stack-1.9.2/envs/ue-oneapi-2024.1.0/install/modulefiles/Core + module use /apps/contrib/spack-stack/spack-stack-1.9.2/envs/ue-oneapi-2024.1.0/install/modulefiles/intel-oneapi-mpi/2021.13-sqiixt7/gcc/13.3.0 + module load stack-oneapi/2024.2.1 + module load stack-intel-oneapi-mpi/2021.13 + module load cdo/2.4.4 + +else + echo "ERROR: No module configuration defined for machine $MACHINE" + exit 1 +fi + +export OMP_NUM_THREADS=${SLURM_CPUS_PER_TASK:-1} + +# check directories +if [[ ! -d "$satoutdir" ]]; then + echo "ERROR: Satellite input directory does not exist: $satoutdir" + exit 1 +fi + +if [[ ! -d "$basedir" ]]; then + echo "Output directory does not exist → creating: $basedir" + mkdir -p "$basedir" || { + echo "ERROR: Failed to create basedir: $basedir" + exit 1 + } +fi + +for SAT in "${SATS[@]}"; do + + echo "" + echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + echo "Processing satellite: ${SAT}" + echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + + cd ${basedir} + for i in ${!months[@]}; do + month=${months[$i]} + year=${years[$i]} + nextmonth=${nextmonths[$i]} + nextyear=${nextyears[$i]} + date=${year}${month} + mkdir ${date}${SAT} + cd ${date}${SAT} + cp ${satoutdir}/${SAT}/AltimeterAlongTrack_ww3tools_${SAT}_${date}*.nc . + cp ${satoutdir}/${SAT}/AltimeterAlongTrack_ww3tools_${SAT}_${nextyear}${nextmonth}01*.nc . + cdo mergetime AltimeterAlongTrack_ww3tools_${SAT}_*.nc Altimeter_${SAT}_${date}.nc + mv Altimeter_${SAT}_${date}.nc ${basedir}/ + cd ${basedir} + done +done +echo "All processing completed." diff --git a/hr-eval/makeprocsatsubmit.py b/hr-eval/makeprocsatsubmit.py index 70588c4..718cbac 100644 --- a/hr-eval/makeprocsatsubmit.py +++ b/hr-eval/makeprocsatsubmit.py @@ -1,54 +1,176 @@ import datetime as dt from dateutil.relativedelta import relativedelta import os +import sys -rootdir = os.path.join('/work2/noaa/marine/jmeixner/processsatdata', 'jobsubs') +# ================================================ +# USER-EDITABLE CONFIGURATION +# ================================================ +MACHINE = "ursa" # or orion/herculus -startdate = dt.datetime(2019,12,1) -enddate = dt.datetime(2023,12,31) +ROOTDIR = "/scratch4/NCEPDEV/marine/Ming.Chen/wave_eval/processsatdata/jobsubs" # output jobcards directory +THISDIR = "/scratch4/NCEPDEV/marine/Ming.Chen/wave_eval/WW3-tools/hr-eval" # working directory +PATHTOWW3TOOLS = "/scratch4/NCEPDEV/marine/Ming.Chen/wave_eval/WW3-tools/ww3tools" # ww3tools directory (ProcSat_Altimeter.py) +OUT_BASE = "/scratch4/NCEPDEV/marine/Ming.Chen/wave_eval/processsatdata/out" # output directory for processed data (origional defined in .yaml) -nowdate = startdate +STARTDATE = "2024-11-15" # start date with formats YYYY-MM-DD or YYYYMMDD +ENDDATE = "2025-01-15" # end date with formats YYYY-MM-DD or YYYYMMDD + +SATELLITES = "JASON3,CRYOSAT2,SARAL,SENTINEL3A" # satellites using comma or space separated + +# SLURM settings +SBATCH_QUEUE = "batch" +SBATCH_ACCOUNT = "marine-cpu" +SBATCH_WALLTIME = "08:00:00" + +SBATCH_EXCLUSIVE = True # True: exclusive mode (whole node) + + # if SBATCH_EXCLUSIVE = True, the settings below are ignored +SBATCH_NODES = "1" +SBATCH_NTASKS = "1" +SBATCH_CPUS_PER_TASK = "4" +SBATCH_MEM = "16G" + +SET_THREAD_ENVS = True # Set OMP_NUM_THREADS, MKL_NUM_THREADS, etc. + +# =============================================== + +MACHINE = MACHINE.strip().lower() + +# Module commands (different machines have different module builds and directories) +if MACHINE == "ursa": + MODULE_USE_PATH = "/scratch3/NCEPDEV/climate/Jessica.Meixner/general/modulefiles" + MODULE_LOAD = "ww3tools" + YAML_CONFIG_SUBDIR = "configs_ursa" + +elif MACHINE in ("orion", "hercules"): + MODULE_USE_PATH = "/work2/noaa/marine/jmeixner/general/modulefiles" + MODULE_LOAD = "ww3tools" + YAML_CONFIG_SUBDIR = "configs_orion" + +else: + print(f"ERROR: Unsupported MACHINE='{MACHINE}'. Use Ursa, Orion, or Hercules.", file=sys.stderr) + sys.exit(1) + +# Check required directories exist +if not os.path.isdir(THISDIR): + print(f"ERROR: THISDIR does not exist: {THISDIR}", file=sys.stderr) + sys.exit(1) + +if not os.path.isdir(PATHTOWW3TOOLS): + print(f"ERROR: PATHTOWW3TOOLS does not exist: {PATHTOWW3TOOLS}", file=sys.stderr) + sys.exit(1) + +# Create output directory if missing +if not os.path.isdir(ROOTDIR): + try: + os.makedirs(ROOTDIR) + print(f"Created output directory: {ROOTDIR}") + except Exception as e: + print(f"ERROR: Cannot create ROOTDIR: {e}", file=sys.stderr) + sys.exit(1) + +# Parse satellite list +sats = [s.strip() for s in SATELLITES.replace(",", " ").split() if s.strip()] +if not sats: + print("ERROR: No valid satellites provided", file=sys.stderr) + sys.exit(1) + +# Parse dates +def parse_date(s): + s = s.strip() + for fmt in ("%Y-%m-%d", "%Y%m%d"): + try: + return dt.datetime.strptime(s, fmt) + except ValueError: + continue + print(f"ERROR: Invalid date format: '{s}'. Use YYYY-MM-DD or YYYYMMDD.", file=sys.stderr) + sys.exit(1) + +# Generate date pairs (15-day steps + monthly overlap pattern) dates1 = [] dates2 = [] -while nowdate <= enddate: - dates1.append(nowdate.strftime('%Y%m%d')) - dates2.append((nowdate + dt.timedelta(days=15)).strftime('%Y%m%d')) - dates1.append((nowdate + dt.timedelta(days=15)).strftime('%Y%m%d')) - nowdate = nowdate + relativedelta(months=+1) - dates2.append(nowdate.strftime('%Y%m%d')) - - -print(dates1) -print(dates2) -satelites=['JASON3', 'CRYOSAT2', 'SARAL', 'SENTINEL3A'] #JASON3,JASON2,CRYOSAT2,JASON1,HY2,SARAL,SENTINEL3A,ENVISAT,ERS1,ERS2,GEOSAT,GFO,TOPEX,SENTINEL3B,CFOSAT +current = parse_date(STARTDATE) +enddate = parse_date(ENDDATE) + +while current <= enddate: + d1 = current.strftime("%Y%m%d") + d2 = (current + dt.timedelta(days=15)).strftime("%Y%m%d") + dates1.append(d1) + dates2.append(d2) + + dates1.append(d2) + current += relativedelta(months=+1) + dates2.append(current.strftime("%Y%m%d")) + +job_count = 0 + for i in range(len(dates1)): - for j in range(len(satelites)): - outfile = os.path.join(rootdir, f"job_{satelites[j]}_{dates1[i]}.sh") - with open(outfile, 'w') as f: - f.write('#!/bin/bash\n') - sbatch = f"""#SBATCH --nodes=1 -#SBATCH -q batch -#SBATCH -t 08:00:00 -#SBATCH -A marine-cpu -#SBATCH -J procsat_{satelites[j]}_{dates1[i]} -#SBATCH -o run_{satelites[j]}_{dates1[i]}.o%j -#SBATCH --partition=orion -#SBATCH --exclusive - - -module use /work2/noaa/marine/jmeixner/general/modulefiles -module load ww3tools - -ThisDir=/work2/noaa/marine/jmeixner/processsatdata -PathToWW3TOOLS=/work2/noaa/marine/jmeixner/processsatdata/ww3-tools/ww3tools - -SAT={satelites[j]} - -IDATE={dates1[i]}00 -EDATE={dates2[i]}00 - -""" - f.write(sbatch) - f.write('YAMLFILE=${ThisDir}/configs/${SAT}.yaml \n') - f.write('python ${PathToWW3TOOLS}/ProcSat_Altimeter.py --satelite ${SAT} --initdate ${IDATE} --enddate ${EDATE} --timestep 1.0 --yaml ${YAMLFILE} \n') + for sat in sats: + jobname = f"job_{sat}_{dates1[i]}.sh" + filepath = os.path.join(ROOTDIR, jobname) + + with open(filepath, "w", encoding="utf-8") as f: + f.write("#!/bin/bash\n\n") + + # Common SLURM directives + f.write(f"#SBATCH --nodes={SBATCH_NODES}\n") + f.write(f"#SBATCH -q {SBATCH_QUEUE}\n") + f.write(f"#SBATCH -t {SBATCH_WALLTIME}\n") + f.write(f"#SBATCH -A {SBATCH_ACCOUNT}\n") + f.write(f"#SBATCH -J procsat_{sat}_{dates1[i]}\n") + f.write(f"#SBATCH -o run_{sat}_{dates1[i]}.o%j\n") + + # Exclusive vs explicit resources + if SBATCH_EXCLUSIVE: + f.write("#SBATCH --exclusive\n") + else: + f.write(f"#SBATCH --ntasks={SBATCH_NTASKS}\n") + f.write(f"#SBATCH --cpus-per-task={SBATCH_CPUS_PER_TASK}\n") + f.write(f"#SBATCH --mem={SBATCH_MEM}\n") + + f.write("\n") + + # Module environment + if MODULE_USE_PATH.strip(): + f.write(f"module use {MODULE_USE_PATH}\n") + if MODULE_LOAD.strip(): + f.write(f"module load {MODULE_LOAD}\n") + if MODULE_USE_PATH.strip() or MODULE_LOAD.strip(): + f.write("\n") + + # Thread environment control (only in shared mode) + if SET_THREAD_ENVS and not SBATCH_EXCLUSIVE: + f.write("# Control number of threads for performance & memory\n") + f.write(f"export OMP_NUM_THREADS={SBATCH_CPUS_PER_TASK}\n") + f.write(f"export MKL_NUM_THREADS={SBATCH_CPUS_PER_TASK}\n") + f.write(f"export NUMEXPR_NUM_THREADS={SBATCH_CPUS_PER_TASK}\n") + f.write(f"export OPENBLAS_NUM_THREADS={SBATCH_CPUS_PER_TASK}\n") + f.write("\n") + + # Job variables + f.write(f'ThisDir="{THISDIR}"\n') + f.write(f'PathToWW3TOOLS="{PATHTOWW3TOOLS}"\n') + f.write(f'SAT="{sat}"\n') + f.write(f'IDATE="{dates1[i]}00"\n') + f.write(f'EDATE="{dates2[i]}00"\n\n') + + # The processing command + f.write(f'YAMLFILE="${{ThisDir}}/{YAML_CONFIG_SUBDIR}/${{SAT}}.yaml"\n') + f.write(f'OUT_BASE="{OUT_BASE}"\n\n') + f.write('python "${PathToWW3TOOLS}/ProcSat_Altimeter.py" \\\n') + f.write(' --satelite "${SAT}" \\\n') + f.write(' --initdate "${IDATE}" \\\n') + f.write(' --enddate "${EDATE}" \\\n') + f.write(' --timestep 1.0 \\\n') + f.write(' --yaml "${YAMLFILE}" \\\n') + f.write(' --out_base "${OUT_BASE}"\n') + + # Make executable + os.chmod(filepath, 0o755) + job_count += 1 + print(f"Created: {jobname}") + +print(f"\nDone. Generated {job_count} job script(s) in:") +print(ROOTDIR) diff --git a/parm/configs_orion/CRYOSAT2.yaml b/parm/configs_orion/CRYOSAT2.yaml new file mode 100755 index 0000000..2d5856b --- /dev/null +++ b/parm/configs_orion/CRYOSAT2.yaml @@ -0,0 +1,56 @@ +# Configuration file for WW3-tools +# https://github.com/NOAA-EMC/WW3-tools + +# text string for identification. This will be included in the name of the output files. +ftag: ww3tools + +# Data paths where the observations are saved +path_alt: /work/noaa/marine/jmeixner/Data/AODN/altimeter +path_ndbc: /work/noaa/marine/ricardo.campos/data/buoys/NDBC/ncformat/wparam +path_copernicus: /work/noaa/marine/ricardo.campos/data/buoys/Copernicus/wtimeseries + +# WAVEWATCHIII field output, for the scripts to obtain the latitude and longitude +# grid_info: /work/noaa/marine/ricardo.campos/work/ww3tools/tests/ww3gfs_met5_2022061600.nc +grid_info: /work/noaa/marine/ricardo.campos/work/ww3tools/tests/ops.glwu.20220601.t19z.nc +# Grid format: Unstructured (0) or Structured (1) +grid_format: 1 + +# General quality control parameters. Basic range test +# Significant wave height (m) min and max: +hsmin: 0. +hsmax: 50. +# Wind speed (m/s) min and max: +wspmin: 0. +wspmax: 100. + +# netcdf format when saving outputs +fnetcdf: 'NETCDF4' + +# Parameters for ProcSat_Altimeter.py +# target space for collocation. Model (1), Altimeter (2), I don't know (0) +tspace: 2 +# Maximum distance (m) for pooling and averaging the data, See: https://doi.org/10.3390/rs15082203 +dlim: 5000.0 +# Maximum temporal distance (s) for pooling and averaging the data +maxti: 1 +# lat lon interval to read AODN altimeter data files (bins). Smaller areas read faster. +latmin: -88.0 +latmax: 88.0 +lonmin: 0. +lonmax: 360. +# Apply Quality Control. 1(yes), 0(no) +qc: 1 +# Minimum water depth, in meters +mindepth: 10. +# Minimum distance from the coast, in Km +mindfc: 5. +# Max RMS of the band significant wave height +max_swh_rms: 1.5 +# Max RMS of the backscatter coefficient +max_sig0_rms: 0.8 +# Max SWH Ku band quality control +max_swh_qc: 2.0 +# Processing. Number of procs for parallelization +npcs: 40 +# power of initial array 10**pia (size) that will be used to allocate satellite data (faster than append) +pia: 10 diff --git a/parm/configs_orion/JASON3.yaml b/parm/configs_orion/JASON3.yaml new file mode 100755 index 0000000..7832dc3 --- /dev/null +++ b/parm/configs_orion/JASON3.yaml @@ -0,0 +1,56 @@ +# Configuration file for WW3-tools +# https://github.com/NOAA-EMC/WW3-tools + +# text string for identification. This will be included in the name of the output files. +ftag: ww3tools + +# Data paths where the observations are saved +path_alt: /work/noaa/marine/jmeixner/Data/AODN/altimeter +path_ndbc: /work/noaa/marine/ricardo.campos/data/buoys/NDBC/ncformat/wparam +path_copernicus: /work/noaa/marine/ricardo.campos/data/buoys/Copernicus/wtimeseries + +# WAVEWATCHIII field output, for the scripts to obtain the latitude and longitude +# grid_info: /work/noaa/marine/ricardo.campos/work/ww3tools/tests/ww3gfs_met5_2022061600.nc +grid_info: /work/noaa/marine/ricardo.campos/work/ww3tools/tests/ops.glwu.20220601.t19z.nc +# Grid format: Unstructured (0) or Structured (1) +grid_format: 1 + +# General quality control parameters. Basic range test +# Significant wave height (m) min and max: +hsmin: 0. +hsmax: 50. +# Wind speed (m/s) min and max: +wspmin: 0. +wspmax: 100. + +# netcdf format when saving outputs +fnetcdf: 'NETCDF4' + +# Parameters for ProcSat_Altimeter.py +# target space for collocation. Model (1), Altimeter (2), I don't know (0) +tspace: 2 +# Maximum distance (m) for pooling and averaging the data, See: https://doi.org/10.3390/rs15082203 +dlim: 5000.0 +# Maximum temporal distance (s) for pooling and averaging the data +maxti: 1 +# lat lon interval to read AODN altimeter data files (bins). Smaller areas read faster. +latmin: -66.15 +latmax: 66.15 +lonmin: 0. +lonmax: 360. +# Apply Quality Control. 1(yes), 0(no) +qc: 1 +# Minimum water depth, in meters +mindepth: 10. +# Minimum distance from the coast, in Km +mindfc: 5. +# Max RMS of the band significant wave height +max_swh_rms: 1.5 +# Max RMS of the backscatter coefficient +max_sig0_rms: 0.8 +# Max SWH Ku band quality control +max_swh_qc: 2.0 +# Processing. Number of procs for parallelization +npcs: 40 +# power of initial array 10**pia (size) that will be used to allocate satellite data (faster than append) +pia: 10 diff --git a/parm/configs_orion/SARAL.yaml b/parm/configs_orion/SARAL.yaml new file mode 100755 index 0000000..73b75c0 --- /dev/null +++ b/parm/configs_orion/SARAL.yaml @@ -0,0 +1,56 @@ +# Configuration file for WW3-tools +# https://github.com/NOAA-EMC/WW3-tools + +# text string for identification. This will be included in the name of the output files. +ftag: ww3tools + +# Data paths where the observations are saved +path_alt: /work/noaa/marine/jmeixner/Data/AODN/altimeter +path_ndbc: /work/noaa/marine/ricardo.campos/data/buoys/NDBC/ncformat/wparam +path_copernicus: /work/noaa/marine/ricardo.campos/data/buoys/Copernicus/wtimeseries + +# WAVEWATCHIII field output, for the scripts to obtain the latitude and longitude +# grid_info: /work/noaa/marine/ricardo.campos/work/ww3tools/tests/ww3gfs_met5_2022061600.nc +grid_info: /work/noaa/marine/ricardo.campos/work/ww3tools/tests/ops.glwu.20220601.t19z.nc +# Grid format: Unstructured (0) or Structured (1) +grid_format: 1 + +# General quality control parameters. Basic range test +# Significant wave height (m) min and max: +hsmin: 0. +hsmax: 50. +# Wind speed (m/s) min and max: +wspmin: 0. +wspmax: 100. + +# netcdf format when saving outputs +fnetcdf: 'NETCDF4' + +# Parameters for ProcSat_Altimeter.py +# target space for collocation. Model (1), Altimeter (2), I don't know (0) +tspace: 2 +# Maximum distance (m) for pooling and averaging the data, See: https://doi.org/10.3390/rs15082203 +dlim: 5000.0 +# Maximum temporal distance (s) for pooling and averaging the data +maxti: 1 +# lat lon interval to read AODN altimeter data files (bins). Smaller areas read faster. +latmin: -81.49 +latmax: 81.45 +lonmin: 0. +lonmax: 360. +# Apply Quality Control. 1(yes), 0(no) +qc: 1 +# Minimum water depth, in meters +mindepth: 10. +# Minimum distance from the coast, in Km +mindfc: 5. +# Max RMS of the band significant wave height +max_swh_rms: 1.5 +# Max RMS of the backscatter coefficient +max_sig0_rms: 0.8 +# Max SWH Ku band quality control +max_swh_qc: 2.0 +# Processing. Number of procs for parallelization +npcs: 40 +# power of initial array 10**pia (size) that will be used to allocate satellite data (faster than append) +pia: 10 diff --git a/parm/configs_orion/SENTINEL3A.yaml b/parm/configs_orion/SENTINEL3A.yaml new file mode 100755 index 0000000..8b2e1e1 --- /dev/null +++ b/parm/configs_orion/SENTINEL3A.yaml @@ -0,0 +1,56 @@ +# Configuration file for WW3-tools +# https://github.com/NOAA-EMC/WW3-tools + +# text string for identification. This will be included in the name of the output files. +ftag: ww3tools + +# Data paths where the observations are saved +path_alt: /work/noaa/marine/jmeixner/Data/AODN/altimeter +path_ndbc: /work/noaa/marine/ricardo.campos/data/buoys/NDBC/ncformat/wparam +path_copernicus: /work/noaa/marine/ricardo.campos/data/buoys/Copernicus/wtimeseries + +# WAVEWATCHIII field output, for the scripts to obtain the latitude and longitude +# grid_info: /work/noaa/marine/ricardo.campos/work/ww3tools/tests/ww3gfs_met5_2022061600.nc +grid_info: /work/noaa/marine/ricardo.campos/work/ww3tools/tests/ops.glwu.20220601.t19z.nc +# Grid format: Unstructured (0) or Structured (1) +grid_format: 1 + +# General quality control parameters. Basic range test +# Significant wave height (m) min and max: +hsmin: 0. +hsmax: 50. +# Wind speed (m/s) min and max: +wspmin: 0. +wspmax: 100. + +# netcdf format when saving outputs +fnetcdf: 'NETCDF4' + +# Parameters for ProcSat_Altimeter.py +# target space for collocation. Model (1), Altimeter (2), I don't know (0) +tspace: 2 +# Maximum distance (m) for pooling and averaging the data, See: https://doi.org/10.3390/rs15082203 +dlim: 5000.0 +# Maximum temporal distance (s) for pooling and averaging the data +maxti: 1 +# lat lon interval to read AODN altimeter data files (bins). Smaller areas read faster. +latmin: -78.0 +latmax: 81.0 +lonmin: 0. +lonmax: 360. +# Apply Quality Control. 1(yes), 0(no) +qc: 1 +# Minimum water depth, in meters +mindepth: 10. +# Minimum distance from the coast, in Km +mindfc: 5. +# Max RMS of the band significant wave height +max_swh_rms: 1.5 +# Max RMS of the backscatter coefficient +max_sig0_rms: 0.8 +# Max SWH Ku band quality control +max_swh_qc: 2.0 +# Processing. Number of procs for parallelization +npcs: 40 +# power of initial array 10**pia (size) that will be used to allocate satellite data (faster than append) +pia: 10 diff --git a/parm/configs_ursa/CRYOSAT2.yaml b/parm/configs_ursa/CRYOSAT2.yaml new file mode 100755 index 0000000..2dae6b2 --- /dev/null +++ b/parm/configs_ursa/CRYOSAT2.yaml @@ -0,0 +1,56 @@ +# Configuration file for WW3-tools +# https://github.com/NOAA-EMC/WW3-tools + +# text string for identification. This will be included in the name of the output files. +ftag: ww3tools + +# Data paths where the observations are saved +path_alt: /scratch3/NCEPDEV/climate/Jessica.Meixner/Data/AODN/altimeter +path_ndbc: /scratch3/NCEPDEV/climate/Jessica.Meixner/Data/buoys/NDBC/ncformat/wparam +path_copernicus: /scratch3/NCEPDEV/climate/Jessica.Meixner/Data/buoys/Copernicus/wtimeseries + +# WAVEWATCHIII field output, for the scripts to obtain the latitude and longitude +# grid_info: /work/noaa/marine/ricardo.campos/work/ww3tools/tests/ww3gfs_met5_2022061600.nc +grid_info: /work/noaa/marine/ricardo.campos/work/ww3tools/tests/ops.glwu.20220601.t19z.nc +# Grid format: Unstructured (0) or Structured (1) +grid_format: 1 + +# General quality control parameters. Basic range test +# Significant wave height (m) min and max: +hsmin: 0. +hsmax: 50. +# Wind speed (m/s) min and max: +wspmin: 0. +wspmax: 100. + +# netcdf format when saving outputs +fnetcdf: 'NETCDF4' + +# Parameters for ProcSat_Altimeter.py +# target space for collocation. Model (1), Altimeter (2), I don't know (0) +tspace: 2 +# Maximum distance (m) for pooling and averaging the data, See: https://doi.org/10.3390/rs15082203 +dlim: 5000.0 +# Maximum temporal distance (s) for pooling and averaging the data +maxti: 1 +# lat lon interval to read AODN altimeter data files (bins). Smaller areas read faster. +latmin: -88.0 +latmax: 88.0 +lonmin: 0. +lonmax: 360. +# Apply Quality Control. 1(yes), 0(no) +qc: 1 +# Minimum water depth, in meters +mindepth: 10. +# Minimum distance from the coast, in Km +mindfc: 5. +# Max RMS of the band significant wave height +max_swh_rms: 1.5 +# Max RMS of the backscatter coefficient +max_sig0_rms: 0.8 +# Max SWH Ku band quality control +max_swh_qc: 2.0 +# Processing. Number of procs for parallelization +npcs: 40 +# power of initial array 10**pia (size) that will be used to allocate satellite data (faster than append) +pia: 10 diff --git a/parm/configs_ursa/JASON3.yaml b/parm/configs_ursa/JASON3.yaml new file mode 100755 index 0000000..670bc37 --- /dev/null +++ b/parm/configs_ursa/JASON3.yaml @@ -0,0 +1,56 @@ +# Configuration file for WW3-tools +# https://github.com/NOAA-EMC/WW3-tools + +# text string for identification. This will be included in the name of the output files. +ftag: ww3tools + +# Data paths where the observations are saved +path_alt: /scratch3/NCEPDEV/climate/Jessica.Meixner/Data/AODN/altimeter +path_ndbc: /scratch3/NCEPDEV/climate/Jessica.Meixner/Data/buoys/NDBC/ncformat/wparam +path_copernicus: /scratch3/NCEPDEV/climate/Jessica.Meixner/Data/buoys/Copernicus/wtimeseries + +# WAVEWATCHIII field output, for the scripts to obtain the latitude and longitude +# grid_info: /work/noaa/marine/ricardo.campos/work/ww3tools/tests/ww3gfs_met5_2022061600.nc +grid_info: /work/noaa/marine/ricardo.campos/work/ww3tools/tests/ops.glwu.20220601.t19z.nc +# Grid format: Unstructured (0) or Structured (1) +grid_format: 1 + +# General quality control parameters. Basic range test +# Significant wave height (m) min and max: +hsmin: 0. +hsmax: 50. +# Wind speed (m/s) min and max: +wspmin: 0. +wspmax: 100. + +# netcdf format when saving outputs +fnetcdf: 'NETCDF4' + +# Parameters for ProcSat_Altimeter.py +# target space for collocation. Model (1), Altimeter (2), I don't know (0) +tspace: 2 +# Maximum distance (m) for pooling and averaging the data, See: https://doi.org/10.3390/rs15082203 +dlim: 5000.0 +# Maximum temporal distance (s) for pooling and averaging the data +maxti: 1 +# lat lon interval to read AODN altimeter data files (bins). Smaller areas read faster. +latmin: -66.15 +latmax: 66.15 +lonmin: 0. +lonmax: 360. +# Apply Quality Control. 1(yes), 0(no) +qc: 1 +# Minimum water depth, in meters +mindepth: 10. +# Minimum distance from the coast, in Km +mindfc: 5. +# Max RMS of the band significant wave height +max_swh_rms: 1.5 +# Max RMS of the backscatter coefficient +max_sig0_rms: 0.8 +# Max SWH Ku band quality control +max_swh_qc: 2.0 +# Processing. Number of procs for parallelization +npcs: 40 +# power of initial array 10**pia (size) that will be used to allocate satellite data (faster than append) +pia: 10 diff --git a/parm/configs_ursa/SARAL.yaml b/parm/configs_ursa/SARAL.yaml new file mode 100755 index 0000000..279330c --- /dev/null +++ b/parm/configs_ursa/SARAL.yaml @@ -0,0 +1,56 @@ +# Configuration file for WW3-tools +# https://github.com/NOAA-EMC/WW3-tools + +# text string for identification. This will be included in the name of the output files. +ftag: ww3tools + +# Data paths where the observations are saved +path_alt: /scratch3/NCEPDEV/climate/Jessica.Meixner/Data/AODN/altimeter +path_ndbc: /scratch3/NCEPDEV/climate/Jessica.Meixner/Data/buoys/NDBC/ncformat/wparam +path_copernicus: /scratch3/NCEPDEV/climate/Jessica.Meixner/Data/buoys/Copernicus/wtimeseries + +# WAVEWATCHIII field output, for the scripts to obtain the latitude and longitude +# grid_info: /work/noaa/marine/ricardo.campos/work/ww3tools/tests/ww3gfs_met5_2022061600.nc +grid_info: /work/noaa/marine/ricardo.campos/work/ww3tools/tests/ops.glwu.20220601.t19z.nc +# Grid format: Unstructured (0) or Structured (1) +grid_format: 1 + +# General quality control parameters. Basic range test +# Significant wave height (m) min and max: +hsmin: 0. +hsmax: 50. +# Wind speed (m/s) min and max: +wspmin: 0. +wspmax: 100. + +# netcdf format when saving outputs +fnetcdf: 'NETCDF4' + +# Parameters for ProcSat_Altimeter.py +# target space for collocation. Model (1), Altimeter (2), I don't know (0) +tspace: 2 +# Maximum distance (m) for pooling and averaging the data, See: https://doi.org/10.3390/rs15082203 +dlim: 5000.0 +# Maximum temporal distance (s) for pooling and averaging the data +maxti: 1 +# lat lon interval to read AODN altimeter data files (bins). Smaller areas read faster. +latmin: -81.49 +latmax: 81.45 +lonmin: 0. +lonmax: 360. +# Apply Quality Control. 1(yes), 0(no) +qc: 1 +# Minimum water depth, in meters +mindepth: 10. +# Minimum distance from the coast, in Km +mindfc: 5. +# Max RMS of the band significant wave height +max_swh_rms: 1.5 +# Max RMS of the backscatter coefficient +max_sig0_rms: 0.8 +# Max SWH Ku band quality control +max_swh_qc: 2.0 +# Processing. Number of procs for parallelization +npcs: 40 +# power of initial array 10**pia (size) that will be used to allocate satellite data (faster than append) +pia: 10 diff --git a/parm/configs_ursa/SENTINEL3A.yaml b/parm/configs_ursa/SENTINEL3A.yaml new file mode 100755 index 0000000..dcfd7a9 --- /dev/null +++ b/parm/configs_ursa/SENTINEL3A.yaml @@ -0,0 +1,56 @@ +# Configuration file for WW3-tools +# https://github.com/NOAA-EMC/WW3-tools + +# text string for identification. This will be included in the name of the output files. +ftag: ww3tools + +# Data paths where the observations are saved +path_alt: /scratch3/NCEPDEV/climate/Jessica.Meixner/Data/AODN/altimeter +path_ndbc: /scratch3/NCEPDEV/climate/Jessica.Meixner/Data/buoys/NDBC/ncformat/wparam +path_copernicus: /scratch3/NCEPDEV/climate/Jessica.Meixner/Data/buoys/Copernicus/wtimeseries + +# WAVEWATCHIII field output, for the scripts to obtain the latitude and longitude +# grid_info: /work/noaa/marine/ricardo.campos/work/ww3tools/tests/ww3gfs_met5_2022061600.nc +grid_info: /work/noaa/marine/ricardo.campos/work/ww3tools/tests/ops.glwu.20220601.t19z.nc +# Grid format: Unstructured (0) or Structured (1) +grid_format: 1 + +# General quality control parameters. Basic range test +# Significant wave height (m) min and max: +hsmin: 0. +hsmax: 50. +# Wind speed (m/s) min and max: +wspmin: 0. +wspmax: 100. + +# netcdf format when saving outputs +fnetcdf: 'NETCDF4' + +# Parameters for ProcSat_Altimeter.py +# target space for collocation. Model (1), Altimeter (2), I don't know (0) +tspace: 2 +# Maximum distance (m) for pooling and averaging the data, See: https://doi.org/10.3390/rs15082203 +dlim: 5000.0 +# Maximum temporal distance (s) for pooling and averaging the data +maxti: 1 +# lat lon interval to read AODN altimeter data files (bins). Smaller areas read faster. +latmin: -78.0 +latmax: 81.0 +lonmin: 0. +lonmax: 360. +# Apply Quality Control. 1(yes), 0(no) +qc: 1 +# Minimum water depth, in meters +mindepth: 10. +# Minimum distance from the coast, in Km +mindfc: 5. +# Max RMS of the band significant wave height +max_swh_rms: 1.5 +# Max RMS of the backscatter coefficient +max_sig0_rms: 0.8 +# Max SWH Ku band quality control +max_swh_qc: 2.0 +# Processing. Number of procs for parallelization +npcs: 40 +# power of initial array 10**pia (size) that will be used to allocate satellite data (faster than append) +pia: 10 diff --git a/ush/combineSatInterpOut.py b/ush/combineSatInterpOut.py new file mode 100644 index 0000000..beb8b7c --- /dev/null +++ b/ush/combineSatInterpOut.py @@ -0,0 +1,310 @@ +import numpy as np +import netCDF4 as nc +import datetime as dt +import os +import xarray as xr +import glob + +''' +Create combined NetCDF files for easier post processing. +''' + +# ================================================ +# =========== User-Defined Variables ============= +# ================================================ + +models = ['retrov17_01', 'GFSv16'] +WORKDIR = "/scratch3/NCEPDEV/marine/Ming.Chen/ursa/ww3tools" + +satellites = ['JASON3', 'CRYOSAT2', 'SARAL', 'SENTINEL3A'] + +# Custom date range (only used if use_seasons = False) +startdate = dt.datetime(2024, 11, 15, 12) # yyyy,m,d,h +enddate = dt.datetime(2024, 12, 17, 18) +interval_hours = 6 # interval between cycles in HOURS (e.g. 6, 12, 24, 72) + +max_forecast_day = 16 + +force_season = "winter" # None = auto from data; or user override season as winter, summer, hurricane + +selected_years = ['2024'] # ['2024'], ['2024','2025'], [] = all + +output_all_in_one = True +output_per_day = True + +# ================================================= + +INPUTDIR_BASE = os.path.join(WORKDIR, "processsatdata", "outinterp") +OUTDIR = os.path.join(WORKDIR, "processsatdata", "outcombine") + +def determine_season(dt_obj): + month = dt_obj.month + day = dt_obj.day + + # Atlantic hurricane season (NOAA): June 1 – Nov 30 + if (month == 6 and day >= 1) or month in [7,8,9,10] or (month == 11 and day <= 30): + return "hurricane" + + # Meteorological winter: Dec–Feb + if month in [12, 1, 2]: + return "winter" + + # Meteorological summer: Jun–Aug + if month in [6, 7, 8]: + return "summer" + + return "other" + +def get_inputdir(model): + return os.path.join(INPUTDIR_BASE, model) + +def get_grids_for_model(model): + if model == "multi1": + return ['global.0p50', 'alaska.0p16', 'atlocn.0p16', 'epacif.0p16', 'wcoast.0p16', + 'alaska.0p06', 'atlocn.0p06', 'wcoast.0p06'] + elif model == "GFSv16": + return ['global.0p25'] + elif model == "retrov17_01": + return ['global.0p25'] + else: + return ['global.0p25'] + +def get_max_forecast_days(model, season): + return max_forecast_day + +def main(): + if not os.path.isdir(OUTDIR): + os.makedirs(OUTDIR) + + now = startdate + all_cycles = [] + while now <= enddate: + all_cycles.append(now) + now += dt.timedelta(hours=interval_hours) + + if selected_years: + cycles_to_process = [c for c in all_cycles if str(c.year) in selected_years] + else: + cycles_to_process = all_cycles + + if not cycles_to_process: + print("No cycles after year filter.") + return + + print(f"Processing {len(cycles_to_process)} cycles (interval {interval_hours}h)") + + for model in models: + print(f"\n=== Model: {model} ===") + grids = get_grids_for_model(model) + inputdir = get_inputdir(model) + + if not os.path.isdir(inputdir): + print(f" Directory not found: {inputdir}") + continue + + cycles_by_season = {} + for cycle_dt in cycles_to_process: + season = force_season if force_season else determine_season(cycle_dt) + cycles_by_season.setdefault(season, []).append(cycle_dt.strftime('%Y%m%d%H')) + + for season_name, date_strs in cycles_by_season.items(): + print(f" → {season_name}: {len(date_strs)} cycles") + + endday = get_max_forecast_days(model, season_name) + + years = sorted(set(d[:4] for d in date_strs)) + + for sat in satellites: + if len(years) == 1: + y = years[0] + y_dates = date_strs + data = collect_data(model, sat, y_dates, grids, inputdir, season_name) + if not data.get('time'): + continue + arr = {k: np.array(v) for k, v in data.items()} + adjust_wind(model, arr['fhrs'], arr['model_wnd']) + write_outputs(arr, model, sat, season_name, endday, year=y) + else: + for y in years: + y_dates = [d for d in date_strs if d.startswith(y)] + data = collect_data(model, sat, y_dates, grids, inputdir, season_name) + if not data.get('time'): + continue + arr = {k: np.array(v) for k, v in data.items()} + adjust_wind(model, arr['fhrs'], arr['model_wnd']) + write_outputs(arr, model, sat, season_name, endday, year=y) + + +def collect_data(model, sat, date_list, grids, inputdir, season_name): + collected = { + 'time':[], 'lats':[], 'lons':[], 'fhrs':[], 'fhrsall':[], + 'obs_hs':[], 'obs_hs_cal':[], 'obs_wnd':[], 'obs_wnd_cal':[], + 'model_hs':[], 'model_wnd':[] + } + for dstr in date_list: + cycle = process_one_cycle(model, sat, dstr, grids, inputdir, season_name) + if cycle: + for k in collected: + collected[k].extend(cycle[k]) + return collected + + +def process_one_cycle(model, sat, date_str, grids, inputdir, season_name): + base = None + for g_idx, grid in enumerate(grids): + # Adjust filename pattern if your files include season name or not + # Here assuming no season in filename: + fname = f"{model}_{grid}_{date_str}_{sat}.nc" + + path = os.path.join(inputdir, fname) + if not os.path.exists(path): + continue + + with nc.Dataset(path) as ds: + t = np.array(ds.variables['time'][:]) + fh = np.array(ds.variables['fcst_hr'][:]) + la = np.array(ds.variables['latitude'][:]) + lo = np.array(ds.variables['longitude'][:]) + ohs = np.array(ds.variables['obs_hs'][:]) + ohc = np.array(ds.variables['obs_hs_cal'][:]) + ow = np.array(ds.variables['obs_wnd'][:]) + owc = np.array(ds.variables['obs_wnd_cal'][:]) + mhs = np.array(ds.variables['model_hs'][:]) + mw = np.array(ds.variables['model_wnd'][:]) + + ict = ds.getncattr('initial_condition_time') + fha = (t - ict) / 3600.0 + + if g_idx == 0: + base = { + 'time':t, 'lats':la, 'lons':lo, + 'fhrs':fh, 'fhrsall':fha, + 'obs_hs':ohs, 'obs_hs_cal':ohc, + 'obs_wnd':ow, 'obs_wnd_cal':owc, + 'model_hs':mhs, 'model_wnd':mw + } + else: + if base is None: continue + base['model_hs'] = np.where(~np.isnan(mhs), mhs, base['model_hs']) + base['model_wnd'] = np.where(~np.isnan(mw), mw, base['model_wnd']) + + return base + + +def adjust_wind(model, fhrs, model_wnd): + if model in ["HR1", "HR2"]: + model_wnd[fhrs < 3] = np.nan + elif model in ["HR3a", "HR3b"]: + model_wnd[fhrs < 1] = np.nan + + +def write_outputs(arr, model, sat, season, endday, year): + year_str = f"_{year}" if year else "" + + def save(filename_suffix, t, la, lo, fh, ohs, ohc, ow, owc, mhs, mw): + # All variables as double (float64) + ds = xr.Dataset({ + 'time': xr.DataArray( + t.astype(np.float64), + dims=['time'], + name='time', + attrs={ + 'standard_name': 'time', + 'units': 'seconds since 1970-01-01 00:00:00', + 'calendar': 'standard', + 'axis': 'T' + } + ), + 'latitude': xr.DataArray( + la.astype(np.float64), + dims=['time'], + attrs={'units': 'degree_north'} + ), + 'longitude': xr.DataArray( + lo.astype(np.float64), + dims=['time'], + attrs={'units': 'degree_east'} + ), + 'model_hs': xr.DataArray( + mhs.astype(np.float64), + dims=['time'], + attrs={'units': 'm'} + ), + 'model_wnd': xr.DataArray( + mw.astype(np.float64), + dims=['time'], + attrs={'units': 'm'} + ), + 'obs_hs': xr.DataArray( + ohs.astype(np.float64), + dims=['time'], + attrs={'units': 'm'} + ), + 'obs_hs_cal': xr.DataArray( + ohc.astype(np.float64), + dims=['time'], + attrs={'units': 'm'} + ), + 'obs_wnd': xr.DataArray( + ow.astype(np.float64), + dims=['time'], + attrs={'units': 'm/s'} + ), + 'obs_wnd_cal': xr.DataArray( + owc.astype(np.float64), + dims=['time'], + attrs={'units': 'm/s'} + ), + 'fcst_hr': xr.DataArray( + fh.astype(np.float64), + dims=['time'], + attrs={ + 'description': 'Forecast hour relative to initial condition time', + 'units': 'hours' + } + ), + }) + + # Explicitly set _FillValue = NaN for all variables + encoding = { + var: {'_FillValue': np.nan, 'dtype': 'float64'} + for var in ds.variables + } + + # Global attributes + ds.attrs['satellite_name'] = sat + ds.attrs['model_name'] = model + + fullpath = os.path.join(OUTDIR, filename_suffix) + ds.to_netcdf( + fullpath, + format='NETCDF4', + encoding=encoding, + engine='netcdf4' + ) + print(f" Wrote: {os.path.basename(fullpath)} ({len(t)} points)") + + if output_all_in_one: + fname = f"combined_{model}_{season}_{sat}.nc" + save(fname, arr['time'], arr['lats'], arr['lons'], + arr['obs_hs'], arr['obs_hs_cal'], arr['obs_wnd'], arr['obs_wnd_cal'], + arr['model_hs'], arr['model_wnd'], arr['fhrs']) + + if output_per_day: + day0 = 0 + day = 1 + while day <= endday: + idx = (arr['fhrsall'] > day0*24) & (arr['fhrsall'] <= day*24) + if np.any(idx): + fname = f"combined_day{day:02d}_{model}_{season}_{sat}.nc" + save(fname, + arr['time'][idx], arr['lats'][idx], arr['lons'][idx], + arr['obs_hs'][idx], arr['obs_hs_cal'][idx], + arr['obs_wnd'][idx], arr['obs_wnd_cal'][idx], + arr['model_hs'][idx], arr['model_wnd'][idx], arr['fhrs'][idx]) + day0 = day + day += 1 + + +if __name__ == '__main__': + main() diff --git a/ush/makecombinemonthly.sh b/ush/makecombinemonthly.sh new file mode 100644 index 0000000..7c50ea3 --- /dev/null +++ b/ush/makecombinemonthly.sh @@ -0,0 +1,116 @@ +#!/bin/bash +#SBATCH --nodes=1 +#SBATCH -q batch +#SBATCH -t 08:00:00 +#SBATCH -A marine-cpu +#SBATCH -J makecombinemonthly +#SBATCH -o makecombinemonthly.o%j +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=8 +#SBATCH --mem=64G + +set -euo pipefail + +# ================= USER'S INPUT ======================= +MACHINE="ursa" +WORKDIR="/scratch3/NCEPDEV/marine/Ming.Chen/ursa/ww3tools" + +satoutdir="/scratch3/NCEPDEV/climate/Jessica.Meixner/WaveEvaluation/processsatdata/out" # if satoutdir="", it will use default directory as ${WORKDIR}/processsatdata/out; or user can input directory + +SATS=("JASON3" "CRYOSAT2") + +NTHREADS=8 # Threads for CDO (usually match cpus-per-task) +# ====================================================== +basedir="${WORKDIR}/processsatdata/combineoutmonthly" + +if [[ -z "${satoutdir}" ]]; then + satoutdir="${WORKDIR}/processsatdata/out" +fi + +if [[ "$MACHINE" == "ursa" ]]; then + module use /contrib/spack-stack/spack-stack-1.9.2/envs/ue-oneapi-2024.2.1/install/modulefiles/Core + module use /contrib/spack-stack/spack-stack-1.9.2/envs/ue-oneapi-2024.2.1/install/modulefiles/intel-oneapi-mpi/2021.13-haww6b3/gcc/12.4.0 + module load stack-oneapi/2024.2.1 + module load stack-intel-oneapi-mpi/2021.13 + module load cdo/2.4.4 +elif [[ "$MACHINE" == "hercules" ]]; then + module use /apps/contrib/spack-stack/spack-stack-1.9.2/envs/ue-oneapi-2024.1.0/install/modulefiles/Core + module use /apps/contrib/spack-stack/spack-stack-1.9.2/envs/ue-oneapi-2024.1.0/install/modulefiles/intel-oneapi-mpi/2021.13-sqiixt7/gcc/13.3.0 + module load stack-oneapi/2024.2.1 + module load stack-intel-oneapi-mpi/2021.13 + module load cdo/2.4.4 +else + echo "ERROR: No module configuration defined for machine $MACHINE" + exit 1 +fi + +export OMP_NUM_THREADS=${SLURM_CPUS_PER_TASK:-1} + +# check directories +if [[ ! -d "$satoutdir" ]]; then + echo "ERROR: Satellite input directory does not exist: $satoutdir" + exit 1 +fi + +if [[ ! -d "$basedir" ]]; then + echo "Output directory does not exist → creating: $basedir" + mkdir -p "$basedir" +fi + +for SAT in "${SATS[@]}"; do + + echo "" + echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + echo "Processing satellite: ${SAT}" + echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + + in_dir="${satoutdir}/${SAT}" + if [[ ! -d "$in_dir" ]]; then + echo "WARNING: input directory not found for $SAT: $in_dir -> skip" + continue + fi + + shopt -s nullglob + all_files=( "${in_dir}/AltimeterAlongTrack_ww3tools_${SAT}_"*.nc ) + shopt -u nullglob + + if (( ${#all_files[@]} == 0 )); then + echo "WARNING: no input files found for $SAT in $in_dir" + continue + fi + + # Extract unique YYYYMM from filenames + months=$( + printf "%s\n" "${all_files[@]}" \ + | sed -E 's|.*_([0-9]{6})[0-9]{4}to.*|\1|' \ + | sort -u + ) + + # Merge each month + while read -r ym; do + [[ -z "$ym" ]] && continue + + shopt -s nullglob + month_files=( "${in_dir}/AltimeterAlongTrack_ww3tools_${SAT}_${ym}"*.nc ) + shopt -u nullglob + + if (( ${#month_files[@]} == 0 )); then + echo " $ym: no files -> skip" + continue + fi + + # Deterministic order + IFS=$'\n' month_files_sorted=($(printf "%s\n" "${month_files[@]}" | sort)) + unset IFS + + outfile="${basedir}/Altimeter_${SAT}_${ym}.nc" + echo " $ym -> $(basename "$outfile") (nfiles=${#month_files_sorted[@]})" + + # Combine along time dimension (correct for along-track data) + cdo -O -P "${NTHREADS}" mergetime "${month_files_sorted[@]}" "$outfile" + + done <<< "$months" +done + +echo "" +echo "All processing completed." diff --git a/ush/makeprocsatsubmit.py b/ush/makeprocsatsubmit.py new file mode 100644 index 0000000..b45232e --- /dev/null +++ b/ush/makeprocsatsubmit.py @@ -0,0 +1,182 @@ +import datetime as dt +from dateutil.relativedelta import relativedelta +import os +import sys + +# ================================================ +# USER-EDITABLE CONFIGURATION +# ================================================ +MACHINE = "ursa" # or orion/herculus +WORKDIR = "/scratch3/NCEPDEV/marine/Ming.Chen/ursa/ww3tools" # working directory including WW3-tools and processeddata + +STARTDATE = "2024-11-15" # start date with formats YYYY-MM-DD or YYYYMMDD +ENDDATE = "2025-01-15" # end date with formats YYYY-MM-DD or YYYYMMDD + +SATELLITES = "JASON3,CRYOSAT2" # satellites using comma or space separated + +# SLURM settings +SBATCH_QUEUE = "batch" +SBATCH_ACCOUNT = "marine-cpu" +SBATCH_WALLTIME = "08:00:00" + +SBATCH_EXCLUSIVE = False # True: exclusive mode (whole node) + + # if SBATCH_EXCLUSIVE = True, the settings below are ignored +SBATCH_NODES = "1" +SBATCH_NTASKS = "1" +SBATCH_CPUS_PER_TASK = "4" +SBATCH_MEM = "64G" + +SET_THREAD_ENVS = True # Set OMP_NUM_THREADS, MKL_NUM_THREADS, etc. + +# =============================================== + +ROOTDIR = os.path.join(WORKDIR, "processsatdata", "jobsubs") # output jobcards directory +THISDIR = os.path.join(WORKDIR, "WW3-tools", "parm") # config directory +PATHTOWW3TOOLS = os.path.join(WORKDIR, "WW3-tools", "ww3tools") # ww3tools directory (ProcSat_Altimeter.py) +OUT_BASE = os.path.join(WORKDIR, "processsatdata", "out") # output directory for processed data (origional defined in .yaml) + +MACHINE = MACHINE.strip().lower() + +# Module commands (different machines have different module builds and directories) +if MACHINE == "ursa": + MODULE_USE_PATH = "/scratch3/NCEPDEV/climate/Jessica.Meixner/general/modulefiles" + MODULE_LOAD = "ww3tools" + YAML_CONFIG_SUBDIR = "configs_ursa" + +elif MACHINE in ("orion", "hercules"): + MODULE_USE_PATH = "/work2/noaa/marine/jmeixner/general/modulefiles" + MODULE_LOAD = "ww3tools" + YAML_CONFIG_SUBDIR = "configs_orion" + +else: + print(f"ERROR: Unsupported MACHINE='{MACHINE}'. Use Ursa, Orion, or Hercules.", file=sys.stderr) + sys.exit(1) + +# Check required directories exist +if not os.path.isdir(THISDIR): + print(f"ERROR: THISDIR does not exist: {THISDIR}", file=sys.stderr) + sys.exit(1) + +if not os.path.isdir(PATHTOWW3TOOLS): + print(f"ERROR: PATHTOWW3TOOLS does not exist: {PATHTOWW3TOOLS}", file=sys.stderr) + sys.exit(1) + +# Create output directory if missing +if not os.path.isdir(ROOTDIR): + try: + os.makedirs(ROOTDIR) + print(f"Created output directory: {ROOTDIR}") + except Exception as e: + print(f"ERROR: Cannot create ROOTDIR: {e}", file=sys.stderr) + sys.exit(1) + +# Parse satellite list +sats = [s.strip() for s in SATELLITES.replace(",", " ").split() if s.strip()] +if not sats: + print("ERROR: No valid satellites provided", file=sys.stderr) + sys.exit(1) + +# Parse dates +def parse_date(s): + s = s.strip() + for fmt in ("%Y-%m-%d", "%Y%m%d"): + try: + return dt.datetime.strptime(s, fmt) + except ValueError: + continue + print(f"ERROR: Invalid date format: '{s}'. Use YYYY-MM-DD or YYYYMMDD.", file=sys.stderr) + sys.exit(1) + +# Generate date pairs (15-day steps + monthly overlap pattern) +dates1 = [] +dates2 = [] + +current = parse_date(STARTDATE) +enddate = parse_date(ENDDATE) + +while current <= enddate: + d1 = current.strftime("%Y%m%d") + d2 = (current + dt.timedelta(days=15)).strftime("%Y%m%d") + dates1.append(d1) + dates2.append(d2) + + dates1.append(d2) + current += relativedelta(months=+1) + dates2.append(current.strftime("%Y%m%d")) + +job_count = 0 + +for i in range(len(dates1)): + for sat in sats: + jobname = f"job_{sat}_{dates1[i]}.sh" + filepath = os.path.join(ROOTDIR, jobname) + + with open(filepath, "w", encoding="utf-8") as f: + f.write("#!/bin/bash\n\n") + + # Common SLURM directives + f.write(f"#SBATCH --nodes={SBATCH_NODES}\n") + f.write(f"#SBATCH -q {SBATCH_QUEUE}\n") + f.write(f"#SBATCH -t {SBATCH_WALLTIME}\n") + f.write(f"#SBATCH -A {SBATCH_ACCOUNT}\n") + f.write(f"#SBATCH -J procsat_{sat}_{dates1[i]}\n") + f.write(f"#SBATCH -o run_{sat}_{dates1[i]}.o%j\n") + + # Exclusive vs explicit resources + if SBATCH_EXCLUSIVE: + f.write("#SBATCH --exclusive\n") + else: + f.write(f"#SBATCH --ntasks={SBATCH_NTASKS}\n") + f.write(f"#SBATCH --cpus-per-task={SBATCH_CPUS_PER_TASK}\n") + f.write(f"#SBATCH --mem={SBATCH_MEM}\n") + + f.write("\n") + + # Module environment + if MODULE_USE_PATH.strip(): + f.write(f"module use {MODULE_USE_PATH}\n") + if MODULE_LOAD.strip(): + f.write(f"module load {MODULE_LOAD}\n") + if MODULE_USE_PATH.strip() or MODULE_LOAD.strip(): + f.write("\n") + + # Thread environment control (only in shared mode) + if SET_THREAD_ENVS and not SBATCH_EXCLUSIVE: + f.write("# Control number of threads for performance & memory\n") + f.write(f"export OMP_NUM_THREADS={SBATCH_CPUS_PER_TASK}\n") + f.write(f"export MKL_NUM_THREADS={SBATCH_CPUS_PER_TASK}\n") + f.write(f"export NUMEXPR_NUM_THREADS={SBATCH_CPUS_PER_TASK}\n") + f.write(f"export OPENBLAS_NUM_THREADS={SBATCH_CPUS_PER_TASK}\n") + f.write("\n") + + # Job variables + f.write(f'ThisDir="{THISDIR}"\n') + f.write(f'PathToWW3TOOLS="{PATHTOWW3TOOLS}"\n') + f.write(f'SAT="{sat}"\n') + f.write(f'IDATE="{dates1[i]}00"\n') + f.write(f'EDATE="{dates2[i]}00"\n\n') + + # The processing command + f.write(f'YAMLFILE="${{ThisDir}}/{YAML_CONFIG_SUBDIR}/${{SAT}}.yaml"\n') + f.write(f'OUT_BASE="{OUT_BASE}"\n\n') + f.write('python "${PathToWW3TOOLS}/ProcSat_Altimeter.py" \\\n') + f.write(' --satelite "${SAT}" \\\n') + f.write(' --initdate "${IDATE}" \\\n') + f.write(' --enddate "${EDATE}" \\\n') + f.write(' --timestep 1.0 \\\n') + f.write(' --yaml "${YAMLFILE}" \\\n') + f.write(' --out_base "${OUT_BASE}"\n') + + # Make executable + os.chmod(filepath, 0o755) + job_count += 1 + print(f"Created: {jobname}") + +ush = os.path.join(WORKDIR, "WW3-tools", "ush", "run_all_jobs.sh") +os.makedirs(ROOTDIR, exist_ok=True) +cmd = f"cp {ush} {ROOTDIR}" +os.system(cmd) + +print(f"\nDone. Generated {job_count} job script(s) in:") +print(ROOTDIR) diff --git a/ush/makesubmitinterp.py b/ush/makesubmitinterp.py new file mode 100644 index 0000000..7acfa07 --- /dev/null +++ b/ush/makesubmitinterp.py @@ -0,0 +1,225 @@ +import datetime as dt +from dateutil.relativedelta import relativedelta +import os +import re +import glob + +## ===================== Setting (modified as needed) ========================= +MACHINE = "ursa" # machine name ursa/orion/hercules +WORKDIR = "/scratch3/NCEPDEV/marine/Ming.Chen/ursa/ww3tools" + +MODEL_BASE = "/scratch3/NCEPDEV/climate/Jessica.Meixner/Data/gfsv16" +SAT_BASE = "/scratch3/NCEPDEV/climate/Jessica.Meixner/WaveEvaluation/processsatdata/combineoutmonthly" # if empty, the default directory will be used as WORKDIR/processsatdata/combineoutmonthly + +# satellite and model settings +satellites=['JASON3', 'CRYOSAT2'] +model="GFSv16" # now only support model of GFSv16 and retrov17_01 +tz_list = ["00","06","12","18"] +grid = "global.0p25" + +# Slurm settings +SBATCH_ACCOUNT = "marine-cpu" +SBATCH_QUEUE = "batch" +SBATCH_TIME = "08:00:00" +SBATCH_NODES = 1 +SBATCH_NTASKS = 1 +SBATCH_CPUS_PER_TASK = 4 +SBATCH_MEM = "16G" +SET_THREAD_ENVS = True + +## --------------- Machine-specific configuration ------------------ + +if MACHINE == "ursa": + MODULE_USE_PATH = "/scratch4/NCEPDEV/marine/Saeideh.Banihashemi/installs/python-modules/" + MODULE_LOAD = "Ursa_ENV" +elif MACHINE in ("orion", "hercules"): + MODULE_USE_PATH = "/work2/noaa/marine/jmeixner/general/modulefiles" + MODULE_LOAD = "ww3tools" +else: + print(f"ERROR: Unsupported MACHINE='{MACHINE}'. Use Ursa, Orion, or Hercules.", file=sys.stderr) + sys.exit(1) + +## --------------- Directory settings ------------------------------ +rootdir = os.path.join(WORKDIR, "processsatdata", "jobinterp") + +if not SAT_BASE or not SAT_BASE.strip(): + SAT_BASE = os.path.join(WORKDIR, "processsatdata", "combineoutmonthly") + +OUTDIR_BASE = os.path.join(WORKDIR, "processsatdata", "outinterp", model) + +PROC_SCRIPT = os.path.join(WORKDIR, "WW3-tools", "ww3tools", "ProcSat_interpolation.py") + +## --------------- Checking inputs and settings -------------------- + +jobdir = os.path.join(rootdir, model) + +if not os.path.isdir(rootdir): + os.makedirs(rootdir, exist_ok=True) + +if not os.path.isdir(jobdir): + os.makedirs(jobdir, exist_ok=True) + +if not os.path.isdir(OUTDIR_BASE): + os.makedirs(OUTDIR_BASE, exist_ok=True) + +re_gfs = re.compile(r"^gfs\.(\d{8})$") +re_sat = re.compile(r"^Altimeter_(.+)_(\d{6})\.nc$") + +def discover_model_dates(model_base: str): + """ + Discover available model dates from folders: + gfs.YYYYMMDD + Returns a sorted list of YYYYMMDD strings. + """ + dates = [] + for name in sorted(os.listdir(model_base)): + if re.match(r"^gfs\.\d{8}$", name): + yyyymmdd = name.split(".")[1] + dates.append(yyyymmdd) + return sorted(dates) + +def sat_month_available_all(sat_base: str, satellites, yyyymm: str) -> bool: + """ + Return True only if ALL satellites have monthly files for yyyymm: + Altimeter_{sat}_{yyyymm}.nc + """ + for sat in satellites: + fname = os.path.join(sat_base, f"Altimeter_{sat}_{yyyymm}.nc") + if not os.path.isfile(fname): + return False + return True + + +cdates = discover_model_dates(MODEL_BASE) + +total = len(cdates) +covered = 0 +missing = 0 + +for cdate in cdates: + yyyymm = cdate[:6] + if sat_month_available_all(SAT_BASE, satellites, yyyymm): + covered += 1 + else: + missing += 1 + +print("Satellite coverage verification:") +print(f" Total model dates : {total}") +print(f" Dates fully covered : {covered}") +print(f" Dates missing satellites : {missing}") + +# ------------------- Write jobcards ----------------------------------- +written = 0 +skipped_no_sat_all = 0 +skipped_no_model_gribs = 0 +missing_cycles = [] + +for cdate in cdates: + yyyymm = cdate[:6] + + sat_ok = True + for sat in satellites: + fname = os.path.join(SAT_BASE, f"Altimeter_{sat}_{yyyymm}.nc") + if not os.path.isfile(fname): + sat_ok = False + break + if not sat_ok: + skipped_no_sat_all += 1 + continue + + for tz in tz_list: + if model == "GFSv16": + model_gridded_dir = os.path.join(MODEL_BASE, f"gfs.{cdate}", tz, "wave", "gridded") + MODEL_DATA_PATTERN_TEMPLATE = "gfswave.t{tz}z.{grid}.f*.grib2" + elif model == "retrov17_01": + model_gridded_dir = os.path.join(MODEL_BASE, f"gfs.{cdate}", tz, "products", "wave", "gridded","global.0p25") + MODEL_DATA_PATTERN_TEMPLATE = "gfs.t{tz}z.{grid}.f*.grib2" + else: + print(f"ERROR: Unsupported Model.", file=sys.stderr) + sys.exit(1) + + if not os.path.isdir(model_gridded_dir): + skipped_no_model_gribs += 1 + missing_cycles.append(f"{cdate}{tz}") + continue + + pattern = MODEL_DATA_PATTERN_TEMPLATE.format(tz=tz, grid=grid) + gribs = glob.glob(os.path.join(model_gridded_dir, pattern)) + if len(gribs) == 0: + skipped_no_model_gribs += 1 + missing_cycles.append(f"{cdate}{tz}") + continue + + cdate_full = f"{cdate}{tz}" + outfile = os.path.join(jobdir, f"job_{model}_{grid}_{cdate_full}.sh") + outlog = os.path.join(jobdir, f"run_{model}_{grid}_{cdate_full}.o%j") + + with open(outfile, "w") as f: + f.write("#!/bin/bash\n") + f.write(f"#SBATCH --nodes={SBATCH_NODES}\n") + f.write(f"#SBATCH --ntasks={SBATCH_NTASKS}\n") + f.write(f"#SBATCH --cpus-per-task={SBATCH_CPUS_PER_TASK}\n") + f.write(f"#SBATCH --mem={SBATCH_MEM}\n") + f.write(f"#SBATCH -q {SBATCH_QUEUE}\n") + f.write(f"#SBATCH -t {SBATCH_TIME}\n") + f.write(f"#SBATCH -A {SBATCH_ACCOUNT}\n") + f.write(f"#SBATCH -J procsat_{model}_{grid}_{cdate_full}\n") + f.write(f"#SBATCH -o {outlog}\n") + + f.write(f"module use {MODULE_USE_PATH}\n") + f.write(f"module load {MODULE_LOAD}\n\n") + + f.write("set -euo pipefail\n") + f.write("set -x\n\n") + + if SET_THREAD_ENVS: + f.write("export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK\n") + f.write("export MKL_NUM_THREADS=$SLURM_CPUS_PER_TASK\n\n") + + f.write(f"MODEL={model}\n") + f.write(f"GRID={grid}\n") + f.write(f"DATE={cdate}\n") + f.write(f"TZ={tz}\n") + f.write(f"YYYYMM={yyyymm}\n") + f.write(f"OUTDIR={OUTDIR_BASE}\n") + f.write("mkdir -p ${OUTDIR}\n\n") + + if model == "GFSv16": + f.write(f"MODEL_DATA_DIR={MODEL_BASE}/gfs.${{DATE}}/${{TZ}}/wave/gridded\n") + elif model == "retrov17_01": + f.write(f"MODEL_DATA_DIR={MODEL_BASE}/gfs.${{DATE}}/${{TZ}}/products/wave/gridded/global.0p25\n") + else: + print(f"ERROR: Unsupported Model.", file=sys.stderr) + sys.exit(1) + + f.write(f"MODEL_DATA_PATTERN='{pattern}'\n\n") + + for sat in satellites: + f.write(f"SAT={sat}\n") + f.write(f"SATELLITE_FILE={SAT_BASE}/Altimeter_{sat}_${{YYYYMM}}.nc\n") + f.write(f"OUTPUT_FILE=${{MODEL}}_${{GRID}}_{cdate_full}_{sat}.nc\n") + f.write( + f"python {PROC_SCRIPT} " + f"-t grib2 -d $MODEL_DATA_DIR -p $MODEL_DATA_PATTERN " + f"-s $SATELLITE_FILE -o $OUTDIR -f $OUTPUT_FILE -m $MODEL\n\n" + ) + + os.chmod(outfile, 0o750) + written += 1 + +print("\nJobcard generation summary:") +print(f" Jobcards written : {written}") +print(f" Dates skipped (month missing ≥1 satellite) : {skipped_no_sat_all}") +print(f" Cycles skipped (missing model dir or GRIBs) : {skipped_no_model_gribs}") + +if missing_cycles: + for c in sorted(missing_cycles): + print(f" {c}") + +ush = os.path.join(WORKDIR, "WW3-tools", "ush", "run_all_jobs.sh") +os.makedirs(jobdir, exist_ok=True) +cmd = f"cp {ush} {jobdir}" +os.system(cmd) + + +print(f" Jobcards directory : {jobdir}") diff --git a/ush/run_all_jobs.sh b/ush/run_all_jobs.sh new file mode 100755 index 0000000..eec8994 --- /dev/null +++ b/ush/run_all_jobs.sh @@ -0,0 +1,114 @@ +#!/bin/bash +set -euo pipefail + +# ============================================================================= +# Auto-detect script directory (important!) +# ============================================================================= +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +cd "${SCRIPT_DIR}" + +# ============================================================================= +# Configuration +# ============================================================================= +JOB_GLOB="job_*.sh" +BATCH=20 +MAX_WAIT_SECONDS=2400 # 40 minutes + +# Log file +TIMESTAMP=$(date +"%Y%m%d_%H%M%S") +LOG_FILE="${SCRIPT_DIR}/run_all_jobs_${TIMESTAMP}.log" + +# ============================================================================= +# Logging setup +# ============================================================================= +echo "Starting master script at $(date)" | tee "${LOG_FILE}" +echo "Running inside directory: ${SCRIPT_DIR}" | tee -a "${LOG_FILE}" +echo "Job glob: ${JOB_GLOB}" | tee -a "${LOG_FILE}" +echo "Batch size: ${BATCH}" | tee -a "${LOG_FILE}" +echo "Max wait per batch: $((MAX_WAIT_SECONDS/60)) minutes" | tee -a "${LOG_FILE}" +echo "===========================================" | tee -a "${LOG_FILE}" + +exec > >(tee -a "${LOG_FILE}") 2>&1 + +# ============================================================================= +# Find job scripts +# ============================================================================= +mapfile -t JOB_SCRIPTS < <(ls ${JOB_GLOB} 2>/dev/null | sort -V) + +if [[ ${#JOB_SCRIPTS[@]} -eq 0 ]]; then + echo "ERROR: No job scripts found matching ${JOB_GLOB}" >&2 + exit 1 +fi + +echo "FOUND ${#JOB_SCRIPTS[@]} job scripts:" +printf '%5s %s\n' "#" "Filename" +printf '%5s %s\n' "-----" "------------------------------" +for i in "${!JOB_SCRIPTS[@]}"; do + printf '%5d %s\n' $((i+1)) "${JOB_SCRIPTS[$i]}" +done +echo "---------------------------------------" +echo + +# ============================================================================= +# Submit in batches +# ============================================================================= +total=${#JOB_SCRIPTS[@]} +batch_id=1 +idx=0 + +while [[ $idx -lt $total ]]; do + echo "==================================================" + echo "Batch ${batch_id}: submitting jobs $((idx+1)) to $((idx+BATCH < total ? idx+BATCH : total))" + echo "==================================================" + + job_ids=() + + for ((k=0; k JobID: ${job_id}" + else + echo "WARNING: Failed to parse JobID (${submit_out})" + fi + done + + # Wait for batch + if [[ ${#job_ids[@]} -gt 0 ]]; then + echo "Waiting for ${#job_ids[@]} jobs to finish..." + + start_time=$(date +%s) + while true; do + remaining=$(squeue -h -j "$(IFS=,; echo "${job_ids[*]}")" --states=PENDING,RUNNING 2>/dev/null | wc -l || echo 0) + + if [[ $remaining -eq 0 ]]; then + echo "Batch ${batch_id} completed." + break + fi + + elapsed=$(( $(date +%s) - start_time )) + if [[ $elapsed -ge $MAX_WAIT_SECONDS ]]; then + echo "Batch ${batch_id} timeout reached. Continue next batch." + squeue -h -j "$(IFS=,; echo "${job_ids[*]}")" -o "%i %j %T %R" 2>/dev/null || true + break + fi + + sleep 30 + done + fi + + echo + batch_id=$((batch_id+1)) +done + +echo "==================================================" +echo "ALL JOBS SUBMITTED" +echo "Finished at $(date)" +echo "Log: ${LOG_FILE}" +echo "==================================================" diff --git a/ww3tools/ProcSat_Altimeter.py b/ww3tools/ProcSat_Altimeter.py index d05dca4..5aa3c2b 100755 --- a/ww3tools/ProcSat_Altimeter.py +++ b/ww3tools/ProcSat_Altimeter.py @@ -106,7 +106,7 @@ def along_track(AODN,atime,wconfig): # plot(AODN['LATITUDE'][indt],AODN['LONGITUDE'][indt],'.') lat=np.array(AODN['LATITUDE'][indt]) lon=np.array(AODN['LONGITUDE'][indt]); lon[lon>360]=lon[lon>360]-360. - + # Coordinates of a point firstcoord = (lat[0], lon[0]) segtrack = list(zip(lat, lon)) @@ -269,7 +269,7 @@ def gridded(AODN,atime,wconfig): ii=ii+len(indpqq) del auxfhsk, auxstdhsk, auxcounthsk, auxfhskcal, auxfwnd, auxfwndcal, indpqq - + del indt print('PyResample kdtree, hourly time, '+repr(t)+' of '+repr(atime.shape[0])) @@ -339,14 +339,14 @@ def savesat(AODN,wconfig,altsel): datein = datetime.utcfromtimestamp(AODN['TIME'].iloc[0]).strftime('%Y%m%d%H') datefin = datetime.utcfromtimestamp(AODN['TIME'].iloc[-1]).strftime('%Y%m%d%H') - #create path_out directory if it does not exist: + #create path_out directory if it does not exist: if not os.path.isdir(wconfig['path_out']): os.makedirs(wconfig['path_out']) fname=wconfig['path_out']+"Altimeter"+smethod+"_"+wconfig['ftag']+"_"+altsel+"_"+datein+"to"+datefin # Save netcdf - ncfile = nc.Dataset(fname+".nc", "w", format=wconfig['fnetcdf']) + ncfile = nc.Dataset(fname+".nc", "w", format=wconfig['fnetcdf']) ncfile.history=hmsg # create dimensions. ncfile.createDimension('time' , len(AODN['TIME'])) @@ -448,25 +448,34 @@ def savesat(AODN,wconfig,altsel): start = timeit.default_timer() ap = argparse.ArgumentParser() - ap.add_argument('-s', '--satelite', help="Satelite Name, one of JASON3,JASON2,CRYOSAT2,JASON1,HY2,SARAL,SENTINEL3A,ENVISAT,ERS1,ERS2,GEOSAT,GFO,TOPEX,SENTINEL3B,CFOSAT",required=True) + ap.add_argument('-s', '--satelite', help="Satelite Name, one of JASON3,JASON2,CRYOSAT2,JASON1,HY2,SARAL,SENTINEL3A,ENVISAT,ERS1,ERS2,GEOSAT,GFO,TOPEX,SENTINEL3B,CFOSAT",required=True) ap.add_argument('-i', '--initdate', help="Initial Date (YYYYMMDDHH)", default='1985010100') ap.add_argument('-e', '--enddate', help="Final Date (YYYYMMDDHH)", default=datetime.utcnow().strftime("%Y%m%d%H")) ap.add_argument('-t', '--timestep', help="time step (in seconds) to build the final time array (regular)", default=float(3600.)) - ap.add_argument('-y', '--yaml', help="WW3 tools yaml file", default='ww3tools.yaml') + ap.add_argument('-y', '--yaml', help="WW3 tools yaml file", default='ww3tools.yaml') + ap.add_argument('-o', '--out_base', help="Base output directory. Final output will be out_base//", default=None) MyArgs = ap.parse_args() - + # WW3-tools configuration file wconfig=wread.readconfig(MyArgs.yaml) - # Set Satelite + # Optional override of output path from command line + if MyArgs.out_base: + wconfig['path_out'] = os.path.join(MyArgs.out_base, MyArgs.satelite) + os.sep + else: + # Ensure path_out ends with a separator to avoid string-concat path bugs + if 'path_out' in wconfig and wconfig['path_out'] and not wconfig['path_out'].endswith(os.sep): + wconfig['path_out'] = wconfig['path_out'] + os.sep + + # Set Satelite altsel = MyArgs.satelite # Set date interval datemin = MyArgs.initdate datemax = MyArgs.enddate # Default time step (in seconds) to build the final time array (regular) time_step=np.double(MyArgs.timestep) - + # read and organize AODN altimeter data for the mission and domain of interest AODN = wread.aodn_altimeter(altsel,wconfig,datemin,datemax) # AODN.to_csv(wconfig['path_out']+"AODN_altimeterSelection_"+datemin+"to"+datemax+".csv", index=False)