From 83d606a7b952511f657d6b54e799914b495b54b5 Mon Sep 17 00:00:00 2001
From: Ming Chen <ming.chen1@noaa.gov>
Date: Mon, 12 Jan 2026 16:23:22 +0000
Subject: [PATCH 1/2] add global bias and RMSE capabilities

---
 ww3tools/pvalstats.py | 600 +++++++++++++++++++++++++++++++++++++++---
 1 file changed, 560 insertions(+), 40 deletions(-)

diff --git a/ww3tools/pvalstats.py b/ww3tools/pvalstats.py
index cfe6542..39cabfa 100755
--- a/ww3tools/pvalstats.py
+++ b/ww3tools/pvalstats.py
@@ -55,7 +55,7 @@
   levels = np.array(np.linspace(0.1,2.3,31)).round(2)
   gmp = GlobalMapPlot(gdata=gsmooth(RmseGefsControl,1),latm=lat,lonm=lon,
     latmin=-67.,latmax=67.,palette = palette, levels=levels,ftag=figname,pextend=pextend)
-  gmp.plot()   
+  gmp.plot()
 
 USAGE:
  Class ModelObsPlot
@@ -123,7 +123,7 @@
 
 # Font size and style
 sl=13
-matplotlib.rcParams.update({'font.size': sl}); plt.rc('font', size=sl) 
+matplotlib.rcParams.update({'font.size': sl}); plt.rc('font', size=sl)
 matplotlib.rc('xtick', labelsize=sl); matplotlib.rc('ytick', labelsize=sl); matplotlib.rcParams.update({'font.size': sl})
 
 def interp_nan(data,lmt=10**50):
@@ -151,15 +151,15 @@ def interp_nan(data,lmt=10**50):
 
 class ModelObsPlot:
 
-    def __init__(self,model=None,obs=None,time=None,
-        fctime=None,fctintervals=None,fctxt=None,fctxticks=None,fctunits=None,month=None,linreg=None,axisnames=None,
-        mlabels=[],vaxisname=None,linestyle=None,linestyle_html=None,marker=None,color=None,ftag='',figformat=None):
+    def __init__(self,model=None,obs=None,time=None,fctime=None,fctintervals=None,fctxt=None,
+        fctxticks=None,fctunits=None,month=None,linreg=None,axisnames=None,mlabels=[],vaxisname=None,
+        linestyle=None,linestyle_html=None,marker=None,color=None,ftag='',figformat=None, mtitle=None):
 
         """
         Initialization of the main object used for the plot functions below.
         model and obs are mandatory.
         time is mandatory for the timeseries plot only.
-        Optional: 
+        Optional:
         :param axisnames: adds specific model and observation axis names to qqplot and scatterplot,
             for ex axisnames=["WW3","Buoy"]
         :param vaxisname: adds specific axis names when only one axis is need, for timeseries (y-axis), month (y-axis), and pdf (x-axis),
@@ -175,12 +175,13 @@ def __init__(self,model=None,obs=None,time=None,
             raise ValueError("Missing required input arguments.")
             return
         else:
-            self.model = np.array(np.atleast_2d(model)).astype('float') 
+            self.model = np.array(np.atleast_2d(model)).astype('float')
             self.obs = np.array(np.atleast_2d(obs)).astype('float')
 
         self.figformat = figformat
         self.ftag = ftag
         self.mlabels = mlabels
+        self.mtitle = mtitle
         self.linreg = linreg
         self.vaxisname = vaxisname if vaxisname is not None else "Model and Observations"
         self.axisnames = axisnames if axisnames is not None else ["Model","Observations"]
@@ -349,7 +350,7 @@ def timeseries(self):
                 p.yaxis.axis_label = self.vaxisname
             else:
                 p.yaxis.axis_label = "Model and Observations"
-         
+
             output_file(self.ftag + 'TimeSeries.html'); show(p); del p
 
     def qqplot(self):
@@ -400,9 +401,9 @@ def qqplot(self):
                     ax.plot(aux,aregr,color=self.color[i],ls='-',linewidth=1.,alpha=0.8,zorder=4)
                     ax.plot(aux,aregr,color='k',ls=':',linewidth=0.7,alpha=0.7,zorder=4)
                     if np.size(self.mlabels)>0:
-                        print(self.ftag+"QQplot "+self.mlabels[i]+": Slope "+np.str(np.round(float(r.slope),5))+", Intercept "+np.str(np.round(float(r.intercept),5)))
+                        print(self.ftag+"QQplot "+self.mlabels[i]+": Slope "+str(np.round(float(r.slope),5))+", Intercept "+str(np.round(float(r.intercept),5)))
                     else:
-                        print(self.ftag+"QQplot: Slope "+np.str(np.round(float(r.slope),5))+", Intercept "+np.str(np.round(float(r.intercept),5)))                      
+                        print(self.ftag+"QQplot: Slope "+str(np.round(float(r.slope),5))+", Intercept "+str(np.round(float(r.intercept),5)))
 
                     del a,b,r,aregr
 
@@ -416,19 +417,21 @@ def qqplot(self):
 
         plt.ylim(ymax = aux.max(), ymin = aux.min())
         plt.xlim(xmax = aux.max(), xmin = aux.min())
-        plt.locator_params(axis='y', nbins=7) ; plt.locator_params(axis='x', nbins=7) 
+        plt.locator_params(axis='y', nbins=7) ; plt.locator_params(axis='x', nbins=7)
 
         ax.set_xlabel(self.axisnames[1]); ax.set_ylabel(self.axisnames[0])
 
         if np.size(self.mlabels)>0:
             plt.legend(loc="upper left",fontsize=sl-2)
 
+        if self.mtitle is not None:
+            ax.set_title("Q-Q plot: " + self.mtitle, fontsize=sl-2)
+
         plt.tight_layout()
         plt.savefig(self.ftag+'QQplot.png', dpi=200, facecolor='w', edgecolor='w',orientation='portrait', format='png',transparent=False, bbox_inches='tight', pad_inches=0.1)
         plt.close(fig1); del fig1, ax
 
-
-    def scatterplot(self, dwscl='no'):
+    def scatterplot(self, dwscl='no', nsample=30000, method='uniform'):
         '''
         Scatter plot.
         Inputs:
@@ -440,6 +443,13 @@ def scatterplot(self, dwscl='no'):
           - ftag: Path to save the figure. Default is the current directory.
           - dwscl: Whether to apply downsampling. Default is 'no'. If set to 'no', downsampling is not applied.
                 If set to any other value, downsampling is applied.
+          - nsample: Target number of samples after downsampling. Default is 30,000
+          - method: Downsampling method. Options are:
+              - 'uniform': Evenly spaced sampling (default, original method)
+              - 'uniform_max': Selects the largest observation value in each evenly spaced sector.
+              - 'random': Random sampling of point
+              - 'datadensity': Sampling inversely proportional to data density
+              - 'quantile': Sampling equally for quantile bins
         Output: png figure saved in the local directory where python is running or in the path given through ftag.
         Example:
           from pvalstats import ModelObsPlot
@@ -452,13 +462,74 @@ def scatterplot(self, dwscl='no'):
           mop.scatterplot()
         '''
 
-        if self.obs[0,:].shape[0] > 50000 and dwscl != 'no':
-            sk = int(np.round(float(self.obs[0,:].shape[0]) / 30000., 0))
+        total_points = self.obs[0,:].shape[0]
+
+        # Downsampling logic
+        if total_points > 50000 and dwscl != 'no':
+            if nsample >= total_points:
+                obs_sampled = self.obs[0,:]
+                model_sampled = self.model
+            else:
+                if method == 'uniform':
+                    sk = int(np.round(float(total_points) / nsample, 0))
+                    obs_sampled = self.obs[0,::sk]
+                    model_sampled = self.model[:,::sk]
+                elif method == 'uniform_max':
+                    sector_size = total_points // nsample
+                    indices = []
+                    for i in range(nsample):
+                        start = i * sector_size
+                        end = min((i + 1) * sector_size, total_points)
+                        sector_obs = self.obs[0, start:end]
+                        if len(sector_obs) > 0:
+                            max_idx = start + np.argmax(sector_obs)
+                            indices.append(max_idx)
+                    indices = np.array(indices)
+                    obs_sampled = self.obs[0, indices]
+                    model_sampled = self.model[:, indices]
+                elif method == 'random':
+                    indices = np.random.choice(total_points, size=nsample, replace=False)
+                    obs_sampled = self.obs[0, indices]
+                    model_sampled = self.model[:, indices]
+                elif method == 'datadensity':
+                    kde = gaussian_kde(self.obs[0,:])
+                    densities = kde(self.obs[0,:])
+                    probs = 1 / (densities + 1e-6)
+                    probs /= probs.sum()
+                    indices = np.random.choice(total_points, size=nsample, p=probs, replace=False)
+                    obs_sampled = self.obs[0, indices]
+                    model_sampled = self.model[:, indices]
+                elif method == 'quantile':
+                    n_bins = nsample // 1000
+                    points_per_bin = nsample // n_bins
+                    obs = self.obs[0,:]
+                    quantiles = np.nanpercentile(obs, np.linspace(0, 100, n_bins + 1))
+                    indices = []
+                    for i in range(n_bins):
+                        bin_mask = (obs >= quantiles[i]) & (obs < quantiles[i + 1])
+                        bin_indices = np.where(bin_mask)[0]
+                        if len(bin_indices) > 0:
+                            sampled = np.random.choice(bin_indices, size=min(points_per_bin, len(bin_indices)), replace=False)
+                            indices.extend(sampled)
+                    obs_sampled = obs[indices]
+                    model_sampled = self.model[:, indices]
+                else:
+                    raise ValueError("Invalid method. Choose 'uniform', 'random', 'datadensity', or 'quantile'.")
         else:
-            sk = 1
+            obs_sampled = self.obs[0,:]
+            model_sampled = self.model
+
+#        if self.obs[0,:].shape[0] > 50000 and dwscl != 'no':
+#            sk = int(np.round(float(self.obs[0,:].shape[0]) / 30000., 0))
+#        else:
+#            sk = 1
 
-        a = math.floor(np.nanmin(np.append(self.obs[:,::sk], self.model[:,::sk])) * 100.) / 100.
-        b = math.ceil(np.nanmax(np.append(self.obs[:,::sk], self.model[:,::sk])) * 100.) / 100.
+#        a = math.floor(np.nanmin(np.append(self.obs[:,::sk], self.model[:,::sk])) * 100.) / 100.
+#        b = math.ceil(np.nanmax(np.append(self.obs[:,::sk], self.model[:,::sk])) * 100.) / 100.
+
+        # Determine plot limits
+        a = math.floor(np.nanmin(np.append(obs_sampled, model_sampled)) * 100.) / 100.
+        b = math.ceil(np.nanmax(np.append(obs_sampled, model_sampled)) * 100.) / 100.
         famin = a - 0.1 * a
         famax = b + 0.1 * a
         aux = np.linspace(famin, famax, 100)
@@ -469,8 +540,12 @@ def scatterplot(self, dwscl='no'):
         ax = fig1.add_subplot(111)
 
         for i in range(0, self.model.shape[0]):
-            b = np.array(self.obs[0,::sk])
-            a = np.array(self.model[i,::sk])
+            # b = np.array(self.obs[0,::sk])
+            # a = np.array(self.model[i,::sk])
+
+            b = np.array(obs_sampled)
+            a = np.array(model_sampled[i,:])
+
             ind = np.where((a*b) > -999.)[0]
             a = np.copy(a[ind])
             b = np.copy(b[ind])
@@ -502,11 +577,11 @@ def scatterplot(self, dwscl='no'):
                 ax.plot(aux, aregr, color=self.color[i], ls='-', linewidth=1., alpha=0.8, zorder=4)
                 ax.plot(aux, aregr, color='k', ls=':', linewidth=0.7, alpha=0.7, zorder=4)
                 if np.size(self.mlabels) > 0:
-                    print(self.ftag + "ScatterPlot " + self.mlabels[i] + ": Slope " + np.str(np.round(float(r.slope), 5))
-                          + ", Intercept " + np.str(np.round(float(r.intercept), 5)))
+                    print(self.ftag + "ScatterPlot " + self.mlabels[i] + ": Slope " + str(np.round(float(r.slope), 5))
+                          + ", Intercept " + str(np.round(float(r.intercept), 5)))
                 else:
-                    print(self.ftag + "ScatterPlot: Slope " + np.str(np.round(float(r.slope), 5)) + ", Intercept "
-                          + np.str(np.round(float(r.intercept), 5)))
+                    print(self.ftag + "ScatterPlot: Slope " + str(np.round(float(r.slope), 5)) + ", Intercept "
+                          + str(np.round(float(r.intercept), 5)))
                 del r, aregr
 
             del a, b
@@ -520,10 +595,15 @@ def scatterplot(self, dwscl='no'):
 
         plt.grid(c='grey', ls=':', alpha=0.5, zorder=1)
         for i in np.array([50, 80, 95, 99]):
-            plt.axvline(x=np.nanpercentile(self.obs, int(i)), ls='--', color='grey', linewidth=1., alpha=0.7, zorder=1)
-            plt.text(np.nanpercentile(self.obs, int(i)), ((famax - famin) / 15) + famin, str(int(i)) + 'th', color='dimgrey',
+            #plt.axvline(x=np.nanpercentile(self.obs, int(i)), ls='--', color='grey', linewidth=1., alpha=0.7, zorder=1)
+            #plt.text(np.nanpercentile(self.obs, int(i)), ((famax - famin) / 15) + famin, str(int(i)) + 'th', color='dimgrey',
+            #         fontsize=sl - 7, zorder=4)
+            #plt.text(np.nanpercentile(self.obs, int(i)), ((famax - famin) / 1.05) + famin, str(int(i)) + 'th',
+            #         color='dimgrey', fontsize=sl - 7, zorder=4)
+            plt.axvline(x=np.nanpercentile(obs_sampled, int(i)), ls='--', color='grey', linewidth=1., alpha=0.7, zorder=1)
+            plt.text(np.nanpercentile(obs_sampled, int(i)), ((famax - famin) / 15) + famin, str(int(i)) + 'th', color='dimgrey',
                      fontsize=sl - 7, zorder=4)
-            plt.text(np.nanpercentile(self.obs, int(i)), ((famax - famin) / 1.05) + famin, str(int(i)) + 'th',
+            plt.text(np.nanpercentile(obs_sampled, int(i)), ((famax - famin) / 1.05) + famin, str(int(i)) + 'th',
                      color='dimgrey', fontsize=sl - 7, zorder=4)
 
         plt.gca().set_xlim(left=famin, right=famax)
@@ -625,7 +705,7 @@ def taylordiagram(self):
             if i==0:
                 sm.taylor_diagram(sdev.take([0,i+1]), crmsd.take([0,i+1]), ccoef.take([0,i+1]), locationColorBar = 'EastOutside',  markerColor = self.color[i],
                     # markerLegend = 'on', markerLabel = list(label.take([0,i+1])),
-                    styleOBS = '-', colOBS = 'k', markerobs = 'o', titleOBS = 'Obs', 
+                    styleOBS = '-', colOBS = 'k', markerobs = 'o', titleOBS = 'Obs',
                     markerSize = 17, tickRMS = tRMS, axismax = axmax, alpha=0.7, tickSTD = tSTD,
                     tickRMSangle = 87, showlabelsRMS = 'on',
                     colRMS='dimgrey', styleRMS=':', widthRMS=1.0, titleRMS='off',
@@ -634,17 +714,19 @@ def taylordiagram(self):
 
             else:
                 sm.taylor_diagram(sdev.take([0,i+1]), crmsd.take([0,i+1]), ccoef.take([0,i+1]), overlay = 'on', locationColorBar = 'EastOutside',  markerColor = self.color[i],
-                    # markerLegend = 'on', markerLabel = list(label.take([0,i+1])), 
-                    styleOBS = '-', colOBS = 'k', markerobs = 'o', titleOBS = 'Obs', 
+                    # markerLegend = 'on', markerLabel = list(label.take([0,i+1])),
+                    styleOBS = '-', colOBS = 'k', markerobs = 'o', titleOBS = 'Obs',
                     markerSize = 17, tickRMS = tRMS, axismax = axmax, alpha=0.5, tickSTD = tSTD,
                     tickRMSangle = 87, showlabelsRMS = 'on',
                     colRMS='dimgrey', styleRMS=':', widthRMS=1.0, titleRMS='off',
                     colSTD='k', styleSTD='-.', widthSTD=1.0, titleSTD ='on',
                     colCOR='k', styleCOR='--', widthCOR=1.0, titleCOR='on')
 
-            if np.size(self.mlabels)>0: 
+            if np.size(self.mlabels)>0:
                 ax.text(fp1, fp2-(fp3*i), label[i+1], verticalalignment='bottom', horizontalalignment='right',
                     transform=ax.transAxes,color=self.color[i], fontsize=fp4)
+            if self.mtitle is not None:
+                ax.set_title("Taylor Diagram: " + self.mtitle, fontsize=fp4, y=1.1)
 
         plt.tight_layout()
         plt.savefig(self.ftag+'TaylorDiagram.png', dpi=250, facecolor='w', edgecolor='w',orientation='portrait', format='png',transparent=False, bbox_inches='tight', pad_inches=0.1)
@@ -686,7 +768,7 @@ def combinerrors(self):
 
         plt.grid(c='grey', ls=':', alpha=0.5,zorder=1)
         plt.axvline(x=0,ls='--',color='dimgrey',linewidth=1.,alpha=0.9,zorder=2)
-        plt.locator_params(axis='y', nbins=5) ; plt.locator_params(axis='x', nbins=5) 
+        plt.locator_params(axis='y', nbins=5) ; plt.locator_params(axis='x', nbins=5)
         ax.set_xlabel("Normalized Bias"); ax.set_ylabel("Scatter Index")
         if (np.nanmax(fauxnb)-np.nanmin(fauxnb))<0.001:
             plt.ticklabel_format(axis="x", style="sci", scilimits=(0,0))
@@ -748,14 +830,14 @@ def pdf(self,result="no"):
             del de
 
         plt.grid(c='grey', ls=':', alpha=0.5,zorder=1)
-        plt.locator_params(axis='y', nbins=7) ; plt.locator_params(axis='x', nbins=7) 
+        plt.locator_params(axis='y', nbins=7) ; plt.locator_params(axis='x', nbins=7)
         ax.set_ylabel("Probability Density")
         ax.set_xlabel(self.vaxisname)
 
         if np.size(self.mlabels)>0:
             plt.legend(fontsize=sl-2)
 
-        plt.tight_layout(); plt.axis('tight'); plt.ylim(ymin = -0.005) 
+        plt.tight_layout(); plt.axis('tight'); plt.ylim(ymin = -0.005)
         plt.savefig(self.ftag+'PDF.png', dpi=200, facecolor='w', edgecolor='w',orientation='portrait', format='png',transparent=False, bbox_inches='tight', pad_inches=0.1)
         ax.set_yscale( "log" ); plt.axis('tight');plt.ylim(ymin = 0.001);plt.ylim(ymax = np.nanmax(mmax))
         plt.savefig(self.ftag+'PDFlogscale.png', dpi=200, facecolor='w', edgecolor='w',orientation='portrait', format='png',transparent=False, bbox_inches='tight', pad_inches=0.1)
@@ -900,7 +982,7 @@ def errxfctime(self,result="no"):
 
     # --- Ensemble Validation ---
 
-    # Rank Histogram (Talagrand diagram) 
+    # Rank Histogram (Talagrand diagram)
     # https://journals.ametsoc.org/view/journals/mwre/129/3/1520-0493_2001_129_0550_iorhfv_2.0.co_2.xml
     def rankhist(self,result="no"):
 
@@ -930,7 +1012,7 @@ def rankhist(self,result="no"):
         # plt.ylim(ymax = 1.01, ymin = -0.01)
         ax1.set_xticks(np.arange(1,13,1))
         ax1.set_xlabel('Rank',size=sl); ax1.set_ylabel('Probability',size=sl)
-        plt.tight_layout(); # plt.axis('tight') 
+        plt.tight_layout(); # plt.axis('tight')
         plt.grid(c='grey', ls='--', alpha=0.3)
         plt.savefig(self.ftag+'RankHistogram.png', dpi=200, facecolor='w', edgecolor='w',orientation='portrait', format='png',transparent=False, bbox_inches='tight', pad_inches=0.1)
         plt.close(fig1); del fig1, ax1, result
@@ -1130,7 +1212,7 @@ def brierxfctime(self,threshold=None,result="no"):
                 for j in range(0,obs.shape[0]):
                     atrue_binary = 1 if obs[j] >= threshold else 0
                     abinary_forecasts = [1 if auxv >= threshold else 0 for auxv in model[:,j]]
-                    atrue_binary = [atrue_binary]*len(abinary_forecasts)        
+                    atrue_binary = [atrue_binary]*len(abinary_forecasts)
                     true_binary=np.append(true_binary,atrue_binary)
                     binary_forecasts=np.append(binary_forecasts,abinary_forecasts)
                     fprob_forecasts=np.append(fprob_forecasts,prob_forecasts)
@@ -1183,7 +1265,7 @@ def gsmooth(data,gflev=1):
     """
     Gaussian filter for spatial smoothness.
     data is a 2-dimensional field. data.shape must be equal to 2.
-    gflev is the level of smoothing.    
+    gflev is the level of smoothing.
     """
     from scipy.ndimage.filters import gaussian_filter
     data=np.array(data)
@@ -1207,7 +1289,7 @@ def __init__(self,lat=None,lon=None,gdata=None,latmin=-85.,latmax=85.,
         self.levels = levels
         self.pextend = pextend
         self.contcolor = contcolor
-        self.sl = sl 
+        self.sl = sl
 
         self.ftag = ftag if ftag is not None else str(os.getcwd()) + '/GlobalMap.png'
         self.palette = palette if palette is not None else plt.cm.jet
@@ -1226,7 +1308,7 @@ def plot(self):
                 aux=np.nanpercentile(np.abs(self.gdata), 99.)
                 self.levels = np.linspace(-aux,aux, 101); del aux
             else:
-                self.levels = np.linspace(0, np.nanpercentile(self.gdata, 99.), 101)       	
+                self.levels = np.linspace(0, np.nanpercentile(self.gdata, 99.), 101)
 
         plt.figure(figsize=(7, 4))
         # ax = plt.axes(projection=ccrs.PlateCarree(central_longitude=-90))
@@ -1253,4 +1335,442 @@ def plot(self):
         plt.close('all')
         del ax
 
+#-------------------------------------------------------------
+#-------- global bias and rmse -------------------------------
+class GlobalSkillMap:
+    """
+    GlobalSkillMap
+    -------------
+    Compute and plot gridded global Bias and RMSE from collocated point matchups.
+
+    Designed to reproduce 'global_plot.py' style:
+      - lat/lon binning (centers)
+      - bias = mean(model - obs)
+      - rmse = sqrt(mean((model - obs)^2))
+      - min_count masking
+      - pcolormesh map with fixed vmin/vmax (no cartopy)
+
+    Parameters
+    ----------
+    lat, lon : array-like, shape (N,)
+    model    : array-like, shape (N,) or (M,N)
+    obs      : array-like, shape (N,)
+    mlabels  : list[str] or None
+    """
+    def __init__(self, lat, lon, model, obs, mlabels=None):
+        self.lat = np.asarray(lat, dtype=float).ravel()
+        self.lon = np.asarray(lon, dtype=float).ravel()
+        self.obs = np.asarray(obs, dtype=float).ravel()
+
+        model = np.asarray(model, dtype=float)
+        if model.ndim == 1:
+            model = model.reshape(1, -1)
+        elif model.ndim != 2:
+            raise ValueError("model must be shape (N,) or (M,N)")
+        self.model = model
+
+        N = self.obs.size
+        if self.lat.size != N or self.lon.size != N or self.model.shape[1] != N:
+            raise ValueError("lat/lon/obs/model length mismatch")
+
+        self.M = self.model.shape[0]
+        if mlabels is None:
+            self.mlabels = [f"Model{i+1}" for i in range(self.M)]
+        else:
+            if len(mlabels) != self.M:
+                raise ValueError("len(mlabels) must equal number of models (M)")
+            self.mlabels = list(mlabels)
+
+    # -----------------------------
+    # QC / filtering
+    # -----------------------------
+    def apply_qc(
+        self,
+        latmin=None,
+        latmax=None,
+        lon_0_360=True,
+        model_min=None,
+        model_max=None,
+        obs_min=None,
+        obs_max=None,
+        extra_mask=None,
+        model_index_for_mask=None,
+    ):
+        """
+        Return filtered copies of (lat, lon, model, obs).
+        This lets you match global_plot.py QC exactly.
+
+        Parameters
+        ----------
+        latmin, latmax : float or None
+        lon_0_360      : bool
+        model_min/max  : float or None
+        obs_min/max    : float or None
+        extra_mask     : boolean array (N,) or None
+        """
+        lat = self.lat.copy()
+        lon = self.lon.copy()
+        obs = self.obs.copy()
+        model = self.model.copy()
+
+        if lon_0_360:
+            lon = (lon + 360.0) % 360.0
+
+        mask = np.isfinite(lat) & np.isfinite(lon) & np.isfinite(obs)
+        if model_index_for_mask is None:
+            mask &= np.all(np.isfinite(model), axis=0)
+        else:
+            mi = int(model_index_for_mask)
+            if mi < 0 or mi >= model.shape[0]:
+                raise ValueError("model_index_for_mask out of range")
+            mask &= np.isfinite(model[mi, :])
+
+        if latmin is not None:
+            mask &= (lat >= latmin)
+        if latmax is not None:
+            mask &= (lat <= latmax)
+
+        if obs_min is not None:
+            mask &= (obs >= obs_min)
+        if obs_max is not None:
+            mask &= (obs <= obs_max)
+
+        if model_min is not None:
+            if model_index_for_mask is None:
+                mask &= np.all(model >= model_min, axis=0)
+            else:
+                mask &= (model[int(model_index_for_mask), :] >= model_min)
+        if model_max is not None:
+            if model_index_for_mask is None:
+                mask &= np.all(model <= model_max, axis=0)
+            else:
+                mask &= (model[int(model_index_for_mask), :] <= model_max)
+
+        if extra_mask is not None:
+            extra_mask = np.asarray(extra_mask, dtype=bool).ravel()
+            if extra_mask.size != obs.size:
+                raise ValueError("extra_mask must have shape (N,)")
+            mask &= extra_mask
+
+        lat_f = lat[mask]
+        lon_f = lon[mask]
+        obs_f = obs[mask]
+        model_f = model[:, mask]
+        return lat_f, lon_f, model_f, obs_f
+
+    # -----------------------------
+    # Gridding
+    # -----------------------------
+    @staticmethod
+    def _bin_centers(x, d):
+        # center bins like floor(x/d)*d + d/2
+        return (np.floor(x / d) * d) + (d / 2.0)
+
+    @staticmethod
+    def _centers_to_edges(centers, d):
+        centers = np.asarray(centers, dtype=float)
+        centers = np.sort(np.unique(centers))
+        # edges length = n+1
+        return np.concatenate(([centers[0] - d / 2.0], centers + d / 2.0))
+
+    def gridded_bias_rmse(
+        self,
+        dlat=1.0,
+        dlon=1.0,
+        lon_0_360=True,
+        min_count=10,
+        latmin=-60.0,
+        latmax=60.0,
+        qc_kwargs=None,
+    ):
+        """
+        Compute gridded bias/rmse/count per (lat,lon) bin.
+
+        Returns
+        -------
+        lat_centers : (nlat,)
+        lon_centers : (nlon,)
+        bias        : (M, nlat, nlon)
+        rmse        : (M, nlat, nlon)
+        count       : (M, nlat, nlon)
+        """
+        qc_kwargs = qc_kwargs or {}
+        lat, lon, model, obs = self.apply_qc(
+            latmin=latmin,
+            latmax=latmax,
+            lon_0_360=lon_0_360,
+            **qc_kwargs,
+        )
+
+        if lat.size == 0:
+            raise ValueError("No data left after QC/filtering.")
+
+        lat_bin = self._bin_centers(lat, dlat)
+        lon_bin = self._bin_centers(lon, dlon)
+
+        lat_centers = np.sort(np.unique(lat_bin))
+        lon_centers = np.sort(np.unique(lon_bin))
+
+        nlat = lat_centers.size
+        nlon = lon_centers.size
+        M = model.shape[0]
+
+        lat_index = {v: i for i, v in enumerate(lat_centers)}
+        lon_index = {v: j for j, v in enumerate(lon_centers)}
+
+        # accumulators
+        sum_err = np.zeros((M, nlat, nlon), dtype=float)
+        sum_err2 = np.zeros((M, nlat, nlon), dtype=float)
+        cnt = np.zeros((M, nlat, nlon), dtype=float)
+
+        # vectorized mapping to indices
+        ii = np.fromiter((lat_index[v] for v in lat_bin), dtype=int, count=lat_bin.size)
+        jj = np.fromiter((lon_index[v] for v in lon_bin), dtype=int, count=lon_bin.size)
+
+        # accumulate per model
+        for m in range(M):
+            err = model[m, :] - obs
+            # all arrays are 1D same length
+            np.add.at(sum_err[m], (ii, jj), err)
+            np.add.at(sum_err2[m], (ii, jj), err * err)
+            np.add.at(cnt[m], (ii, jj), 1.0)
+
+        bias = np.full((M, nlat, nlon), np.nan, dtype=float)
+        rmse = np.full((M, nlat, nlon), np.nan, dtype=float)
+
+        valid = cnt >= float(min_count)
+        bias[valid] = (sum_err[valid] / cnt[valid])
+        rmse[valid] = np.sqrt(sum_err2[valid] / cnt[valid])
+
+        return lat_centers, lon_centers, bias, rmse, cnt
+
+    def global_plot_like_arrays(
+        self,
+        dlat=1.0,
+        dlon=1.0,
+        lon_0_360=True,
+        min_count=10,
+        latmin=-60.0,
+        latmax=60.0,
+        qc_kwargs=None,
+    ):
+        """
+        Convenience: returns mesh edges + grids for pcolormesh.
+        """
+        latc, lonc, bias, rmse, cnt = self.gridded_bias_rmse(
+            dlat=dlat,
+            dlon=dlon,
+            lon_0_360=lon_0_360,
+            min_count=min_count,
+            latmin=latmin,
+            latmax=latmax,
+            qc_kwargs=qc_kwargs,
+        )
+
+        lon_edges = self._centers_to_edges(lonc, dlon)
+        lat_edges = self._centers_to_edges(latc, dlat)
+        LON_edges, LAT_edges = np.meshgrid(lon_edges, lat_edges)
+
+        return LON_edges, LAT_edges, latc, lonc, bias, rmse, cnt
+
+    # -----------------------------
+    # Plotting (global_plot.py style)
+    # -----------------------------
+    def plot_global_like_global_plot(
+        self,
+        metric="bias",          # "bias" or "rmse"
+        model_index=0,
+        dlat=1.0,
+        dlon=1.0,
+        lon_0_360=True,
+        min_count=10,
+        latmin=-60.0,
+        latmax=60.0,
+        qc_kwargs=None,
+        vmax=None,              # required for exact matching scale
+        cmap=None,              # default matches global_plot choices
+        title=None,
+        cbar_label=None,
+        outfile=None,           # if provided, saves figure
+        dpi=150,
+        figsize=(14, 4),
+        show=False,
+        use_pandas=True,
+        required_mask_arrays=None,
+    ):
+        """
+        Produce a pcolormesh plot matching global_plot.py style.
+
+        Notes
+        -----
+        - For bias: uses symmetric vmin/vmax = (-vmax, +vmax) with RdBu_r
+        - For rmse: uses vmin=0, vmax=vmax with jet
+        """
+        def _grid_pandas(lat, lon, model_1d, obs_1d, dlat, dlon, min_count):
+            import pandas as _pd
+            import numpy as _np
+
+            df = _pd.DataFrame({
+                "latitude": _np.asarray(lat, dtype=float),
+                "longitude": _np.asarray(lon, dtype=float),
+                "model": _np.asarray(model_1d, dtype=float),
+                "obs": _np.asarray(obs_1d, dtype=float),
+            })
+
+            # Drop rows with missing critical values (exactly like global_plot.py)
+            df = df.dropna(subset=["latitude", "longitude", "model", "obs"])
+
+            # Errors
+            df["err"] = df["model"] - df["obs"]
+
+            # Bin centers (floor + half-bin)
+            lon_norm = (df["longitude"].to_numpy() + 360.0) % 360.0
+            df["lat_bin"] = (_np.floor(df["latitude"].to_numpy() / dlat) * dlat) + dlat / 2.0
+            df["lon_bin"] = (_np.floor(lon_norm / dlon) * dlon) + dlon / 2.0
+
+            grouped = df.groupby(["lat_bin", "lon_bin"])
+
+            stats = grouped.agg(
+                count=("err", "size"),
+                bias=("err", "mean"),
+                mse=("err", lambda x: _np.mean(_np.asarray(x, dtype=float) ** 2)),
+            ).reset_index()
+
+            stats["rmse"] = _np.sqrt(stats["mse"])
+
+            lat_centers = _np.sort(stats["lat_bin"].unique())
+            lon_centers = _np.sort(stats["lon_bin"].unique())
+
+            nlat = len(lat_centers)
+            nlon = len(lon_centers)
+
+            bias_grid = _np.full((nlat, nlon), _np.nan, dtype=float)
+            rmse_grid = _np.full((nlat, nlon), _np.nan, dtype=float)
+            count_grid = _np.full((nlat, nlon), _np.nan, dtype=float)
+
+            lat_index = {v: i for i, v in enumerate(lat_centers)}
+            lon_index = {v: j for j, v in enumerate(lon_centers)}
+
+            for _, row in stats.iterrows():
+                i = lat_index[row["lat_bin"]]
+                j = lon_index[row["lon_bin"]]
+                bias_grid[i, j] = row["bias"]
+                rmse_grid[i, j] = row["rmse"]
+                count_grid[i, j] = row["count"]
+
+            # Build edges for pcolormesh
+            lon_edges = _np.concatenate(([lon_centers[0] - dlon / 2.0], lon_centers + dlon / 2.0))
+            lat_edges = _np.concatenate(([lat_centers[0] - dlat / 2.0], lat_centers + dlat / 2.0))
+            LON_edges, LAT_edges = _np.meshgrid(lon_edges, lat_edges)
+
+            # Mask bins with too few obs
+            bias_plot = _np.where(count_grid >= min_count, bias_grid, _np.nan)
+            rmse_plot = _np.where(count_grid >= min_count, rmse_grid, _np.nan)
+
+            return LON_edges, LAT_edges, bias_plot, rmse_plot, count_grid
+
+        if metric not in ("bias", "rmse"):
+            raise ValueError("metric must be 'bias' or 'rmse'")
+
+        if model_index < 0 or model_index >= self.M:
+            raise ValueError("model_index out of range")
+
+        if vmax is None:
+            raise ValueError("vmax must be provided to match global_plot.py scaling")
+
+        # Build gridded fields
+        if use_pandas:
+            # QC like global_plot.py + optional additional required arrays (e.g., wind, fcst_hr)
+            qc_kwargs2 = dict(qc_kwargs or {})
+            qc_kwargs2.setdefault("latmin", latmin)
+            qc_kwargs2.setdefault("latmax", latmax)
+            qc_kwargs2.setdefault("lon_0_360", lon_0_360)
+
+            extra_mask = None
+            if required_mask_arrays is not None:
+                # Require all provided arrays to be finite (mimics df.dropna on those fields)
+                extra_mask = np.ones(self.obs.size, dtype=bool)
+                for _k, _arr in dict(required_mask_arrays).items():
+                    _a = np.asarray(_arr, dtype=float).ravel()
+                    if _a.size != self.obs.size:
+                        raise ValueError(f"required_mask_arrays['{_k}'] length mismatch")
+                    extra_mask &= np.isfinite(_a)
+                qc_kwargs2["extra_mask"] = extra_mask
+
+            lat_f, lon_f, model_f, obs_f = self.apply_qc(
+                model_index_for_mask=model_index,
+                **qc_kwargs2,
+            )
+            model_1d = model_f[model_index, :]
+
+            # Also enforce lon normalization here (global_plot does it before binning)
+            if lon_0_360:
+                lon_f = (lon_f + 360.0) % 360.0
+
+            LONe, LATe, bias_plot, rmse_plot, cnt = _grid_pandas(
+                lat_f, lon_f, model_1d, obs_f, dlat, dlon, min_count
+            )
+            field = bias_plot if metric == "bias" else rmse_plot
+
+        else:
+            LONe, LATe, latc, lonc, bias, rmse, cnt = self.global_plot_like_arrays(
+                dlat=dlat,
+                dlon=dlon,
+                lon_0_360=lon_0_360,
+                min_count=min_count,
+                latmin=latmin,
+                latmax=latmax,
+                qc_kwargs=qc_kwargs,
+            )
+            field = bias[model_index] if metric == "bias" else rmse[model_index]
+
+        if metric == "bias":
+            vmin, vmax2 = -float(vmax), float(vmax)
+            if cmap is None:
+                cmap = "RdBu_r"
+            if cbar_label is None:
+                cbar_label = "Bias (model − obs)"
+        else:
+            vmin, vmax2 = 0.0, float(vmax)
+            if cmap is None:
+                cmap = "jet"
+            if cbar_label is None:
+                cbar_label = "RMSE"
+
+        fig = plt.figure(figsize=figsize, dpi=dpi)
+        ax = plt.gca()
+
+        pcm = ax.pcolormesh(
+            LONe, LATe, field,
+            shading="auto",
+            cmap=cmap,
+            vmin=vmin,
+            vmax=vmax2,
+        )
+
+        cb = plt.colorbar(pcm, ax=ax, orientation="horizontal" if metric == "rmse" else "vertical")
+        cb.set_label(cbar_label)
+
+        if title:
+            ax.set_title(title)
+
+        ax.set_xlabel("Longitude")
+        ax.set_ylabel("Latitude")
+        if lon_0_360:
+            ax.set_xlim(0, 360)
+        ax.set_ylim(latmin, latmax)
+
+        # mimic global_plot style: no coastlines, no grid by default
+        ax.grid(False)
+
+        if outfile is not None:
+            outdir = os.path.dirname(outfile)
+            if outdir:
+                os.makedirs(outdir, exist_ok=True)
+            plt.savefig(outfile, bbox_inches="tight")
+            plt.close(fig)
+        else:
+            if show:
+                plt.show()
 
+        return

From 25850e9b4ff81adee625a59500ca6728db15cf4f Mon Sep 17 00:00:00 2001
From: Ming Chen <ming.chen1@noaa.gov>
Date: Mon, 12 Jan 2026 16:33:00 +0000
Subject: [PATCH 2/2] add instructions and examples

---
 ww3tools/pvalstats.py | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/ww3tools/pvalstats.py b/ww3tools/pvalstats.py
index 39cabfa..b5f6eef 100755
--- a/ww3tools/pvalstats.py
+++ b/ww3tools/pvalstats.py
@@ -1337,6 +1337,29 @@ def plot(self):
 
 #-------------------------------------------------------------
 #-------- global bias and rmse -------------------------------
+#-------------------------------------------------------------
+#
+# How to plot:
+# 1. create global map class
+#    example:
+#    gsm = GlobalSkillMap(lat=lat, lon=lon, model=model_hs, obs=obs_hs, mlabels=[model_key])
+# 2. plot using gsm
+#    example:
+#    gsm.plot_global(
+#        metric="bias",
+#        model_index=0,
+#        dlat=DLAT,
+#        dlon=DLON,
+#        lon_0_360=True,
+#        min_count=MIN_COUNT,
+#        latmin=LATMIN,
+#        latmax=LATMAX,
+#        qc_kwargs=QC_HS,
+#        vmax=HS_BIAS_VMAX,
+#        title=f"Hs Bias – {filename} ({model_key} vs {satellite_name})",
+#        outfile=os.path.join(out_model_dir, f"plot_Hs_{filename}_{satellite_name}_global_Bias.png"),
+#    )
+
 class GlobalSkillMap:
     """
     GlobalSkillMap