plot updates for FR #448

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged

jpata merged 6 commits into main from jp_20260108_finalreading

Jan 15, 2026

mlpf/plotting/data_preparation.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -45,6 +45,8 @@ def load_nano(fn): @@
             "PuppiMET_phi",
             "RawPFMET_pt",
             "RawPFMET_phi",
+            "RawPuppiMET_pt",
+            "RawPuppiMET_phi",
             "Pileup_nPU",
             "Pileup_nTrueInt",
             "GenVtx_z",
@@ Expand Down @@

mlpf/plotting/plot_loss_curves.py

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -65,8 +65,8 @@ def loss_plot(epochs, train, test, margin=0.05, smoothing=False, ylabel="", titl
  
        l0 = "Training"

        l1 = "Validation"

        p0 = plt.plot(epochs, train, alpha=alpha, label=l0, marker="o", ls="--")

        p1 = plt.plot(epochs, test, alpha=alpha, label=l1, marker="x")

        p0 = plt.plot(epochs, train, alpha=alpha, label=l0, marker="o", ls="--", lw=3, ms=10)

        p1 = plt.plot(epochs, test, alpha=alpha, label=l1, marker="x", lw=3, ms=10)

        if smoothing:

            train_smooth = np.convolve(train[~np.isnan(train)], np.ones(5) / 5, mode="valid")

    @@ -82,17 +82,23 @@ def loss_plot(epochs, train, test, margin=0.05, smoothing=False, ylabel="", titl
  
        if last_valid_loss is not np.nan:

            plt.ylim(last_valid_loss * (1.0 - margin), last_valid_loss * (1.0 + margin))

        plt.legend(loc=3, frameon=False, fontsize=30)

        plt.xlabel("epoch")

        plt.legend(loc=3, frameon=False, fontsize=35)

        plt.xlabel("Epoch")

        plt.ylabel(ylabel)

        mplhep.cms.label("", data=False, rlabel="Run 3 configuration")

        mplhep.cms.label(llabel="Simulation", rlabel="Run 3 (14 TeV)")

        return fig, ax

    @click.command()

    @click.option("--input-dirs", "-i", multiple=True, required=True, help="Input directories containing history/epoch_*.json files.")

    @click.option("--labels", "-l", multiple=True, help="Labels for each input directory. Must be the same number as input-dirs.")

    @click.option("--output-dir", "-o", required=True, type=str, help="Output directory for plots.")

    @click.option(

        "--input-dirs",

        "-i",

        multiple=True,

        help="Input directories containing history/epoch_*.json files.",

        default=["/mnt/work/huggingface/particleflow/cms/v2.6.0pre1/pyg-cms_20250722_101813_274478"],

    )

    @click.option("--labels", "-l", multiple=True, help="Labels for each input directory. Must be the same number as input-dirs.", default=["v2.6.0pre1"])

    @click.option("--output-dir", "-o", type=str, help="Output directory for plots.", default="plots")

    def main(input_dirs, labels, output_dir):

        """

        Generates loss curve plots from training history files.

    @@ -123,7 +129,7 @@ def main(input_dirs, labels, output_dir):
  
            # Total loss

            fig, ax = loss_plot(

                history.index + 1, history["train_Total"].values, history["valid_Total"].values, margin=0.1, ylabel="Total loss", title=label

                history.index + 1, history["train_Total"].values, history["valid_Total"].values, margin=0.15, ylabel="Total Loss [a.u.]", title=label

            )

            plt.xticks(range(1, len(history) + 1))

            plt.savefig(output_path / f"{label}_loss.pdf")

    @@ -136,7 +142,7 @@ def main(input_dirs, labels, output_dir):
  
                    history["train_Classification"].values * 100,

                    history["valid_Classification"].values * 100,

                    margin=0.05,

                    ylabel="Particle ID loss x100",

                    ylabel="Particle ID Loss x100 [a.u.]",

                    title=label,

                )

                plt.xticks(range(1, len(history) + 1))

    @@ -149,8 +155,8 @@ def main(input_dirs, labels, output_dir):
  
                    history.index + 1,

                    history["train_Classification_binary"].values,

                    history["valid_Classification_binary"].values,

                    margin=0.1,

                    ylabel="Binary classification loss",

                    margin=0.15,

                    ylabel="Binary Classification Loss [a.u.]",

                    title=label,

                )

                plt.xticks(range(1, len(history) + 1))

    @@ -162,7 +168,7 @@ def main(input_dirs, labels, output_dir):
  
            if all([f"train_{loss}" in history.columns for loss in reg_losses]):

                reg_loss = sum([history[f"train_{loss}"].values for loss in reg_losses])

                val_reg_loss = sum([history[f"valid_{loss}"].values for loss in reg_losses])

                fig, ax = loss_plot(history.index + 1, reg_loss, val_reg_loss, margin=0.2, ylabel="Regression loss", title=label)

                fig, ax = loss_plot(history.index + 1, reg_loss, val_reg_loss, margin=0.2, ylabel="Regression Loss [a.u.]", title=label)

                plt.xticks(range(1, len(history) + 1))

                plt.savefig(output_path / f"{label}_reg_loss.pdf")

                plt.close(fig)

    @@ -174,7 +180,7 @@ def main(input_dirs, labels, output_dir):
  
                    history["train_ispu"].values,

                    history["valid_ispu"].values,

                    margin=0.5,

                    ylabel="PU classification loss",

                    ylabel="PU Classification Loss [a.u.]",

                    title=label,

                )

                plt.xticks(range(1, len(history) + 1))

    @@ -186,10 +192,10 @@ def main(input_dirs, labels, output_dir):
  
            # fig.suptitle(label, fontsize=35)

            loss_info = [

                {"name": "Total", "label": "Total Loss"},

                {"name": "Classification", "label": "PID Classification Loss"},

                {"name": "Classification_binary", "label": "Binary Classification Loss"},

                {"name": "Regression", "label": "Regression Loss"},

                {"name": "Total", "label": "Total Loss [a.u.]"},

                {"name": "Classification", "label": "PID Classification Loss [a.u.]"},

                {"name": "Classification_binary", "label": "Binary Classification Loss [a.u.]"},

                {"name": "Regression", "label": "Regression Loss [a.u.]"},

            ]

            for i, (ax, info) in enumerate(zip(axs, loss_info)):

    @@ -238,10 +244,10 @@ def main(input_dirs, labels, output_dir):
  
                    continue

                label = labels[i] if labels else Path(input_dirs[i]).name

                plt.plot(history.index + 1, history["valid_Total"], marker="o", label=label)

            plt.ylabel("Total valid. loss")

            plt.xlabel("epoch")

            plt.ylabel("Total Valid. Loss [a.u.]")

            plt.xlabel("Epoch")

            plt.legend(loc="best")

            mplhep.cms.label("", data=False, rlabel="Run 3 configuration")

            mplhep.cms.label(llabel="Simulation", rlabel="Run 3 (14 TeV)")

            plt.savefig(output_path / "loss_comparison.pdf")

            plt.close()

mlpf/plotting/plot_met_validation.py

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -58,15 +58,15 @@ def plot_met_distribution(
  
        h2 = to_bh(data_mlpf["PuppiMET_pt"], bins)

        plt.sca(a0)

        x0 = mplhep.histplot(h0, histtype="step", lw=2, label="Gen.", ls="--", color=gen_color)

        x0 = mplhep.histplot(h0, histtype="step", lw=2, label="Gen", ls="--", color=gen_color)

        x1 = mplhep.histplot(h1, histtype="step", lw=2, label="PF-PUPPI", ls=pf_linestyle, color=pf_color)

        x2 = mplhep.histplot(h2, histtype="step", lw=2, label=mlpf_label, ls=mlpf_linestyle, color=mlpf_color)

        if logy:

            plt.yscale("log")

            a0.set_ylim(bottom=1, top=a0.get_ylim()[1] * 1000)

        mplhep.cms.label("", data=False, com=tev, year="Run 3", ax=a0)

        mplhep.cms.label(llabel="Simulation", rlabel="Run 3 (13.6 TeV)", ax=a0)

        a0.text(

            sample_label_coords[0],

            sample_label_coords[1],

    @@ -127,7 +127,7 @@ def met_response_plot(
  
        ax = plt.axes()

        b = np.linspace(0, 5, 101)

        mplhep.cms.label("", data=False, com=tev, year="Run 3", ax=ax)

        mplhep.cms.label(llabel="Simulation", rlabel="Run 3 (13.6 TeV)", ax=ax)

        ax.text(

            sample_label_coords[0],

            sample_label_coords[1],

    @@ -325,7 +325,7 @@ def plot_met_response_vs_pu(resp_pf, resp_mlpf, data_pf, data_mlpf, output_dir,
  
        ax.set_xlabel("True $N_{PV}$")

        ax.set_ylabel("MET response resolution (IQR/med.)")

        ax.set_ylim(0, 1.5)

        mplhep.cms.label(ax=ax, data=False, com=tev, year="Run 3")

        mplhep.cms.label(llabel="Simulation", rlabel="Run 3 (13.6 TeV)", ax=ax)

        ax.text(

            kwargs["sample_label_coords"][0],

            kwargs["sample_label_coords"][1],

    @@ -368,16 +368,16 @@ def make_plots(input_pf_parquet, input_mlpf_parquet, output_dir, sample_name, te
  
        # plotting style variables

        legend_fontsize = 30

        legend_loc = (0.5, 0.55)

        legend_loc = (0.5, 0.5)

        legend_loc_scalereso = (0.50, 0.65)

        legend_loc_met_response = (0.3, 0.45)

        sample_label_fontsize = 30

        sample_label_fontsize = 35

        addtext_fontsize = 25

        jet_label_coords_single = 0.02, 0.88

        sample_label_coords = 0.02, 0.96

        gen_color = "#648df4"

        pf_color = "#f3a041"

        mlpf_color = "#d23b3d"

        jet_label_coords_single = 0.05, 0.88

        sample_label_coords = 0.05, 0.95

        gen_color = "#9C9CA1"

        pf_color = "#5790FC"

        mlpf_color = "#E42536"

        pf_linestyle = "-."

        mlpf_linestyle = "-"

        pf_label = "PF-PUPPI"

    @@ -511,7 +511,7 @@ def varbins(*args):
  
        ax.set_xscale("log")

        ax.set_ylim(0.0, 4.0)

        plt.axhline(1.0, color="black", ls="--")

        mplhep.cms.label(ax=ax, data=False, com=tev, year="Run 3")

        mplhep.cms.label(llabel="Simulation", rlabel="Run 3 (13.6 TeV)", ax=ax)

        ax.text(

            sample_label_coords[0], sample_label_coords[1], plot_sample_name, transform=ax.transAxes, fontsize=sample_label_fontsize, ha="left", va="top"

        )

    @@ -533,7 +533,7 @@ def varbins(*args):
  
        ax.legend(fontsize=legend_fontsize, loc=legend_loc_scalereso)

        ax.set_xscale("log")

        ax.set_ylim(0.0, 2.0)

        mplhep.cms.label(ax=ax, data=False, com=tev, year="Run 3")

        mplhep.cms.label(llabel="Simulation", rlabel="Run 3 (13.6 TeV)", ax=ax)

        ax.text(

            sample_label_coords[0], sample_label_coords[1], plot_sample_name, transform=ax.transAxes, fontsize=sample_label_fontsize, ha="left", va="top"

        )

mlpf/plotting/plot_validation.py

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -67,14 +67,14 @@ def make_plots(input_pf_parquet, input_mlpf_parquet, corrections_file, output_di
  
        legend_loc_effpur = (0.5, 0.68)

        legend_loc_scalereso = (0.40, 0.50)

        legend_loc_jet_response = (0.05, 0.55)

        sample_label_fontsize = 30

        sample_label_fontsize = 35

        addtext_fontsize = 25

        jet_label_coords = 0.02, 0.86

        jet_label_coords_single = 0.02, 0.88

        sample_label_coords = 0.02, 0.96

        gen_color = "#648df4"

        pf_color = "#f3a041"

        mlpf_color = "#d23b3d"

        jet_label_coords = 0.05, 0.85

        jet_label_coords_single = 0.05, 0.87

        sample_label_coords = 0.05, 0.95

        gen_color = "#9C9CA1"

        pf_color = "#5790FC"

        mlpf_color = "#E42536"

        gen_linestyle = "--"

        pf_linestyle = "-."

        mlpf_linestyle = "-"

    @@ -184,9 +184,9 @@ def plot_kinematic_distribution(
  
            h2 = to_bh(awkward.flatten(data_mlpf[f"{jet_prefix}_{variable_reco}"][mlpf_jet_mask]), bins)

            plt.sca(a0)

            x0 = mplhep.histplot(h0, histtype="step", lw=2, label="Gen.", ls=gen_linestyle)

            x1 = mplhep.histplot(h1, histtype="step", lw=2, label=pf_label, ls=pf_linestyle)

            x2 = mplhep.histplot(h2, histtype="step", lw=2, label=mlpf_label, ls=mlpf_linestyle)

            x0 = mplhep.histplot(h0, histtype="step", lw=2, label="Gen", ls=gen_linestyle, color=gen_color)

            x1 = mplhep.histplot(h1, histtype="step", lw=2, label=pf_label, ls=pf_linestyle, color=pf_color)

            x2 = mplhep.histplot(h2, histtype="step", lw=2, label=mlpf_label, ls=mlpf_linestyle, color=mlpf_color)

            if logy:

                plt.yscale("log")

notebooks/cms/cms-validate-root-vs-postprocessing.ipynb

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -53,20 +53,21 @@
  
       "outputs": [],

       "source": [

        "matplotlib.rcParams['axes.labelsize'] = 35\n",

        "legend_fontsize = 30\n",

        "legend_fontsize = 35\n",

        "sample_label_fontsize = 30\n",

        "addtext_fontsize = 25\n",

        "addtext_fontsize = 30\n",

        "\n",

        "jet_label_coords = 0.02, 0.82\n",

        "jet_label_coords_single = 0.02, 0.86\n",

        "sample_label_coords = 0.02, 0.96\n",

        "jet_label_coords = 0.05, 0.80\n",

        "jet_label_coords_single = 0.05, 0.86\n",

        "sample_label_coords = 0.05, 0.95\n",

        "jet_label_ak4 = \"AK4 ref. jets, $p_T$ > 3 GeV\"\n",

        "particle_label = \"$p_T$ > 0.5 GeV, $|\\eta|$ < 5\"\n",

        "legend_loc = (0.54, 0.48)\n",

        "\n",

        "default_cycler = plt.rcParams['axes.prop_cycle']\n",

        "\n",

        "pythia_color = \"#648df4\"\n",

        "target_color = \"#944b8a\"\n",

        "pythia_color = \"#9C9CA1\"\n",

        "target_color = \"#964A8B\"\n",

        "\n",

        "pythia_linestyle = \"--\"\n",

        "target_linestyle = \"-\""

    @@ -104,7 +105,7 @@
  
       "outputs": [],

       "source": [

        "#files = [pickle.load(open(fn, \"rb\")) for fn in glob.glob(\"/local/joosep/mlpf/cms/20250508_cmssw_15_0_5_d3c6d1/validation_plots/out*.pkl\")]\n",

        "files = [pickle.load(open(fn, \"rb\")) for fn in glob.glob(\"/mnt/work/particleflow/20250508_cmssw_15_0_5_d3c6d1/validation_plots/out*.pkl\")]\n",

        "files = [pickle.load(open(fn, \"rb\")) for fn in glob.glob(\"/mnt/work/particleflow/CMSSW_15_0_5_mlpf_v2.6.0pre1_puppi_2372e2/validation_plots/out*.pkl\")]\n",

        "ret = reduce(add_results, files, {})\n",

        "\n",

        "sample_keys = sorted(set([\"/\".join(k.split(\"/\")[0:2]) for k in ret.keys() if not k.startswith(\"combined\")]))\n",

    @@ -139,7 +140,7 @@
  
        "}\n",

        "\n",

        "def sample_label(ax, sample, additional_text=\"\"):\n",

        "    plt.text(sample_label_coords[0], sample_label_coords[1], EVALUATION_DATASET_NAMES[sample_labels[sample.replace(\"combined/\", \"\")]] + \"\\n\" + additional_text, ha=\"left\", va=\"top\", transform=ax.transAxes)"

        "    plt.text(sample_label_coords[0], sample_label_coords[1], EVALUATION_DATASET_NAMES[sample_labels[sample.replace(\"combined/\", \"\")]] + \"\\n\" + additional_text, ha=\"left\", va=\"top\", transform=ax.transAxes, fontsize=sample_label_fontsize)"

       ]

      },

      {

    @@ -167,16 +168,6 @@
  
        "    plt.savefig(\"{}_particle_met_2d.pdf\".format(sample.replace(\"/\", \"_\")), bbox_inches=\"tight\")"

       ]

      },

      {

       "cell_type": "code",

       "execution_count": null,

       "id": "44a895d1-f5f9-4f09-8134-126c34da6422",

       "metadata": {},

       "outputs": [],

       "source": [

        "sample_keys_combined"

       ]

      },

      {

       "cell_type": "code",

       "execution_count": null,

    @@ -195,7 +186,7 @@
  
        "    mplhep.histplot(h1, label=\"Target\", lw=2, color=target_color, ls=target_linestyle)\n",

        "    plt.xscale(\"log\")\n",

        "    plt.yscale(\"log\")\n",

        "    plt.legend(loc=(0.65, 0.7), fontsize=legend_fontsize)\n",

        "    plt.legend(loc=legend_loc, fontsize=legend_fontsize)\n",

        "    plt.ylim(1, 1e8)\n",

        "    mplhep.cms.label(\"\", data=False, com=14, year='Run 3')\n",

        "    sample_label(a0, sample)\n",

    @@ -272,13 +263,13 @@
  
        "    mplhep.histplot(h0, label=\"Pythia\", lw=2, color=pythia_color, ls=pythia_linestyle)\n",

        "    mplhep.histplot(h1, label=\"Target\", lw=2, color=target_color, ls=target_linestyle)\n",

        "    plt.xscale(\"log\")\n",

        "    plt.legend(fontsize=legend_fontsize)\n",

        "    plt.legend(fontsize=legend_fontsize, loc=legend_loc)\n",

        "    mplhep.cms.label(\"\", data=False, com=14, year='Run 3')\n",

        "    sample_label(a0, sample)\n",

        "    a0.text(jet_label_coords[0], jet_label_coords[1], jet_label_ak4, transform=a0.transAxes, fontsize=addtext_fontsize)\n",

        "    plt.yscale(\"log\")\n",

        "    plt.ylabel(\"Count\")\n",

        "    plt.ylim(1,1e8)\n",

        "    plt.ylim(100,1e8)\n",

        "\n",

        "    plt.sca(a1)\n",

        "    \n",

    @@ -311,7 +302,7 @@
  
        "    plt.figure()\n",

        "    ax = plt.axes()\n",

        "    mplhep.histplot(ret[f\"{sample}/jets_pt_ratio_target_pumask\"][bh.rebin(rebin)], yerr=False, label=\"Target\", lw=2, color=target_color, ls=target_linestyle)\n",

        "    plt.legend(fontsize=legend_fontsize)\n",

        "    plt.legend(loc=legend_loc, fontsize=legend_fontsize)\n",

        "    mplhep.cms.label(\"\", data=False, com=14, year='Run 3')\n",

        "    sample_label(ax, sample, jet_label_ak4)\n",

        "    a0.text(jet_label_coords_single[0], jet_label_coords_single[1], jet_label_ak4, transform=a0.transAxes, fontsize=addtext_fontsize)\n",

    @@ -336,7 +327,7 @@
  
        "    plt.sca(a0)\n",

        "    mplhep.histplot(ret[f\"{sample}/met_pythia\"][bh.rebin(rebin)], yerr=False, label=\"Pythia\", lw=2, color=pythia_color, ls=pythia_linestyle)\n",

        "    mplhep.histplot(ret[f\"{sample}/met_target_pumask\"][bh.rebin(rebin)], yerr=False, label=\"Target\", lw=2, color=target_color, ls=target_linestyle)\n",

        "    plt.legend(loc=(0.65, 0.7), fontsize=legend_fontsize)\n",

        "    plt.legend(loc=legend_loc, fontsize=legend_fontsize)\n",

        "    plt.yscale(\"log\")\n",

        "    plt.xscale(\"log\")\n",

        "    mplhep.cms.label(\"\", data=False, com=14, year='Run 3')\n",

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

plot updates for FR #448

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!

Uh oh!