Add systematic stack and example

f4cdd882 · Frank Sauerburger · 0839a695 · f4cdd882 · f4cdd882 · f4cdd882
Unverified Commit f4cdd882 authored 4 years ago by Frank Sauerburger
--- a/examples/SystematicsBand.ipynb
+++ b/examples/SystematicsBand.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import seaborn as sns\n",
+    "\n",
+    "from nnfwtbn import Variable, Process, Cut, hist, SystStack, DataStack, \\\n",
+    "                    McStack, TruthStack\n",
+    "from nnfwtbn import toydata"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = toydata.get()\n",
+    "df = df.compute()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = df.assign(variation=\"NOMINAL\")\n",
+    "\n",
+    "df_up = df.assign(higgs_m=df.higgs_m + 1 * df.jet_1_eta.abs(),\n",
+    "                  variation=\"1up\")\n",
+    "\n",
+    "df_down = df.assign(higgs_m=df.higgs_m - 1 * df.jet_2_eta.abs(),\n",
+    "                    variation=\"1down\")\n",
+    "\n",
+    "df = pd.concat([df, df_up, df_down], sort=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "p_ztt = Process(r\"$Z\\rightarrow\\tau\\tau$\", range=(0, 0))\n",
+    "p_sig = Process(r\"Signal\", range=(1, 1))\n",
+    "\n",
+    "p_bkg_up = Process(r\"Up\", lambda d: d.variation == \"1up\")\n",
+    "p_bkg_down = Process(r\"Down\", lambda d: d.variation == \"1down\")\n",
+    "p_ztt_nom = Process(r\"$Z\\rightarrow\\tau\\tau$\", lambda d: (d.variation == \"NOMINAL\") & (d.fpid == 0))\n",
+    "p_sig_nom = Process(r\"Signal\", lambda d: (d.variation == \"NOMINAL\") & (d.fpid == 1))\n",
+    "\n",
+    "p_asimov = Process(r\"Asimov\", selection=lambda d: (d.fpid >= 0) & (d.variation == \"NOMINAL\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "c_nominal = Cut(lambda d: d.variation == \"NOMINAL\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "colors = [\"windows blue\", \"amber\", \"greyish\", \"faded green\", \"dusty purple\"]\n",
+    "\n",
+    "s_bkg = McStack(p_ztt, p_sig, palette=sns.xkcd_palette(colors))\n",
+    "\n",
+    "s_bkg_up = TruthStack(p_bkg_up,\n",
+    "                   histtype=\"step\",\n",
+    "                   color='k',\n",
+    "                   palette=sns.xkcd_palette(colors))\n",
+    "\n",
+    "s_bkg_down = TruthStack(p_bkg_down,\n",
+    "                     histtype=\"step\",\n",
+    "                     color='k',\n",
+    "                     linestyle='--',\n",
+    "                     palette=sns.xkcd_palette(colors))\n",
+    "\n",
+    "s_bkg_nom = McStack(p_ztt_nom, p_sig_nom, palette=sns.xkcd_palette(colors))\n",
+    "\n",
+    "s_bkg_syst = SystStack(p_ztt, p_sig, palette=sns.xkcd_palette(colors))\n",
+    "\n",
+    "s_data = DataStack(p_asimov)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "v_higgs_m = Variable(r\"$m^H$\", \"higgs_m\", \"GeV\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "c_vbf = Cut(lambda d: d.dijet_p4__M  > 400) & \\\n",
+    "        Cut(lambda d: d.jet_1_p4__Pt >= 30) & \\\n",
+    "        Cut(lambda d: d.is_dijet_centrality == 1) & \\\n",
+    "        Cut(lambda d: d.jet_0_p4__Eta * df.jet_1_p4__Eta < 0) & \\\n",
+    "        Cut(lambda d: (d.jet_0_p4__Eta - df.jet_1_p4__Eta).abs() > 3)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Plot nominal"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "hist(c_nominal(df), v_higgs_m, 20, [s_bkg, s_data], range=(0, 200), selection=None,\n",
+    "     weight=\"weight\",  ratio_label=\"Data / SM\", ratio_range=(0.1, 1.9))\n",
+    "None"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Envelop"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "hist(df, v_higgs_m, 20, [s_bkg_nom, s_bkg_up, s_bkg_down], range=(0, 200), selection=None,\n",
+    "     weight=\"weight\",  ratio_label=\"Up / Down\", ratio_range=(0.1, 1.9))\n",
+    "None"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Full band"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "hist(df, v_higgs_m, 20, [s_bkg_syst, s_data], range=(0, 200), selection=None,\n",
+    "     weight=\"weight\",  ratio_label=\"Data / SM\", ratio_range=(0.1, 1.9))\n",
+    "None"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
+%% Cell type:code id: tags:
+
+``` python
+import pandas as pd
+import seaborn as sns
+
+from nnfwtbn import Variable, Process, Cut, hist, SystStack, DataStack, \
+                    McStack, TruthStack
+from nnfwtbn import toydata
+```
+
+%% Cell type:code id: tags:
+
+``` python
+df = toydata.get()
+df = df.compute()
+```
+
+%% Cell type:code id: tags:
+
+``` python
+df = df.assign(variation="NOMINAL")
+
+df_up = df.assign(higgs_m=df.higgs_m + 1 * df.jet_1_eta.abs(),
+                  variation="1up")
+
+df_down = df.assign(higgs_m=df.higgs_m - 1 * df.jet_2_eta.abs(),
+                    variation="1down")
+
+df = pd.concat([df, df_up, df_down], sort=False)
+```
+
+%% Cell type:code id: tags:
+
+``` python
+p_ztt = Process(r"$Z\rightarrow\tau\tau$", range=(0, 0))
+p_sig = Process(r"Signal", range=(1, 1))
+
+p_bkg_up = Process(r"Up", lambda d: d.variation == "1up")
+p_bkg_down = Process(r"Down", lambda d: d.variation == "1down")
+p_ztt_nom = Process(r"$Z\rightarrow\tau\tau$", lambda d: (d.variation == "NOMINAL") & (d.fpid == 0))
+p_sig_nom = Process(r"Signal", lambda d: (d.variation == "NOMINAL") & (d.fpid == 1))
+
+p_asimov = Process(r"Asimov", selection=lambda d: (d.fpid >= 0) & (d.variation == "NOMINAL"))
+```
+
+%% Cell type:code id: tags:
+
+``` python
+c_nominal = Cut(lambda d: d.variation == "NOMINAL")
+```
+
+%% Cell type:code id: tags:
+
+``` python
+colors = ["windows blue", "amber", "greyish", "faded green", "dusty purple"]
+
+s_bkg = McStack(p_ztt, p_sig, palette=sns.xkcd_palette(colors))
+
+s_bkg_up = TruthStack(p_bkg_up,
+                   histtype="step",
+                   color='k',
+                   palette=sns.xkcd_palette(colors))
+
+s_bkg_down = TruthStack(p_bkg_down,
+                     histtype="step",
+                     color='k',
+                     linestyle='--',
+                     palette=sns.xkcd_palette(colors))
+
+s_bkg_nom = McStack(p_ztt_nom, p_sig_nom, palette=sns.xkcd_palette(colors))
+
+s_bkg_syst = SystStack(p_ztt, p_sig, palette=sns.xkcd_palette(colors))
+
+s_data = DataStack(p_asimov)
+```
+
+%% Cell type:code id: tags:
+
+``` python
+v_higgs_m = Variable(r"$m^H$", "higgs_m", "GeV")
+```
+
+%% Cell type:code id: tags:
+
+``` python
+c_vbf = Cut(lambda d: d.dijet_p4__M  > 400) & \
+        Cut(lambda d: d.jet_1_p4__Pt >= 30) & \
+        Cut(lambda d: d.is_dijet_centrality == 1) & \
+        Cut(lambda d: d.jet_0_p4__Eta * df.jet_1_p4__Eta < 0) & \
+        Cut(lambda d: (d.jet_0_p4__Eta - df.jet_1_p4__Eta).abs() > 3)
+```
+
+%% Cell type:markdown id: tags:
+
+# Plot nominal
+
+%% Cell type:code id: tags:
+
+``` python
+hist(c_nominal(df), v_higgs_m, 20, [s_bkg, s_data], range=(0, 200), selection=None,
+     weight="weight",  ratio_label="Data / SM", ratio_range=(0.1, 1.9))
+None
+```
+
+%% Cell type:markdown id: tags:
+
+# Envelop
+
+%% Cell type:code id: tags:
+
+``` python
+hist(df, v_higgs_m, 20, [s_bkg_nom, s_bkg_up, s_bkg_down], range=(0, 200), selection=None,
+     weight="weight",  ratio_label="Up / Down", ratio_range=(0.1, 1.9))
+None
+```
+
+%% Cell type:markdown id: tags:
+
+# Full band
+
+%% Cell type:code id: tags:
+
+``` python
+hist(df, v_higgs_m, 20, [s_bkg_syst, s_data], range=(0, 200), selection=None,
+     weight="weight",  ratio_label="Data / SM", ratio_range=(0.1, 1.9))
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+```
--- a/nnfwtbn/__init__.py
+++ b/nnfwtbn/__init__.py
@@ -8,5 +8,5 @@ from nnfwtbn.plot import HistogramFactory, hist, confusion_matrix, roc, \
 from nnfwtbn.model import CrossValidator, ClassicalCV, MixedCV, \
                          Normalizer, EstimatorNormalizer, \
                          HepNet
-from nnfwtbn.stack import Stack, McStack, DataStack
+from nnfwtbn.stack import Stack, McStack, DataStack, SystStack, TruthStack
 from nnfwtbn.interface import TmvaBdt
--- a/nnfwtbn/stack.py
+++ b/nnfwtbn/stack.py
@@ -165,6 +165,66 @@ class Stack:
        """
        return len(self.processes)

+
+DEFAULT_VARIATION_VAR = 'variation'
+DEFAULT_NOMINAL = 'NOMINAL'
+
+class SystStack(Stack):
+    def __init__(self, df, *args, **kwds):
+        super().__init__(df, *args, **kwds)
+        self.variation_var = DEFAULT_VARIATION_VAR
+        self.skip_variations = {DEFAULT_NOMINAL}
+        self.nominal = DEFAULT_NOMINAL
+        
+    def get_total(self, df, *args, **kwds):
+        nom_idx = (df[self.variation_var] == self.nominal)
+        return super().get_total(df[nom_idx], *args, **kwds)
+    
+    def get_hist(self, df, *args, **kwds):
+        nom_idx = (df[self.variation_var] == self.nominal)
+        return super().get_hist(df[nom_idx], *args, **kwds)
+    
+    def get_stat_uncertainty(self, df, *args, **kwds):
+        nom_idx = (df[self.variation_var] == self.nominal)
+        return super().get_total_uncertainty(df[nom_idx], *args, **kwds)
+    
+    def get_syst_uncertainty(self, df, bins, variable, weight,
+                             include_outside=False):
+        nominal_hist = self.get_total(df, bins, variable, weight,
+                                      include_outside=include_outside)
+        
+        envelop = []
+        for variation in df[self.variation_var].unique():
+            if variation in self.skip_variations:
+                continue
+                
+            variation_hist = 0
+            
+            # Can't use super().get_total because it falls back to this get_hist()
+            for i in range(len(self.processes)):
+                var_idx = (df[self.variation_var] == variation)
+                variation_hist += super().get_hist(df[var_idx],
+                                                   i, bins, variable, weight,
+                                                   include_outside)
+            variation_hist -= nominal_hist
+            envelop.append(variation_hist)
+            # envelop[variation] = variation_hist
+            
+        envelop = da.asarray(envelop)
+        return da.sqrt((envelop**2).sum(axis=0))
+
+    def get_total_uncertainty(self, *args, **kwds):
+        return da.sqrt(
+            self.get_stat_uncertainty(*args, **kwds)**2 +
+            self.get_syst_uncertainty(*args, **kwds)**2
+        )
+    
+
+class TruthStack(Stack):
+    
+    def get_total_uncertainty(self, df, bins, *args, **kwds):
+        return bins[:-1] * 0
+
 class McStack(Stack):
    """
    Short-hand class for a Stack with only Monte-Carlo-like processes.