Skip to content
Snippets Groups Projects
Unverified Commit f4cdd882 authored by Frank Sauerburger's avatar Frank Sauerburger
Browse files

Add systematic stack and example

parent 0839a695
Branches 59-add-systematic-band-stack
No related tags found
1 merge request!54Resolve "Add systematic band stack"
Pipeline #12624 passed
%% Cell type:code id: tags:
``` python
import pandas as pd
import seaborn as sns
from nnfwtbn import Variable, Process, Cut, hist, SystStack, DataStack, \
McStack, TruthStack
from nnfwtbn import toydata
```
%% Cell type:code id: tags:
``` python
df = toydata.get()
df = df.compute()
```
%% Cell type:code id: tags:
``` python
df = df.assign(variation="NOMINAL")
df_up = df.assign(higgs_m=df.higgs_m + 1 * df.jet_1_eta.abs(),
variation="1up")
df_down = df.assign(higgs_m=df.higgs_m - 1 * df.jet_2_eta.abs(),
variation="1down")
df = pd.concat([df, df_up, df_down], sort=False)
```
%% Cell type:code id: tags:
``` python
p_ztt = Process(r"$Z\rightarrow\tau\tau$", range=(0, 0))
p_sig = Process(r"Signal", range=(1, 1))
p_bkg_up = Process(r"Up", lambda d: d.variation == "1up")
p_bkg_down = Process(r"Down", lambda d: d.variation == "1down")
p_ztt_nom = Process(r"$Z\rightarrow\tau\tau$", lambda d: (d.variation == "NOMINAL") & (d.fpid == 0))
p_sig_nom = Process(r"Signal", lambda d: (d.variation == "NOMINAL") & (d.fpid == 1))
p_asimov = Process(r"Asimov", selection=lambda d: (d.fpid >= 0) & (d.variation == "NOMINAL"))
```
%% Cell type:code id: tags:
``` python
c_nominal = Cut(lambda d: d.variation == "NOMINAL")
```
%% Cell type:code id: tags:
``` python
colors = ["windows blue", "amber", "greyish", "faded green", "dusty purple"]
s_bkg = McStack(p_ztt, p_sig, palette=sns.xkcd_palette(colors))
s_bkg_up = TruthStack(p_bkg_up,
histtype="step",
color='k',
palette=sns.xkcd_palette(colors))
s_bkg_down = TruthStack(p_bkg_down,
histtype="step",
color='k',
linestyle='--',
palette=sns.xkcd_palette(colors))
s_bkg_nom = McStack(p_ztt_nom, p_sig_nom, palette=sns.xkcd_palette(colors))
s_bkg_syst = SystStack(p_ztt, p_sig, palette=sns.xkcd_palette(colors))
s_data = DataStack(p_asimov)
```
%% Cell type:code id: tags:
``` python
v_higgs_m = Variable(r"$m^H$", "higgs_m", "GeV")
```
%% Cell type:code id: tags:
``` python
c_vbf = Cut(lambda d: d.dijet_p4__M > 400) & \
Cut(lambda d: d.jet_1_p4__Pt >= 30) & \
Cut(lambda d: d.is_dijet_centrality == 1) & \
Cut(lambda d: d.jet_0_p4__Eta * df.jet_1_p4__Eta < 0) & \
Cut(lambda d: (d.jet_0_p4__Eta - df.jet_1_p4__Eta).abs() > 3)
```
%% Cell type:markdown id: tags:
# Plot nominal
%% Cell type:code id: tags:
``` python
hist(c_nominal(df), v_higgs_m, 20, [s_bkg, s_data], range=(0, 200), selection=None,
weight="weight", ratio_label="Data / SM", ratio_range=(0.1, 1.9))
None
```
%% Cell type:markdown id: tags:
# Envelop
%% Cell type:code id: tags:
``` python
hist(df, v_higgs_m, 20, [s_bkg_nom, s_bkg_up, s_bkg_down], range=(0, 200), selection=None,
weight="weight", ratio_label="Up / Down", ratio_range=(0.1, 1.9))
None
```
%% Cell type:markdown id: tags:
# Full band
%% Cell type:code id: tags:
``` python
hist(df, v_higgs_m, 20, [s_bkg_syst, s_data], range=(0, 200), selection=None,
weight="weight", ratio_label="Data / SM", ratio_range=(0.1, 1.9))
None
```
%% Cell type:code id: tags:
``` python
```
......@@ -8,5 +8,5 @@ from nnfwtbn.plot import HistogramFactory, hist, confusion_matrix, roc, \
from nnfwtbn.model import CrossValidator, ClassicalCV, MixedCV, \
Normalizer, EstimatorNormalizer, \
HepNet
from nnfwtbn.stack import Stack, McStack, DataStack
from nnfwtbn.stack import Stack, McStack, DataStack, SystStack, TruthStack
from nnfwtbn.interface import TmvaBdt
......@@ -165,6 +165,66 @@ class Stack:
"""
return len(self.processes)
DEFAULT_VARIATION_VAR = 'variation'
DEFAULT_NOMINAL = 'NOMINAL'
class SystStack(Stack):
def __init__(self, df, *args, **kwds):
super().__init__(df, *args, **kwds)
self.variation_var = DEFAULT_VARIATION_VAR
self.skip_variations = {DEFAULT_NOMINAL}
self.nominal = DEFAULT_NOMINAL
def get_total(self, df, *args, **kwds):
nom_idx = (df[self.variation_var] == self.nominal)
return super().get_total(df[nom_idx], *args, **kwds)
def get_hist(self, df, *args, **kwds):
nom_idx = (df[self.variation_var] == self.nominal)
return super().get_hist(df[nom_idx], *args, **kwds)
def get_stat_uncertainty(self, df, *args, **kwds):
nom_idx = (df[self.variation_var] == self.nominal)
return super().get_total_uncertainty(df[nom_idx], *args, **kwds)
def get_syst_uncertainty(self, df, bins, variable, weight,
include_outside=False):
nominal_hist = self.get_total(df, bins, variable, weight,
include_outside=include_outside)
envelop = []
for variation in df[self.variation_var].unique():
if variation in self.skip_variations:
continue
variation_hist = 0
# Can't use super().get_total because it falls back to this get_hist()
for i in range(len(self.processes)):
var_idx = (df[self.variation_var] == variation)
variation_hist += super().get_hist(df[var_idx],
i, bins, variable, weight,
include_outside)
variation_hist -= nominal_hist
envelop.append(variation_hist)
# envelop[variation] = variation_hist
envelop = da.asarray(envelop)
return da.sqrt((envelop**2).sum(axis=0))
def get_total_uncertainty(self, *args, **kwds):
return da.sqrt(
self.get_stat_uncertainty(*args, **kwds)**2 +
self.get_syst_uncertainty(*args, **kwds)**2
)
class TruthStack(Stack):
def get_total_uncertainty(self, df, bins, *args, **kwds):
return bins[:-1] * 0
class McStack(Stack):
"""
Short-hand class for a Stack with only Monte-Carlo-like processes.
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment