Compare revisions

Frank Sauerburger · Frank Sauerburger · Frank Sauerburger · Frank Sauerburger · Frank Sauerburger · Frank Sauerburger
--- a/.gitignore
+++ b/.gitignore
@@ -5,3 +5,4 @@ __pycache__
 *.h5
 doc/*
 .eggs/
+examples/*.html
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -18,7 +18,7 @@ unittest:
    - pip install -r requirements.txt
    - python setup.py test

-doc_build:
+build:doc:
  stage: build
  image: python:3.7
  script:
@@ -33,19 +33,46 @@ doc_build:
      - _public
    expire_in: 1 mos

-deploy:
+build:examples:
+  stage: build
+  image: python:3.7
+  script:
+    - python setup.py install
+    - cd examples
+    - pip install -r requirements.txt
+    - jupyter-nbconvert --ExecutePreprocessor.timeout=300 --execute --to html *.ipynb 
+  artifacts:
+    paths:
+      - examples/*.html
+      - examples/*.ipynb
+    expire_in: 1 mos
+
+deploy:examples:
+  stage: deploy
+  only:
+      - master
+  dependencies:
+    - build:examples
+  variables:
+    "CI_WEBSITE_DIR": "examples/"
+    "DFS_WEBSITE_DIR": "examples/"
+  image: gitlab-registry.cern.ch/ci-tools/ci-web-deployer
+  script:
+    - deploy-dfs
+
+deploy:doc:
  stage: deploy
  only:
      - master
  dependencies:
-    - doc_build
+    - build:doc
  variables:
    "CI_WEBSITE_DIR": "_public/"
  image: gitlab-registry.cern.ch/ci-tools/ci-web-deployer
  script:
    - deploy-dfs

-build_master:
+build:docker:master:
  stage: build
  variables:
    DOCKER_FILE: Dockerfile
@@ -57,7 +84,7 @@ build_master:
  tags:
    - docker-image-build

-build_commit:
+build:docker:commit:
  stage: build
  variables:
    DOCKER_FILE: Dockerfile

--- a/Classification.ipynb
+++ b/Classification.ipynb
--- a/ConfusionMatrix.ipynb
+++ b/ConfusionMatrix.ipynb
--- a/Histogram.ipynb
+++ b/Histogram.ipynb
--- a/HistogramFactory.ipynb
+++ b/HistogramFactory.ipynb
--- a/README.rst
+++ b/README.rst
@@ -5,11 +5,12 @@ Pure Python framework to train neural networks for high energy physics analysis.

 Examples
 --------
-
- * `Histogram <Histogram.ipynb>`_
- * `HistogramFactory <HistogramFactory.ipynb>`_
- * `ConfusionMatrix <ConfusionMatrix.ipynb>`_
- * `ROC <ROC.ipynb>`_
+ * `ToyData <https://nnfwtbn.web.cern.ch/examples/ToyData.html>`_
+ * `Histogram <https://nnfwtbn.web.cern.ch/examples/Histogram.html>`_
+ * `HistogramFactory <https://nnfwtbn.web.cern.ch/examples/HistogramFactory.html>`_
+ * `ConfusionMatrix <https://nnfwtbn.web.cern.ch/examples/ConfusionMatrix.html>`_
+ * `ROC <https://nnfwtbn.web.cern.ch/examples/ROC.html>`_
+ * `Classification <https://nnfwtbn.web.cern.ch/examples/Classification.html>`_


 Links

--- a/ROC.ipynb
+++ b/ROC.ipynb
--- a/examples/Classification.ipynb
+++ b/examples/Classification.ipynb
+%% Cell type:code id: tags:
+
+``` python
+import pandas as pd
+import matplotlib.pyplot as plt
+import seaborn as sns
+from keras.models import Sequential
+from keras.layers import Dense, Dropout
+from keras.optimizers import SGD
+
+from nnfwtbn import Variable, Process, Cut, \
+                    HepNet, ClassicalCV, EstimatorNormalizer, \
+                    HistogramFactory, confusion_matrix, atlasify
+from nnfwtbn import toydata
+```
+
+%% Cell type:code id: tags:
+
+``` python
+df = toydata.get()
+```
+
+%% Cell type:code id: tags:
+
+``` python
+p_ztt = Process(r"$Z\rightarrow\tau\tau$", range=(0, 0))
+p_sig = Process(r"Signal", range=(1, 1))
+```
+
+%% Cell type:code id: tags:
+
+``` python
+hist_factory = HistogramFactory(df, stacks=[[p_sig, p_ztt]],
+     weight="weight", color=[sns.color_palette()])
+```
+
+%% Cell type:markdown id: tags:
+
+# Cut-based
+
+%% Cell type:code id: tags:
+
+``` python
+hist_factory(Variable("$\Delta \eta^{jj}$",
+                      lambda d: (d.jet_1_eta - d.jet_2_eta).abs()),
+             bins=20, range=(0, 8))
+hist_factory(Variable("$m^{jj}$", "m_jj"),
+             bins=20, range=(0, 1500))
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+c_sr = Cut(lambda d: d.m_jj  > 400) & \
+        Cut(lambda d: d.jet_2_pt >= 30) & \
+        Cut(lambda d: d.jet_1_eta * d.jet_2_eta < 0) & \
+        Cut(lambda d: (d.jet_2_eta - d.jet_1_eta).abs() > 3)
+c_sr.label = "Signal"
+
+c_rest = (~c_sr)
+c_rest.label = "Rest"
+```
+
+%% Cell type:code id: tags:
+
+``` python
+confusion_matrix(df, [p_sig, p_ztt], [c_sr, c_rest],
+                 x_label="Signal", y_label="Region", annot=True, weight="weight")
+confusion_matrix(df, [p_sig, p_ztt], [c_sr, c_rest], normalize_rows=True,
+                 x_label="Signal", y_label="Region", annot=True, weight="weight")
+None
+```
+
+%% Cell type:markdown id: tags:
+
+# Neural Network
+
+%% Cell type:code id: tags:
+
+``` python
+df['dijet_deta'] = (df.jet_1_eta - df.jet_2_eta).abs()
+df['dijet_prod_eta'] = (df.jet_1_eta * df.jet_2_eta)
+input_var = ['dijet_prod_eta', 'm_jj', 'dijet_deta', 'higgs_pt', 'jet_2_pt', 'jet_1_eta', 'jet_2_eta', 'tau_eta']
+
+output_var = ['is_sig', 'is_ztt']
+```
+
+%% Cell type:code id: tags:
+
+``` python
+df["is_sig"] = p_sig.selection.idx_array(df)
+df["is_ztt"] = p_ztt.selection.idx_array(df)
+```
+
+%% Cell type:code id: tags:
+
+``` python
+sns.pairplot(df.sample(n=1000), vars=input_var, hue="is_sig")
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+def model():
+    m = Sequential()
+    m.add(Dense(units=15, activation='relu', input_dim=len(input_var)))
+    m.add(Dense(units=5, activation='relu'))
+    m.add(Dense(units=2, activation='softmax'))
+
+    m.compile(loss='categorical_crossentropy',
+              optimizer=SGD(lr=0.1),
+              metrics=['categorical_accuracy'])
+
+    return m
+
+df['random'] = toydata.rng.random(size=len(df))
+cv = ClassicalCV(5, frac_var='random')
+
+net = HepNet(model, cv, EstimatorNormalizer, input_var, output_var)
+```
+
+%% Cell type:code id: tags:
+
+``` python
+sig_wf = len(p_sig.selection(df).weight) / p_sig.selection(df).weight.sum()
+ztt_wf = len(p_ztt.selection(df).weight) / p_ztt.selection(df).weight.sum()
+```
+
+%% Cell type:code id: tags:
+
+``` python
+net.fit(df, epochs=150, verbose=0, batch_size=2048,
+        weight=Variable("weight", lambda d: d.weight * (d.is_sig * sig_wf + d.is_ztt * ztt_wf)))
+```
+
+%% Cell type:code id: tags:
+
+``` python
+sns.lineplot(x='epoch', y='loss', data=net.history, label="Training")
+sns.lineplot(x='epoch', y='val_loss', data=net.history, label="Validation")
+plt.ylabel("loss")
+atlasify("Internal")
+None
+```
+
+%% Cell type:markdown id: tags:
+
+## Accuracy
+
+%% Cell type:code id: tags:
+
+``` python
+sns.lineplot(x='epoch', y='categorical_accuracy', data=net.history, label="Training")
+sns.lineplot(x='epoch', y='val_categorical_accuracy', data=net.history, label="Validation")
+plt.ylabel("Accuracy")
+atlasify("Internal")
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+sns.lineplot(x='epoch', y='val_categorical_accuracy', data=net.history, hue="fold")
+atlasify("Internal", enlarge=1.6)
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+out = net.predict(df, cv='test')
+out['pred_sig'] = out.pred_is_sig >= 0.5
+```
+
+%% Cell type:code id: tags:
+
+``` python
+c_pred_sig = Process("Signal", lambda d: d.pred_is_sig >= 0.5)
+c_pred_ztt = Process(r"$Z\rightarrow\tau\tau$", lambda d: d.pred_is_sig < 0.5)
+
+confusion_matrix(out, [p_sig, p_ztt], [c_pred_sig, c_pred_ztt],
+                 x_label="Truth", y_label="Classification", annot=True, weight="weight")
+confusion_matrix(out, [p_sig, p_ztt], [c_pred_sig, c_pred_ztt], normalize_rows=True,
+                 x_label="Truth", y_label="Classification", annot=True, weight="weight")
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+```
+%% Cell type:code id: tags:
+
+``` python
+import pandas as pd
+import matplotlib.pyplot as plt
+import seaborn as sns
+from keras.models import Sequential
+from keras.layers import Dense, Dropout
+from keras.optimizers import SGD
+
+from nnfwtbn import Variable, Process, Cut, \
+                    HepNet, ClassicalCV, EstimatorNormalizer, \
+                    HistogramFactory, confusion_matrix, atlasify
+from nnfwtbn import toydata
+```
+
+%% Cell type:code id: tags:
+
+``` python
+df = toydata.get()
+```
+
+%% Cell type:code id: tags:
+
+``` python
+p_ztt = Process(r"$Z\rightarrow\tau\tau$", range=(0, 0))
+p_sig = Process(r"Signal", range=(1, 1))
+```
+
+%% Cell type:code id: tags:
+
+``` python
+hist_factory = HistogramFactory(df, stacks=[[p_sig, p_ztt]],
+     weight="weight", color=[sns.color_palette()])
+```
+
+%% Cell type:markdown id: tags:
+
+# Cut-based
+
+%% Cell type:code id: tags:
+
+``` python
+hist_factory(Variable("$\Delta \eta^{jj}$",
+                      lambda d: (d.jet_1_eta - d.jet_2_eta).abs()),
+             bins=20, range=(0, 8))
+hist_factory(Variable("$m^{jj}$", "m_jj"),
+             bins=20, range=(0, 1500))
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+c_sr = Cut(lambda d: d.m_jj  > 400) & \
+        Cut(lambda d: d.jet_2_pt >= 30) & \
+        Cut(lambda d: d.jet_1_eta * d.jet_2_eta < 0) & \
+        Cut(lambda d: (d.jet_2_eta - d.jet_1_eta).abs() > 3)
+c_sr.label = "Signal"
+
+c_rest = (~c_sr)
+c_rest.label = "Rest"
+```
+
+%% Cell type:code id: tags:
+
+``` python
+confusion_matrix(df, [p_sig, p_ztt], [c_sr, c_rest],
+                 x_label="Signal", y_label="Region", annot=True, weight="weight")
+confusion_matrix(df, [p_sig, p_ztt], [c_sr, c_rest], normalize_rows=True,
+                 x_label="Signal", y_label="Region", annot=True, weight="weight")
+None
+```
+
+%% Cell type:markdown id: tags:
+
+# Neural Network
+
+%% Cell type:code id: tags:
+
+``` python
+df['dijet_deta'] = (df.jet_1_eta - df.jet_2_eta).abs()
+df['dijet_prod_eta'] = (df.jet_1_eta * df.jet_2_eta)
+input_var = ['dijet_prod_eta', 'm_jj', 'dijet_deta', 'higgs_pt', 'jet_2_pt', 'jet_1_eta', 'jet_2_eta', 'tau_eta']
+
+output_var = ['is_sig', 'is_ztt']
+```
+
+%% Cell type:code id: tags:
+
+``` python
+df["is_sig"] = p_sig.selection.idx_array(df)
+df["is_ztt"] = p_ztt.selection.idx_array(df)
+```
+
+%% Cell type:code id: tags:
+
+``` python
+sns.pairplot(df.sample(n=1000), vars=input_var, hue="is_sig")
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+def model():
+    m = Sequential()
+    m.add(Dense(units=15, activation='relu', input_dim=len(input_var)))
+    m.add(Dense(units=5, activation='relu'))
+    m.add(Dense(units=2, activation='softmax'))
+
+    m.compile(loss='categorical_crossentropy',
+              optimizer=SGD(lr=0.1),
+              metrics=['categorical_accuracy'])
+
+    return m
+
+df['random'] = toydata.rng.random(size=len(df))
+cv = ClassicalCV(5, frac_var='random')
+
+net = HepNet(model, cv, EstimatorNormalizer, input_var, output_var)
+```
+
+%% Cell type:code id: tags:
+
+``` python
+sig_wf = len(p_sig.selection(df).weight) / p_sig.selection(df).weight.sum()
+ztt_wf = len(p_ztt.selection(df).weight) / p_ztt.selection(df).weight.sum()
+```
+
+%% Cell type:code id: tags:
+
+``` python
+net.fit(df, epochs=150, verbose=0, batch_size=2048,
+        weight=Variable("weight", lambda d: d.weight * (d.is_sig * sig_wf + d.is_ztt * ztt_wf)))
+```
+
+%% Cell type:code id: tags:
+
+``` python
+sns.lineplot(x='epoch', y='loss', data=net.history, label="Training")
+sns.lineplot(x='epoch', y='val_loss', data=net.history, label="Validation")
+plt.ylabel("loss")
+atlasify("Internal")
+None
+```
+
+%% Cell type:markdown id: tags:
+
+## Accuracy
+
+%% Cell type:code id: tags:
+
+``` python
+sns.lineplot(x='epoch', y='categorical_accuracy', data=net.history, label="Training")
+sns.lineplot(x='epoch', y='val_categorical_accuracy', data=net.history, label="Validation")
+plt.ylabel("Accuracy")
+atlasify("Internal")
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+sns.lineplot(x='epoch', y='val_categorical_accuracy', data=net.history, hue="fold")
+atlasify("Internal", enlarge=1.6)
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+out = net.predict(df, cv='test')
+out['pred_sig'] = out.pred_is_sig >= 0.5
+```
+
+%% Cell type:code id: tags:
+
+``` python
+c_pred_sig = Process("Signal", lambda d: d.pred_is_sig >= 0.5)
+c_pred_ztt = Process(r"$Z\rightarrow\tau\tau$", lambda d: d.pred_is_sig < 0.5)
+
+confusion_matrix(out, [p_sig, p_ztt], [c_pred_sig, c_pred_ztt],
+                 x_label="Truth", y_label="Classification", annot=True, weight="weight")
+confusion_matrix(out, [p_sig, p_ztt], [c_pred_sig, c_pred_ztt], normalize_rows=True,
+                 x_label="Truth", y_label="Classification", annot=True, weight="weight")
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+```
--- a/examples/ConfusionMatrix.ipynb
+++ b/examples/ConfusionMatrix.ipynb
+%% Cell type:code id: tags:
+
+``` python
+import pandas as pd
+import seaborn as sns
+
+from nnfwtbn import Variable, Process, Cut, confusion_matrix, HistogramFactory
+from nnfwtbn import toydata
+```
+
+%% Cell type:code id: tags:
+
+``` python
+df = toydata.get()
+```
+
+%% Cell type:code id: tags:
+
+``` python
+p_sig = Process(r"Signal", range=(1, 1))
+p_ztt = Process(r"$Z\rightarrow\tau\tau$", range=(0, 0))
+```
+
+%% Cell type:code id: tags:
+
+``` python
+c_low = Cut(lambda d: d.m_jj  < 350, label="Low $m^{jj}$")
+c_mid = Cut(lambda d: (d.m_jj >= 350) & (d.m_jj  < 600), label="Mid $m^{jj}$")
+c_high = Cut(lambda d: d.m_jj  > 600, label="High $m^{jj}$")
+```
+
+%% Cell type:markdown id: tags:
+
+## Normalized columns
+
+%% Cell type:code id: tags:
+
+``` python
+confusion_matrix(df, [p_sig, p_ztt], [c_low, c_mid, c_high],
+                 y_label="Region", x_label="Truth Signal", annot=True, weight="weight")
+None
+```
+
+%% Cell type:markdown id: tags:
+
+## Normalized rows
+
+%% Cell type:code id: tags:
+
+``` python
+confusion_matrix(df, [p_sig, p_ztt], [c_low, c_mid, c_high], normalize_rows=True,
+                 y_label="Region", x_label="Truth Signal", annot=True, weight="weight")
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+```
+%% Cell type:code id: tags:
+
+``` python
+import pandas as pd
+import seaborn as sns
+
+from nnfwtbn import Variable, Process, Cut, confusion_matrix, HistogramFactory
+from nnfwtbn import toydata
+```
+
+%% Cell type:code id: tags:
+
+``` python
+df = toydata.get()
+```
+
+%% Cell type:code id: tags:
+
+``` python
+p_sig = Process(r"Signal", range=(1, 1))
+p_ztt = Process(r"$Z\rightarrow\tau\tau$", range=(0, 0))
+```
+
+%% Cell type:code id: tags:
+
+``` python
+c_low = Cut(lambda d: d.m_jj  < 350, label="Low $m^{jj}$")
+c_mid = Cut(lambda d: (d.m_jj >= 350) & (d.m_jj  < 600), label="Mid $m^{jj}$")
+c_high = Cut(lambda d: d.m_jj  > 600, label="High $m^{jj}$")
+```
+
+%% Cell type:markdown id: tags:
+
+## Normalized columns
+
+%% Cell type:code id: tags:
+
+``` python
+confusion_matrix(df, [p_sig, p_ztt], [c_low, c_mid, c_high],
+                 y_label="Region", x_label="Truth Signal", annot=True, weight="weight")
+None
+```
+
+%% Cell type:markdown id: tags:
+
+## Normalized rows
+
+%% Cell type:code id: tags:
+
+``` python
+confusion_matrix(df, [p_sig, p_ztt], [c_low, c_mid, c_high], normalize_rows=True,
+                 y_label="Region", x_label="Truth Signal", annot=True, weight="weight")
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+```
--- a/examples/Histogram.ipynb
+++ b/examples/Histogram.ipynb
+%% Cell type:code id: tags:
+
+``` python
+import pandas as pd
+import seaborn as sns
+
+from nnfwtbn import Variable, Process, Cut, hist
+from nnfwtbn import toydata
+```
+
+%% Cell type:code id: tags:
+
+``` python
+df = toydata.get()
+```
+
+%% Cell type:code id: tags:
+
+``` python
+p_ztt = Process(r"$Z\rightarrow\tau\tau$", range=(0, 0))
+p_sig = Process(r"Signal", range=(1, 1))
+
+p_asimov = Process(r"Asimov", selection=lambda d: d.fpid >= 0)
+```
+
+%% Cell type:code id: tags:
+
+``` python
+v_higgs_m = Variable(r"$m^H$", "higgs_m", "GeV")
+```
+
+%% Cell type:code id: tags:
+
+``` python
+c_vbf = Cut(lambda d: d.dijet_p4__M  > 400) & \
+        Cut(lambda d: d.jet_1_p4__Pt >= 30) & \
+        Cut(lambda d: d.is_dijet_centrality == 1) & \
+        Cut(lambda d: d.jet_0_p4__Eta * df.jet_1_p4__Eta < 0) & \
+        Cut(lambda d: (d.jet_0_p4__Eta - df.jet_1_p4__Eta).abs() > 3)
+```
+
+%% Cell type:code id: tags:
+
+``` python
+hist(df, v_higgs_m, 20, stacks=[[p_ztt, p_sig], [p_asimov]], range=(0, 200), selection=None,
+     weight="weight", color=[sns.color_palette(), ['black']],
+     histtype=[['stepfilled']*4 + ['step'], 'points'], ratio_label="Data / SM")
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+hist(df, v_higgs_m, 20, stacks=[[p_ztt, p_sig], [p_asimov]], range=(0, 200), selection=None,
+     weight="weight", color=[sns.color_palette(), ['black']], y_log=True,
+     histtype=[['stepfilled']*4 + ['step'], 'points'], numerator=None)
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+hist(df, v_higgs_m, 20, stacks=[[p_ztt, p_sig], [p_asimov]], range=(0, 200), selection=None,
+     weight="weight", color=[sns.color_palette(), ['black']],
+     y_log=True, y_min=1e-1, vlines=[80, {'x': 100, 'color': 'b'}],
+     histtype=[['stepfilled']*4 + ['step'], 'points'], ratio_label="MC / Data", numerator=0, denominator=1)
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+hist(df, v_higgs_m, 20, stacks=[[p_ztt, p_sig], [p_asimov]], range=(40, 120), selection=None,
+     weight="weight", color=[sns.color_palette(), ['black']],
+     y_log=True, y_min=1e-1, vlines=[80, {'x': 100, 'color': 'b'}],
+     numerator=[p_asimov], denominator=[p_ztt],
+     histtype=[['stepfilled']*4 + ['step'], 'points'], ratio_label="Data / Bkg")
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+hist(df, v_higgs_m, 20, stacks=[[p_ztt, p_sig], [p_asimov]], range=(40, 120), selection=None,
+     weight="weight", color=[sns.color_palette(), ['black']],
+     y_log=True, y_min=1e-1, vlines=[80, {'x': 100, 'color': 'b'}],
+     numerator=[p_asimov], denominator=[p_ztt], diff=True,
+     histtype=[['stepfilled']*4 + ['step'], 'points'], ratio_label="Data / Bkg")
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+```
+%% Cell type:code id: tags:
+
+``` python
+import pandas as pd
+import seaborn as sns
+
+from nnfwtbn import Variable, Process, Cut, hist
+from nnfwtbn import toydata
+```
+
+%% Cell type:code id: tags:
+
+``` python
+df = toydata.get()
+```
+
+%% Cell type:code id: tags:
+
+``` python
+p_ztt = Process(r"$Z\rightarrow\tau\tau$", range=(0, 0))
+p_sig = Process(r"Signal", range=(1, 1))
+
+p_asimov = Process(r"Asimov", selection=lambda d: d.fpid >= 0)
+```
+
+%% Cell type:code id: tags:
+
+``` python
+v_higgs_m = Variable(r"$m^H$", "higgs_m", "GeV")
+```
+
+%% Cell type:code id: tags:
+
+``` python
+c_vbf = Cut(lambda d: d.dijet_p4__M  > 400) & \
+        Cut(lambda d: d.jet_1_p4__Pt >= 30) & \
+        Cut(lambda d: d.is_dijet_centrality == 1) & \
+        Cut(lambda d: d.jet_0_p4__Eta * df.jet_1_p4__Eta < 0) & \
+        Cut(lambda d: (d.jet_0_p4__Eta - df.jet_1_p4__Eta).abs() > 3)
+```
+
+%% Cell type:code id: tags:
+
+``` python
+hist(df, v_higgs_m, 20, stacks=[[p_ztt, p_sig], [p_asimov]], range=(0, 200), selection=None,
+     weight="weight", color=[sns.color_palette(), ['black']],
+     histtype=[['stepfilled']*4 + ['step'], 'points'], ratio_label="Data / SM")
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+hist(df, v_higgs_m, 20, stacks=[[p_ztt, p_sig], [p_asimov]], range=(0, 200), selection=None,
+     weight="weight", color=[sns.color_palette(), ['black']], y_log=True,
+     histtype=[['stepfilled']*4 + ['step'], 'points'], numerator=None)
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+hist(df, v_higgs_m, 20, stacks=[[p_ztt, p_sig], [p_asimov]], range=(0, 200), selection=None,
+     weight="weight", color=[sns.color_palette(), ['black']],
+     y_log=True, y_min=1e-1, vlines=[80, {'x': 100, 'color': 'b'}],
+     histtype=[['stepfilled']*4 + ['step'], 'points'], ratio_label="MC / Data", numerator=0, denominator=1)
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+hist(df, v_higgs_m, 20, stacks=[[p_ztt, p_sig], [p_asimov]], range=(40, 120), selection=None,
+     weight="weight", color=[sns.color_palette(), ['black']],
+     y_log=True, y_min=1e-1, vlines=[80, {'x': 100, 'color': 'b'}],
+     numerator=[p_asimov], denominator=[p_ztt],
+     histtype=[['stepfilled']*4 + ['step'], 'points'], ratio_label="Data / Bkg")
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+hist(df, v_higgs_m, 20, stacks=[[p_ztt, p_sig], [p_asimov]], range=(40, 120), selection=None,
+     weight="weight", color=[sns.color_palette(), ['black']],
+     y_log=True, y_min=1e-1, vlines=[80, {'x': 100, 'color': 'b'}],
+     numerator=[p_asimov], denominator=[p_ztt], diff=True,
+     histtype=[['stepfilled']*4 + ['step'], 'points'], ratio_label="Data / Bkg")
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+```
--- a/examples/HistogramFactory.ipynb
+++ b/examples/HistogramFactory.ipynb
+%% Cell type:code id: tags:
+
+``` python
+import pandas as pd
+import seaborn as sns
+
+from nnfwtbn import Variable, Process, Cut, hist, HistogramFactory
+from nnfwtbn import toydata
+```
+
+%% Cell type:code id: tags:
+
+``` python
+df = toydata.get()
+```
+
+%% Cell type:code id: tags:
+
+``` python
+p_ztt = Process(r"$Z\rightarrow\tau\tau$", range=(0, 0))
+p_sig = Process(r"Signal", range=(1, 1))
+p_asimov = Process(r"Asimov", selection=lambda d: d.fpid >= 0)
+```
+
+%% Cell type:code id: tags:
+
+``` python
+c_vbf = Cut(lambda d: d.dijet_p4__M  > 400) & \
+        Cut(lambda d: d.jet_1_p4__Pt >= 30) & \
+        Cut(lambda d: d.is_dijet_centrality == 1) & \
+        Cut(lambda d: d.jet_0_p4__Eta * df.jet_1_p4__Eta < 0) & \
+        Cut(lambda d: (d.jet_0_p4__Eta - df.jet_1_p4__Eta).abs() > 3)
+```
+
+%% Cell type:code id: tags:
+
+``` python
+hist_factory = HistogramFactory(df, bins=20, stacks=[[p_ztt, p_sig], [p_asimov]], range=(0, 200), selection=None,
+     weight="weight", color=[sns.color_palette(), ['black']],
+     histtype=[['stepfilled']*4 + ['step'], 'points'])
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+v_mmc = Variable(r"$m^H$", "higgs_m", "GeV")
+hist_factory(v_mmc)
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+v_tau_pT = Variable(r"$p_\mathrm{T}{\tau}$", "tau_pt", "GeV")
+hist_factory(v_tau_pT, bins=12, range=(0, 120))
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+v_lep_pT = Variable(r"$p_\mathrm{T}{\ell}$", "lep_pt", "GeV")
+hist_factory(v_lep_pT, bins=12, range=(0, 120))
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+```
+%% Cell type:code id: tags:
+
+``` python
+import pandas as pd
+import seaborn as sns
+
+from nnfwtbn import Variable, Process, Cut, hist, HistogramFactory
+from nnfwtbn import toydata
+```
+
+%% Cell type:code id: tags:
+
+``` python
+df = toydata.get()
+```
+
+%% Cell type:code id: tags:
+
+``` python
+p_ztt = Process(r"$Z\rightarrow\tau\tau$", range=(0, 0))
+p_sig = Process(r"Signal", range=(1, 1))
+p_asimov = Process(r"Asimov", selection=lambda d: d.fpid >= 0)
+```
+
+%% Cell type:code id: tags:
+
+``` python
+c_vbf = Cut(lambda d: d.dijet_p4__M  > 400) & \
+        Cut(lambda d: d.jet_1_p4__Pt >= 30) & \
+        Cut(lambda d: d.is_dijet_centrality == 1) & \
+        Cut(lambda d: d.jet_0_p4__Eta * df.jet_1_p4__Eta < 0) & \
+        Cut(lambda d: (d.jet_0_p4__Eta - df.jet_1_p4__Eta).abs() > 3)
+```
+
+%% Cell type:code id: tags:
+
+``` python
+hist_factory = HistogramFactory(df, bins=20, stacks=[[p_ztt, p_sig], [p_asimov]], range=(0, 200), selection=None,
+     weight="weight", color=[sns.color_palette(), ['black']],
+     histtype=[['stepfilled']*4 + ['step'], 'points'])
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+v_mmc = Variable(r"$m^H$", "higgs_m", "GeV")
+hist_factory(v_mmc)
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+v_tau_pT = Variable(r"$p_\mathrm{T}{\tau}$", "tau_pt", "GeV")
+hist_factory(v_tau_pT, bins=12, range=(0, 120))
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+v_lep_pT = Variable(r"$p_\mathrm{T}{\ell}$", "lep_pt", "GeV")
+hist_factory(v_lep_pT, bins=12, range=(0, 120))
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+```
--- a/examples/ROC.ipynb
+++ b/examples/ROC.ipynb
+%% Cell type:code id: tags:
+
+``` python
+import pandas as pd
+import seaborn as sns
+
+from nnfwtbn import Variable, Process, Cut, roc
+from nnfwtbn import toydata
+```
+
+%% Cell type:code id: tags:
+
+``` python
+df = toydata.get()
+df['noise'] = df.fpid + toydata.rng.normal(0, 0.3, size=len(df))
+```
+
+%% Cell type:code id: tags:
+
+``` python
+p_bkg = Process(r"Background", range=(0, 0))
+p_sig = Process(r"Signal", range=(1, 1))
+```
+
+%% Cell type:code id: tags:
+
+``` python
+import matplotlib.pyplot as plt
+fig, ax = plt.subplots()
+
+v_higgs_m = Variable(r"$m^H$", "higgs_m", "GeV")
+roc(df, p_sig, p_bkg, v_higgs_m, axes=ax, steps=400)
+roc(df, p_sig, p_bkg, Variable("Noise ID", "noise"), axes=ax)
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+```
+
+%% Cell type:code id: tags:
+
+``` python
+```
+%% Cell type:code id: tags:
+
+``` python
+import pandas as pd
+import seaborn as sns
+
+from nnfwtbn import Variable, Process, Cut, roc
+from nnfwtbn import toydata
+```
+
+%% Cell type:code id: tags:
+
+``` python
+df = toydata.get()
+df['noise'] = df.fpid + toydata.rng.normal(0, 0.3, size=len(df))
+```
+
+%% Cell type:code id: tags:
+
+``` python
+p_bkg = Process(r"Background", range=(0, 0))
+p_sig = Process(r"Signal", range=(1, 1))
+```
+
+%% Cell type:code id: tags:
+
+``` python
+import matplotlib.pyplot as plt
+fig, ax = plt.subplots()
+
+v_higgs_m = Variable(r"$m^H$", "higgs_m", "GeV")
+roc(df, p_sig, p_bkg, v_higgs_m, axes=ax, steps=400)
+roc(df, p_sig, p_bkg, Variable("Noise ID", "noise"), axes=ax)
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+```
+
+%% Cell type:code id: tags:
+
+``` python
+```
--- a/examples/ToyData.ipynb
+++ b/examples/ToyData.ipynb
+%% Cell type:code id: tags:
+
+``` python
+from nnfwtbn import Variable, Process, Cut, hist
+from nnfwtbn.toydata import generate, proposal, mcmc_step, vbfh_pdf, mcmc, ztt_pdf
+from pylorentz import Momentum4
+import numpy as np
+import pandas as pd
+import seaborn as sns
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# shuffle=False allows plotting the MC walk
+%time df = generate(10000, vbfh_frac=0.5, shuffle=False)
+```
+
+%% Cell type:code id: tags:
+
+``` python
+import matplotlib.pyplot as plt
+```
+
+%% Cell type:markdown id: tags:
+
+# Markov chain walk
+
+%% Cell type:code id: tags:
+
+``` python
+idx = df.fpid == 1
+fig, ax = plt.subplots(2, 1, sharex=True)
+ax[0].set_title("VBFH")
+ax[0].plot(np.arange(sum(idx)), df.jet_1_pt[idx], label="Jet 1")
+ax[0].plot(np.arange(sum(idx)), df.jet_2_pt[idx], label="Jet 2")
+ax[0].set_ylabel(r"$p_{\mathrm{T}}$")
+ax[0].legend()
+
+ax[1].set_title(r"$Z\rightarrow\tau\tau$")
+ax[1].plot(np.arange(sum(~idx)), df.jet_1_pt[~idx], label="Jet 1")
+ax[1].plot(np.arange(sum(~idx)), df.jet_2_pt[~idx], label="Jet 2")
+ax[1].set_ylabel(r"$p_{\mathrm{T}}$")
+ax[1].legend()
+fig.tight_layout()
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+fig, ax = plt.subplots(2, 1, sharex=True)
+ax[0].set_title("VBFH")
+ax[0].plot(np.arange(sum(idx)), df.jet_1_eta[idx].abs(), label="Jet 1")
+ax[0].plot(np.arange(sum(idx)), df.jet_2_eta[idx].abs(), label="Jet 2")
+ax[0].set_ylabel(r"$|\eta_j|$")
+ax[0].legend()
+
+ax[1].set_title(r"$Z\rightarrow\tau\tau$")
+ax[1].plot(np.arange(sum(~idx)), df.jet_1_eta[~idx].abs(), label="Jet 1")
+ax[1].plot(np.arange(sum(~idx)), df.jet_2_eta[~idx].abs(), label="Jet 2")
+ax[1].set_ylabel(r"$|\eta_j|$")
+ax[1].legend()
+fig.tight_layout()
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+fig, ax = plt.subplots(1, 1)
+ax.plot(np.arange(sum(idx)), df.higgs_m[idx], label="VBFH")
+ax.plot(np.arange(sum(~idx)), df.higgs_m[~idx], label=r"$Z\rightarrow\tau\tau$")
+ax.set_ylabel(r"$m^H$")
+ax.legend()
+fig.tight_layout()
+None
+```
+
+%% Cell type:markdown id: tags:
+
+# Distributions
+
+%% Cell type:code id: tags:
+
+``` python
+p_vbfh = Process("VBFH", range=(1, 1))
+p_ztt = Process(r"$Z\rightarrow\tau\tau$", range=(0, 0))
+```
+
+%% Cell type:code id: tags:
+
+``` python
+jet_1_eta = Variable(r"$\eta^{j_1}$", "jet_1_eta")
+```
+
+%% Cell type:code id: tags:
+
+``` python
+hist(df, jet_1_eta, bins=20, range=(-6, 6), stacks=[[p_ztt, p_vbfh]],
+     weight="weight", color=[sns.color_palette("Blues")[:1] + sns.color_palette()[1:]],
+     histtype=[['stepfilled']*5])
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+jet_2_eta = Variable(r"$\eta^{j_2}$", "jet_2_eta")
+hist(df, jet_2_eta, bins=20, range=(-6, 6), stacks=[[p_ztt, p_vbfh]],
+     weight="weight", color=[sns.color_palette("Blues")[:1] + sns.color_palette()[1:]],
+     histtype=[['stepfilled']*5])
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+delta_eta_jets = Variable(r"$|\Delta\eta{jj}|$", lambda d: d.jet_1_eta.abs())
+hist(df, delta_eta_jets, bins=22, range=(-1, 10), stacks=[[p_ztt, p_vbfh]],
+     weight="weight", color=[sns.color_palette("Blues")[:1] + sns.color_palette()[1:]],
+     histtype=[['stepfilled']*5])
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+jet_1_pt = Variable(r"$p_{\mathrm{T}}^{j_1}$", "jet_1_pt")
+jet_2_pt = Variable(r"$p_{\mathrm{T}}^{j_2}$", "jet_2_pt")
+hist(df, jet_1_pt, bins=50, range=(0, 1000), stacks=[[p_ztt, p_vbfh]],
+     weight="weight", color=[sns.color_palette("Blues")[:1] + sns.color_palette()[1:]],
+     histtype=[['stepfilled']*5])
+hist(df, jet_2_pt, bins=25, range=(0, 250), stacks=[[p_ztt, p_vbfh]],
+     weight="weight", color=[sns.color_palette("Blues")[:1] + sns.color_palette()[1:]],
+     histtype=[['stepfilled']*5])
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+v_higgs_m = Variable(r"$m^H$", "higgs_m")
+hist(df, v_higgs_m, bins=20, range=(0, 200), stacks=[[p_ztt, p_vbfh]],
+     weight="weight", color=[sns.color_palette("Blues")[:1] + sns.color_palette()[1:]],
+     histtype=[['stepfilled']*5])
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+v_weight = Variable(r"$w$", "weight")
+hist(df, v_weight, bins=20, range=(0, 2), stacks=[[p_ztt, p_vbfh]],
+     weight=lambda d: d.weight * 1 + 0, color=[sns.color_palette("Blues")[:1] + sns.color_palette()[1:]],
+     histtype=[['stepfilled']*5])
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+v_mjj = Variable(r"$m^{jj}$", "m_jj")
+hist(df, v_mjj, bins=51, range=(-40, 2000), stacks=[[p_ztt, p_vbfh]],
+     weight=lambda d: d.weight * 1 + 0, color=[sns.color_palette("Blues")[:1] + sns.color_palette()[1:]],
+     histtype=[['stepfilled']*5])
+hist(df, v_mjj, bins=51, range=(-40, 2000), stacks=[[p_vbfh]],
+     weight=lambda d: d.weight * 1 + 0, color=[sns.color_palette()[1:]],
+     histtype=[['stepfilled']*5])
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+v_lep_pt = Variable(r"$p^{\ell}_{\mathrm{T}}$", "lep_pt")
+hist(df, v_lep_pt, bins=30, range=(-10, 290), stacks=[[p_ztt, p_vbfh]],
+     weight="weight", color=[sns.color_palette("Blues")[:1] + sns.color_palette()[1:]],
+     histtype=[['stepfilled']*5])
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+v_lep_eta = Variable(r"$|\eta^{\ell}|$", "lep_eta")
+hist(df, v_lep_eta, bins=30, range=(-5, 5), stacks=[[p_ztt, p_vbfh]],
+     weight="weight", color=[sns.color_palette("Blues")[:1] + sns.color_palette()[1:]],
+     histtype=[['stepfilled']*5])
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+v_tau_pt = Variable(r"$p^{\tau}_{\mathrm{T}}$", "tau_pt")
+hist(df, v_tau_pt, bins=30, range=(-10, 290), stacks=[[p_ztt, p_vbfh]],
+     weight="weight", color=[sns.color_palette("Blues")[:1] + sns.color_palette()[1:]],
+     histtype=[['stepfilled']*5])
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+v_tau_eta = Variable(r"$|\eta^{\tau}|$", "tau_eta")
+hist(df, v_tau_eta, bins=30, range=(-5, 5), stacks=[[p_ztt, p_vbfh]],
+     weight="weight", color=[sns.color_palette("Blues")[:1] + sns.color_palette()[1:]],
+     histtype=[['stepfilled']*5])
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+v_met_pt = Variable(r"$E^{\mathrm{miss}}_{\mathrm{T}}$", "met_pt")
+hist(df, v_met_pt, bins=30, range=(-10, 290), stacks=[[p_ztt, p_vbfh]],
+     weight="weight", color=[sns.color_palette("Blues")[:1] + sns.color_palette()[1:]],
+     histtype=[['stepfilled']*5])
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+v_lep_cent = Variable(r"$\eta^{\ell}$", "tau_centrality")
+hist(df, v_lep_cent, bins=22, range=(-0.05, 1.05), stacks=[[p_ztt, p_vbfh]],
+     weight="weight", color=[sns.color_palette("Blues")[:1] + sns.color_palette()[1:]],
+     histtype=[['stepfilled']*5])
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+v_tau_cent = Variable(r"$\eta^{\tau}$", "tau_centrality")
+hist(df, v_tau_cent, bins=22, range=(-0.05, 1.05), stacks=[[p_ztt, p_vbfh]],
+     weight="weight", color=[sns.color_palette("Blues")[:1] + sns.color_palette()[1:]],
+     histtype=[['stepfilled']*5])
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+```
+%% Cell type:code id: tags:
+
+``` python
+from nnfwtbn import Variable, Process, Cut, hist
+from nnfwtbn.toydata import generate, proposal, mcmc_step, vbfh_pdf, mcmc, ztt_pdf
+from pylorentz import Momentum4
+import numpy as np
+import pandas as pd
+import seaborn as sns
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# shuffle=False allows plotting the MC walk
+%time df = generate(10000, vbfh_frac=0.5, shuffle=False)
+```
+
+%% Cell type:code id: tags:
+
+``` python
+import matplotlib.pyplot as plt
+```
+
+%% Cell type:markdown id: tags:
+
+# Markov chain walk
+
+%% Cell type:code id: tags:
+
+``` python
+idx = df.fpid == 1
+fig, ax = plt.subplots(2, 1, sharex=True)
+ax[0].set_title("VBFH")
+ax[0].plot(np.arange(sum(idx)), df.jet_1_pt[idx], label="Jet 1")
+ax[0].plot(np.arange(sum(idx)), df.jet_2_pt[idx], label="Jet 2")
+ax[0].set_ylabel(r"$p_{\mathrm{T}}$")
+ax[0].legend()
+
+ax[1].set_title(r"$Z\rightarrow\tau\tau$")
+ax[1].plot(np.arange(sum(~idx)), df.jet_1_pt[~idx], label="Jet 1")
+ax[1].plot(np.arange(sum(~idx)), df.jet_2_pt[~idx], label="Jet 2")
+ax[1].set_ylabel(r"$p_{\mathrm{T}}$")
+ax[1].legend()
+fig.tight_layout()
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+fig, ax = plt.subplots(2, 1, sharex=True)
+ax[0].set_title("VBFH")
+ax[0].plot(np.arange(sum(idx)), df.jet_1_eta[idx].abs(), label="Jet 1")
+ax[0].plot(np.arange(sum(idx)), df.jet_2_eta[idx].abs(), label="Jet 2")
+ax[0].set_ylabel(r"$|\eta_j|$")
+ax[0].legend()
+
+ax[1].set_title(r"$Z\rightarrow\tau\tau$")
+ax[1].plot(np.arange(sum(~idx)), df.jet_1_eta[~idx].abs(), label="Jet 1")
+ax[1].plot(np.arange(sum(~idx)), df.jet_2_eta[~idx].abs(), label="Jet 2")
+ax[1].set_ylabel(r"$|\eta_j|$")
+ax[1].legend()
+fig.tight_layout()
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+fig, ax = plt.subplots(1, 1)
+ax.plot(np.arange(sum(idx)), df.higgs_m[idx], label="VBFH")
+ax.plot(np.arange(sum(~idx)), df.higgs_m[~idx], label=r"$Z\rightarrow\tau\tau$")
+ax.set_ylabel(r"$m^H$")
+ax.legend()
+fig.tight_layout()
+None
+```
+
+%% Cell type:markdown id: tags:
+
+# Distributions
+
+%% Cell type:code id: tags:
+
+``` python
+p_vbfh = Process("VBFH", range=(1, 1))
+p_ztt = Process(r"$Z\rightarrow\tau\tau$", range=(0, 0))
+```
+
+%% Cell type:code id: tags:
+
+``` python
+jet_1_eta = Variable(r"$\eta^{j_1}$", "jet_1_eta")
+```
+
+%% Cell type:code id: tags:
+
+``` python
+hist(df, jet_1_eta, bins=20, range=(-6, 6), stacks=[[p_ztt, p_vbfh]],
+     weight="weight", color=[sns.color_palette("Blues")[:1] + sns.color_palette()[1:]],
+     histtype=[['stepfilled']*5])
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+jet_2_eta = Variable(r"$\eta^{j_2}$", "jet_2_eta")
+hist(df, jet_2_eta, bins=20, range=(-6, 6), stacks=[[p_ztt, p_vbfh]],
+     weight="weight", color=[sns.color_palette("Blues")[:1] + sns.color_palette()[1:]],
+     histtype=[['stepfilled']*5])
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+delta_eta_jets = Variable(r"$|\Delta\eta{jj}|$", lambda d: d.jet_1_eta.abs())
+hist(df, delta_eta_jets, bins=22, range=(-1, 10), stacks=[[p_ztt, p_vbfh]],
+     weight="weight", color=[sns.color_palette("Blues")[:1] + sns.color_palette()[1:]],
+     histtype=[['stepfilled']*5])
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+jet_1_pt = Variable(r"$p_{\mathrm{T}}^{j_1}$", "jet_1_pt")
+jet_2_pt = Variable(r"$p_{\mathrm{T}}^{j_2}$", "jet_2_pt")
+hist(df, jet_1_pt, bins=50, range=(0, 1000), stacks=[[p_ztt, p_vbfh]],
+     weight="weight", color=[sns.color_palette("Blues")[:1] + sns.color_palette()[1:]],
+     histtype=[['stepfilled']*5])
+hist(df, jet_2_pt, bins=25, range=(0, 250), stacks=[[p_ztt, p_vbfh]],
+     weight="weight", color=[sns.color_palette("Blues")[:1] + sns.color_palette()[1:]],
+     histtype=[['stepfilled']*5])
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+v_higgs_m = Variable(r"$m^H$", "higgs_m")
+hist(df, v_higgs_m, bins=20, range=(0, 200), stacks=[[p_ztt, p_vbfh]],
+     weight="weight", color=[sns.color_palette("Blues")[:1] + sns.color_palette()[1:]],
+     histtype=[['stepfilled']*5])
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+v_weight = Variable(r"$w$", "weight")
+hist(df, v_weight, bins=20, range=(0, 2), stacks=[[p_ztt, p_vbfh]],
+     weight=lambda d: d.weight * 1 + 0, color=[sns.color_palette("Blues")[:1] + sns.color_palette()[1:]],
+     histtype=[['stepfilled']*5])
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+v_mjj = Variable(r"$m^{jj}$", "m_jj")
+hist(df, v_mjj, bins=51, range=(-40, 2000), stacks=[[p_ztt, p_vbfh]],
+     weight=lambda d: d.weight * 1 + 0, color=[sns.color_palette("Blues")[:1] + sns.color_palette()[1:]],
+     histtype=[['stepfilled']*5])
+hist(df, v_mjj, bins=51, range=(-40, 2000), stacks=[[p_vbfh]],
+     weight=lambda d: d.weight * 1 + 0, color=[sns.color_palette()[1:]],
+     histtype=[['stepfilled']*5])
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+v_lep_pt = Variable(r"$p^{\ell}_{\mathrm{T}}$", "lep_pt")
+hist(df, v_lep_pt, bins=30, range=(-10, 290), stacks=[[p_ztt, p_vbfh]],
+     weight="weight", color=[sns.color_palette("Blues")[:1] + sns.color_palette()[1:]],
+     histtype=[['stepfilled']*5])
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+v_lep_eta = Variable(r"$|\eta^{\ell}|$", "lep_eta")
+hist(df, v_lep_eta, bins=30, range=(-5, 5), stacks=[[p_ztt, p_vbfh]],
+     weight="weight", color=[sns.color_palette("Blues")[:1] + sns.color_palette()[1:]],
+     histtype=[['stepfilled']*5])
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+v_tau_pt = Variable(r"$p^{\tau}_{\mathrm{T}}$", "tau_pt")
+hist(df, v_tau_pt, bins=30, range=(-10, 290), stacks=[[p_ztt, p_vbfh]],
+     weight="weight", color=[sns.color_palette("Blues")[:1] + sns.color_palette()[1:]],
+     histtype=[['stepfilled']*5])
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+v_tau_eta = Variable(r"$|\eta^{\tau}|$", "tau_eta")
+hist(df, v_tau_eta, bins=30, range=(-5, 5), stacks=[[p_ztt, p_vbfh]],
+     weight="weight", color=[sns.color_palette("Blues")[:1] + sns.color_palette()[1:]],
+     histtype=[['stepfilled']*5])
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+v_met_pt = Variable(r"$E^{\mathrm{miss}}_{\mathrm{T}}$", "met_pt")
+hist(df, v_met_pt, bins=30, range=(-10, 290), stacks=[[p_ztt, p_vbfh]],
+     weight="weight", color=[sns.color_palette("Blues")[:1] + sns.color_palette()[1:]],
+     histtype=[['stepfilled']*5])
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+v_lep_cent = Variable(r"$\eta^{\ell}$", "tau_centrality")
+hist(df, v_lep_cent, bins=22, range=(-0.05, 1.05), stacks=[[p_ztt, p_vbfh]],
+     weight="weight", color=[sns.color_palette("Blues")[:1] + sns.color_palette()[1:]],
+     histtype=[['stepfilled']*5])
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+v_tau_cent = Variable(r"$\eta^{\tau}$", "tau_centrality")
+hist(df, v_tau_cent, bins=22, range=(-0.05, 1.05), stacks=[[p_ztt, p_vbfh]],
+     weight="weight", color=[sns.color_palette("Blues")[:1] + sns.color_palette()[1:]],
+     histtype=[['stepfilled']*5])
+None
+```
+
+%% Cell type:code id: tags:
+
+``` python
+```
--- a/examples/requirements.txt
+++ b/examples/requirements.txt
+keras
+tensorflow
+jupyter
--- a/nnfwtbn/tests/test_toydata.py
+++ b/nnfwtbn/tests/test_toydata.py
+
+import unittest
+import numpy as np
+from nnfwtbn.toydata import draw
+
+SEED = 798088218969
+
+class DrawTestCase(unittest.TestCase):
+    """
+    Test the implementation of draw().
+    """
+
+    @staticmethod
+    def _rng(seed=SEED):
+        """
+        Returns a new random number generator.
+        """
+        return np.random.Generator(np.random.PCG64(seed))
+
+    def setUp(self):
+        """
+        Instantiate a random number generator.
+        """
+        self.rng = DrawTestCase._rng()
+
+    @staticmethod
+    def _toy_pdf(x):
+        """
+        A toy PDF for testing: Normalized parabola between 0 and 1
+        """
+        return 3 * x**2
+
+    @staticmethod
+    def _toy_pdf2(x):
+        """
+        A toy PDF for testing: Normalized parabola between 1 and 11
+        """
+        return 3 * (x - 1)**2 / 1000
+
+    def test_draw_len(self):
+        """
+        Check that draw returns the number of samples given by the size
+        parameter.
+        """
+        
+        self.assertEqual(len(draw(self.rng, DrawTestCase._toy_pdf)), 1)
+        self.assertEqual(len(draw(self.rng, DrawTestCase._toy_pdf, size=10)), 10)
+        self.assertEqual(draw(self.rng, DrawTestCase._toy_pdf, size=(2, 5)).shape,
+                         (2, 5))
+
+    def test_draw_reproducible(self):
+        """
+        Check that draw returns the same array when called with identical
+        arguments.
+        """
+        rng1 = DrawTestCase._rng()
+        rng2 = DrawTestCase._rng()
+
+        sample1 = draw(rng1, DrawTestCase._toy_pdf, size=(10, 100))
+        sample2 = draw(rng2, DrawTestCase._toy_pdf, size=(10, 100))
+
+        self.assertTrue((sample1 == sample2).all())
+
+    def test_draw_seed(self):
+        """
+        Check that different arrays are returned when different seeds are
+        given.
+        """
+        rng1 = DrawTestCase._rng(42)
+        rng2 = DrawTestCase._rng(43)
+
+        sample1 = draw(rng1, DrawTestCase._toy_pdf, size=(10, 100))
+        sample2 = draw(rng2, DrawTestCase._toy_pdf, size=(10, 100))
+
+        self.assertFalse((sample1 == sample2).all())
+
+    def test_draw_limits(self):
+        """
+        Check that the returned numbers are withing the limit.
+        """
+        sample = draw(self.rng, DrawTestCase._toy_pdf, size=1000*1000)
+
+        self.assertGreaterEqual(sample.min(), 0) 
+        self.assertLess(sample.min(), 0.01) 
+
+        self.assertGreater(sample.max(), 0.9999) 
+        self.assertLessEqual(sample.max(), 1) 
+
+    def test_draw_limits_2(self):
+        """
+        Check that the returned numbers are withing the limit.
+        """
+        sample = draw(self.rng, DrawTestCase._toy_pdf2, lower=1, upper=11, size=1000*1000)
+
+        self.assertGreaterEqual(sample.min(), 1) 
+        self.assertLess(sample.min(), 1.1) 
+
+        self.assertGreater(sample.max(), 10.999) 
+        self.assertLessEqual(sample.max(), 11) 
--- a/nnfwtbn/toydata.py
+++ b/nnfwtbn/toydata.py
+"""
+This module implements method to generate a deterministic, physics-inspired
+toy dataset. The dataset is intended for documentations and examples. The
+module does not rely on external random number generators (seeding numpy
+might break user code).
+"""
+
+import math
+
+import numpy as np
+from scipy.integrate import quad
+from scipy.interpolate import interp1d
+import pandas as pd
+
+from pylorentz import Momentum4
+
+def draw(rng, pdf, size=1, lower=0, upper=1, N=100):
+    """
+    Draws a size-shaped random sample from the given PDF. The PDF must be
+    normalized to unity withing the given limits. 
+    """
+
+    grid = np.linspace(lower, upper, N)
+    cdf = np.array([quad(pdf, lower, x)[0] for x in grid])
+
+    inv_cdf = interp1d(cdf, grid, bounds_error=False,
+                       fill_value=(0, 1))
+
+    ys = rng.uniform(size=size)
+    return inv_cdf(ys)
+
+rng = np.random.RandomState(2221006818)  # random seed
+
+def augment(point):
+    jet_1_pt, jet_1_eta, jet_1_phi, jet_2_pt, jet_2_eta, jet_2_phi, met_phi, \
+        met_pt, tau_phi, tau_eta, tau_pt, lep_phi, lep_eta, lep_pt = point
+
+    jet_1 = Momentum4.m_eta_phi_pt(0, jet_1_eta, jet_1_phi, jet_1_pt)
+    jet_2 = Momentum4.m_eta_phi_pt(0, jet_2_eta, jet_2_phi, jet_2_pt)
+
+    tau = Momentum4.m_eta_phi_pt(1.8, tau_eta, tau_phi, tau_pt)
+    lep = Momentum4.m_eta_phi_pt(0.1, lep_eta, lep_phi, lep_pt)
+    met = Momentum4.m_eta_phi_pt(0.0, 0, met_phi, met_pt)
+
+    lep_centrality = np.exp(- 4 / (jet_1_eta - jet_2_eta)**2 * (lep_eta * (jet_1_eta + jet_2_eta)/2)**2)
+    tau_centrality = np.exp(- 4 / (jet_1_eta - jet_2_eta)**2 * (tau_eta * (jet_1_eta + jet_2_eta)/2)**2)
+
+    higgs = tau + lep + met
+
+    return jet_1_pt, jet_1_eta, jet_1_phi, jet_2_pt, jet_2_eta, jet_2_phi, met_phi, \
+        met_pt, tau_phi, tau_eta, tau_pt, lep_phi, lep_eta, lep_pt, \
+        higgs.p_t, higgs.eta, higgs.phi, higgs.m, (jet_1 + jet_2).m, \
+        lep_centrality, tau_centrality
+
+def generate(total, vbfh_frac=0.2, shuffle=True):
+    n_vbfh = math.ceil(vbfh_frac * total)
+    n_ztt = total - n_vbfh
+
+    df_vbfh = mcmc(n_vbfh, vbfh_pdf)
+    df_vbfh['fpid'] = 1
+    df_vbfh['weight'] = 1
+
+    df_ztt = mcmc(n_ztt, ztt_pdf)
+    df_ztt['fpid'] = 0
+    df_ztt['weight'] = 1
+
+    df =  pd.concat([df_vbfh, df_ztt])
+    if shuffle:
+        df = df.sample(frac=1, random_state=rng)
+
+    df.reset_index(drop=True, inplace=True)
+    return df
+
+
+def vbfh_pdf(point):
+    """
+    Returns the relative probability density at the given point. The function
+    is not properly normalized. The outer dimension of the point contains the
+    following values:
+      - jet_1_pt
+      - jet_1_eta
+      - jet_1_phi
+      - jet_2_pt
+      - jet_2_eta
+      - jet_2_phi
+      - met_phi
+      - met_pt
+      - tau_phi
+      - tau_eta
+      - tau_pt
+      - lep_phi
+      - lep_eta
+      - lep_pt
+    """
+    overall = 1.
+
+    point = augment(point)
+    jet_1_pt, jet_1_eta, jet_1_phi, jet_2_pt, jet_2_eta, jet_2_phi, met_phi, \
+        met_pt, tau_phi, tau_eta, tau_pt, lep_phi, lep_eta, lep_pt, \
+        higgs_pt, higgs_eta, higgs_phi, higgs_m, m_jj, lep_centrality, \
+        tau_centrality = point
+
+    # jet system
+    overall *= (jet_1_pt > jet_2_pt)
+    overall *= abs(jet_1_eta - jet_2_eta)
+    overall *= abs(jet_1_eta) * np.exp(-abs(jet_1_eta)**2 / 9)
+    overall *= abs(jet_2_eta) * np.exp(-abs(jet_2_eta)**2 / 9)
+
+    overall *= (jet_1_pt > 25)
+    overall *= (jet_2_pt > 25)
+    overall *= 1 / (jet_1_pt)
+    overall *= 1 / (jet_2_pt)
+
+    # Higgs system
+    overall *= np.exp(-0.5 * (higgs_m - 125)**2 / 15**2)
+
+    overall *= 1 / (1 + np.exp((m_jj-1000) / 500))
+    overall *= 1 / (1 + np.exp((m_jj-1000) / 500))
+
+    overall *= (met_pt > 0)
+    overall *= (lep_pt > 10)
+    overall *= (tau_pt > 20)
+
+    overall *= np.exp(-lep_pt / 180)
+    overall *= np.exp(-tau_pt / 180)
+    overall *= np.exp(-met_pt / 50)
+
+    overall *= (lep_centrality >= 0)
+    overall *= (lep_centrality < 1)
+    overall *= lep_centrality / 2 + 0.5
+    overall *= (tau_centrality >= 0)
+    overall *= (tau_centrality < 1)
+    overall *= tau_centrality / 2 + 0.5
+
+    return overall
+
+
+def ztt_pdf(point):
+    overall = 1.
+
+    point = augment(point)
+    jet_1_pt, jet_1_eta, jet_1_phi, jet_2_pt, jet_2_eta, jet_2_phi, met_phi, \
+        met_pt, tau_phi, tau_eta, tau_pt, lep_phi, lep_eta, lep_pt, \
+        higgs_pt, higgs_eta, higgs_phi, higgs_m, m_jj, lep_centrality, \
+        tau_centrality = point
+
+    # jet system
+    overall *= (jet_1_pt > jet_2_pt)
+    overall *= np.exp(-abs(jet_1_eta)**2 / 9)
+    overall *= np.exp(-abs(jet_2_eta)**2 / 9)
+
+    overall *= (jet_1_pt > 25)
+    overall *= (jet_2_pt > 25)
+    overall *= np.exp(-jet_1_pt / 100)
+    overall *= np.exp(-jet_2_pt / 60)
+
+    # Z system
+    overall *= np.exp(-0.5 * (higgs_m - 90)**2 / 10**2)
+
+    overall *= (met_pt > 0)
+    overall *= (lep_pt > 10)
+    overall *= (tau_pt > 20)
+    
+    overall *= np.exp(-lep_pt / 150)
+    overall *= np.exp(-tau_pt / 150)
+    overall *= np.exp(-met_pt / 50)
+
+    overall *= (lep_centrality >= 0)
+    overall *= (lep_centrality < 1)
+    overall *= (1 - lep_centrality / 2)
+    overall *= (tau_centrality >= 0)
+    overall *= (tau_centrality < 1)
+    overall *= (1 - tau_centrality / 2)
+
+    overall *= np.exp(-m_jj / 200)
+
+    return overall
+
+def proposal(point):
+    jet_1_pt, jet_1_eta, jet_1_phi, jet_2_pt, jet_2_eta, jet_2_phi, met_phi, \
+        met_pt, tau_phi, tau_eta, tau_pt, lep_phi, lep_eta, lep_pt = point
+
+    jet_1_pt = rng.normal(jet_1_pt, 10)
+    jet_2_pt = rng.normal(jet_2_pt, 10)
+    tau_pt = rng.normal(tau_pt, 10)
+    lep_pt = rng.normal(lep_pt, 10)
+    met_pt = rng.normal(met_pt, 10)
+
+    flip = rng.choice([1, -1])
+    jet_1_eta = rng.normal(jet_1_eta, 0.2) * flip
+    jet_2_eta = rng.normal(jet_2_eta, 0.2) * flip
+    tau_eta = rng.normal(tau_eta, 0.2) * flip
+    lep_eta = rng.normal(lep_eta, 0.2) * flip
+
+
+    rot = rng.uniform(0, 2* np.pi)
+    jet_1_phi = rng.normal(jet_1_phi + rot, 0.1) % (2 * np.pi)
+    jet_2_phi = rng.normal(jet_2_phi + rot, 0.1) % (2 * np.pi)
+    tau_phi = rng.normal(tau_phi + rot, 0.1) % (2 * np.pi)
+    lep_phi = rng.normal(lep_phi + rot, 0.1) % (2 * np.pi)
+    met_phi = rng.normal(met_phi + rot, 0.1) % (2 * np.pi)
+
+    return jet_1_pt, jet_1_eta, jet_1_phi, jet_2_pt, jet_2_eta, jet_2_phi, met_phi, \
+        met_pt, tau_phi, tau_eta, tau_pt, lep_phi, lep_eta, lep_pt
+
+def mcmc_step(x, pdf):
+    """
+    Perform a single step of Markov chain Monte Carlo.
+    """
+    while True:
+        new_x = proposal(x)
+        alpha = pdf(new_x) / pdf(x)
+        u = rng.random()
+
+        if u <= alpha:
+            return new_x
+
+
+def mcmc(length, pdf):
+    """
+    Generate length many samples.
+    """
+    point = 100, 1, 1, 50, 2, 1, 1, 1, 1, 1, 40, 1, 1, 30
+
+    for i in range(1000):
+        point = mcmc_step(point, pdf)
+
+    points = []
+    for i in range(length):
+        point = mcmc_step(point, pdf)
+        points.append(augment(point))
+
+    return pd.DataFrame(points,
+        columns=["jet_1_pt", "jet_1_eta", "jet_1_phi", "jet_2_pt", "jet_2_eta", "jet_2_phi",
+                 "met_phi", "met_pt", "tau_phi", "tau_eta", "tau_pt", "lep_phi", "lep_eta",
+                  "lep_pt", "higgs_pt", "higgs_eta", "higgs_phi", "higgs_m",
+                  "m_jj", "lep_centrality", "tau_centrality"])
+
+def get():
+    try:
+        df = pd.read_hdf(".toydata.h5", "main") 
+    except FileNotFoundError:
+        print("Cannot find cached data. Recreating toy data. This might take "
+              "some time...")
+        df = generate(10000)
+        df.to_hdf(".toydata.h5", "main")
+
+    return df
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,3 +5,4 @@ matplotlib
 seaborn
 tables
 pandas
+pylorentz
--- a/setup.py
+++ b/setup.py
@@ -18,7 +18,8 @@ setup(name='nnfwtbn',
                        "matplotlib",
                        "seaborn",
                        "tables",
-                        "pandas"],
+                        "pandas",
+                        "pylorentz"],
      test_suite='nnfwtbn.tests',
      description='Experimental neural network framework to be named.',
      url="https://gitlab.cern.ch/fsauerbu/nnfwtbn",
No results found